<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="static/style.xsl"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-04-14T02:42:45Z</responseDate><request verb="GetRecord" identifier="oai:www.recercat.cat:10230/69309" metadataPrefix="didl">https://recercat.cat/oai/request</request><GetRecord><record><header><identifier>oai:recercat.cat:10230/69309</identifier><datestamp>2025-12-13T21:23:35Z</datestamp><setSpec>com_2072_6</setSpec><setSpec>col_2072_452952</setSpec></header><metadata><d:DIDL xmlns:d="urn:mpeg:mpeg21:2002:02-DIDL-NS" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:doc="http://www.lyncode.com/xoai" xsi:schemaLocation="urn:mpeg:mpeg21:2002:02-DIDL-NS http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-21_schema_files/did/didl.xsd">
   <d:Item id="hdl_10230_69309">
      <d:Descriptor>
         <d:Statement mimeType="application/xml; charset=utf-8">
            <dii:Identifier xmlns:dii="urn:mpeg:mpeg21:2002:01-DII-NS" xsi:schemaLocation="urn:mpeg:mpeg21:2002:01-DII-NS http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-21_schema_files/dii/dii.xsd">urn:hdl:10230/69309</dii:Identifier>
         </d:Statement>
      </d:Descriptor>
      <d:Descriptor>
         <d:Statement mimeType="application/xml; charset=utf-8">
            <oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
               <dc:title>Hierarchies of reward machines</dc:title>
               <dc:creator>Furelos Blanco, Daniel</dc:creator>
               <dc:creator>Law, Mark</dc:creator>
               <dc:creator>Jonsson, Anders</dc:creator>
               <dc:creator>Broda, Krysia</dc:creator>
               <dc:creator>Russo, Alessandra</dc:creator>
               <dc:subject>Reward machines</dc:subject>
               <dc:subject>Hierarchies</dc:subject>
               <dc:description>Reward machines (RMs) are a recent formalism for representing the reward function of a reinforcement learning task through a finite-state machine whose edges encode subgoals of the task using high-level events. The structure of RMs enables the decomposition of a task into simpler and independently solvable subtasks that help tackle longhorizon and/or sparse reward tasks. We propose a formalism for further abstracting the subtask structure by endowing an RM with the ability to call other RMs, thus composing a hierarchy of RMs (HRM). We exploit HRMs by treating each call to an RM as an independently solvable subtask using the options framework, and describe a curriculum-based method to learn HRMs from traces observed by the agent. Our experiments reveal that exploiting a handcrafted HRM leads to faster convergence than with a flat HRM, and that learning an HRM is feasible in cases where its equivalent flat representation is not.</dc:description>
               <dc:description>Anders Jonsson is partially funded by TAILOR, AGAUR SGR and Spanish grant PID2019-108141GB-I00</dc:description>
               <dc:date>2025-01-27T13:54:20Z</dc:date>
               <dc:date>2025-01-27T13:54:20Z</dc:date>
               <dc:date>2023</dc:date>
               <dc:type>info:eu-repo/semantics/conferenceObject</dc:type>
               <dc:type>info:eu-repo/semantics/publishedVersion</dc:type>
               <dc:relation>info:eu-repo/grantAgreement/ES/2PE/PID2019-108141GB-I00</dc:relation>
               <dc:rights>Copyright 2023 by the author(s).</dc:rights>
               <dc:rights>info:eu-repo/semantics/openAccess</dc:rights>
               <dc:publisher>PMLR</dc:publisher>
            </oai_dc:dc>
         </d:Statement>
      </d:Descriptor>
   </d:Item>
</d:DIDL></metadata></record></GetRecord></OAI-PMH>