<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="static/style.xsl"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-04-03T20:16:51Z</responseDate><request verb="GetRecord" identifier="oai:www.recercat.cat:10256/2178" metadataPrefix="marc">https://recercat.cat/oai/request</request><GetRecord><record><header><identifier>oai:recercat.cat:10256/2178</identifier><datestamp>2024-05-22T09:46:14Z</datestamp><setSpec>com_2072_452955</setSpec><setSpec>com_2072_2054</setSpec><setSpec>col_2072_452957</setSpec></header><metadata><record xmlns="http://www.loc.gov/MARC21/slim" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:doc="http://www.lyncode.com/xoai" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
   <leader>00925njm 22002777a 4500</leader>
   <datafield ind2=" " ind1=" " tag="042">
      <subfield code="a">dc</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="720">
      <subfield code="a">El-Fakdi Sencianes, Andrés</subfield>
      <subfield code="e">author</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="720">
      <subfield code="a">Carreras Pérez, Marc</subfield>
      <subfield code="e">author</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="260">
      <subfield code="c">2008</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="520">
      <subfield code="a">This paper proposes a field application of a high-level reinforcement learning (RL) control system for solving the action selection problem of an autonomous robot in cable tracking task. The learning system is characterized by using a direct policy search method for learning the internal state/action mapping. Policy only algorithms may suffer from long convergence times when dealing with real robotics. In order to speed up the process, the learning phase has been carried out in a simulated environment and, in a second step, the policy has been transferred and tested successfully on a real robot. Future steps plan to continue the learning process on-line while on the real robot while performing the mentioned task. We demonstrate its feasibility with real experiments on the underwater robot ICTINEU AUV</subfield>
   </datafield>
   <datafield ind1="8" ind2=" " tag="024">
      <subfield code="a">http://hdl.handle.net/10256/2178</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Aprenentatge per reforç</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Robots autònoms</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Autonomous robots</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Reinforcement learning</subfield>
   </datafield>
   <datafield ind2="0" ind1="0" tag="245">
      <subfield code="a">Policy gradient based Reinforcement Learning for real autonomous underwater cable tracking</subfield>
   </datafield>
</record></metadata></record></GetRecord></OAI-PMH>