<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="static/style.xsl"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-04-13T02:21:57Z</responseDate><request verb="GetRecord" identifier="oai:www.recercat.cat:2072/473145" metadataPrefix="marc">https://recercat.cat/oai/request</request><GetRecord><record><header><identifier>oai:recercat.cat:2072/473145</identifier><datestamp>2024-11-04T07:10:55Z</datestamp><setSpec>com_2072_98</setSpec><setSpec>col_2072_378192</setSpec></header><metadata><record xmlns="http://www.loc.gov/MARC21/slim" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:doc="http://www.lyncode.com/xoai" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
   <leader>00925njm 22002777a 4500</leader>
   <datafield ind2=" " ind1=" " tag="042">
      <subfield code="a">dc</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="720">
      <subfield code="a">Acevedo Giménez, César Esteban</subfield>
      <subfield code="e">author</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="720">
      <subfield code="a">Hernández Budé, Porfidio</subfield>
      <subfield code="e">author</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="720">
      <subfield code="a">Espinosa, Antonio</subfield>
      <subfield code="e">author</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="720">
      <subfield code="a">Méndez Muñoz, Víctor</subfield>
      <subfield code="e">author</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="260">
      <subfield code="c">2017</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="520">
      <subfield code="a">A representative set of workflows found in bioinformatics pipelines must deal with large data sets. Most scientific workflows are defined as Direct Acyclic Graphs (DAGs). Despite DAGs are useful to understand dependence relationships, they do not provide any information about input, output and temporal data files. This information about the location of files of data intensive applications helps to avoid performance issues. This paper presents a multiworkflow store-aware scheduler in a cluster environment called Critical Path File Location (CPFL) policy where the access time to disk is more relevant than network, as an extension of the classical list scheduling policies. Our purpose is to find the best location of data files in a hierarchical storage system. The resulting algorithm is tested in an HPC cluster and in a simulated cluster scenario with bioinformatics synthetic workflows, and largely used benchmarks like Montage and Epigenomics. The resulting simulator is tuned and validated with the first test results from the real infrastructure. The evaluation of our proposal shows promising results up to 70% on benchmarks in real HPC clusters using 128 cores and up to 69% of makespan improvement on simulated 512 cores clusters with a deviation between 0.9% and 3% regarding the real HPC cluster.</subfield>
   </datafield>
   <datafield ind1="8" ind2=" " tag="024">
      <subfield code="a">http://hdl.handle.net/2072/473145</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Multiworkflows</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Cluster</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Scheduler</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Simulation</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Critical path</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Data processing</subfield>
   </datafield>
   <datafield ind2="0" ind1="0" tag="245">
      <subfield code="a">A Critical Path File Location (CPFL) algorithm for data-aware multiworkflow scheduling on HPC clusters</subfield>
   </datafield>
</record></metadata></record></GetRecord></OAI-PMH>