<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="static/style.xsl"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-04-18T00:40:26Z</responseDate><request verb="GetRecord" identifier="oai:www.recercat.cat:2117/428142" metadataPrefix="didl">https://recercat.cat/oai/request</request><GetRecord><record><header><identifier>oai:recercat.cat:2117/428142</identifier><datestamp>2025-07-16T22:28:11Z</datestamp><setSpec>com_2072_1033</setSpec><setSpec>col_2072_452949</setSpec></header><metadata><d:DIDL xmlns:d="urn:mpeg:mpeg21:2002:02-DIDL-NS" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:doc="http://www.lyncode.com/xoai" xsi:schemaLocation="urn:mpeg:mpeg21:2002:02-DIDL-NS http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-21_schema_files/did/didl.xsd">
   <d:Item id="hdl_2117_428142">
      <d:Descriptor>
         <d:Statement mimeType="application/xml; charset=utf-8">
            <dii:Identifier xmlns:dii="urn:mpeg:mpeg21:2002:01-DII-NS" xsi:schemaLocation="urn:mpeg:mpeg21:2002:01-DII-NS http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-21_schema_files/dii/dii.xsd">urn:hdl:2117/428142</dii:Identifier>
         </d:Statement>
      </d:Descriptor>
      <d:Descriptor>
         <d:Statement mimeType="application/xml; charset=utf-8">
            <oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
               <dc:title>Parallelizing recurrent neural networks and variants using OmpSs</dc:title>
               <dc:creator>Sharma, Robin Kumar</dc:creator>
               <dc:creator>Casas, Marc</dc:creator>
               <dc:subject>Àrees temàtiques de la UPC::Informàtica::Arquitectura de computadors</dc:subject>
               <dc:subject>High performance computing</dc:subject>
               <dc:subject>Deep neural network (DNN)</dc:subject>
               <dc:subject>wavefront parallelization</dc:subject>
               <dc:subject>task parallelism</dc:subject>
               <dc:subject>recurrent neural networks (RNNs)</dc:subject>
               <dc:subject>bidirectional recurrent neural networks (BRNNs)</dc:subject>
               <dc:subject>long-short term memory (LSTM)</dc:subject>
               <dc:subject>gated recurrent units (GRU)</dc:subject>
               <dc:subject>Càlcul intensiu (Informàtica)</dc:subject>
               <dc:description>Recurrent neural networks (RNNs) are widely used for&#xd;
natural language processing, time-series prediction, or text&#xd;
analysis tasks [1]. RNNs models have been widely used&#xd;
in combination with convolutional neural networks (CNNs).&#xd;
RNNs contain memory units that display dynamic and temporal&#xd;
connections between past and future data. The outstanding&#xd;
text and signal analysis properties of RNNs and other recurrent&#xd;
models like Long-Short Term Memories (LSTMs) [2] and&#xd;
Gated Recurrent Units (GRUs) [3] make them the prevalent&#xd;
choice to analyze sequential and unsegmented data like text or&#xd;
speech signals.&#xd;
RNNs have two widely used variants; one is uni-directional&#xd;
RNNs [1], which only preserves the information of the past&#xd;
because the only inputs it has seen are from the past, and the&#xd;
second is bi-directional RNNs (BRNNs) [4] which preserves&#xd;
both past and future information. The internal structure of&#xd;
RNNs and its variants inference and training in terms of data or&#xd;
control dependencies across their fundamental numerical kernels&#xd;
complicate the exploitation of model parallelism, which&#xd;
is why just data-parallelism has been traditionally applied to&#xd;
accelerate RNNs [1]. Model parallelism has not been fully&#xd;
exploited to accelerate the forward and backward propagation&#xd;
of RNNs on multi-core CPUs.&#xd;
We present two model parallelism-based approaches: WPar&#xd;
(Wavefront-Parallelization), a comprehensive approach for&#xd;
uni-directional RNNs, and B-Par (Bidirectional-Parallelization)&#xd;
for bi-directional RNNs inference and training on CPUs that&#xd;
relies on applying model parallelism into RNNs models. We&#xd;
use fine-grained pipeline parallelism in terms of tasks to&#xd;
accelerate multi-layer RNNs running on multi-core CPUs.</dc:description>
               <dc:date>2023-05</dc:date>
               <dc:type>Conference report</dc:type>
               <dc:rights>http://creativecommons.org/licenses/by-nc-nd/4.0/</dc:rights>
               <dc:rights>Open Access</dc:rights>
               <dc:rights>Attribution-NonCommercial-NoDerivatives 4.0 International</dc:rights>
               <dc:publisher>Barcelona Supercomputing Center</dc:publisher>
            </oai_dc:dc>
         </d:Statement>
      </d:Descriptor>
   </d:Item>
</d:DIDL></metadata></record></GetRecord></OAI-PMH>