<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="static/style.xsl"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-04-17T03:25:32Z</responseDate><request verb="GetRecord" identifier="oai:www.recercat.cat:2117/180457" metadataPrefix="marc">https://recercat.cat/oai/request</request><GetRecord><record><header><identifier>oai:recercat.cat:2117/180457</identifier><datestamp>2026-02-07T09:30:50Z</datestamp><setSpec>com_2072_1033</setSpec><setSpec>col_2072_452950</setSpec></header><metadata><record xmlns="http://www.loc.gov/MARC21/slim" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:doc="http://www.lyncode.com/xoai" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
   <leader>00925njm 22002777a 4500</leader>
   <datafield ind2=" " ind1=" " tag="042">
      <subfield code="a">dc</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="720">
      <subfield code="a">Pascual de la Puente, Santiago</subfield>
      <subfield code="e">author</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="720">
      <subfield code="a">Serra, Joan</subfield>
      <subfield code="e">author</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="720">
      <subfield code="a">Bonafonte Cávez, Antonio</subfield>
      <subfield code="e">author</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="260">
      <subfield code="c">2019-11-01</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="520">
      <subfield code="a">Speech enhancement improves recorded voice utterances to eliminate noise that might be impeding their intelligibility or compromising their quality. Typical speech enhancement systems are based on regression approaches that subtract noise or predict clean signals. Most of them do not operate directly on waveforms. In this work, we propose a generative approach to regenerate corrupted signals into a clean version by using generative adversarial networks on the raw signal. We also explore several variations of the proposed system, obtaining insights into proper architectural choices for an adversarially trained, convolutional autoencoder applied to speech. We conduct both objective and subjective evaluations to assess the performance of the proposed method. The former helps us choose among variations and better tune hyperparameters, while the latter is used in a listening experiment with 42 subjects, confirming the effectiveness of the approach in the real world. We also demonstrate the applicability of the approach for more generalized speech enhancement, where we have to regenerate voices from whispered signals.</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="520">
      <subfield code="a">Peer Reviewed</subfield>
   </datafield>
   <datafield ind2=" " ind1=" " tag="520">
      <subfield code="a">Postprint (author's final draft)</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Àrees temàtiques de la UPC::Enginyeria de la telecomunicació</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Speech processing systems</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Neural networks (Computer science)</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Speech enhancement</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Audio transformation</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Generative adversarial network</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Neural networks</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Processament de la parla</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Reconeixement automàtic de la parla</subfield>
   </datafield>
   <datafield tag="653" ind2=" " ind1=" ">
      <subfield code="a">Xarxes neuronals (Informàtica)</subfield>
   </datafield>
   <datafield ind2="0" ind1="0" tag="245">
      <subfield code="a">Time-domain speech enhancement using generative adversarial networks</subfield>
   </datafield>
</record></metadata></record></GetRecord></OAI-PMH>