<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="static/style.xsl"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-04-17T07:46:15Z</responseDate><request verb="GetRecord" identifier="oai:www.recercat.cat:10230/28023" metadataPrefix="mets">https://recercat.cat/oai/request</request><GetRecord><record><header><identifier>oai:recercat.cat:10230/28023</identifier><datestamp>2025-12-26T11:29:48Z</datestamp><setSpec>com_2072_6</setSpec><setSpec>col_2072_452952</setSpec></header><metadata><mets xmlns="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:doc="http://www.lyncode.com/xoai" ID="&#xa;&#x9;&#x9;&#x9;&#x9;DSpace_ITEM_10230-28023" TYPE="DSpace ITEM" PROFILE="DSpace METS SIP Profile 1.0" xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd" OBJID="&#xa;&#x9;&#x9;&#x9;&#x9;hdl:10230/28023">
   <metsHdr CREATEDATE="2026-04-17T09:46:15Z">
      <agent ROLE="CUSTODIAN" TYPE="ORGANIZATION">
         <name>RECERCAT</name>
      </agent>
   </metsHdr>
   <dmdSec ID="DMD_10230_28023">
      <mdWrap MDTYPE="MODS">
         <xmlData xmlns:mods="http://www.loc.gov/mods/v3" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-1.xsd">
            <mods:mods xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-1.xsd">
               <mods:name>
                  <mods:role>
                     <mods:roleTerm type="text">author</mods:roleTerm>
                  </mods:role>
                  <mods:namePart>Gialampoukidis, Ilias</mods:namePart>
               </mods:name>
               <mods:name>
                  <mods:role>
                     <mods:roleTerm type="text">author</mods:roleTerm>
                  </mods:role>
                  <mods:namePart>Vrochidis, Stefanos</mods:namePart>
               </mods:name>
               <mods:name>
                  <mods:role>
                     <mods:roleTerm type="text">author</mods:roleTerm>
                  </mods:role>
                  <mods:namePart>Kompatsiaris, Ioannis</mods:namePart>
               </mods:name>
               <mods:name>
                  <mods:role>
                     <mods:roleTerm type="text">author</mods:roleTerm>
                  </mods:role>
                  <mods:namePart>Wanner, Leo</mods:namePart>
               </mods:name>
               <mods:originInfo>
                  <mods:dateIssued encoding="iso8601">2017-01-31T17:22:58Z2016</mods:dateIssued>
               </mods:originInfo>
               <mods:identifier type="none"/>
               <mods:abstract>Nowadays there is an important need by journalists and media monitoring companies to cluster news in large amounts of web articles, in order to ensure fast access to their topics or events of interest. Our aim in this work is to identify groups of news articles that share a common topic or event, without a priori knowledge of the number of clusters. The estimation of the correct number of topics is a challenging issue, due to the existence of “noise”, i.e. news articles which are irrelevant to all other topics. In this context, we introduce a novel density-based news clustering framework, in which the assignment of news articles to topics is done by the well-established Latent Dirichlet Allocation, but the estimation of the number of clusters is performed by our novel method, called “DBSCAN-Martingale”, which allows for extracting noise from the dataset and progressively extracts clusters from an OPTICS reachability plot. We evaluate our framework and the DBSCAN-Martingale on the 20newsgroups-mini dataset and on 220 web news articles, which are references to specific Wikipedia pages. Among twenty methods for news clustering, without knowing the number of clusters k, the framework of DBSCAN-Martingale provides the correct number of clusters and the highest Normalized Mutual Information.This  work  was  supported  by  the  projects  MULTISENSOR (FP7-610411) and KRISTINA (H2020-645012), funded by the European Commission.</mods:abstract>
               <mods:language>
                  <mods:languageTerm authority="rfc3066"/>
               </mods:language>
               <mods:accessCondition type="useAndReproduction">© Springer The final publication is available at Springer via/nhttp://dx.doi.org/10.1007/978-3-319-41920-6_13. info:eu-repo/semantics/openAccess</mods:accessCondition>
               <mods:subject>
                  <mods:topic>Clustering news articles</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Latent Dirichlet Allocation</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>DBSCAN-Martingale</mods:topic>
               </mods:subject>
               <mods:titleInfo>
                  <mods:title>A hybrid framework for news clustering based on the DBSCAN-Martingale and LDA</mods:title>
               </mods:titleInfo>
               <mods:genre>info:eu-repo/semantics/conferenceObject info:eu-repo/semantics/acceptedVersion</mods:genre>
            </mods:mods>
         </xmlData>
      </mdWrap>
   </dmdSec>
   <structMap LABEL="DSpace Object" TYPE="LOGICAL">
      <div TYPE="DSpace Object Contents" ADMID="DMD_10230_28023"/>
   </structMap>
</mets></metadata></record></GetRecord></OAI-PMH>