<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="static/style.xsl"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-04-13T07:01:34Z</responseDate><request verb="GetRecord" identifier="oai:www.recercat.cat:2117/445762" metadataPrefix="mets">https://recercat.cat/oai/request</request><GetRecord><record><header><identifier>oai:recercat.cat:2117/445762</identifier><datestamp>2025-11-08T07:19:10Z</datestamp><setSpec>com_2072_1033</setSpec><setSpec>col_2072_452951</setSpec></header><metadata><mets xmlns="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:doc="http://www.lyncode.com/xoai" ID="&#xa;&#x9;&#x9;&#x9;&#x9;DSpace_ITEM_2117-445762" TYPE="DSpace ITEM" PROFILE="DSpace METS SIP Profile 1.0" xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd" OBJID="&#xa;&#x9;&#x9;&#x9;&#x9;hdl:2117/445762">
   <metsHdr CREATEDATE="2026-04-13T09:01:34Z">
      <agent ROLE="CUSTODIAN" TYPE="ORGANIZATION">
         <name>RECERCAT</name>
      </agent>
   </metsHdr>
   <dmdSec ID="DMD_2117_445762">
      <mdWrap MDTYPE="MODS">
         <xmlData xmlns:mods="http://www.loc.gov/mods/v3" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-1.xsd">
            <mods:mods xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-1.xsd">
               <mods:name>
                  <mods:role>
                     <mods:roleTerm type="text">author</mods:roleTerm>
                  </mods:role>
                  <mods:namePart>Espasa Rosell, Jordi</mods:namePart>
               </mods:name>
               <mods:extension>
                  <mods:dateAccessioned encoding="iso8601">2025-11-08T07:19:10Z</mods:dateAccessioned>
               </mods:extension>
               <mods:extension>
                  <mods:dateAvailable encoding="iso8601">2025-11-08T07:19:10Z</mods:dateAvailable>
               </mods:extension>
               <mods:originInfo>
                  <mods:dateIssued encoding="iso8601">2025-10-20</mods:dateIssued>
               </mods:originInfo>
               <mods:identifier type="none"/>
               <mods:identifier type="uri">http://hdl.handle.net/2117/445762</mods:identifier>
               <mods:abstract>This Master's Thesis optimizes large language models (LLMs) for multiple-choice question answering (MCQA) to evaluate employee performance from spoken transcripts in personalized training platforms. Current LLMs achieve only 63% accuracy in dynamic assessments due to biases, reasoning failures, and inefficiencies. We develop a systematic framework balancing precision, cost, and execution time through iterative evaluation refinement, corpus preparation, baseline selection, and phased experiments, including single-factor screening (OFAT), multi-factor interactions, and parameter-efficient fine-tuning (PEFT). Key factors assessed include model scale, in-context learning, chain-of-thought (CoT), chain-of-density (CoD), self-correction, and agentic ensembles. Contributions encompass a replicable optimization pipeline and strategies to mitigate biases like positional and literal interpretation errors. Results show improvements from 63% to 80% accuracy and enhanced F1-scores, enabling ethical, scalable AI-driven assessments for enterprise individualized learning.</mods:abstract>
               <mods:language>
                  <mods:languageTerm authority="rfc3066"/>
               </mods:language>
               <mods:accessCondition type="useAndReproduction">Open Access</mods:accessCondition>
               <mods:subject>
                  <mods:topic>Àrees temàtiques de la UPC::Informàtica::Intel·ligència artificial::Aprenentatge automàtic</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Deep learning (Machine learning)</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Questions and answers</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Models de llenguatge de gran escala</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Resposta a preguntes d'opció múltiple</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Avaluació del rendiment d'empleats</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Transcripcions orals</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Plataformes de formació personalitzada</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Optimització de models</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Precisió en avaluacions dinàmiques</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Biaixos en models d'IA</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Fallades de raonament</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Marc sistemàtic</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Refinament iteratiu d'avaluació</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Preparació de corpus</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Selecció de línia base</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Experiments per fases</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Cribratge d'un sol factor</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Large language models</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Multiple-choice question answering</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Employee performance evaluation</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Spoken transcripts</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Mersonalized training platforms</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Model optimization</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Accuracy in dynamic assessments</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>AI model biases</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Reasoning failures</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Systematic framework</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Iterative evaluation refinement</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Corpus preparation</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Baseline selection</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Phased experiments</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>One-factor-at-a-time screening</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Multi-factor interactions</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Parameter-efficient fine-tuning</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Model scale</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>In-context learning</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Chain-of-thought</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Aprenentatge profund (Aprenentatge automàtic)</mods:topic>
               </mods:subject>
               <mods:subject>
                  <mods:topic>Preguntes i respostes</mods:topic>
               </mods:subject>
               <mods:titleInfo>
                  <mods:title>Natural language models for learning assessment from unstructured data</mods:title>
               </mods:titleInfo>
               <mods:genre>Master thesis</mods:genre>
            </mods:mods>
         </xmlData>
      </mdWrap>
   </dmdSec>
   <structMap LABEL="DSpace Object" TYPE="LOGICAL">
      <div TYPE="DSpace Object Contents" ADMID="DMD_2117_445762"/>
   </structMap>
</mets></metadata></record></GetRecord></OAI-PMH>