{"?xml":{"@version":"1.0"},"edm:RDF":{"@xmlns:dc":"http://purl.org/dc/elements/1.1/","@xmlns:edm":"http://www.europeana.eu/schemas/edm/","@xmlns:wgs84_pos":"http://www.w3.org/2003/01/geo/wgs84_pos","@xmlns:foaf":"http://xmlns.com/foaf/0.1/","@xmlns:rdaGr2":"http://rdvocab.info/ElementsGr2","@xmlns:oai":"http://www.openarchives.org/OAI/2.0/","@xmlns:owl":"http://www.w3.org/2002/07/owl#","@xmlns:rdf":"http://www.w3.org/1999/02/22-rdf-syntax-ns#","@xmlns:ore":"http://www.openarchives.org/ore/terms/","@xmlns:skos":"http://www.w3.org/2004/02/skos/core#","@xmlns:dcterms":"http://purl.org/dc/terms/","edm:WebResource":[{"@rdf:about":"http://www.dlib.si/stream/URN:NBN:SI:doc-S1M4KA4D/15718087-6215-412b-80d2-8d6ac3b90be5/PDF","dcterms:extent":"690 KB"},{"@rdf:about":"http://www.dlib.si/stream/URN:NBN:SI:doc-S1M4KA4D/2e6c2d0b-bf99-44be-962b-470cf9ad9e21/TEXT","dcterms:extent":"0 KB"}],"edm:TimeSpan":{"@rdf:about":"2002-2024","edm:begin":{"@xml:lang":"en","#text":"2002"},"edm:end":{"@xml:lang":"en","#text":"2024"}},"edm:ProvidedCHO":{"@rdf:about":"URN:NBN:SI:doc-S1M4KA4D","dcterms:isPartOf":[{"@rdf:resource":"https://www.dlib.si/details/URN:NBN:SI:spr-1R5DVDOA"},{"@xml:lang":"sl","#text":"Informatica Medica Slovenica"}],"dcterms:issued":"2022","dc:creator":["Blagus, Rok","Tupkušić, Mirza"],"dc:format":[{"@xml:lang":"sl","#text":"številka:1/2"},{"@xml:lang":"sl","#text":"letnik:27"},{"@xml:lang":"sl","#text":"str. 1-13"}],"dc:identifier":["ISSN:1318-2129","COBISSID_HOST:152443651","URN:URN:NBN:SI:doc-S1M4KA4D"],"dc:language":"sl","dc:publisher":{"@xml:lang":"sl","#text":"Slovensko društvo za medicinsko informatiko"},"dc:subject":[{"@xml:lang":"en","#text":"cross-validation"},{"@xml:lang":"sl","#text":"napovedni model"},{"@xml:lang":"sl","#text":"navzkrižno preverjanje"},{"@xml:lang":"sl","#text":"neuravnoteženi podatki"},{"@xml:lang":"en","#text":"prediction models"},{"@xml:lang":"en","#text":"rare events"}],"dcterms:temporal":{"@rdf:resource":"2002-2024"},"dc:title":{"@xml:lang":"sl","#text":"Preoptimistične ocene točnosti napovednih modelov| an illustration based on the joint use of sampling techniques and cross-validation| ilustracija na primeru skupne uporabe tehnik vzorčenja in navzkrižnega preverjanja| Over-optimistic assessment of the performance of prediction models|"},"dc:description":{"@xml:lang":"sl","#text":"Prediction models use various statistical methods for building classification rules to classify units into pre-specified groups based on the learning data. In practice, the data are often not suitable for the chosen procedure and they need to be pre-processed before training the classifier. An important example are imbalanced data where the naive approach can lead to poor accuracy for the minority class. Many data augmentation approaches have been developed to alleviate this issue. However, when using these techniques, one needs to be careful to correctly evaluate the performance of the classifier in terms of its predictive accuracy, because incorrect evaluation can lead to an overly optimistic estimate of the classifier’s performance. We explain in detail why this happens and showcase the different contributing factors. The results are illustrated using various performance measures, various data augmentation techniques, and various cross-validation techniques. Our results can help the developers of prediction models to correctly evaluate predictive ability of the derived model, as well as to understand and critically appraise whether the predictive ability of the model was correctly estimated or the evaluation was too optimistic"},"edm:type":"TEXT","dc:type":[{"@xml:lang":"sl","#text":"znanstveno časopisje"},{"@xml:lang":"en","#text":"journals"},{"@rdf:resource":"http://www.wikidata.org/entity/Q361785"}]},"ore:Aggregation":{"@rdf:about":"http://www.dlib.si/?URN=URN:NBN:SI:doc-S1M4KA4D","edm:aggregatedCHO":{"@rdf:resource":"URN:NBN:SI:doc-S1M4KA4D"},"edm:isShownBy":{"@rdf:resource":"http://www.dlib.si/stream/URN:NBN:SI:doc-S1M4KA4D/15718087-6215-412b-80d2-8d6ac3b90be5/PDF"},"edm:rights":{"@rdf:resource":"http://rightsstatements.org/vocab/InC/1.0/"},"edm:provider":"Slovenian National E-content Aggregator","edm:intermediateProvider":{"@xml:lang":"en","#text":"National and University Library of Slovenia"},"edm:dataProvider":{"@xml:lang":"sl","#text":"Slovensko društvo za medicinsko informatiko"},"edm:object":{"@rdf:resource":"http://www.dlib.si/streamdb/URN:NBN:SI:doc-S1M4KA4D/maxi/edm"},"edm:isShownAt":{"@rdf:resource":"http://www.dlib.si/details/URN:NBN:SI:doc-S1M4KA4D"}}}}