{"?xml":{"@version":"1.0"},"edm:RDF":{"@xmlns:dc":"http://purl.org/dc/elements/1.1/","@xmlns:edm":"http://www.europeana.eu/schemas/edm/","@xmlns:wgs84_pos":"http://www.w3.org/2003/01/geo/wgs84_pos","@xmlns:foaf":"http://xmlns.com/foaf/0.1/","@xmlns:rdaGr2":"http://rdvocab.info/ElementsGr2","@xmlns:oai":"http://www.openarchives.org/OAI/2.0/","@xmlns:owl":"http://www.w3.org/2002/07/owl#","@xmlns:rdf":"http://www.w3.org/1999/02/22-rdf-syntax-ns#","@xmlns:ore":"http://www.openarchives.org/ore/terms/","@xmlns:skos":"http://www.w3.org/2004/02/skos/core#","@xmlns:dcterms":"http://purl.org/dc/terms/","edm:WebResource":[{"@rdf:about":"http://www.dlib.si/stream/URN:NBN:SI:doc-TO87M5LI/c8473552-020f-4d52-9dec-50c0daa544f9/PDF","dcterms:extent":"564 KB"},{"@rdf:about":"http://www.dlib.si/stream/URN:NBN:SI:doc-TO87M5LI/3ecfb94b-ee1e-4789-a9f3-cc5cdea95217/TEXT","dcterms:extent":"54 KB"}],"edm:TimeSpan":{"@rdf:about":"2013-2025","edm:begin":{"@xml:lang":"en","#text":"2013"},"edm:end":{"@xml:lang":"en","#text":"2025"}},"edm:ProvidedCHO":{"@rdf:about":"URN:NBN:SI:doc-TO87M5LI","dcterms:isPartOf":[{"@rdf:resource":"https://www.dlib.si/details/URN:NBN:SI:spr-BR18JCH2"},{"@xml:lang":"sl","#text":"Slovenščina 2.0"}],"dcterms:issued":"2018","dc:creator":["Bajec, Marko","Žitnik, Slavko"],"dc:format":[{"@xml:lang":"sl","#text":"številka:1"},{"@xml:lang":"sl","#text":"letnik:6"},{"@xml:lang":"sl","#text":"str. 37-66"}],"dc:identifier":["ISSN:2335-2736","COBISSID_HOST:67079778","URN:URN:NBN:SI:doc-TO87M5LI"],"dc:language":"sl","dc:publisher":{"@xml:lang":"sl","#text":"Trojina, zavod za uporabno slovenistiko"},"dc:subject":[{"@xml:lang":"sl","#text":"algoritem SkipCor"},{"@xml:lang":"sl","#text":"coref149"},{"@xml:lang":"en","#text":"coreference resolution"},{"@xml:lang":"sl","#text":"odkrivanje koreferenčnosti"},{"@xml:lang":"en","#text":"SkipCor algorithm"},{"@xml:lang":"en","#text":"Slovene"},{"@xml:lang":"sl","#text":"slovenščina"},{"@xml:lang":"sl","#text":"ssj500k"}],"dcterms:temporal":{"@rdf:resource":"2013-2025"},"dc:title":{"@xml:lang":"sl","#text":"Odkrivanje koreferenčnosti v slovenskem jeziku na označenih besedilih iz coref149|"},"dc:description":[{"@xml:lang":"sl","#text":"Coreference resolution is one of the three main tasks of the information extraction from text. Its goal is to classify all mentions of entities in a text discourse into groups where each group would represent a separate entity. Coreference resolution methods for larger languages are being developed for quite some time, while none has been proposed for the Slovene language yet. In this paper we present a new manually annotated Slovene corpus for coreference resolution - coref149. We adapt our english-based automatic coreference resolution system SkipCor to the Slovene language and achieve 76% CoNLL 2012 score. We analyse the influences of developed feature functions and check types of the most frequent errors. During the text analysis we have also developed a software library with a web interface, which offers to run all the analysis we describe in this paper and to browse their predictions. The resuls are promising and comparable to the results of coreference analysis for other larger languages. We show that it is possible to implement algorithms for automatic coreference resolution for the Slovene language. Therefore we propose to prepare a larger and better quality corpus featuring all the specifics of the language, which would enable the implementation of generally useful methods for coreference resolution"},{"@xml:lang":"sl","#text":"Odkrivanje koreferenčnosti je ena izmed treh ključnih nalog ekstrakcije informacij iz besedil, kamor spadata še prepoznavanje imenskih entitet in ekstrakcija povezav. Namen odkrivanja koreferenčnosti je prek celotnega besedila ustrezno združiti vse omenitve entitet v skupine, v katerih vsaka skupina predstavlja svojo entiteto. Metode za reševanje te naloge se za nekatere jezike z več govorci razvijajo že dalj časa, medtem ko za slovenski jezik še niso bile izdelane. V prispevku predstavljamo nov, ročno označen korpus za odkrivanje koreferenčnosti v slovenskem jeziku - korpus coref149. Za avtomatsko odkrivanje koreferenčnosti smo prilagodili sistem SkipCor, ki smo ga izdelali za angleški jezik. Sistem SkipCor je na slovenskem gradivu dosegel 76 % ocene CoNLL 2012. Ob tem smo analizirali še vplive posameznih tipov značilk in preverili, katere so pogoste napake. Pri analiziranju besedil smo razvili tudi programsko knjižnico s spletnim vmesnikom, prek katere je možno izvesti vse opisane analize in neposredno primerjati njihovo uspešnost. Rezultati analiz so obetavni in primerljivi z rezultati pri drugih, bolj razširjenih jezikih. S tem smo dokazali, da je avtomatsko odkrivanje koreferenčnosti v slovenskem jeziku lahko uspešno, v prihodnosti pa bi bilo potrebno izdelati še večji in kvalitetnejši korpus, v katerem bodo koreferenčno naslovljene vse posebnosti slovenskega jezika, kar bi omogočilo izgradnjo učinkovitih metod za avtomatsko reševanje koreferenčnih problemov"}],"edm:type":"TEXT","dc:type":[{"@xml:lang":"sl","#text":"znanstveno časopisje"},{"@xml:lang":"en","#text":"journals"},{"@rdf:resource":"http://www.wikidata.org/entity/Q361785"}]},"ore:Aggregation":{"@rdf:about":"http://www.dlib.si/?URN=URN:NBN:SI:doc-TO87M5LI","edm:aggregatedCHO":{"@rdf:resource":"URN:NBN:SI:doc-TO87M5LI"},"edm:isShownBy":{"@rdf:resource":"http://www.dlib.si/stream/URN:NBN:SI:doc-TO87M5LI/c8473552-020f-4d52-9dec-50c0daa544f9/PDF"},"edm:rights":{"@rdf:resource":"http://creativecommons.org/licenses/by-sa/4.0/"},"edm:provider":"Slovenian National E-content Aggregator","edm:intermediateProvider":{"@xml:lang":"en","#text":"National and University Library of Slovenia"},"edm:dataProvider":{"@xml:lang":"sl","#text":"Trojina, zavod za uporabno slovenistiko"},"edm:object":{"@rdf:resource":"http://www.dlib.si/streamdb/URN:NBN:SI:doc-TO87M5LI/maxi/edm"},"edm:isShownAt":{"@rdf:resource":"http://www.dlib.si/details/URN:NBN:SI:doc-TO87M5LI"}}}}