diff --git a/Build/Sources-Distro/registry/parlamint50_il b/Build/Sources-Distro/registry/parlamint50_il new file mode 100644 index 000000000..af0a4acb1 --- /dev/null +++ b/Build/Sources-Distro/registry/parlamint50_il @@ -0,0 +1,196 @@ +MAINTAINER "CLARIN.SI " +NAME "ParlaMint-IL 5.0 (Israeli parliament)" +INFO "Israeli parliamentary corpus ParlaMint-IL, 1992-2024 v5.0" +ENCODING "UTF-8" +DEFAULTLOCALE "he_IL.UTF-8" +LANGUAGE "Hebrew" +INFOHREF "http://hdl.handle.net/11356/2005" +TAGSETDOC "https://universaldependencies.org/guidelines.html" + +PATH "/data/manatee-data/parlamint50_il/" + +VERTICAL "| zcat /data/vert/ParlaMint-IL.5.0.vert.gz" + +DOCSTRUCTURE speech +MAXDETAIL 10000 + +SUBCORPATTRS "speech.subcorpus|speech.body|speech.date,speech.term|speech.session|speech.meeting,speech.sitting|speech.agenda,speech.speaker_mp|speech.speaker_minister,speech.speaker_role|speech.speaker_name,speech.speaker_gender|speech.speaker_birth,speech.speaker_party|speech.speaker_party_name,speech.party_status|speech.party_orientation|speech.text_id" +SHORTREF "=speech.speaker_id,=speech.date" +FULLREF "speech.id,speech.text_id,speech.title,speech.date,speech.subcorpus,speech.body,speech.term,speech.session,speech.meeting,speech.sitting,speech.agenda,speech.speaker_name,speech.speaker_gender,speech.speaker_role,speech.speaker_mp,speech.speaker_minister,speech.speaker_party,speech.speaker_party_name,speech.party_status,speech.party_orientation" + +SIMPLEQUERY '[lc="%s" | norm="%s" | lemma_lc="%s"]' + +STRUCTURE speech { + ATTRIBUTE text_id { + LABEL "Text ID" + } + ATTRIBUTE title + ATTRIBUTE subcorpus { + MULTISEP "÷" + } + ATTRIBUTE lang + ATTRIBUTE body { + LABEL "Parliamentary body" + MULTIVALUE yes + MULTISEP "|" + } + ATTRIBUTE term + ATTRIBUTE session { + TEXTBOXLENGTH 4 + } + ATTRIBUTE meeting { + TEXTBOXLENGTH 4 + } + ATTRIBUTE sitting { + TEXTBOXLENGTH 4 + } + ATTRIBUTE agenda { + TEXTBOXLENGTH 4 + } + ATTRIBUTE date + ATTRIBUTE id { + LABEL "Speech ID" + TYPE "UNIQUE" + } + ATTRIBUTE speaker_id { + DEFAULTVALUE "-" + } + ATTRIBUTE speaker_name { + DEFAULTVALUE "-" + } + ATTRIBUTE speaker_role { + DEFAULTVALUE "-" + } + ATTRIBUTE speaker_mp { + DEFAULTVALUE "-" + } + ATTRIBUTE speaker_minister { + DEFAULTVALUE "-" + } + ATTRIBUTE speaker_party { + DEFAULTVALUE "-" + } + ATTRIBUTE speaker_party_name { + DEFAULTVALUE "-" + } + ATTRIBUTE party_status { + DEFAULTVALUE "-" + } + ATTRIBUTE party_orientation { + DEFAULTVALUE "-" + } + ATTRIBUTE speaker_gender { + DEFAULTVALUE "-" + } + ATTRIBUTE speaker_birth { + DEFAULTVALUE "-" + TEXTBOXLENGTH 10 + } + DISPLAYTAG 0 + DISPLAYBEGIN "%(speaker_id): " + DISPLAYEND "" +} +STRUCTURE p { + ATTRIBUTE id { + TYPE "UNIQUE" + } + ATTRIBUTE lang + DISPLAYTAG 0 + DISPLAYBEGIN "" + DISPLAYEND " ¶ " +} +STRUCTURE s { + ATTRIBUTE id { + LABEL "Sentence ID" + TYPE "UNIQUE" + } + DISPLAYTAG 0 +} +STRUCTURE name { + ATTRIBUTE type + DISPLAYTAG 0 + DISPLAYBEGIN "[%(type):" + DISPLAYEND "]" +} +STRUCTURE note { + ATTRIBUTE type + ATTRIBUTE content + DISPLAYTAG 0 + DISPLAYBEGIN "[%(type): %(content)" + DISPLAYEND "]" +} +STRUCTURE g { + DISPLAYTAG 0 + DISPLAYBEGIN "_EMPTY_" +} + +ATTRIBUTE word +ATTRIBUTE lc { + LABEL "word (lowercase)" + DYNAMIC "utf8lowercase" + DYNLIB "internal" + ARG1 "C" + FUNTYPE "s" + FROMATTR "word" + DYNTYPE "index" + TRANSQUERY "yes" +} +ATTRIBUTE norm { + LABEL "syntactic word" + MULTIVALUE yes + MULTISEP "|" +} +ATTRIBUTE lemma { + MULTIVALUE yes + MULTISEP "|" +} +ATTRIBUTE lemma_lc { + LABEL "lemma (lowercase)" + DYNAMIC "utf8lowercase" + DYNLIB "internal" + ARG1 "C" + FUNTYPE "s" + FROMATTR "lemma" + TYPE "index" + TRANSQUERY "yes" +} +ATTRIBUTE pos { + LABEL "UD PoS tag" + MULTIVALUE yes + MULTISEP "|" +} +ATTRIBUTE feats { + LABEL "UD features" + MULTIVALUE yes + MULTISEP "|" +} +ATTRIBUTE id { + LABEL "ID of token" + MULTIVALUE yes + MULTISEP "|" +} +ATTRIBUTE dep { + LABEL "UD dependency label" + MULTIVALUE yes + MULTISEP "|" +} +ATTRIBUTE dep_head_lemma { + LABEL "Lemma of the dependency head" + MULTIVALUE yes + MULTISEP "|" +} +ATTRIBUTE dep_head_pos { + LABEL "UD PoS tag of the dependency head" + MULTIVALUE yes + MULTISEP "|" +} +ATTRIBUTE dep_head_feats { + LABEL "UD features of the dependency head" + MULTIVALUE yes + MULTISEP "|" +} +ATTRIBUTE dep_head_id { + LABEL "ID of the dependency head" + MULTIVALUE yes + MULTISEP "|" +}