-
Notifications
You must be signed in to change notification settings - Fork 52
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e4a0f26
commit f999d6c
Showing
1 changed file
with
196 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
MAINTAINER "CLARIN.SI <[email protected]>" | ||
NAME "ParlaMint-IL 5.0 (Israeli parliament)" | ||
INFO "Israeli parliamentary corpus ParlaMint-IL, 1992-2024 v5.0" | ||
ENCODING "UTF-8" | ||
DEFAULTLOCALE "he_IL.UTF-8" | ||
LANGUAGE "Hebrew" | ||
INFOHREF "http://hdl.handle.net/11356/2005" | ||
TAGSETDOC "https://universaldependencies.org/guidelines.html" | ||
|
||
PATH "/data/manatee-data/parlamint50_il/" | ||
|
||
VERTICAL "| zcat /data/vert/ParlaMint-IL.5.0.vert.gz" | ||
|
||
DOCSTRUCTURE speech | ||
MAXDETAIL 10000 | ||
|
||
SUBCORPATTRS "speech.subcorpus|speech.body|speech.date,speech.term|speech.session|speech.meeting,speech.sitting|speech.agenda,speech.speaker_mp|speech.speaker_minister,speech.speaker_role|speech.speaker_name,speech.speaker_gender|speech.speaker_birth,speech.speaker_party|speech.speaker_party_name,speech.party_status|speech.party_orientation|speech.text_id" | ||
SHORTREF "=speech.speaker_id,=speech.date" | ||
FULLREF "speech.id,speech.text_id,speech.title,speech.date,speech.subcorpus,speech.body,speech.term,speech.session,speech.meeting,speech.sitting,speech.agenda,speech.speaker_name,speech.speaker_gender,speech.speaker_role,speech.speaker_mp,speech.speaker_minister,speech.speaker_party,speech.speaker_party_name,speech.party_status,speech.party_orientation" | ||
|
||
SIMPLEQUERY '[lc="%s" | norm="%s" | lemma_lc="%s"]' | ||
|
||
STRUCTURE speech { | ||
ATTRIBUTE text_id { | ||
LABEL "Text ID" | ||
} | ||
ATTRIBUTE title | ||
ATTRIBUTE subcorpus { | ||
MULTISEP "÷" | ||
} | ||
ATTRIBUTE lang | ||
ATTRIBUTE body { | ||
LABEL "Parliamentary body" | ||
MULTIVALUE yes | ||
MULTISEP "|" | ||
} | ||
ATTRIBUTE term | ||
ATTRIBUTE session { | ||
TEXTBOXLENGTH 4 | ||
} | ||
ATTRIBUTE meeting { | ||
TEXTBOXLENGTH 4 | ||
} | ||
ATTRIBUTE sitting { | ||
TEXTBOXLENGTH 4 | ||
} | ||
ATTRIBUTE agenda { | ||
TEXTBOXLENGTH 4 | ||
} | ||
ATTRIBUTE date | ||
ATTRIBUTE id { | ||
LABEL "Speech ID" | ||
TYPE "UNIQUE" | ||
} | ||
ATTRIBUTE speaker_id { | ||
DEFAULTVALUE "-" | ||
} | ||
ATTRIBUTE speaker_name { | ||
DEFAULTVALUE "-" | ||
} | ||
ATTRIBUTE speaker_role { | ||
DEFAULTVALUE "-" | ||
} | ||
ATTRIBUTE speaker_mp { | ||
DEFAULTVALUE "-" | ||
} | ||
ATTRIBUTE speaker_minister { | ||
DEFAULTVALUE "-" | ||
} | ||
ATTRIBUTE speaker_party { | ||
DEFAULTVALUE "-" | ||
} | ||
ATTRIBUTE speaker_party_name { | ||
DEFAULTVALUE "-" | ||
} | ||
ATTRIBUTE party_status { | ||
DEFAULTVALUE "-" | ||
} | ||
ATTRIBUTE party_orientation { | ||
DEFAULTVALUE "-" | ||
} | ||
ATTRIBUTE speaker_gender { | ||
DEFAULTVALUE "-" | ||
} | ||
ATTRIBUTE speaker_birth { | ||
DEFAULTVALUE "-" | ||
TEXTBOXLENGTH 10 | ||
} | ||
DISPLAYTAG 0 | ||
DISPLAYBEGIN "%(speaker_id): " | ||
DISPLAYEND "" | ||
} | ||
STRUCTURE p { | ||
ATTRIBUTE id { | ||
TYPE "UNIQUE" | ||
} | ||
ATTRIBUTE lang | ||
DISPLAYTAG 0 | ||
DISPLAYBEGIN "" | ||
DISPLAYEND " ¶ " | ||
} | ||
STRUCTURE s { | ||
ATTRIBUTE id { | ||
LABEL "Sentence ID" | ||
TYPE "UNIQUE" | ||
} | ||
DISPLAYTAG 0 | ||
} | ||
STRUCTURE name { | ||
ATTRIBUTE type | ||
DISPLAYTAG 0 | ||
DISPLAYBEGIN "[%(type):" | ||
DISPLAYEND "]" | ||
} | ||
STRUCTURE note { | ||
ATTRIBUTE type | ||
ATTRIBUTE content | ||
DISPLAYTAG 0 | ||
DISPLAYBEGIN "[%(type): %(content)" | ||
DISPLAYEND "]" | ||
} | ||
STRUCTURE g { | ||
DISPLAYTAG 0 | ||
DISPLAYBEGIN "_EMPTY_" | ||
} | ||
|
||
ATTRIBUTE word | ||
ATTRIBUTE lc { | ||
LABEL "word (lowercase)" | ||
DYNAMIC "utf8lowercase" | ||
DYNLIB "internal" | ||
ARG1 "C" | ||
FUNTYPE "s" | ||
FROMATTR "word" | ||
DYNTYPE "index" | ||
TRANSQUERY "yes" | ||
} | ||
ATTRIBUTE norm { | ||
LABEL "syntactic word" | ||
MULTIVALUE yes | ||
MULTISEP "|" | ||
} | ||
ATTRIBUTE lemma { | ||
MULTIVALUE yes | ||
MULTISEP "|" | ||
} | ||
ATTRIBUTE lemma_lc { | ||
LABEL "lemma (lowercase)" | ||
DYNAMIC "utf8lowercase" | ||
DYNLIB "internal" | ||
ARG1 "C" | ||
FUNTYPE "s" | ||
FROMATTR "lemma" | ||
TYPE "index" | ||
TRANSQUERY "yes" | ||
} | ||
ATTRIBUTE pos { | ||
LABEL "UD PoS tag" | ||
MULTIVALUE yes | ||
MULTISEP "|" | ||
} | ||
ATTRIBUTE feats { | ||
LABEL "UD features" | ||
MULTIVALUE yes | ||
MULTISEP "|" | ||
} | ||
ATTRIBUTE id { | ||
LABEL "ID of token" | ||
MULTIVALUE yes | ||
MULTISEP "|" | ||
} | ||
ATTRIBUTE dep { | ||
LABEL "UD dependency label" | ||
MULTIVALUE yes | ||
MULTISEP "|" | ||
} | ||
ATTRIBUTE dep_head_lemma { | ||
LABEL "Lemma of the dependency head" | ||
MULTIVALUE yes | ||
MULTISEP "|" | ||
} | ||
ATTRIBUTE dep_head_pos { | ||
LABEL "UD PoS tag of the dependency head" | ||
MULTIVALUE yes | ||
MULTISEP "|" | ||
} | ||
ATTRIBUTE dep_head_feats { | ||
LABEL "UD features of the dependency head" | ||
MULTIVALUE yes | ||
MULTISEP "|" | ||
} | ||
ATTRIBUTE dep_head_id { | ||
LABEL "ID of the dependency head" | ||
MULTIVALUE yes | ||
MULTISEP "|" | ||
} |