Skip to content

Commit

Permalink
Upgrade with USAS structures
Browse files Browse the repository at this point in the history
  • Loading branch information
TomazErjavec committed Oct 11, 2023
1 parent 2b4d6aa commit 2103dae
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 39 deletions.
37 changes: 24 additions & 13 deletions Schema/ParlaMint-TEI.ana.rnc
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,26 @@ include "ParlaMint-TEI.rnc" {

## The only element that contains analysed text is the segment. It can,
## however, contain transcription commentary.
seg =
element seg {
global.atts
>> a:documentation [ "Optional @corresp for MTed corpora." ],
corresp.att,
(comment | pb | sentence)+
}
seg = element seg { global.atts, (comment | pb | sentence)+ }
}

## A sentence.
sentence =
element s {
global.atts
>> a:documentation [ "Optional @corresp for MTed corpora." ],
corresp.att,
(comment | pb | ner | word | punct)+,
syntax?
global.atts, (comment | pb | ner | phr | word | punct)+, syntax?
}

## Multi-Word Unit
phr =
element phr {
global.atts,
attribute function { xsd:token },
attribute ana { xsd:anyURI }
>> a:documentation [
"Currently we only support phrases with semantic markers"
],
attribute type { "sem" },
mwe_tokens
}

## Named entity element
Expand Down Expand Up @@ -70,6 +73,9 @@ ner.atts =
global.atts,
attribute ana { xsd:anyURI }

## Annotations that can appear in a MWE.
mwe_tokens = (word | punct)+

## Annotations that can appear in a NE.
ner_tokens = (word | punct | comment | pb | ner)+

Expand Down Expand Up @@ -123,7 +129,12 @@ token.atts =
attribute msd { non-empty.string },
attribute ana { anyURIs }?,
attribute pos { non-empty.string }?,
attribute function { normalized-space.string }?
attribute function {
# This is different from TEI, where it is a name token (which can't contain comma!?)

# ref name="normalized-space.string"/
xsd:token
}?

## Obligatory attributes of word.
word.atts =
Expand Down
37 changes: 32 additions & 5 deletions Schema/ParlaMint-TEI.ana.rng
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@
however, contain transcription commentary.</a:documentation>
<element name="seg">
<ref name="global.atts"/>
<a:documentation>Optional @corresp for MTed corpora.</a:documentation>
<ref name="corresp.att"/>
<oneOrMore>
<choice>
<ref name="comment"/>
Expand All @@ -54,13 +52,12 @@
<a:documentation>A sentence.</a:documentation>
<element name="s">
<ref name="global.atts"/>
<a:documentation>Optional @corresp for MTed corpora.</a:documentation>
<ref name="corresp.att"/>
<oneOrMore>
<choice>
<ref name="comment"/>
<ref name="pb"/>
<ref name="ner"/>
<ref name="phr"/>
<ref name="word"/>
<ref name="punct"/>
</choice>
Expand All @@ -71,6 +68,24 @@
</element>
</define>

<define name="phr">
<a:documentation>Multi-Word Unit</a:documentation>
<element name="phr">
<ref name="global.atts"/>
<attribute name="function">
<data type="token"/>
</attribute>
<attribute name="ana">
<data type="anyURI"/>
</attribute>
<a:documentation>Currently we only support phrases with semantic markers</a:documentation>
<attribute name="type">
<value>sem</value>
</attribute>
<ref name="mwe_tokens"/>
</element>
</define>

<define name="ner">
<a:documentation>Named entity element</a:documentation>
<choice>
Expand Down Expand Up @@ -139,6 +154,16 @@
</attribute>
</define>

<define name="mwe_tokens">
<a:documentation>Annotations that can appear in a MWE.</a:documentation>
<oneOrMore>
<choice>
<ref name="word"/>
<ref name="punct"/>
</choice>
</oneOrMore>
</define>

<define name="ner_tokens">
<a:documentation>Annotations that can appear in a NE.</a:documentation>
<oneOrMore>
Expand Down Expand Up @@ -261,7 +286,9 @@
</optional>
<optional>
<attribute name="function">
<ref name="normalized-space.string"/>
<!-- This is different from TEI, where it is a name token (which can't contain comma!?) -->
<!--ref name="normalized-space.string"/-->
<data type="token"/>
</attribute>
</optional>
</define>
Expand Down
14 changes: 4 additions & 10 deletions Schema/ParlaMint-TEI.rnc
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ title =
}

## A "meeting" (e.g. session) of the parliament.
meeting = element meeting { global.atts, ana.att, corresp.att, text }
meeting = element meeting { global.atts, ana.att, text }

## Publisher of the corpus.
publisher =
Expand Down Expand Up @@ -218,9 +218,7 @@ text-body =
## The definition of the div element.
\div =
element div {
global.atts
>> a:documentation [ "Optional @corresp for MTed corpora." ],
corresp.att,
global.atts,

## A standard div with utterances.
((attribute type { "debateSection" },
Expand Down Expand Up @@ -249,18 +247,14 @@ u =
"Utterance has obligatory @ana, for the role of the speaker."
],
ana.att,
(attribute source { anyURIs }?)
>> a:documentation [ "Optional @corresp for MTed corpora." ],
corresp.att,
attribute source { anyURIs }?,
(seg | comment | pb)+
}

## A segment (i.e paragraph) inside a speech.
seg =
element seg {
global.atts
>> a:documentation [ "Optional @corresp for MTed corpora." ],
corresp.att,
global.atts,
(comment
| pb
| text
Expand Down
7 changes: 0 additions & 7 deletions Schema/ParlaMint-TEI.rng
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,6 @@
<element name="meeting">
<ref name="global.atts"/>
<ref name="ana.att"/>
<ref name="corresp.att"/>
<text/>
</element>
</define>
Expand Down Expand Up @@ -408,8 +407,6 @@
<a:documentation>The definition of the div element.</a:documentation>
<element name="div">
<ref name="global.atts"/>
<a:documentation>Optional @corresp for MTed corpora.</a:documentation>
<ref name="corresp.att"/>
<choice>
<a:documentation>A standard div with utterances.</a:documentation>
<group>
Expand Down Expand Up @@ -479,8 +476,6 @@
<ref name="anyURIs"/>
</attribute>
</optional>
<a:documentation>Optional @corresp for MTed corpora.</a:documentation>
<ref name="corresp.att"/>
<oneOrMore>
<choice>
<ref name="seg"/>
Expand All @@ -495,8 +490,6 @@
<a:documentation>A segment (i.e paragraph) inside a speech.</a:documentation>
<element name="seg">
<ref name="global.atts"/>
<a:documentation>Optional @corresp for MTed corpora.</a:documentation>
<ref name="corresp.att"/>
<oneOrMore>
<choice>
<ref name="comment"/>
Expand Down
3 changes: 2 additions & 1 deletion Schema/ParlaMint.rnc
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ head =
}

## A page break, possibly with its source URI.
pb = element pb { global.atts, corresp.att, source.att, empty }
pb = element pb { global.atts, source.att, empty }

## A series of paragraphs. Paragraphs are only used in the teiHeader.
paras = element p { global.atts, annotated.text }+
Expand Down Expand Up @@ -339,6 +339,7 @@ corresp.att = attribute corresp { anyURIs }?
global.atts =
id.att?,
lang.att,
corresp.att,
attribute n { xsd:string }?

## The identifier attribute, giving the unique ID of the
Expand Down
2 changes: 1 addition & 1 deletion Schema/ParlaMint.rng
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,6 @@
<a:documentation>A page break, possibly with its source URI.</a:documentation>
<element name="pb">
<ref name="global.atts"/>
<ref name="corresp.att"/>
<ref name="source.att"/>
<empty/>
</element>
Expand Down Expand Up @@ -733,6 +732,7 @@
<ref name="id.att"/>
</optional>
<ref name="lang.att"/>
<ref name="corresp.att"/>
<optional>
<attribute name="n">
<data type="string"/>
Expand Down
4 changes: 2 additions & 2 deletions Schema/compile.log
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ java -jar /usr/share/java/trang.jar ParlaMint-listOrg.rng ParlaMint-listOrg.rnc
java -jar /usr/share/java/trang.jar ParlaMint-listPerson.rng ParlaMint-listPerson.rnc
java -jar /usr/share/java/trang.jar ParlaMint-taxonomy.rng ParlaMint-taxonomy.rnc
make[1]: Leaving directory '/home/project/corpora/Parla/ParlaMint/ParlaMint/Schema'
4.92user 0.61system 0:02.46elapsed 224%CPU (0avgtext+0avgdata 67164maxresident)k
1872inputs+824outputs (0major+71458minor)pagefaults 0swaps
5.33user 0.56system 0:02.59elapsed 227%CPU (0avgtext+0avgdata 67200maxresident)k
1600inputs+872outputs (0major+71468minor)pagefaults 0swaps

0 comments on commit 2103dae

Please sign in to comment.