Skip to content

Commit

Permalink
simple notes and incidents normalization - no nested brackets, no nes…
Browse files Browse the repository at this point in the history
…ted elements (#157 #195)
  • Loading branch information
matyaskopp committed Jun 8, 2023
1 parent 794d629 commit 0763803
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 2 deletions.
14 changes: 13 additions & 1 deletion Scripts/parlamint-lib.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
xmlns:tei="http://www.tei-c.org/ns/1.0"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns="http://www.tei-c.org/ns/1.0"
xmlns:et="http://nl.ijs.si/et"
xmlns:mk="http://ufal.mff.cuni.cz/matyas-kopp"
xmlns:et="http://nl.ijs.si/et"
exclude-result-prefixes="#all"
version="2.0">

Expand Down Expand Up @@ -554,6 +555,17 @@
</xsl:choose>
</xsl:template>

<!-- Notes and incidents normalization - removing brackets and normalize spces-->
<xsl:function name="mk:normalize-note" as="xs:string">
<xsl:param name="noteIn" as="xs:string"/>
<xsl:variable name="noteOut1" select="normalize-space($noteIn)"/>
<!-- plain notes without any inner brackets of the same type-->
<xsl:variable name="noteOut2" select="replace($noteOut1,'^\s*\[\s*([^\[\]]*?)\s*\][\s\.]*$','$1')"/>
<xsl:variable name="noteOut3" select="replace($noteOut2,'^\s*/\s*([^/]*?)\s*/[\s\.]*$','$1')"/>
<xsl:variable name="noteOut4" select="replace($noteOut3,'^\s*\(\s*([^\(\)]*?)\s*\)[\s\.]*$','$1')"/>
<xsl:value-of select="$noteOut4"/>
</xsl:function>

<!-- Format number-->
<xsl:function name="et:format-number" as="xs:string">
<xsl:param name="lang" as="xs:string"/>
Expand Down
27 changes: 26 additions & 1 deletion Scripts/parlamint2release.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
xmlns:xi="http://www.w3.org/2001/XInclude"
xmlns="http://www.tei-c.org/ns/1.0"
xmlns:tei="http://www.tei-c.org/ns/1.0"
xmlns:mk="http://ufal.mff.cuni.cz/matyas-kopp"
xmlns:et="http://nl.ijs.si/et"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="xsl tei et xs xi"
Expand Down Expand Up @@ -352,7 +353,31 @@
<xsl:message select="concat('WARN ', /tei:TEI/@xml:id,
': removing empty note in ', ancestor-or-self::tei:*[@xml:id][1]/@xml:id)"/>
</xsl:template>


<!-- Normalize nonempty notes and incidents -->
<xsl:template mode="comp" match="tei:note[normalize-space(.)
and not(./element())]
|
tei:incident/tei:desc | tei:kinesic/tei:desc | tei:vocal/tei:desc
">
<xsl:variable name="textIn" select="./text()"/>
<xsl:variable name="textOut" select="mk:normalize-note($textIn)"/>
<xsl:if test="not($textIn = $textOut)">
<xsl:message select="concat('INFO ', /tei:TEI/@xml:id,
': note/incident normalization ',$textIn,' to ', $textOut, '')"/>
</xsl:if>
<xsl:copy>
<xsl:apply-templates select="@*"/>
<xsl:value-of select="$textOut"/>
</xsl:copy>
</xsl:template>
<xsl:template mode="comp" match="tei:note[./element()] ">
<!-- notes can contain mixed content (text - time - text) -->
<xsl:message select="concat('WARN ', /tei:TEI/@xml:id,
': skipping note/element() normalization ',copy-of(.),' ancestor:', ancestor-or-self::tei:*[@xml:id][1]/@xml:id)"/>
<xsl:copy-of select="."/>
</xsl:template>

<!-- Give IDs to segs without them (if u has ID, otherwise complain) -->
<xsl:template mode="comp" match="tei:seg[not(@xml:id)]">
<xsl:copy>
Expand Down

0 comments on commit 0763803

Please sign in to comment.