-
Notifications
You must be signed in to change notification settings - Fork 7
/
enhance-tei.xql
87 lines (77 loc) · 5.74 KB
/
enhance-tei.xql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
xquery version "3.1";
import module namespace config = "http://www.salamanca.school/xquery/config" at "modules/config.xqm";
import module namespace i18n = "http://exist-db.org/xquery/i18n" at "i18n.xqm";
import module namespace console = "http://exist-db.org/xquery/console";
declare namespace exist = "http://exist.sourceforge.net/NS/exist";
declare namespace output = "http://www.w3.org/2010/xslt-xquery-serialization";
declare namespace request = "http://exist-db.org/xquery/request";
declare namespace sal = "http://salamanca.adwmainz.de";
declare namespace tei = "http://www.tei-c.org/ns/1.0";
declare namespace itei = "https://www.salamanca.school/indexed-tei";
declare namespace util = "http://exist-db.org/xquery/util";
(: Reduces a TEI doc to pure structural information and indexes structural nodes according to sal:node index, thus enhancing the TEI for RDF extraction. :)
declare option exist:timeout '3500000'; (: ~1h :)
declare option output:method 'xml';
declare variable $omittableElemTypes := ('g', 'lb', 'cb', 'hi', 'choice', 'abbr', 'sic', 'orig', 'expan', 'corr', 'reg', 'ref', 'foreign');
declare variable $omittableAttrTypes := ('anchored', 'rendition', 'resp', 'change', 'cert');
declare function local:copy($input as item()*, $salNodes as map()?) as item()* {
for $node in $input return
typeswitch($node)
case element()
return
(: remove frequent, but irrelevant elements :)
if (local-name($node) = $omittableElemTypes) then
for $child in $node return local:copy($child/node(), $salNodes)
else if ($node/self::tei:text and $node/@xml:id eq 'completeWork') then
(: since the text root itself might not be in the index, we must handle it here especially :)
element {'itei:' || local-name($node)} {
(
(: give tei:text fragments rudimentary information about their context, so that rdf extraction doesn't need to access respective teiHeaders especially :)
attribute in {$node/ancestor::tei:TEI/@xml:id},
$node/@*
),
for $child in $node
return local:copy($child/node(), $salNodes)
}
else
element {'itei:' || local-name($node)} {
(: copy all the attributes :)
for $att in $node/@*[not(name(.) = $omittableAttrTypes)]
return
(: if we are dealing with an xml:id attribute, and this also occurs in the _nodeIndex file, pull in more attributes from there :)
if (name($att) = "xml:id" and map:get($salNodes,$att)) then (: equivalent to render:isIndexNode() :)
let $sn := map:get($salNodes,$att)
let $pn := map:get($salNodes,$sn/sal:citableParent/string())
(: add (only English) label to title (also German and Spanish?) :)
let $title :=
i18n:process(<i18n:text key="{$sn/@class/string()}"/>,'en','/db/apps/salamanca/data/i18n','en') || ' ' || $sn/sal:title/text()
return (
attribute title {$title},
(: if ($sn/sal:crumbtrail/a[last()]/@href) then attribute web {$sn/sal:crumbtrail/a[last()]/@href} else (), :)
attribute web {'work.html?wid=' || substring($sn/@n, 0, 6) || '&frag=' || string($sn/@fragment) || '#' || string($sn/@n)}, (: work.html?wid=W0030&frag=00001_W0030-00-0001-fm-03e8#W0030-00-0003-he-03ea :)
attribute citableParent {$pn/sal:citetrail},
attribute citetrail {$sn/sal:citetrail},
$att,
(: give tei:text fragments rudimentary information about their context, so that rdf extraction doesn't need to access respective teiHeaders especially :)
if ($node/self::tei:text[@type eq "work_volume"]) then
attribute in {$node/ancestor::tei:TEI/@xml:id}
else ()
)
else
attribute {name($att)} {$att}
,
(: output all the child elements of this element recursively :)
for $child in $node
return local:copy($child/node(), $salNodes)
}
case processing-instruction() return $node
(: remove text nodes and comments :)
default return ()
};
let $wid := request:get-parameter('wid', '')
let $debug := if ($config:debug = ("trace", "info")) then console:log("tei enhancer running, requested work " || $wid || ".") else ()
let $origTEI := util:expand(doc($config:tei-works-root || '/' || $wid || '.xml')/tei:TEI)
let $salNodesF := doc($config:index-root || '/' || $wid || '_nodeIndex.xml')/sal:index
let $salNodesM := map:merge(for $n in $salNodesF/sal:node return map:entry($n/@n/string(), $n))
let $output := local:copy($origTEI, $salNodesM)
return $output