-
Notifications
You must be signed in to change notification settings - Fork 2
/
generate_ocr_result-master.sh
33 lines (27 loc) · 1.33 KB
/
generate_ocr_result-master.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
psql -c """drop table if exists output_candidates cascade; """ ddocr
psql -c """select candidate.*, probability, bucket, random() as random_number
into output_candidates
from cand_label_label_inference_bucketed right join candidate on candidateid=candidate.id
where docid in (select * from eval_docs)
order by docid, wordid, random_number
;
""" ddocr
psql -c """create view maxp as
select docid, wordid, max(probability) as maxp, max(random_number) as maxrand
from output_candidates group by docid,wordid;
""" ddocr
psql -c """create view output_words as
select output_candidates.* from output_candidates join maxp
on output_candidates.docid = maxp.docid
and output_candidates.wordid = maxp.wordid
and output_candidates.probability = maxp.maxp
and output_candidates.random_number = maxp.maxrand
;""" ddocr
psql -c """copy (select docid, wordid, word from output_words order by docid, wordid)
to '/tmp/ocr-output-words.tsv';""" ddocr
psql -c """copy (select docid, wordid, word from candidate
where source = 'T' and docid in (select * from eval_docs)
order by docid, wordid, candid) to '/tmp/ocr-output-words-tesseract.tsv';""" ddocr
psql -c """copy (select docid, wordid, word from candidate
where source = 'C' and docid in (select * from eval_docs)
order by docid, wordid, candid) to '/tmp/ocr-output-words-cuneiform.tsv';""" ddocr