-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdo_work.sh
executable file
·46 lines (35 loc) · 1.21 KB
/
do_work.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/bin/bash
export LC_ALL=en_US.UTF-8
export LANG=en_US.UTF-8
echo "hello"
#gs -sDEVICE=txtwrite -dTextFormat=0 -o out.txt in.pdf
awk '
BEGIN { FS="[\" ]"; isBold = 0 }
$0~"Bold" {isBold = 1}
$0~"<span" { if( !isBold ) { printf "%s ", $3; FS = "\""}}
$0~"<char" { if( !isBold ) {printf $4}}
$0~"</span" { if( !isBold ) {print ""}; FS = "[\" ]"; isBold = 0}
' out.txt > fields.html
echo "made fields"
php -r 'while(($line=fgets(STDIN)) !== FALSE) echo html_entity_decode($line, ENT_QUOTES|ENT_HTML5);' < fields.html > fields.txt
awk '
BEGIN {start=""; end=""; loc=""; price=""; skip=0;}
$0~"^30 Location" {
printf "%s \t %s \t %s \t %s \t", start, end, price, loc;
gsub(" ", "+", loc); system("./lookup.sh \""loc"\"");
start=""; end=""; loc=""; price="";
skip=0; next;
}
{if (skip) {next}}
$0~"^105 partir" {start=$4; end=$6}
$0~"^305 fr." {price=$2}
$0~"^30 Remarques" {skip=1; next}
$0~"^30 Publication" {next}
$0~"^30 Université de Lausanne" {next}
$0~"^30 Service des affaires socio-culturelles" {next}
$0~"^30 Bâtiment Unicentre" {next}
$0~"^30 CH - 1015 Lausanne" {next}
$0~"^30 Offres de logement" {next}
$0~"^30 " {sub("30", "", $0); loc = loc " " $0 }
' fields.txt > data.txt
echo "rearranged into data"