From e674d70e7b55bb81ce75bfd600f836e742936950 Mon Sep 17 00:00:00 2001
From: User <ppkrauss@gmail.com>
Date: Fri, 3 Aug 2018 12:39:13 -0300
Subject: [PATCH 1/2] script for issue #53

---
 scripts/wd_countries.sh | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 scripts/wd_countries.sh

diff --git a/scripts/wd_countries.sh b/scripts/wd_countries.sh
new file mode 100644
index 00000000..d2cd038e
--- /dev/null
+++ b/scripts/wd_countries.sh
@@ -0,0 +1,17 @@
+## retrieving Wikidata dataset by SparQL:
+curl -o wd_countries.csv -G 'https://query.wikidata.org/sparql' \
+     --header "Accept: text/csv"  \
+     --data-urlencode query='
+ SELECT DISTINCT (?simple_value AS ?iso2_code) ?wd_id
+ WHERE {
+   ?item p:P297 ?statement .
+   ?statement ps:P297 ?simple_value .
+   OPTIONAL { ?statement pq:P582 ?qualifier . }
+   FILTER ( !bound(?qualifier) )
+   BIND ( strafter(str(?item), str(wd:)) AS ?wd_id ).
+ } ORDER BY ?iso2_code
+'
+# Eliminate duplication (confusion with kingdoms and territories)
+# in the future we can use "P31 Q417175" to eliminate doublets of kingdows, but "territory vs nation" need some check.
+# so, filtering invalid doublets and saving with same name:
+grep -v 'Q756617\|Q29999\|Q29999' wd_countries.csv  | sponge wd_countries.csv

From 89421b57d0f66e754bf566a80ef924aed1d3cff9 Mon Sep 17 00:00:00 2001
From: User <ppkrauss@gmail.com>
Date: Fri, 3 Aug 2018 14:31:19 -0300
Subject: [PATCH 2/2] oops, little correction to filter more 3 items

---
 scripts/wd_countries.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/wd_countries.sh b/scripts/wd_countries.sh
index d2cd038e..576fa620 100644
--- a/scripts/wd_countries.sh
+++ b/scripts/wd_countries.sh
@@ -11,7 +11,10 @@ curl -o wd_countries.csv -G 'https://query.wikidata.org/sparql' \
    BIND ( strafter(str(?item), str(wd:)) AS ?wd_id ).
  } ORDER BY ?iso2_code
 '
+
 # Eliminate duplication (confusion with kingdoms and territories)
 # in the future we can use "P31 Q417175" to eliminate doublets of kingdows, but "territory vs nation" need some check.
 # so, filtering invalid doublets and saving with same name:
-grep -v 'Q756617\|Q29999\|Q29999' wd_countries.csv  | sponge wd_countries.csv
+grep -v 'Q756617\|Q29999\|Q407199\|Q240592\|Q83286\|Q1246' wd_countries.csv  | sponge wd_countries.csv
+# filtering also the last two, that are not in use at ISO: Q83286=old YU, Yugoslavia; Q1246=XK, Kosovo.
+# filtering wrong duplicated Q240592 Macedonia.