diff --git a/scholia/config.py b/scholia/config.py new file mode 100644 index 00000000..d3c00e2c --- /dev/null +++ b/scholia/config.py @@ -0,0 +1,46 @@ +"""config. + +Usage: + scholia.config + +""" + +import configparser + +from io import StringIO + +from os.path import exists, expanduser + + +CONFIG_FILENAMES = [ + 'scholia.ini', + '~/etc/scholia.ini', + '~/scholia.ini'] + +DEFAULTS = """ +[query-server] +sparql_endpoint = https://query.wikidata.org/sparql + +[requests] +user_agent = Scholia + +""" + + +config = configparser.ConfigParser() + +config.read_file(StringIO(DEFAULTS)) + +for filename in CONFIG_FILENAMES: + full_filename = expanduser(filename) + if exists(full_filename): + config.read(full_filename) + break + + +if __name__ == '__main__': + for section in config.sections(): + print(f"[{section}]") + for key in config[section]: + print(f"{key} = {config[section].get(key)}") + print() diff --git a/scholia/network.py b/scholia/network.py index 22a13acc..ef78257f 100644 --- a/scholia/network.py +++ b/scholia/network.py @@ -7,11 +7,13 @@ from collections import OrderedDict -from .query import SPARQL_ENDPOINT +from .config import config import requests +SPARQL_ENDPOINT = config['query-server'].get('sparql_endpoint') + EXAMPLE_SPARQL_QUERY = """ SELECT ?item1 ?item1Label ?item2 ?item2Label ?weight WITH { diff --git a/scholia/query.py b/scholia/query.py index 898c27db..cfdecef1 100644 --- a/scholia/query.py +++ b/scholia/query.py @@ -59,9 +59,12 @@ from six import u -SPARQL_ENDPOINT = "https://query.wikidata.org/sparql" +from .config import config -USER_AGENT = 'Scholia' + +SPARQL_ENDPOINT = config['query-server'].get('sparql_endpoint') + +USER_AGENT = config['requests'].get('user_agent') HEADERS = {'User-Agent': USER_AGENT} diff --git a/scholia/rss.py b/scholia/rss.py index 5dee6569..7705cc09 100644 --- a/scholia/rss.py +++ b/scholia/rss.py @@ -49,7 +49,9 @@ from six import u -from .query import SPARQL_ENDPOINT +from .config import config + +SPARQL_ENDPOINT = config['query-server'].get('sparql_endpoint') WORK_ITEM_RSS = u(""" diff --git a/scholia/scrape/ceurws.py b/scholia/scrape/ceurws.py index cbd9fd18..1eeb3b0e 100644 --- a/scholia/scrape/ceurws.py +++ b/scholia/scrape/ceurws.py @@ -29,12 +29,15 @@ import requests +from ..config import config from ..qs import paper_to_quickstatements, proceedings_to_quickstatements -from ..query import iso639_to_q, SPARQL_ENDPOINT as WDQS_URL +from ..query import iso639_to_q from ..utils import escape_string, pages_to_number_of_pages -USER_AGENT = 'Scholia' +SPARQL_ENDPOINT = config['query-server'].get('sparql_endpoint') + +USER_AGENT = config['requests'].get('user_agent') HEADERS = {'User-Agent': USER_AGENT} @@ -264,7 +267,7 @@ def paper_to_q(paper): query = SHORT_TITLED_PAPER_TO_Q_QUERY.format( url=url) - response = requests.get(WDQS_URL, + response = requests.get(SPARQL_ENDPOINT, params={'query': query, 'format': 'json'}, headers=HEADERS) if not response.ok: diff --git a/scholia/scrape/nips.py b/scholia/scrape/nips.py index 229cc942..8de35f72 100644 --- a/scholia/scrape/nips.py +++ b/scholia/scrape/nips.py @@ -47,9 +47,14 @@ import requests +from ..config import config from ..qs import paper_to_quickstatements from ..utils import escape_string -from ..query import SPARQL_ENDPOINT as WDQS_URL + + +SPARQL_ENDPOINT = config['query-server'].get('sparql_endpoint') + +USER_AGENT = config['requests'].get('user_agent') PAPER_TO_Q_QUERY = u(""" SELECT ?paper WHERE {{ @@ -63,7 +68,6 @@ URL_BASE = "https://papers.nips.cc" -USER_AGENT = "Scholia" # Year should be the nominal year, - not the year of publication YEAR_TO_Q = { @@ -146,9 +150,9 @@ def paper_to_q(paper): label=title, title=title, url=paper['url'], full_text_url=paper['full_text_url']) - response = requests.get( - WDQS_URL, params={'query': query, 'format': 'json'}, - headers={'User-Agent': USER_AGENT}) + response = requests.get(SPARQL_ENDPOINT, + params={'query': query, 'format': 'json'}, + headers={'User-Agent': USER_AGENT}) if not response.ok: raise Exception("Wikidata API response error: {}".format( response.status_code)) diff --git a/scholia/scrape/ojs.py b/scholia/scrape/ojs.py index 4b5ea894..e31911da 100644 --- a/scholia/scrape/ojs.py +++ b/scholia/scrape/ojs.py @@ -31,12 +31,15 @@ import requests +from ..config import config from ..qs import paper_to_quickstatements -from ..query import iso639_to_q, issn_to_qs, SPARQL_ENDPOINT as WDQS_URL +from ..query import iso639_to_q, issn_to_qs from ..utils import escape_string, pages_to_number_of_pages -USER_AGENT = 'Scholia' +SPARQL_ENDPOINT = config['query-server'].get('sparql_endpoint') + +USER_AGENT = config['requests'].get('user_agent') HEADERS = {'User-Agent': USER_AGENT} @@ -173,7 +176,7 @@ def paper_to_q(paper): query = SHORT_TITLED_PAPER_TO_Q_QUERY.format( url=paper['url']) - response = requests.get(WDQS_URL, + response = requests.get(SPARQL_ENDPOINT, params={'query': query, 'format': 'json'}, headers=HEADERS) data = response.json()['results']['bindings'] diff --git a/scholia/text.py b/scholia/text.py index 659257be..aecf4b5a 100644 --- a/scholia/text.py +++ b/scholia/text.py @@ -30,7 +30,7 @@ from six.moves import cPickle as pickle -from .query import SPARQL_ENDPOINT +from .config import config import re @@ -39,6 +39,8 @@ import requests +SPARQL_ENDPOINT = config['query-server'].get('sparql_endpoint') + TOPIC_LABELS_SPARQL = """ SELECT ?topic ?topic_label WITH { diff --git a/scholia/wikipedia.py b/scholia/wikipedia.py index 6f77dff7..a0386bf1 100644 --- a/scholia/wikipedia.py +++ b/scholia/wikipedia.py @@ -29,7 +29,10 @@ from six import b, u -from .query import SPARQL_ENDPOINT +from .config import config + + +SPARQL_ENDPOINT = config['query-server'].get('sparql_endpoint') BIBLIOGRAPHY_SPARQL_QUERY = """