diff --git a/ames/converters/epfmt.py b/ames/converters/epfmt.py index 6c51ffb..9901df6 100644 --- a/ames/converters/epfmt.py +++ b/ames/converters/epfmt.py @@ -5,11 +5,9 @@ # # For the Go package see https://github.com/caltechlibrary/eprinttools. # -import os -import io import json import sys -from subprocess import run, Popen, PIPE +from subprocess import run, Popen # diff --git a/ames/harvesters/caltechdata.py b/ames/harvesters/caltechdata.py index 245aa08..3bb832e 100644 --- a/ames/harvesters/caltechdata.py +++ b/ames/harvesters/caltechdata.py @@ -1,7 +1,7 @@ -import os,json,subprocess,shutil +import os,subprocess,shutil import requests from caltechdata_api import decustomize_schema -import dataset +from py_dataset import dataset from progressbar import progressbar def get_caltechdata(collection,production=True,datacite=False): diff --git a/ames/harvesters/caltechfeeds.py b/ames/harvesters/caltechfeeds.py index 49c5227..5341f62 100644 --- a/ames/harvesters/caltechfeeds.py +++ b/ames/harvesters/caltechfeeds.py @@ -2,7 +2,7 @@ import requests from progressbar import progressbar from datetime import datetime,timezone -import dataset +from py_dataset import dataset import zipfile def download_file(url,fname): @@ -77,6 +77,9 @@ def get_caltechfeed(feed,autoupdate=False): #We decide whether to update datev,err = dataset.read(cname,'captured') + if err != '': + print(err) + exit() if datev == {}: #No date, collection must be updated update = 'Y' diff --git a/ames/harvesters/crossref_refs.py b/ames/harvesters/crossref_refs.py index 729bf8e..cb897f1 100644 --- a/ames/harvesters/crossref_refs.py +++ b/ames/harvesters/crossref_refs.py @@ -1,6 +1,6 @@ import os,shutil,json,subprocess, datetime import requests -import dataset +from py_dataset import dataset def get_crossref_refs(new=True): #New=True will download everything from scratch and delete any existing records diff --git a/ames/harvesters/eputil.py b/ames/harvesters/eputil.py index 36c4c42..8e46ad2 100644 --- a/ames/harvesters/eputil.py +++ b/ames/harvesters/eputil.py @@ -4,12 +4,10 @@ # # For Go package see https://github.com/caltechlibrary/eprinttools. # -import os import json import sys from subprocess import run, Popen, PIPE -from datetime import datetime, timedelta - +from datetime import datetime # # get_eprint_keys returns a list of keys available from the @@ -99,8 +97,6 @@ def get_eprint(eprint_url, eprint_id): # 'https://jane.doe:secret@eprint.example.edu' # def get_eprints(eprint_url, eprint_id): - eprints = [] - eprint = {} cmd = ['eputil'] cmd.append('-json') cmd.append(eprint_url + '/rest/eprint/' + eprint_id + '.xml') diff --git a/ames/harvesters/usage.py b/ames/harvesters/usage.py index 9a441d0..2cc31bf 100644 --- a/ames/harvesters/usage.py +++ b/ames/harvesters/usage.py @@ -1,10 +1,9 @@ -import os,json,csv,subprocess +import os,json,csv import requests import pandas as pd from datetime import datetime -from caltechdata_api import decustomize_schema from progressbar import progressbar -import dataset +from py_dataset import dataset def file_mapping(source_collection,mapping_file): '''Return a dictionary that maps /tindfiles/serve urls to records. @@ -24,6 +23,9 @@ def file_mapping(source_collection,mapping_file): keys = dataset.keys(source_collection) for k in keys: record,err = dataset.read(source_collection,k) + if err != '': + print(err) + exit() #Ignore embargoed records if 'electronic_location_and_access' in record: diff --git a/ames/matchers/caltechdata.py b/ames/matchers/caltechdata.py index a1f9ea1..d240694 100644 --- a/ames/matchers/caltechdata.py +++ b/ames/matchers/caltechdata.py @@ -1,10 +1,10 @@ -import os,subprocess,json,re,copy +import os,json,re from caltechdata_api import caltechdata_edit from ames import codemeta_to_datacite from ames.harvesters import get_records from progressbar import progressbar import idutils -import dataset +from py_dataset import dataset import requests def match_cd_refs(): diff --git a/ames/matchers/datacite.py b/ames/matchers/datacite.py index a289e13..eee5978 100644 --- a/ames/matchers/datacite.py +++ b/ames/matchers/datacite.py @@ -1,5 +1,4 @@ -import os,subprocess,json -import dataset +from py_dataset import dataset import requests from datacite import DataCiteMDSClient, schema40 from datetime import date, datetime diff --git a/get_doi_badge.py b/get_doi_badge.py index 9f209f4..740f705 100644 --- a/get_doi_badge.py +++ b/get_doi_badge.py @@ -1,5 +1,5 @@ from ames.harvesters import get_github_id -import argparse,json +import argparse parser = argparse.ArgumentParser(description=\ "Get Markdown for a doi badge from CaltechDATA") diff --git a/run_coda_report.py b/run_coda_report.py index f8e01e4..cb5b4cc 100644 --- a/run_coda_report.py +++ b/run_coda_report.py @@ -1,5 +1,5 @@ import os,argparse,csv -import dataset +from py_dataset import dataset import random from progressbar import progressbar from ames.harvesters import get_caltechfeed, get_records diff --git a/run_codemeta.py b/run_codemeta.py index e1dd29e..dc8c9b5 100644 --- a/run_codemeta.py +++ b/run_codemeta.py @@ -1,7 +1,6 @@ from ames.harvesters import get_cd_github from ames.matchers import match_codemeta -import os,subprocess,json -import requests +import os if os.path.isdir('data') == False: os.mkdir('data') diff --git a/run_event_data.py b/run_event_data.py index 010b2a7..b4a1225 100644 --- a/run_event_data.py +++ b/run_event_data.py @@ -2,9 +2,9 @@ from ames.harvesters import get_caltechdata from ames.matchers import match_cd_refs from xml.sax import saxutils as su -import os,subprocess,json +import os import requests -import dataset +from py_dataset import dataset #Environment variable AWS_SDK_LOAD_CONFIG=1 must be set before running diff --git a/run_usage.py b/run_usage.py index 39a76cd..fb281e8 100644 --- a/run_usage.py +++ b/run_usage.py @@ -1,5 +1,5 @@ import requests -import dataset +from py_dataset import dataset import os, csv from ames.harvesters import file_mapping, get_usage from ames.harvesters import get_caltechdata diff --git a/setup.py b/setup.py index 5cc9612..5e61fa6 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ def read(fname): # What packages are required for this module to be executed? REQUIRED = [ - 'requests','datacite','progressbar2','idutils','caltechdata_api' + 'requests','datacite','progressbar2','idutils','caltechdata_api','py_dataset' ] # What packages are optional? diff --git a/update_CaltechPEOPLE.py b/update_CaltechPEOPLE.py index c26f073..363cb39 100644 --- a/update_CaltechPEOPLE.py +++ b/update_CaltechPEOPLE.py @@ -1,5 +1,5 @@ -import os,argparse,csv -import dataset +import os,argparse +from py_dataset import dataset from ames.harvesters import get_caltechfeed, get_records if __name__ == '__main__':