Skip to content

Commit

Permalink
Setup for py_dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
tmorrell committed Apr 26, 2019
1 parent 70165d2 commit da0b0a8
Show file tree
Hide file tree
Showing 15 changed files with 26 additions and 29 deletions.
4 changes: 1 addition & 3 deletions ames/converters/epfmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
#
# For the Go package see https://github.com/caltechlibrary/eprinttools.
#
import os
import io
import json
import sys
from subprocess import run, Popen, PIPE
from subprocess import run, Popen


#
Expand Down
4 changes: 2 additions & 2 deletions ames/harvesters/caltechdata.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os,json,subprocess,shutil
import os,subprocess,shutil
import requests
from caltechdata_api import decustomize_schema
import dataset
from py_dataset import dataset
from progressbar import progressbar

def get_caltechdata(collection,production=True,datacite=False):
Expand Down
5 changes: 4 additions & 1 deletion ames/harvesters/caltechfeeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import requests
from progressbar import progressbar
from datetime import datetime,timezone
import dataset
from py_dataset import dataset
import zipfile

def download_file(url,fname):
Expand Down Expand Up @@ -77,6 +77,9 @@ def get_caltechfeed(feed,autoupdate=False):
#We decide whether to update

datev,err = dataset.read(cname,'captured')
if err != '':
print(err)
exit()
if datev == {}:
#No date, collection must be updated
update = 'Y'
Expand Down
2 changes: 1 addition & 1 deletion ames/harvesters/crossref_refs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os,shutil,json,subprocess, datetime
import requests
import dataset
from py_dataset import dataset

def get_crossref_refs(new=True):
#New=True will download everything from scratch and delete any existing records
Expand Down
6 changes: 1 addition & 5 deletions ames/harvesters/eputil.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@
#
# For Go package see https://github.com/caltechlibrary/eprinttools.
#
import os
import json
import sys
from subprocess import run, Popen, PIPE
from datetime import datetime, timedelta

from datetime import datetime

#
# get_eprint_keys returns a list of keys available from the
Expand Down Expand Up @@ -99,8 +97,6 @@ def get_eprint(eprint_url, eprint_id):
# 'https://jane.doe:[email protected]'
#
def get_eprints(eprint_url, eprint_id):
eprints = []
eprint = {}
cmd = ['eputil']
cmd.append('-json')
cmd.append(eprint_url + '/rest/eprint/' + eprint_id + '.xml')
Expand Down
8 changes: 5 additions & 3 deletions ames/harvesters/usage.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import os,json,csv,subprocess
import os,json,csv
import requests
import pandas as pd
from datetime import datetime
from caltechdata_api import decustomize_schema
from progressbar import progressbar
import dataset
from py_dataset import dataset

def file_mapping(source_collection,mapping_file):
'''Return a dictionary that maps /tindfiles/serve urls to records.
Expand All @@ -24,6 +23,9 @@ def file_mapping(source_collection,mapping_file):
keys = dataset.keys(source_collection)
for k in keys:
record,err = dataset.read(source_collection,k)
if err != '':
print(err)
exit()

#Ignore embargoed records
if 'electronic_location_and_access' in record:
Expand Down
4 changes: 2 additions & 2 deletions ames/matchers/caltechdata.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os,subprocess,json,re,copy
import os,json,re
from caltechdata_api import caltechdata_edit
from ames import codemeta_to_datacite
from ames.harvesters import get_records
from progressbar import progressbar
import idutils
import dataset
from py_dataset import dataset
import requests

def match_cd_refs():
Expand Down
3 changes: 1 addition & 2 deletions ames/matchers/datacite.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os,subprocess,json
import dataset
from py_dataset import dataset
import requests
from datacite import DataCiteMDSClient, schema40
from datetime import date, datetime
Expand Down
2 changes: 1 addition & 1 deletion get_doi_badge.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ames.harvesters import get_github_id
import argparse,json
import argparse

parser = argparse.ArgumentParser(description=\
"Get Markdown for a doi badge from CaltechDATA")
Expand Down
2 changes: 1 addition & 1 deletion run_coda_report.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os,argparse,csv
import dataset
from py_dataset import dataset
import random
from progressbar import progressbar
from ames.harvesters import get_caltechfeed, get_records
Expand Down
3 changes: 1 addition & 2 deletions run_codemeta.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from ames.harvesters import get_cd_github
from ames.matchers import match_codemeta
import os,subprocess,json
import requests
import os

if os.path.isdir('data') == False:
os.mkdir('data')
Expand Down
4 changes: 2 additions & 2 deletions run_event_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
from ames.harvesters import get_caltechdata
from ames.matchers import match_cd_refs
from xml.sax import saxutils as su
import os,subprocess,json
import os
import requests
import dataset
from py_dataset import dataset

#Environment variable AWS_SDK_LOAD_CONFIG=1 must be set before running

Expand Down
2 changes: 1 addition & 1 deletion run_usage.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import requests
import dataset
from py_dataset import dataset
import os, csv
from ames.harvesters import file_mapping, get_usage
from ames.harvesters import get_caltechdata
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def read(fname):

# What packages are required for this module to be executed?
REQUIRED = [
'requests','datacite','progressbar2','idutils','caltechdata_api'
'requests','datacite','progressbar2','idutils','caltechdata_api','py_dataset'
]

# What packages are optional?
Expand Down
4 changes: 2 additions & 2 deletions update_CaltechPEOPLE.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os,argparse,csv
import dataset
import os,argparse
from py_dataset import dataset
from ames.harvesters import get_caltechfeed, get_records

if __name__ == '__main__':
Expand Down

0 comments on commit da0b0a8

Please sign in to comment.