diff --git a/metadig/__init__.py b/metadig/__init__.py new file mode 100644 index 0000000..0a5ab78 --- /dev/null +++ b/metadig/__init__.py @@ -0,0 +1,5 @@ +__version__ = '1.0' + +from .checks import getType +from .checks import isResolvable +from .variable import isBlank diff --git a/metadig/checks.py b/metadig/checks.py new file mode 100644 index 0000000..b34094b --- /dev/null +++ b/metadig/checks.py @@ -0,0 +1,104 @@ +"""Metadig check utilities +""" + +import sys +import urllib2 +from urlparse import urlparse + +def getType(object): + print 'type: {}'.format(type(object)) + +def isResolvable(url): +# First parse the url for a protocol, host port and path + + #url = 'https://cn.dataone.org/cn/v2/resolve/urn:uuid:7098ba54-ca6f-4e35-beb3-718bd0fe58a8' + urlComps = urlparse(url) + location = urlComps.netloc + if(urlComps.netloc == ""): + return (False, '"{}" does not appear to be a URL'.format(url)) + + # Check the 'schema' to see if it is an open one. Currently we + # are just check for http and https. + knownProtocols = ['http', 'https'] + if(urlComps.scheme not in set(knownProtocols)): + return (False, 'Unknown or proprietary communications protocol: "{}", known protocols: {}'.format(urlComps.scheme, ", ".join(knownProtocols))) + + request = urllib2.Request(url) + request.get_method = lambda : 'HEAD' + # Python urllib2 strangly throws an error for a http status, and the response object is returned + # by the exception code. + try: + response = urllib2.urlopen(request) + except urllib2.HTTPError as he: + # An error was encountered resolving the url, check which one so that we can print + # a more meaningful error message than provided by HTTPError + # FYI, HTTP status codes (from FAIR FM_A1.1 https://github.com/FAIRMetrics/Metrics/blob/master/Distributions/FM_A1.1.pdf) + if (he.code == 400): + return (False, "Unable to resolved URL {}: Bad request".format(url)) + elif (he.code == 401): + return (False, "Unable to resolved URL {}: Unauthorized".format(url)) + elif (he.code == 404): + return (False, "Unable to resolved URL {}: Not Found".format(url)) + elif (he.code == 500): + return (False, "Unable to resolved URL {}: Server Error".format(url)) + else: + return (False, 'Error resolving URL "{}": {} {}'.format(url, he.code, he.headers)) + except urllib2.URLError as ue: + print("URLError.reason", ue.reason) + return (False, ue.reason[1]) + except Exception() as e: + print("Exception: ", e) + return (False, str(e)) + + if(response.code in set([200, 202, 203, 206, 301, 302, 303])): + return (True, "Successfully resolved the URL {}: status {}".format(url, response.code)) + else: + return (False, "Successfully resolved the URL {}".format(url)) + + +def getSystemMetadata(identifier, memberNode): + # Get the DataONE system metadata for the given identifier + + url = 'https://cn.dataone.org/cn/v2/meta/{}'.format(identifier) + + request = urllib2.Request(url) + request.get_method = lambda : 'HEAD' + # Python urllib2 strangly throws an error for a http status, and the response object is returned + # by the exception code. + try: + response = urllib2.urlopen(request) + except urllib2.HTTPError as he: + # An error was encountered resolving the url, check which one so that we can print + # a more meaningful error message than provided by HTTPError + # FYI, HTTP status codes (from FAIR FM_A1.1 https://github.com/FAIRMetrics/Metrics/blob/master/Distributions/FM_A1.1.pdf) + if (he.code == 400): + return (False, "Unable to resolved URL {}: Bad request".format(url)) + elif (he.code == 401): + return (False, "Unable to resolved URL {}: Unauthorized".format(url)) + elif (he.code == 404): + return (False, "Unable to resolved URL {}: Not Found".format(url)) + elif (he.code == 500): + return (False, "Unable to resolved URL {}: Server Error".format(url)) + else: + return (False, 'Error resolving URL "{}": {} {}'.format(url, he.code, he.headers)) + except urllib2.URLError as ue: + print("URLError.reason", ue.reason) + return (False, ue.reason[1]) + except Exception() as e: + print("Exception: ", e) + return (False, str(e)) + + if(response.code in set([200, 202, 203, 206, 301, 302, 303])): + return (True, "Successfully resolved the URL {}: status {}".format(url, response.code)) + else: + return (False, "Successfully resolved the URL {}".format(url)) + +# Check if an identifier has a valid, known namespace +#def isNamespaceValid(identifier): +# delimiter = ':' +# if(identifier.find(delimeter == -1)): +# return False, "Missing namespace in identifier" +# +# namespace, id = identfier.split(':', 1) +# +# return True, "The namespace is valid" diff --git a/metadig/variable.py b/metadig/variable.py new file mode 100644 index 0000000..069ed07 --- /dev/null +++ b/metadig/variable.py @@ -0,0 +1,47 @@ +"""Metadig check utilities +""" + +import sys +import re + +import java.util.ArrayList as ArrayList + +def isBlank(object): + + pattern = re.compile(r'\s+') + # Depending on the values extracted from the xpath, the following types may be returned + # - an int (single value, all numeric) + # - a string + # - a boolean (i.e. "Yes", "Y", "No", "No") - this type isn't expected for award + # - a java.util.ArrayList (multiple values, each typed as int, boolean or string) + if(isinstance(object, int)): + return False + elif (isinstance(object, str) or isinstance(object, unicode)): + # If award is a string, check that it is not all whitespace + objStripped = re.sub(pattern, '', object) + if (len(objStripped) == 0): + return True + else: + return False + elif(isinstance(object, ArrayList)): + # Multiple objects exist + # Return as soon as a non-blank object is found + # Also, check if all values are blank + blankFound = False + for i in range(0, len(object)): + thisObj = object.get(i) + if (isinstance(thisObj, int)): + return False + else: + objStripped = re.sub(pattern, '', thisObj) + if (len(objStripped) == 0): + blankFound = True + else: + return False + # If we reached this point and blankFound is true, then all + # values are blank + if blankFound: + return True + else: + raise Exception('Unknown variable type {}'.format(type(object))) + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..e25b87f --- /dev/null +++ b/setup.py @@ -0,0 +1,33 @@ +from setuptools import setup +from setuptools import find_packages + +long_description = ''' +Metadig provides tools that can be used by the MetaDIG Quality Engine +''' +#with open("README.md", "r") as fh: +#long_description = fh.read() + +setup(name='metadig', + version='1.0.0', + author="Peter Slaughter", + author_email="slaughter@nceas.ucsb.edu", + description='Metadig Quality Engine Python Library', + long_description=long_description, + url='https://github.com/NCEAS/metadig-py', + download_url='https://github.com/NCEAS/metadig/tarball/1.0.0', + license='Apache-2', + packages=find_packages(), + install_requires=[ + #'scipy>=0.14', + ], + classifiers=[ + 'Development Status :: 1 - Development/Unstable', + 'License :: OSI Approved :: Apache 2 ', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Topic :: Software Development :: Libraries', + 'Topic :: Software Development :: Libraries :: Python Modules' + ] +)