Skip to content

Commit

Permalink
Initial revision of library
Browse files Browse the repository at this point in the history
  • Loading branch information
gothub committed May 29, 2019
1 parent 74d9d5c commit 9227b5c
Show file tree
Hide file tree
Showing 4 changed files with 189 additions and 0 deletions.
5 changes: 5 additions & 0 deletions metadig/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
__version__ = '1.0'

from .checks import getType
from .checks import isResolvable
from .variable import isBlank
104 changes: 104 additions & 0 deletions metadig/checks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""Metadig check utilities
"""

import sys
import urllib2
from urlparse import urlparse

def getType(object):
print 'type: {}'.format(type(object))

def isResolvable(url):
# First parse the url for a protocol, host port and path

#url = 'https://cn.dataone.org/cn/v2/resolve/urn:uuid:7098ba54-ca6f-4e35-beb3-718bd0fe58a8'
urlComps = urlparse(url)
location = urlComps.netloc
if(urlComps.netloc == ""):
return (False, '"{}" does not appear to be a URL'.format(url))

# Check the 'schema' to see if it is an open one. Currently we
# are just check for http and https.
knownProtocols = ['http', 'https']
if(urlComps.scheme not in set(knownProtocols)):
return (False, 'Unknown or proprietary communications protocol: "{}", known protocols: {}'.format(urlComps.scheme, ", ".join(knownProtocols)))

request = urllib2.Request(url)
request.get_method = lambda : 'HEAD'
# Python urllib2 strangly throws an error for a http status, and the response object is returned
# by the exception code.
try:
response = urllib2.urlopen(request)
except urllib2.HTTPError as he:
# An error was encountered resolving the url, check which one so that we can print
# a more meaningful error message than provided by HTTPError
# FYI, HTTP status codes (from FAIR FM_A1.1 https://github.com/FAIRMetrics/Metrics/blob/master/Distributions/FM_A1.1.pdf)
if (he.code == 400):
return (False, "Unable to resolved URL {}: Bad request".format(url))
elif (he.code == 401):
return (False, "Unable to resolved URL {}: Unauthorized".format(url))
elif (he.code == 404):
return (False, "Unable to resolved URL {}: Not Found".format(url))
elif (he.code == 500):
return (False, "Unable to resolved URL {}: Server Error".format(url))
else:
return (False, 'Error resolving URL "{}": {} {}'.format(url, he.code, he.headers))
except urllib2.URLError as ue:
print("URLError.reason", ue.reason)
return (False, ue.reason[1])
except Exception() as e:
print("Exception: ", e)
return (False, str(e))

if(response.code in set([200, 202, 203, 206, 301, 302, 303])):
return (True, "Successfully resolved the URL {}: status {}".format(url, response.code))
else:
return (False, "Successfully resolved the URL {}".format(url))


def getSystemMetadata(identifier, memberNode):
# Get the DataONE system metadata for the given identifier

url = 'https://cn.dataone.org/cn/v2/meta/{}'.format(identifier)

request = urllib2.Request(url)
request.get_method = lambda : 'HEAD'
# Python urllib2 strangly throws an error for a http status, and the response object is returned
# by the exception code.
try:
response = urllib2.urlopen(request)
except urllib2.HTTPError as he:
# An error was encountered resolving the url, check which one so that we can print
# a more meaningful error message than provided by HTTPError
# FYI, HTTP status codes (from FAIR FM_A1.1 https://github.com/FAIRMetrics/Metrics/blob/master/Distributions/FM_A1.1.pdf)
if (he.code == 400):
return (False, "Unable to resolved URL {}: Bad request".format(url))
elif (he.code == 401):
return (False, "Unable to resolved URL {}: Unauthorized".format(url))
elif (he.code == 404):
return (False, "Unable to resolved URL {}: Not Found".format(url))
elif (he.code == 500):
return (False, "Unable to resolved URL {}: Server Error".format(url))
else:
return (False, 'Error resolving URL "{}": {} {}'.format(url, he.code, he.headers))
except urllib2.URLError as ue:
print("URLError.reason", ue.reason)
return (False, ue.reason[1])
except Exception() as e:
print("Exception: ", e)
return (False, str(e))

if(response.code in set([200, 202, 203, 206, 301, 302, 303])):
return (True, "Successfully resolved the URL {}: status {}".format(url, response.code))
else:
return (False, "Successfully resolved the URL {}".format(url))

# Check if an identifier has a valid, known namespace
#def isNamespaceValid(identifier):
# delimiter = ':'
# if(identifier.find(delimeter == -1)):
# return False, "Missing namespace in identifier"
#
# namespace, id = identfier.split(':', 1)
#
# return True, "The namespace is valid"
47 changes: 47 additions & 0 deletions metadig/variable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Metadig check utilities
"""

import sys
import re

import java.util.ArrayList as ArrayList

def isBlank(object):

pattern = re.compile(r'\s+')
# Depending on the values extracted from the xpath, the following types may be returned
# - an int (single value, all numeric)
# - a string
# - a boolean (i.e. "Yes", "Y", "No", "No") - this type isn't expected for award
# - a java.util.ArrayList (multiple values, each typed as int, boolean or string)
if(isinstance(object, int)):
return False
elif (isinstance(object, str) or isinstance(object, unicode)):
# If award is a string, check that it is not all whitespace
objStripped = re.sub(pattern, '', object)
if (len(objStripped) == 0):
return True
else:
return False
elif(isinstance(object, ArrayList)):
# Multiple objects exist
# Return as soon as a non-blank object is found
# Also, check if all values are blank
blankFound = False
for i in range(0, len(object)):
thisObj = object.get(i)
if (isinstance(thisObj, int)):
return False
else:
objStripped = re.sub(pattern, '', thisObj)
if (len(objStripped) == 0):
blankFound = True
else:
return False
# If we reached this point and blankFound is true, then all
# values are blank
if blankFound:
return True
else:
raise Exception('Unknown variable type {}'.format(type(object)))

33 changes: 33 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from setuptools import setup
from setuptools import find_packages

long_description = '''
Metadig provides tools that can be used by the MetaDIG Quality Engine
'''
#with open("README.md", "r") as fh:
#long_description = fh.read()

setup(name='metadig',
version='1.0.0',
author="Peter Slaughter",
author_email="[email protected]",
description='Metadig Quality Engine Python Library',
long_description=long_description,
url='https://github.com/NCEAS/metadig-py',
download_url='https://github.com/NCEAS/metadig/tarball/1.0.0',
license='Apache-2',
packages=find_packages(),
install_requires=[
#'scipy>=0.14',
],
classifiers=[
'Development Status :: 1 - Development/Unstable',
'License :: OSI Approved :: Apache 2 ',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Topic :: Software Development :: Libraries',
'Topic :: Software Development :: Libraries :: Python Modules'
]
)

0 comments on commit 9227b5c

Please sign in to comment.