Skip to content

Commit e862afa

Browse files
committed
Made field existence optional, fixed message with missing inequality constraints, refactor of spec. parsing
1 parent 4ec3980 commit e862afa

File tree

3 files changed

+109
-17
lines changed

3 files changed

+109
-17
lines changed

.gitignore

+4-1
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,7 @@ nosetests.xml
3838
# Mr Developer
3939
.mr.developer.cfg
4040
.project
41-
.pydevproject
41+
.pydevproject
42+
43+
# Docs
44+
_build/

matgendb/vv/validate.py

+78-8
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ def compare(self, lhs_value, rhs_value):
261261
raise RuntimeError('unexpected size operator: {}'.format(self._op))
262262
if self.is_inequality():
263263
if not isinstance(lhs_value, Number):
264-
raise ValueError('Number required for inequality')
264+
return False
265265
py_op = self.PY_INEQ.get(self._op, self._op)
266266
return eval('{} {} {}'.format(lhs_value, py_op, rhs_value))
267267
if self.is_type():
@@ -774,6 +774,65 @@ def update(self, *args):
774774
self._count = 0
775775

776776

777+
class ConstraintSpec:
778+
"""Specification of a set of constraints for a collection.
779+
"""
780+
FILTER_SECT = 'filter'
781+
CONSTRAINT_SECT = 'constraints'
782+
783+
def __init__(self, spec):
784+
"""Create specification from a configuration.
785+
786+
:param spec: Configuration for a single collection
787+
:type spec: dict
788+
:raise: ValueError if specification is wrong
789+
"""
790+
self._sections = {}
791+
for item in spec:
792+
if isinstance(item, dict):
793+
self._add_filtered_section(item)
794+
else:
795+
self._add_simple_section(item)
796+
797+
def _add_filtered_section(self, item):
798+
"""Add a section that has a filter and set of constraints
799+
800+
:raise: ValueError if filter or constraints is missing
801+
"""
802+
# extract filter and constraints
803+
cond_raw, constraints = None, None
804+
try:
805+
cond_raw = item[self.FILTER_SECT]
806+
constraints = item[self.CONSTRAINT_SECT]
807+
except KeyError:
808+
if cond_raw is None:
809+
raise ValueError("configuration is missing '{}'".format(self.FILTER_SECT))
810+
else:
811+
raise ValueError("configuration is missing '{}'".format(self.CONSTRAINT_SECT))
812+
813+
# make condition(s) into a tuple
814+
if isinstance(cond_raw, basestring):
815+
cond = (cond_raw,)
816+
elif cond_raw is None:
817+
cond = None
818+
else:
819+
cond = tuple(cond_raw) # tuples can be used as keys
820+
# add
821+
if cond in self._sections:
822+
self._sections[cond].extend(constraints)
823+
else:
824+
self._sections[cond] = constraints
825+
826+
def _add_simple_section(self, item):
827+
self._sections[None] = [item]
828+
829+
def __iter__(self):
830+
"""When invoked as an iterator, return the key, value
831+
pairs of the filter and constraints.
832+
"""
833+
return self._sections.iteritems()
834+
835+
777836
class Validator(DoesLogging):
778837
"""Validate a collection.
779838
"""
@@ -792,7 +851,7 @@ class Validator(DoesLogging):
792851
)
793852
\s*''', re.VERBOSE)
794853

795-
def __init__(self, max_violations=50, max_dberrors=10, aliases=None):
854+
def __init__(self, max_violations=50, max_dberrors=10, aliases=None, add_exists=False):
796855
DoesLogging.__init__(self, name='mg.validator')
797856
self.set_progress(0)
798857
self._aliases = aliases if aliases else {}
@@ -803,6 +862,7 @@ def __init__(self, max_violations=50, max_dberrors=10, aliases=None):
803862
self._find_kw = {}
804863
self._max_dberr = max_dberrors
805864
self._base_report_fields = {'_id': 1, 'task_id': 1}
865+
self._add_exists = add_exists
806866

807867
def set_aliases(self, a):
808868
"""Set aliases.
@@ -824,15 +884,21 @@ def num_violations(self):
824884
return 0
825885
return self._progress._count
826886

827-
def validate(self, coll, constraint_sections, subject='collection'):
887+
def validate(self, coll, constraint_spec, subject='collection'):
828888
"""Validation of a collection.
829889
This is a generator that yields ConstraintViolationGroups.
830890
891+
:param coll: Mongo collection
892+
:type coll: pymongo.Collection
893+
:param constraint_spec: Constraint specification
894+
:type constraint_spec: ConstraintSpec
895+
:param subject: Name of the thing being validated
896+
:type subject: str
831897
:return: Sets of constraint violation, one for each constraint_section
832898
:rtype: ConstraintViolationGroup
833899
"""
834900
self._progress.set_subject(subject)
835-
self._build(constraint_sections)
901+
self._build(constraint_spec)
836902
for cond, body in self._sections:
837903
cvg = self._validate_section(subject, coll, cond, body)
838904
if cvg is not None:
@@ -919,16 +985,19 @@ def _get_violations(self, query, record):
919985
reasons.append(ConstraintViolation(clause.constraint, fval, expected))
920986
return reasons
921987

922-
def _build(self, constraint_sections):
988+
def _build(self, constraint_spec):
923989
"""Generate queries to execute.
924990
925991
Sets instance variables so that Mongo query strings, etc. can now
926992
be extracted from the object.
993+
994+
:param constraint_spec: Constraint specification
995+
:type constraint_spec: ConstraintSpec
927996
"""
928997
self._sections = []
929998
self._report_fields = self._base_report_fields
930999
# loopover each condition on the records
931-
for cond_expr_list, expr_list in constraint_sections.iteritems():
1000+
for cond_expr_list, expr_list in constraint_spec:
9321001
#print("@@ CONDS = {}".format(cond_expr_list))
9331002
#print("@@ MAIN = {}".format(expr_list))
9341003
groups = self._process_constraint_expressions(expr_list)
@@ -938,8 +1007,9 @@ def _build(self, constraint_sections):
9381007
for c in cg:
9391008
projection.add(c.field, c.op, c.value)
9401009
query.add_clause(MongoClause(c))
941-
for c in cg.existence_constraints:
942-
query.add_clause(MongoClause(c, exists_main=True))
1010+
if self._add_exists:
1011+
for c in cg.existence_constraints:
1012+
query.add_clause(MongoClause(c, exists_main=True))
9431013
self._report_fields.update(projection.to_mongo())
9441014
cond_query = MongoQuery()
9451015
if cond_expr_list is not None:

scripts/mgvv

+27-8
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import urlparse
2525
import yaml
2626
# local module
2727
from matgendb.util import get_settings
28-
from matgendb.vv.validate import Validator, DBError
28+
from matgendb.vv.validate import ConstraintSpec, Validator, DBError
2929
from matgendb.vv import report
3030

3131
# Initialize module logging
@@ -165,6 +165,9 @@ def parse_mongo_url(url):
165165

166166
def get_constraint_sections(spec):
167167
"""Parse out sections within a collection.
168+
169+
:return: Dictionary keyed by the 'filter' of the 'constraints'
170+
:rtype: dict
168171
"""
169172
sections = {}
170173
for item in spec:
@@ -218,6 +221,8 @@ def main(cmdline=None):
218221
'The "from" and "to" are required; default server is localhost. '
219222
'This information can also be in the main config file under the "_email" key, '
220223
'which should be a mapping with keys "from", "to", and "server".')
224+
parser.add_argument('--exonly', dest='must_exist', action='store_true', default=False,
225+
help='Only show results where all fields in the constraints are also present in the record')
221226
parser.add_argument('--file', '-f', dest='constfile', metavar='FILE', default=None,
222227
help='Main configuration file. Has constraints, and optionally email info.')
223228
parser.add_argument('--format', '-F', dest='report_format', metavar='FORMAT', default='html',
@@ -267,6 +272,11 @@ def main(cmdline=None):
267272
parser.error('Cannot authenticate to database as user {}'.format(user))
268273
else:
269274
_log.info('Connecting to DB "{}" without authentication'.format(config[db_key]))
275+
# get collection name if given
276+
if 'collection' in config:
277+
db_coll_name = config['collection']
278+
else:
279+
db_coll_name = None
270280

271281
# Init constraints from file or command-line
272282
constraints_from_file = args.constfile is not None
@@ -275,14 +285,18 @@ def main(cmdline=None):
275285
with Timing('load.yaml', file=f.name):
276286
constraints = yaml.safe_load(f)
277287
else:
278-
if args.coll is None:
279-
parser.error('collection name required for command-line constraints')
288+
coll_name = args.coll
289+
if coll_name is None:
290+
if db_coll_name is None:
291+
parser.error('collection name not found')
292+
else:
293+
coll_name = db_coll_name
280294
if not args.constraint:
281295
parser.error('at least one constraint is required')
282296
# re-split command-line arguments on commas (not whitespace)
283297
plist = ' '.join(args.constraint).split(',')
284298
# make dict(key=collection) of list of the constraints, just like YAML config file
285-
constraints = {args.coll: plist}
299+
constraints = {coll_name: plist}
286300

287301
# Init aliases from file or command-line
288302
if constraints_from_file:
@@ -321,19 +335,24 @@ def main(cmdline=None):
321335
.format(fmt, textlist(formatters.keys())))
322336

323337
# Run validation for each collection
324-
validator = Validator(aliases=aliases, max_violations=args.limit, max_dberrors=10)
338+
validator = Validator(aliases=aliases, max_violations=args.limit, max_dberrors=10, add_exists=args.must_exist)
325339
if args.progress > 0:
326340
validator.set_progress(args.progress)
327341
with Timing("validate"):
328-
for coll_name, constraint_spec in constraints.iteritems():
342+
for coll_name, constraint_spec_cfg in constraints.iteritems():
329343
sect_hdr = report.SectionHeader(title="Collection {}".format(coll_name))
330344
rpt_sect = report.ReportSection(sect_hdr)
331345
if coll_name.startswith(PATTERN_KEY_PREFIX_IGNORE):
332346
continue
333347
coll = db[coll_name]
334348
try:
335-
sect = get_constraint_sections(constraint_spec)
336-
for vnum, vgroup in enumerate(validator.validate(coll, sect, subject=coll_name)):
349+
try:
350+
cspec = ConstraintSpec(constraint_spec_cfg)
351+
except ValueError, err:
352+
_log.error('processing constraints for {}: {}'.format(coll_name, err))
353+
break
354+
#sect = get_constraint_sections(constraint_spec)
355+
for vnum, vgroup in enumerate(validator.validate(coll, cspec, subject=coll_name)):
337356
sect_hdr = report.SectionHeader(title='Violations {:d}'.format(vnum + 1))
338357
sect_hdr.add('Condition', str(vgroup.condition))
339358
table = report.Table(colnames=('Id', 'TaskId', 'Field', 'Constraint', 'Value'))

0 commit comments

Comments
 (0)