diff --git a/.github/workflows/test-python-package.yml b/.github/workflows/test-python-package.yml index cb973d881..42538c481 100644 --- a/.github/workflows/test-python-package.yml +++ b/.github/workflows/test-python-package.yml @@ -36,4 +36,4 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | - pytest + pytest --forked diff --git a/dataprofiler/profilers/profile_builder.py b/dataprofiler/profilers/profile_builder.py index 741b8386e..0eef8d9ac 100644 --- a/dataprofiler/profilers/profile_builder.py +++ b/dataprofiler/profilers/profile_builder.py @@ -365,16 +365,38 @@ def __init__(self, data, samples_per_update=None, min_true_samples=None, if isinstance(data, data_readers.text_data.TextData): raise TypeError("Cannot provide TextData object to Profiler") - # assign data labeler # assign data labeler data_labeler_options = self.options.structured_options.data_labeler if data_labeler_options.is_enabled \ and data_labeler_options.data_labeler_object is None: - data_labeler = DataLabeler( - labeler_type='structured', - dirpath=data_labeler_options.data_labeler_dirpath, - load_options=None) - self.options.set({'data_labeler.data_labeler_object': data_labeler}) + + try: + + data_labeler = DataLabeler( + labeler_type='structured', + dirpath=data_labeler_options.data_labeler_dirpath, + load_options=None) + self.options.set({'data_labeler.data_labeler_object': data_labeler}) + + except Exception as e: + + import warnings + warning_msg = "\n\n!!! WARNING Partial Profiler Failure !!!\n\n" + warning_msg += "Profiling Type: {}".format('data_labeler') + warning_msg += "\nException: {}".format(type(e).__name__) + warning_msg += "\nMessage: {}".format(e) + + # This is considered a major error + if type(e).__name__ == "ValueError": + raise ValueError(e) + + warning_msg += "\n\nFor labeler errors, try installing " + warning_msg += "the extra ml requirements via:\n\n" + warning_msg += "$ pip install dataprofiler[ml] --user\n\n" + + warnings.warn(warning_msg, RuntimeWarning, stacklevel=2) + + self.options.set({'data_labeler.is_enabled': False}) self.update_profile(data) diff --git a/dataprofiler/version.py b/dataprofiler/version.py index 01d9d6eb6..f9d16f77a 100644 --- a/dataprofiler/version.py +++ b/dataprofiler/version.py @@ -4,7 +4,7 @@ MAJOR = 0 MINOR = 4 -MICRO = 0 +MICRO = 1 VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)