From 650cc4988296a1a8ec23fe91d4a687859630f6c4 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Wed, 3 Jul 2019 12:14:37 +0200 Subject: [PATCH 01/16] pyDKB: move `logLevel` type to `common.types`. This type (fixed set of service words) looks like the one that will be used across the whole library, not in the `dataflow` module alone. --- Utils/Dataflow/pyDKB/common/types.py | 9 +++++++++ Utils/Dataflow/pyDKB/dataflow/communication/__init__.py | 1 - .../pyDKB/dataflow/communication/consumer/Consumer.py | 2 +- .../dataflow/communication/consumer/FileConsumer.py | 2 +- .../pyDKB/dataflow/communication/consumer/__init__.py | 1 - .../dataflow/communication/producer/FileProducer.py | 2 +- .../pyDKB/dataflow/communication/producer/Producer.py | 2 +- .../pyDKB/dataflow/communication/producer/__init__.py | 1 - .../pyDKB/dataflow/communication/stream/InputStream.py | 2 +- .../pyDKB/dataflow/communication/stream/Stream.py | 2 +- .../pyDKB/dataflow/communication/stream/__init__.py | 1 - Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py | 2 +- Utils/Dataflow/pyDKB/dataflow/stage/ProcessorStage.py | 2 +- Utils/Dataflow/pyDKB/dataflow/stage/__init__.py | 1 - Utils/Dataflow/pyDKB/dataflow/types.py | 3 +-- 15 files changed, 18 insertions(+), 15 deletions(-) create mode 100644 Utils/Dataflow/pyDKB/common/types.py diff --git a/Utils/Dataflow/pyDKB/common/types.py b/Utils/Dataflow/pyDKB/common/types.py new file mode 100644 index 000000000..a86c29208 --- /dev/null +++ b/Utils/Dataflow/pyDKB/common/types.py @@ -0,0 +1,9 @@ +""" +pyDKB.common.types + +Definitions of types used across all the library moduless. +""" + +from Type import Type + +logLevel = Type("TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL") diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/__init__.py b/Utils/Dataflow/pyDKB/dataflow/communication/__init__.py index 014963cdc..ef2908394 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/__init__.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/__init__.py @@ -4,7 +4,6 @@ from .. import messageType from .. import codeType -from .. import logLevel from .. import DataflowException from messages import Message diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/Consumer.py b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/Consumer.py index 1bc235f94..cf5ab44f9 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/Consumer.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/Consumer.py @@ -4,7 +4,7 @@ import sys -from . import logLevel +from pyDKB.common.types import logLevel from . import DataflowException from .. import Message diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/FileConsumer.py b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/FileConsumer.py index b0da3d579..52fb0776c 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/FileConsumer.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/FileConsumer.py @@ -14,7 +14,7 @@ import os import Consumer -from . import logLevel +from pyDKB.common.types import logLevel from .. import Message diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/__init__.py b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/__init__.py index 382ebaf50..331aa1cee 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/__init__.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/__init__.py @@ -3,7 +3,6 @@ """ from .. import messageType -from .. import logLevel from .. import DataflowException from Consumer import Consumer diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/producer/FileProducer.py b/Utils/Dataflow/pyDKB/dataflow/communication/producer/FileProducer.py index c82d049c1..fd8232b24 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/producer/FileProducer.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/producer/FileProducer.py @@ -13,7 +13,7 @@ import time from Producer import Producer, ProducerException -from . import logLevel +from pyDKB.common.types import logLevel class FileProducer(Producer): diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/producer/Producer.py b/Utils/Dataflow/pyDKB/dataflow/communication/producer/Producer.py index cde3f870a..a91a40773 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/producer/Producer.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/producer/Producer.py @@ -4,7 +4,7 @@ import sys -from . import logLevel +from pyDKB.common.types import logLevel from . import DataflowException from .. import Message diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/producer/__init__.py b/Utils/Dataflow/pyDKB/dataflow/communication/producer/__init__.py index 0e9a1cef5..a8cef8be5 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/producer/__init__.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/producer/__init__.py @@ -3,7 +3,6 @@ """ from .. import messageType -from .. import logLevel from .. import DataflowException from Producer import Producer diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/stream/InputStream.py b/Utils/Dataflow/pyDKB/dataflow/communication/stream/InputStream.py index fcbc11e2d..121bcb79d 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/stream/InputStream.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/stream/InputStream.py @@ -3,7 +3,7 @@ """ from Stream import Stream -from . import logLevel +from pyDKB.common.types import logLevel from . import Message from pyDKB.common import custom_readline diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/stream/Stream.py b/Utils/Dataflow/pyDKB/dataflow/communication/stream/Stream.py index 75c0b87b1..63513e0f1 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/stream/Stream.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/stream/Stream.py @@ -5,7 +5,7 @@ import sys from . import messageType -from . import logLevel +from pyDKB.common.types import logLevel from exceptions import StreamException diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/stream/__init__.py b/Utils/Dataflow/pyDKB/dataflow/communication/stream/__init__.py index 396a71be0..645ccdeff 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/stream/__init__.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/stream/__init__.py @@ -4,7 +4,6 @@ from .. import messageType from .. import codeType -from .. import logLevel from .. import DataflowException from .. import Message from InputStream import InputStream diff --git a/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py b/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py index d3148afa1..55cd260ec 100644 --- a/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py +++ b/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py @@ -8,7 +8,7 @@ from collections import defaultdict import textwrap -from . import logLevel +from pyDKB.common.types import logLevel try: import argparse diff --git a/Utils/Dataflow/pyDKB/dataflow/stage/ProcessorStage.py b/Utils/Dataflow/pyDKB/dataflow/stage/ProcessorStage.py index 8373c2235..fceb5d826 100644 --- a/Utils/Dataflow/pyDKB/dataflow/stage/ProcessorStage.py +++ b/Utils/Dataflow/pyDKB/dataflow/stage/ProcessorStage.py @@ -43,7 +43,7 @@ from . import AbstractStage from . import messageType -from . import logLevel +from pyDKB.common.types import logLevel from pyDKB.dataflow import DataflowException from pyDKB.common import hdfs from pyDKB.dataflow import communication diff --git a/Utils/Dataflow/pyDKB/dataflow/stage/__init__.py b/Utils/Dataflow/pyDKB/dataflow/stage/__init__.py index 611ef0cd5..5c5c799f6 100644 --- a/Utils/Dataflow/pyDKB/dataflow/stage/__init__.py +++ b/Utils/Dataflow/pyDKB/dataflow/stage/__init__.py @@ -3,7 +3,6 @@ """ from .. import messageType -from .. import logLevel from AbstractStage import AbstractStage from ProcessorStage import ProcessorStage diff --git a/Utils/Dataflow/pyDKB/dataflow/types.py b/Utils/Dataflow/pyDKB/dataflow/types.py index ecc1f790a..b21e0db9a 100644 --- a/Utils/Dataflow/pyDKB/dataflow/types.py +++ b/Utils/Dataflow/pyDKB/dataflow/types.py @@ -4,9 +4,8 @@ from ..common import Type -__all__ = ["dataType", "messageType", "codeType", "logLevel"] +__all__ = ["dataType", "messageType", "codeType"] dataType = Type("DOCUMENT", "AUTHOR", "DATASET") messageType = Type("STRING", "JSON", "TTL") codeType = Type("STRING") -logLevel = Type("TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL") From 001cb71983a3fb6e9fc4a510c88d0274367dd30b Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Wed, 3 Jul 2019 13:23:13 +0200 Subject: [PATCH 02/16] pyDKB: move logging functionality to a common ancestor class. --- Utils/Dataflow/pyDKB/common/LoggableObject.py | 35 +++++++++++++++++++ Utils/Dataflow/pyDKB/common/__init__.py | 1 + .../communication/consumer/Consumer.py | 22 ++---------- .../communication/producer/Producer.py | 21 ++--------- .../dataflow/communication/stream/Stream.py | 22 ++---------- .../pyDKB/dataflow/stage/AbstractStage.py | 21 ++--------- Utils/Dataflow/test/pyDKB/case/20/err | 6 ++-- Utils/Dataflow/test/pyDKB/case/21/err | 6 ++-- 8 files changed, 50 insertions(+), 84 deletions(-) create mode 100644 Utils/Dataflow/pyDKB/common/LoggableObject.py diff --git a/Utils/Dataflow/pyDKB/common/LoggableObject.py b/Utils/Dataflow/pyDKB/common/LoggableObject.py new file mode 100644 index 000000000..a47f29202 --- /dev/null +++ b/Utils/Dataflow/pyDKB/common/LoggableObject.py @@ -0,0 +1,35 @@ +""" +pyDKB.common.LoggableObject +""" + +import sys + +from types import logLevel + + +class LoggableObject(object): + """ Common ancestor for all classes that need 'log' method. """ + + @classmethod + def log(cls, message, level=logLevel.INFO): + """ Output log message with given log level. + + :param message: message to output + :type message: str + :param level: log level of the message + :type level: ``pyDKB.common.types.logLevel`` member + """ + if not logLevel.hasMember(level): + self.log("Unknown log level: %s" % level, logLevel.WARN) + level = logLevel.INFO + if type(message) == list: + lines = message + else: + lines = message.splitlines() + if lines: + out_message = "(%s) (%s) %s" % (logLevel.memberName(level), + cls.__name__, lines[0]) + for l in lines[1:]: + out_message += "\n(==) %s" % l + out_message += "\n" + sys.stderr.write(out_message) diff --git a/Utils/Dataflow/pyDKB/common/__init__.py b/Utils/Dataflow/pyDKB/common/__init__.py index 024e2871f..7b8a0c215 100644 --- a/Utils/Dataflow/pyDKB/common/__init__.py +++ b/Utils/Dataflow/pyDKB/common/__init__.py @@ -7,3 +7,4 @@ import json_utils as json from custom_readline import custom_readline from Type import Type +from LoggableObject import LoggableObject diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/Consumer.py b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/Consumer.py index cf5ab44f9..20ef4bdd7 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/consumer/Consumer.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/consumer/Consumer.py @@ -4,7 +4,7 @@ import sys -from pyDKB.common.types import logLevel +from pyDKB.common import LoggableObject from . import DataflowException from .. import Message @@ -16,7 +16,7 @@ class ConsumerException(DataflowException): pass -class Consumer(object): +class Consumer(LoggableObject): """ Data consumer implementation. """ config = None @@ -30,24 +30,6 @@ def __init__(self, config={}): self.config = config self.reconfigure() - def log(self, message, level=logLevel.INFO): - """ Output log message with given log level. """ - if not logLevel.hasMember(level): - self.log("Unknown log level: %s" % level, logLevel.WARN) - level = logLevel.INFO - if type(message) == list: - lines = message - else: - lines = message.splitlines() - if lines: - out_message = "(%s) (%s) %s" % (logLevel.memberName(level), - self.__class__.__name__, - lines[0]) - for l in lines[1:]: - out_message += "\n(==) %s" % l - out_message += "\n" - sys.stderr.write(out_message) - def __iter__(self): """ Initialize iteration. """ return self diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/producer/Producer.py b/Utils/Dataflow/pyDKB/dataflow/communication/producer/Producer.py index a91a40773..d953cf070 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/producer/Producer.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/producer/Producer.py @@ -4,7 +4,7 @@ import sys -from pyDKB.common.types import logLevel +from pyDKB.common import LoggableObject from . import DataflowException from .. import Message @@ -16,7 +16,7 @@ class ProducerException(DataflowException): pass -class Producer(object): +class Producer(LoggableObject): """ Data producer implementation. """ config = None @@ -30,23 +30,6 @@ def __init__(self, config={}): self.config = config self.reconfigure() - def log(self, message, level=logLevel.INFO): - """ Output log message with given log level. """ - if not logLevel.hasMember(level): - self.log("Unknown log level: %s" % level, logLevel.WARN) - level = logLevel.INFO - if type(message) == list: - lines = message - else: - lines = message.splitlines() - if lines: - out_message = "(%s) (%s) %s" % (logLevel.memberName(level), - self.__class__.__name__, lines[0]) - for l in lines[1:]: - out_message += "\n(==) %s" % l - out_message += "\n" - sys.stderr.write(out_message) - def reconfigure(self, config={}): """ (Re)initialize producer with stage config arguments. """ if config: diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/stream/Stream.py b/Utils/Dataflow/pyDKB/dataflow/communication/stream/Stream.py index 63513e0f1..52d55096d 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/stream/Stream.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/stream/Stream.py @@ -4,12 +4,12 @@ import sys +from pyDKB.common import LoggableObject from . import messageType -from pyDKB.common.types import logLevel from exceptions import StreamException -class Stream(object): +class Stream(LoggableObject): """ Abstract class for input/output streams. """ message_type = None @@ -21,24 +21,6 @@ def __init__(self, fd=None, config={}): self.reset(fd) self.configure(config) - def log(self, message, level=logLevel.INFO): - """ Output log message with given log level. """ - if not logLevel.hasMember(level): - self.log("Unknown log level: %s" % level, logLevel.WARN) - level = logLevel.INFO - if type(message) == list: - lines = message - else: - lines = message.splitlines() - if lines: - out_message = "(%s) (%s) %s" % (logLevel.memberName(level), - self.__class__.__name__, - lines[0]) - for l in lines[1:]: - out_message += "\n(==) %s" % l - out_message += "\n" - sys.stderr.write(out_message) - def configure(self, config): """ Stream configuration. """ if not isinstance(config, dict): diff --git a/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py b/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py index 55cd260ec..9a3012e19 100644 --- a/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py +++ b/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py @@ -8,6 +8,7 @@ from collections import defaultdict import textwrap +from pyDKB.common import LoggableObject from pyDKB.common.types import logLevel try: @@ -17,7 +18,7 @@ raise e -class AbstractStage(object): +class AbstractStage(LoggableObject): """ Class/instance variable description: * Argument parser (argparse.ArgumentParser) @@ -51,24 +52,6 @@ def __init__(self, description="DKB Dataflow stage"): self._error = None - def log(self, message, level=logLevel.INFO): - """ Output log message with given log level. """ - if not logLevel.hasMember(level): - self.log("Unknown log level: %s" % level, logLevel.WARN) - level = logLevel.INFO - if type(message) == list: - lines = message - else: - lines = message.splitlines() - if lines: - out_message = "(%s) (%s) %s" % (logLevel.memberName(level), - self.__class__.__name__, - lines[0]) - for l in lines[1:]: - out_message += "\n(==) %s" % l - out_message += "\n" - sys.stderr.write(out_message) - def log_configuration(self): """ Log stage configuration. """ self.log("Configuration parameters:") diff --git a/Utils/Dataflow/test/pyDKB/case/20/err b/Utils/Dataflow/test/pyDKB/case/20/err index 794acb20a..8a893f676 100644 --- a/Utils/Dataflow/test/pyDKB/case/20/err +++ b/Utils/Dataflow/test/pyDKB/case/20/err @@ -18,11 +18,11 @@ (==) self.flush_buffer() (==) File "./../../pyDKB/dataflow/stage/ProcessorStage.py", line 341, in flush_buffer (==) self.__output.flush() -(==) File "./../../pyDKB/dataflow/communication/producer/Producer.py", line 124, in flush +(==) File "./../../pyDKB/dataflow/communication/producer/Producer.py", line 107, in flush (==) self.get_stream().flush() -(==) File "./../../pyDKB/dataflow/communication/producer/Producer.py", line 71, in get_stream +(==) File "./../../pyDKB/dataflow/communication/producer/Producer.py", line 54, in get_stream (==) self.reset_stream() -(==) File "./../../pyDKB/dataflow/communication/producer/Producer.py", line 81, in reset_stream +(==) File "./../../pyDKB/dataflow/communication/producer/Producer.py", line 64, in reset_stream (==) dest = self.get_dest() (==) File "./../../pyDKB/dataflow/communication/producer/FileProducer.py", line 60, in get_dest (==) self.reset_file() diff --git a/Utils/Dataflow/test/pyDKB/case/21/err b/Utils/Dataflow/test/pyDKB/case/21/err index 3c55e6dc6..315397776 100644 --- a/Utils/Dataflow/test/pyDKB/case/21/err +++ b/Utils/Dataflow/test/pyDKB/case/21/err @@ -19,11 +19,11 @@ (==) self.flush_buffer() (==) File "./../../pyDKB/dataflow/stage/ProcessorStage.py", line 341, in flush_buffer (==) self.__output.flush() -(==) File "./../../pyDKB/dataflow/communication/producer/Producer.py", line 124, in flush +(==) File "./../../pyDKB/dataflow/communication/producer/Producer.py", line 107, in flush (==) self.get_stream().flush() -(==) File "./../../pyDKB/dataflow/communication/producer/Producer.py", line 71, in get_stream +(==) File "./../../pyDKB/dataflow/communication/producer/Producer.py", line 54, in get_stream (==) self.reset_stream() -(==) File "./../../pyDKB/dataflow/communication/producer/Producer.py", line 81, in reset_stream +(==) File "./../../pyDKB/dataflow/communication/producer/Producer.py", line 64, in reset_stream (==) dest = self.get_dest() (==) File "./../../pyDKB/dataflow/communication/producer/FileProducer.py", line 60, in get_dest (==) self.reset_file() From a81664010c1d44e96fee38b767673455b1da2e17 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Tue, 9 Jul 2019 14:05:10 +0200 Subject: [PATCH 03/16] pyDKB: replace `sys.stderr.write` with `self.log` where possible. --- Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py b/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py index 9a3012e19..9063f4e14 100644 --- a/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py +++ b/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py @@ -161,8 +161,8 @@ def parse_args(self, args): try: self.ARGS.eom = self.ARGS.eom.decode('string_escape') except (ValueError), err: - sys.stderr.write("(ERROR) Failed to read arguments.\n" - "(ERROR) Case: %s\n" % (err)) + self.log("Failed to read arguments.\n" + "Case: %s" % (err), logLevel.ERROR) sys.exit(1) if self.ARGS.eop is None: @@ -176,8 +176,8 @@ def parse_args(self, args): try: self.ARGS.eop = self.ARGS.eop.decode('string_escape') except (ValueError), err: - sys.stderr.write("(ERROR) Failed to read arguments.\n" - "(ERROR) Case: %s\n" % (err)) + self.log("Failed to read arguments.\n" + "Case: %s" % (err), logLevel.ERROR) sys.exit(1) if self.ARGS.mode == 'm': From 399a027e2418fe3bf3e26669ef3d5c4597283baf Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Wed, 10 Jul 2019 13:45:58 +0200 Subject: [PATCH 04/16] pyDKB/common: move log implementation to a standalone function. Now and then we need to output some log message from the module body, not from an object (e.g. warn message about failed import). It does not look good to write something like `LoggableObject.log(msg)`; simple `log(msg)` looks better for me. --- Utils/Dataflow/pyDKB/common/LoggableObject.py | 18 +-------- Utils/Dataflow/pyDKB/common/misc.py | 40 +++++++++++++++++++ 2 files changed, 42 insertions(+), 16 deletions(-) create mode 100644 Utils/Dataflow/pyDKB/common/misc.py diff --git a/Utils/Dataflow/pyDKB/common/LoggableObject.py b/Utils/Dataflow/pyDKB/common/LoggableObject.py index a47f29202..64e4ab92f 100644 --- a/Utils/Dataflow/pyDKB/common/LoggableObject.py +++ b/Utils/Dataflow/pyDKB/common/LoggableObject.py @@ -2,9 +2,8 @@ pyDKB.common.LoggableObject """ -import sys - from types import logLevel +from misc import log class LoggableObject(object): @@ -19,17 +18,4 @@ def log(cls, message, level=logLevel.INFO): :param level: log level of the message :type level: ``pyDKB.common.types.logLevel`` member """ - if not logLevel.hasMember(level): - self.log("Unknown log level: %s" % level, logLevel.WARN) - level = logLevel.INFO - if type(message) == list: - lines = message - else: - lines = message.splitlines() - if lines: - out_message = "(%s) (%s) %s" % (logLevel.memberName(level), - cls.__name__, lines[0]) - for l in lines[1:]: - out_message += "\n(==) %s" % l - out_message += "\n" - sys.stderr.write(out_message) + log(message, level, cls.__name__) diff --git a/Utils/Dataflow/pyDKB/common/misc.py b/Utils/Dataflow/pyDKB/common/misc.py new file mode 100644 index 000000000..1a70cfa92 --- /dev/null +++ b/Utils/Dataflow/pyDKB/common/misc.py @@ -0,0 +1,40 @@ +""" +pyDKB.common.misc + +Miscellanious utility functions. +""" + +import sys + +from types import logLevel + + +def log(message, level=logLevel.INFO, *args): + """ Output log message with given log level. + + :param message: message to output + :type message: str + :param level: log level of the message + :type level: ``pyDKB.common.types.logLevel`` member + :param *args: additional prefixes (will be output between log + level prefix and message body) + :type *args: str + """ + if not logLevel.hasMember(level): + self.log("Unknown log level: %s" % level, logLevel.WARN) + level = logLevel.INFO + if type(message) == list: + lines = message + else: + lines = message.splitlines() + if args: + prefix = ' ' + ' '.join(['(%s)' % p for p in args]) + else: + prefix = '' + if lines: + out_message = "(%s)%s %s" % (logLevel.memberName(level), + prefix, lines[0]) + for l in lines[1:]: + out_message += "\n(==) %s" % l + out_message += "\n" + sys.stderr.write(out_message) From e936a08902bb75d6e15e6f28c669c255ecc122d0 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Tue, 9 Jul 2019 14:11:05 +0200 Subject: [PATCH 05/16] pyDKB: replace `sys.stderr.write` with `log`. --- Utils/Dataflow/pyDKB/common/hdfs.py | 7 ++++--- Utils/Dataflow/pyDKB/dataflow/cds.py | 11 ++++++----- .../Dataflow/pyDKB/dataflow/communication/messages.py | 6 +++--- Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py | 3 ++- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/Utils/Dataflow/pyDKB/common/hdfs.py b/Utils/Dataflow/pyDKB/common/hdfs.py index 2093a8583..6ed946de4 100644 --- a/Utils/Dataflow/pyDKB/common/hdfs.py +++ b/Utils/Dataflow/pyDKB/common/hdfs.py @@ -10,6 +10,7 @@ import tempfile from . import HDFSException +from misc import (log, logLevel) DEVNULL = open(os.path.devnull, "w") DKB_HOME = "/user/DKB/" @@ -33,7 +34,7 @@ def check_stderr(proc, timeout=None, max_lines=1): if err: n_lines += 1 if max_lines is None or n_lines <= max_lines: - sys.stderr.write("(INFO) (proc) %s\n" % err) + log("%s" % err, logLevel.INFO, 'proc') if proc.poll(): raise subprocess.CalledProcessError(proc.returncode, None) return proc.poll() @@ -78,8 +79,8 @@ def movefile(fname, dest): try: os.remove(fname) except OSError, err: - sys.stderr.write("(WARN) Failed to remove local copy of HDFS file" - " (%s): %s" % (fname, err)) + log("Failed to remove local copy of HDFS file" + " (%s): %s" % (fname, err), logLevel.WARN) def getfile(fname): diff --git a/Utils/Dataflow/pyDKB/dataflow/cds.py b/Utils/Dataflow/pyDKB/dataflow/cds.py index 342eae719..afafb647f 100644 --- a/Utils/Dataflow/pyDKB/dataflow/cds.py +++ b/Utils/Dataflow/pyDKB/dataflow/cds.py @@ -6,6 +6,7 @@ import signal import os +from pyDKB.common.misc import (log, logLevel) __all__ = ["CDSInvenioConnector", "KerberizedCDSInvenioConnector"] @@ -19,7 +20,7 @@ from invenio_client.contrib import cds import splinter except ImportError, e: - sys.stderr.write("(WARN) %s failed (%s)\n" % (__name__, e)) + log("%s failed (%s)\n" % (__name__, e), logLevel.WARN) __all__ = [] else: @@ -80,9 +81,9 @@ def __init__(self, login="user", password="password"): try: kerberos except NameError: - sys.stderr.write("(ERROR) Kerberos Python package is not" - " installed. Can't proceed with Kerberos" - " authorization.\n") + log("Kerberos Python package is not" + " installed. Can't proceed with Kerberos" + " authorization.", logLevel.ERROR) sys.exit(4) super(KerberizedCDSInvenioConnector, self).__init__("user", @@ -105,5 +106,5 @@ def _init_browser(self): self.browser.find_link_by_partial_text("Sign in").click() except kerberos.GSSError, e: - sys.stderr.write("(ERROR) %s\n" % str(e)) + log("%s" % str(e), logLevel.ERROR) sys.exit(3) diff --git a/Utils/Dataflow/pyDKB/dataflow/communication/messages.py b/Utils/Dataflow/pyDKB/dataflow/communication/messages.py index 4754af2f5..bc98bdd33 100644 --- a/Utils/Dataflow/pyDKB/dataflow/communication/messages.py +++ b/Utils/Dataflow/pyDKB/dataflow/communication/messages.py @@ -6,6 +6,7 @@ from . import messageType from . import codeType +from pyDKB.common.misc import (log, logLevel) import json import sys @@ -35,9 +36,8 @@ def Message(msg_type): raise ValueError("Message type must be a member of messageType") cls = __message_class.get(msg_type) if not cls: - sys.stderr.write( - "(WARN) Message class for type %s is not implemented. " - "Using AbstractMessage instead.") + log("Message class for type %s is not implemented. " + "Using AbstractMessage instead.", logLevel.WARN) cls = AbstractMessage return cls diff --git a/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py b/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py index 9063f4e14..1c477301f 100644 --- a/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py +++ b/Utils/Dataflow/pyDKB/dataflow/stage/AbstractStage.py @@ -10,11 +10,12 @@ from pyDKB.common import LoggableObject from pyDKB.common.types import logLevel +from pyDKB.common.misc import log try: import argparse except ImportError, e: - sys.stderr.write("(ERROR) argparse package is not installed.\n") + log("argparse package is not installed.", logLevel.ERROR) raise e From d3761de7f9e7954302e6749f957c0dd18217ab77 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Tue, 9 Jul 2019 14:15:20 +0200 Subject: [PATCH 06/16] pyDKB/common: add default prefix to `log` function (caller module name). If used from `LoggableObject`, prefix is the caller class name (passed via `LoggableObject.log` method). If no prefix passed, caller module name is used instead. `LoggableObject` method does not support additional prefixes, yet if `log` function is called directly, prefixes can be passed and in this case caller `__name__` should be passed explicitly. --- Utils/Dataflow/pyDKB/common/hdfs.py | 2 +- Utils/Dataflow/pyDKB/common/misc.py | 5 ++++- Utils/Dataflow/pyDKB/dataflow/cds.py | 2 +- Utils/Dataflow/test/pyDKB/test.sh | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Utils/Dataflow/pyDKB/common/hdfs.py b/Utils/Dataflow/pyDKB/common/hdfs.py index 6ed946de4..3be69580c 100644 --- a/Utils/Dataflow/pyDKB/common/hdfs.py +++ b/Utils/Dataflow/pyDKB/common/hdfs.py @@ -34,7 +34,7 @@ def check_stderr(proc, timeout=None, max_lines=1): if err: n_lines += 1 if max_lines is None or n_lines <= max_lines: - log("%s" % err, logLevel.INFO, 'proc') + log("%s" % err, logLevel.INFO, __name__, 'proc') if proc.poll(): raise subprocess.CalledProcessError(proc.returncode, None) return proc.poll() diff --git a/Utils/Dataflow/pyDKB/common/misc.py b/Utils/Dataflow/pyDKB/common/misc.py index 1a70cfa92..915805b99 100644 --- a/Utils/Dataflow/pyDKB/common/misc.py +++ b/Utils/Dataflow/pyDKB/common/misc.py @@ -5,6 +5,7 @@ """ import sys +import inspect from types import logLevel @@ -30,7 +31,9 @@ def log(message, level=logLevel.INFO, *args): if args: prefix = ' ' + ' '.join(['(%s)' % p for p in args]) else: - prefix = '' + frm = inspect.stack()[1] + mod = inspect.getmodule(frm[0]) + prefix = ' (%s)' % mod.__name__ if lines: out_message = "(%s)%s %s" % (logLevel.memberName(level), prefix, lines[0]) diff --git a/Utils/Dataflow/pyDKB/dataflow/cds.py b/Utils/Dataflow/pyDKB/dataflow/cds.py index afafb647f..8e090db2f 100644 --- a/Utils/Dataflow/pyDKB/dataflow/cds.py +++ b/Utils/Dataflow/pyDKB/dataflow/cds.py @@ -20,7 +20,7 @@ from invenio_client.contrib import cds import splinter except ImportError, e: - log("%s failed (%s)\n" % (__name__, e), logLevel.WARN) + log("Submodule failed (%s)" % e, logLevel.WARN) __all__ = [] else: diff --git a/Utils/Dataflow/test/pyDKB/test.sh b/Utils/Dataflow/test/pyDKB/test.sh index ece2f74db..d0719b1a1 100755 --- a/Utils/Dataflow/test/pyDKB/test.sh +++ b/Utils/Dataflow/test/pyDKB/test.sh @@ -52,7 +52,7 @@ test_case() { after=`cat $case/after 2>/dev/null` eval "$before $cmd; $after" 2>&1 1> out.tmp | \ - grep -a -v '(WARN) pyDKB.dataflow.cds failed (No module named invenio_client.contrib)' | \ + grep -a -v '(WARN) (pyDKB.dataflow.cds) Submodule failed (No module named invenio_client.contrib)' | \ sed -e"s#$base_dir#\$base_dir#" > err.tmp err_correct=0 From 7a4fbc154a3fcc7f33bec015e767e4d4e93ea110 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Wed, 10 Jul 2019 14:00:13 +0200 Subject: [PATCH 07/16] pyDKB/common: make `log` output current time. --- Utils/Dataflow/pyDKB/common/misc.py | 9 +++++++-- Utils/Dataflow/test/pyDKB/test.sh | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/Utils/Dataflow/pyDKB/common/misc.py b/Utils/Dataflow/pyDKB/common/misc.py index 915805b99..719a92987 100644 --- a/Utils/Dataflow/pyDKB/common/misc.py +++ b/Utils/Dataflow/pyDKB/common/misc.py @@ -6,9 +6,13 @@ import sys import inspect +from datetime import datetime from types import logLevel +# Datetime format for log messages +DTFORMAT = '%Y-%m-%d %H:%M:%S' + def log(message, level=logLevel.INFO, *args): """ Output log message with given log level. @@ -35,8 +39,9 @@ def log(message, level=logLevel.INFO, *args): mod = inspect.getmodule(frm[0]) prefix = ' (%s)' % mod.__name__ if lines: - out_message = "(%s)%s %s" % (logLevel.memberName(level), - prefix, lines[0]) + dtime = datetime.now().strftime(DTFORMAT) + out_message = "%s (%s)%s %s" % (dtime, logLevel.memberName(level), + prefix, lines[0]) for l in lines[1:]: out_message += "\n(==) %s" % l out_message += "\n" diff --git a/Utils/Dataflow/test/pyDKB/test.sh b/Utils/Dataflow/test/pyDKB/test.sh index d0719b1a1..2ecbc6e10 100755 --- a/Utils/Dataflow/test/pyDKB/test.sh +++ b/Utils/Dataflow/test/pyDKB/test.sh @@ -53,7 +53,8 @@ test_case() { eval "$before $cmd; $after" 2>&1 1> out.tmp | \ grep -a -v '(WARN) (pyDKB.dataflow.cds) Submodule failed (No module named invenio_client.contrib)' | \ - sed -e"s#$base_dir#\$base_dir#" > err.tmp + sed -E -e"s#$base_dir#\$base_dir#" \ + -e"s#^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} ##" > err.tmp err_correct=0 out_correct=0 From 97177b55bd7cc33437f703a2caf02d9bd0ed13b1 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Wed, 10 Jul 2019 14:06:45 +0200 Subject: [PATCH 08/16] pyDKB: update version info. --- Utils/Dataflow/pyDKB/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/VERSION b/Utils/Dataflow/pyDKB/VERSION index 3a2e926bf..322186fb8 100644 --- a/Utils/Dataflow/pyDKB/VERSION +++ b/Utils/Dataflow/pyDKB/VERSION @@ -1 +1 @@ -0.3.20190703 +0.3.20190710 From 8c7e5517f6300d0549cc6315a1c20da8cf56fd1b Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Tue, 6 Aug 2019 11:34:36 +0200 Subject: [PATCH 09/16] pyDKB/misc: bug fix. --- Utils/Dataflow/pyDKB/common/misc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/common/misc.py b/Utils/Dataflow/pyDKB/common/misc.py index 719a92987..9c9e7ccec 100644 --- a/Utils/Dataflow/pyDKB/common/misc.py +++ b/Utils/Dataflow/pyDKB/common/misc.py @@ -26,7 +26,7 @@ def log(message, level=logLevel.INFO, *args): :type *args: str """ if not logLevel.hasMember(level): - self.log("Unknown log level: %s" % level, logLevel.WARN) + log("Unknown log level: %s" % level, logLevel.WARN) level = logLevel.INFO if type(message) == list: lines = message From ff7b18fbc8da78f4c8ec44ec5fb4d418867a0fa5 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Tue, 6 Aug 2019 11:38:25 +0200 Subject: [PATCH 10/16] pyDKB/misc: fix error with module name log prefix in interactive mode. --- Utils/Dataflow/pyDKB/common/misc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/common/misc.py b/Utils/Dataflow/pyDKB/common/misc.py index 9c9e7ccec..dfa93f823 100644 --- a/Utils/Dataflow/pyDKB/common/misc.py +++ b/Utils/Dataflow/pyDKB/common/misc.py @@ -37,7 +37,8 @@ def log(message, level=logLevel.INFO, *args): else: frm = inspect.stack()[1] mod = inspect.getmodule(frm[0]) - prefix = ' (%s)' % mod.__name__ + modname = getattr(mod, '__name__', 'main') + prefix = ' (%s)' % modname if lines: dtime = datetime.now().strftime(DTFORMAT) out_message = "%s (%s)%s %s" % (dtime, logLevel.memberName(level), From 678604ae903c5e1dd91409629ea8e08edbbc22ec Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Tue, 27 Aug 2019 12:22:52 +0200 Subject: [PATCH 11/16] pyDKB: typo fix. --- Utils/Dataflow/pyDKB/common/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/common/types.py b/Utils/Dataflow/pyDKB/common/types.py index a86c29208..59111a5c4 100644 --- a/Utils/Dataflow/pyDKB/common/types.py +++ b/Utils/Dataflow/pyDKB/common/types.py @@ -1,7 +1,7 @@ """ pyDKB.common.types -Definitions of types used across all the library moduless. +Definitions of types used across all the library modules. """ from Type import Type From 5ada0e6f623391d551790ef59a94f89a62d9eb7c Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Tue, 27 Aug 2019 12:48:01 +0200 Subject: [PATCH 12/16] pyDKB/misc: allow logging of non-string objects. --- Utils/Dataflow/pyDKB/common/misc.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Utils/Dataflow/pyDKB/common/misc.py b/Utils/Dataflow/pyDKB/common/misc.py index dfa93f823..2b98e861f 100644 --- a/Utils/Dataflow/pyDKB/common/misc.py +++ b/Utils/Dataflow/pyDKB/common/misc.py @@ -17,8 +17,9 @@ def log(message, level=logLevel.INFO, *args): """ Output log message with given log level. - :param message: message to output - :type message: str + :param message: message to output (string, list of strings or + any other object) + :type message: object :param level: log level of the message :type level: ``pyDKB.common.types.logLevel`` member :param *args: additional prefixes (will be output between log @@ -31,7 +32,7 @@ def log(message, level=logLevel.INFO, *args): if type(message) == list: lines = message else: - lines = message.splitlines() + lines = str(message).splitlines() if args: prefix = ' ' + ' '.join(['(%s)' % p for p in args]) else: From cb722fef86ecccb2454ba832e3999f7a1ea61d18 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Fri, 30 Aug 2019 11:38:38 +0200 Subject: [PATCH 13/16] pyDKB/log: fix logging of multiline messages in list. --- Utils/Dataflow/pyDKB/common/misc.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Utils/Dataflow/pyDKB/common/misc.py b/Utils/Dataflow/pyDKB/common/misc.py index 2b98e861f..9f89d4aec 100644 --- a/Utils/Dataflow/pyDKB/common/misc.py +++ b/Utils/Dataflow/pyDKB/common/misc.py @@ -29,10 +29,11 @@ def log(message, level=logLevel.INFO, *args): if not logLevel.hasMember(level): log("Unknown log level: %s" % level, logLevel.WARN) level = logLevel.INFO - if type(message) == list: - lines = message - else: - lines = str(message).splitlines() + if type(message) != list: + message = [message] + lines = [] + for m in message: + lines += str(m).splitlines() if args: prefix = ' ' + ' '.join(['(%s)' % p for p in args]) else: From 00ba3210e1b4e7cf73793b014807e73b02d9dd5b Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Fri, 30 Aug 2019 11:45:49 +0200 Subject: [PATCH 14/16] pyDKB/log: drop empty lines from the log output. Now `log('')` and `log('\n\n\n')` output nothing and multiple newline symbols are ignored: ``` >>> log('') >>> log('\n\n\n\n') >>> >>> log('a\n\n\n\n') 2019-08-30 11:45:04 (INFO) (main) a >>> log('a\n\n\nb\n') 2019-08-30 11:45:10 (INFO) (main) a (==) b >>> log(['a\n\n\nb\n', 'ccc', 'de\n\nf\n']) 2019-08-30 11:45:28 (INFO) (main) a (==) b (==) ccc (==) de (==) f ``` --- Utils/Dataflow/pyDKB/common/misc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/common/misc.py b/Utils/Dataflow/pyDKB/common/misc.py index 9f89d4aec..222f1556a 100644 --- a/Utils/Dataflow/pyDKB/common/misc.py +++ b/Utils/Dataflow/pyDKB/common/misc.py @@ -33,7 +33,7 @@ def log(message, level=logLevel.INFO, *args): message = [message] lines = [] for m in message: - lines += str(m).splitlines() + lines += [line for line in str(m).splitlines() if line.strip()] if args: prefix = ' ' + ' '.join(['(%s)' % p for p in args]) else: From 273a39f50448dddfbd55a684123adaa9a0857aa6 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Fri, 30 Aug 2019 12:04:22 +0200 Subject: [PATCH 15/16] pyDKB/log: add more detailed docstring. --- Utils/Dataflow/pyDKB/common/misc.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Utils/Dataflow/pyDKB/common/misc.py b/Utils/Dataflow/pyDKB/common/misc.py index 222f1556a..08a7e8963 100644 --- a/Utils/Dataflow/pyDKB/common/misc.py +++ b/Utils/Dataflow/pyDKB/common/misc.py @@ -17,6 +17,13 @@ def log(message, level=logLevel.INFO, *args): """ Output log message with given log level. + In case of multiline messages or list of messages only first line (message) + is prepended with provided prefixes and timestamp; in all the next lines + (messages) they are replaced with special prefix '(==)', representing that + these lines belong to the same log record. + + Empty lines and lines containing only whitespace symbols are ignored. + :param message: message to output (string, list of strings or any other object) :type message: object From f87abccbf3408b553c2dc49f0b842c313f74929a Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 16 Sep 2019 12:45:09 +0200 Subject: [PATCH 16/16] pyDKB: update version info (0.3.20190916). --- Utils/Dataflow/pyDKB/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/VERSION b/Utils/Dataflow/pyDKB/VERSION index 322186fb8..f255823f4 100644 --- a/Utils/Dataflow/pyDKB/VERSION +++ b/Utils/Dataflow/pyDKB/VERSION @@ -1 +1 @@ -0.3.20190710 +0.3.20190916