Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] pyDKB: common logging system #135

Open
wants to merge 22 commits into
base: pyDKB-fix
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
aefdb31
pyDKB: introduce common logging system for the whole library.
mgolosova May 18, 2018
4434508
pyDKB/logging: add suffix alignment for traceback messages.
mgolosova Jul 6, 2018
073f80a
pyDKB: fix logic of submodule import.
mgolosova Jul 6, 2018
27f893b
pyDKB: set default log level to WARN.
mgolosova Jul 6, 2018
83d3af1
pyDKB/*/AbstractStage: simplify 'argparse' import error handling.
mgolosova May 21, 2018
cfc98c3
pyDKB/logging: improve 'exc_info' usage in 'logger.traceback()'.
mgolosova Jul 9, 2018
9706bf0
pyDKB/*/AbstracStage: fix/improve `set_error()` method description.
mgolosova Jul 9, 2018
c159e92
pyDKB/*/stage: embrace new logging functionality.
mgolosova Jul 9, 2018
b2a8a88
pyDKB/*/communication: embrace new logging functionality (consumer).
mgolosova Jul 9, 2018
1d94478
pyDKB/*/communication: embrace new logging functionality (producer).
mgolosova Jul 9, 2018
38f421b
pyDKB/*/communication: embrace new logging functionality (stream).
mgolosova Jul 9, 2018
b8d393d
pyDKB/*/communication: embrace new logging functionality (messages).
mgolosova Jul 9, 2018
9a0e504
pyDKB/*/communication: fix log message (missed value for `'%s'`).
mgolosova Jul 9, 2018
a5c118f
kafka: fix indentation in ExternalProcessorLogger.
mgolosova Jul 9, 2018
e0d1269
kafka: add functionality to handle `(==)` in ExternalProcessor logs.
mgolosova Jul 9, 2018
cc8ca3d
kafka: remove excessive timestamp from external process log messages.
mgolosova Jul 9, 2018
b64700a
pyDKB/logging: Python 2.6, logging.__version__ < 0.5.x.x.
mgolosova Jul 9, 2018
2fa4473
pyDKB/logging: cleanup `*args` from `traceback()` method.
mgolosova Jul 10, 2018
0d5f5b9
pyDKB/logging: change format suffix handling.
mgolosova Jul 10, 2018
c7df132
pyDKB/logging: simplify extra line formatting.
mgolosova Jul 10, 2018
890a41d
pyDKB/logging: improve extra line formatting function.
mgolosova Jul 10, 2018
4880b65
pyDKB/logging: fix issue with multiline log message with arguments...
mgolosova Jul 10, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@ public class ExternalProcessLogger implements Runnable {

private BufferedReader STDERR;

private Pattern lmt_p = Pattern.compile("\\(?(TRACE|DEBUG|INFO|"
+ "WARN(?:ING)?|ERROR|==)\\)?");

private Pattern lmt_p = Pattern.compile("\\(?(TRACE|DEBUG|INFO|"
+ "WARN(?:ING)?|ERROR)\\)?");
private String prev_type = "";

public ExternalProcessLogger(Process process, String command) {
this.process = process;
Expand All @@ -52,9 +53,12 @@ private void external_log(String line) {
String type = "TRACE";
if (m.lookingAt()) {
type = m.group(1);
line = line.replaceFirst("\\(?" + type + "\\)?", "");
line = line.replaceFirst("^(.*)?\\(?" + type + "\\)?", "");
}
line = "(" + this.command + ")" + line;
if (type == "==") {
type = prev_type;
}
switch (type) {
case "TRACE":
log.trace(line);
Expand All @@ -75,6 +79,6 @@ private void external_log(String line) {
default:
log.trace(line);
}

prev_type = type;
}
}
22 changes: 20 additions & 2 deletions Utils/Dataflow/pyDKB/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,27 @@
Common library for Data Knowledge Base development.
"""

import dataflow
import common
import sys

__version__ = "0.3-SNAPSHOT"

__all__ = ["dataflow"]

try:
import common
logMsgFormat = '%(asctime)s: (%(levelname)s) %(message)s' \
' (%(name)s)'
# Or, in case of multithreading:
# ' (%(name)s) (%(threadName)s)'
common.logging.configureRootLogger(msg_format=logMsgFormat,
level=common.logging.WARN)
logger = common.logging.getLogger(__name__)
except (SyntaxError, ImportError), err:
raise ImportError("%s (in submodule 'common')" % err)

try:
import dataflow
except (SyntaxError, ImportError), err:
logger.error("Failed to import submodule 'dataflow'")
logger.traceback()
raise ImportError("%s (in submodule 'dataflow')" % err)
1 change: 1 addition & 0 deletions Utils/Dataflow/pyDKB/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

from exceptions import *
import _logging as logging
import hdfs
import json_utils as json
from custom_readline import custom_readline
Expand Down
318 changes: 318 additions & 0 deletions Utils/Dataflow/pyDKB/common/_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,318 @@
"""
pyDKB.common._logging

A wrapper for standard 'logging' module.
"""

import logging
from logging import CRITICAL, FATAL, ERROR, WARNING, WARN, INFO, DEBUG, NOTSET
Copy link
Contributor

@anastasiakaida anastasiakaida Jun 4, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mgolosova
What is the main difference between "critical" and "fatal" errors (especially in pyDKB)? :)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@anastasiakaida,
I guess this is the answer ;)

import sys

__logging_version = int(logging.__version__.split('.')[1])

# ---------------------------------------------------
# Module variables
# ---------------------------------------------------
#
# Some variables are initialized in 'init()' function.
#
# Logger instance to be used in the module
logger = None
# Root logger for the whole library
_rootLogger = None
# Additional log level
TRACE = DEBUG / 2
logging.addLevelName(TRACE, 'TRACE')


# -------------------------------------------
# Custom implementation for 'logging' classes
# -------------------------------------------


class Logger(logging.Logger, object):
""" Logger implementation, aware of 'TRACE' log level.

New methods:
* trace() -- log with TRACE level;
* traceback() -- log traceback with DEBUG level.
"""

def trace(self, msg, *args, **kwargs):
""" Log 'msg % args' with severity 'TRACE'.

To pass exception information, use the keyword argument exc_info with
a true value, e.g.

logger.trace("Houston, we have a %s", "interesting problem",
exc_info=1)
"""
if self.isEnabledFor(TRACE):
self._log(TRACE, msg, args, **kwargs)

def traceback(self, **kwargs):
""" Log traceback without additionat messages with severity 'DEBUG'.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"additionaL".


logger.traceback()
"""
if self.isEnabledFor(DEBUG):
if not (kwargs.get('exc_info')):
kwargs['exc_info'] = 1
self.debug('Traceback info:', **kwargs)


class RootLogger(Logger):
""" Same as Logger, but must must have `Level` and be the only one. """
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Double "must".

def __init__(self, level, name='root'):
""" Initialize new root logger. """
Logger.__init__(self, name, level)


class MultilineFormatter(logging.Formatter, object):
""" Formatter for multiline messages.

Every extra line (directly in the message body, or the traceback)
is:
* prefixed with ' (==)';
* suffixed with the part of format string which goes after
'%(message)s' part.
"""

_suffix = None

def __init__(self, *args):
""" Split format string into message format and suffix. """
new_args = list(args)
if len(args):
fmt = args[0]
new_args[0], self._suffix = self.splitFormat(fmt)
super(MultilineFormatter, self).__init__(*new_args)

def splitFormat(self, fmt):
""" Split format string into msg format and suffix.

Suffix is everything that goes after the message body.
"""
format, suffix = ('', '')
splitted = fmt.split("%(message)s")
if len(splitted) > 1:
format = "%(message)s".join(splitted[:-1]) + "%(message)s"
suffix = splitted[-1]
else:
format = fmt
return format, suffix

def formatSuffix(self, record):
""" Return formatted suffix. """
return self._suffix % record.__dict__

def formatExtra(self, lines, prefix=" (==) ", suffix='', align=False):
""" Format extra lines of the log message (traceback, ...).

Parameter 'align' shows whether the suffix should be aligned
to the right (by the longest line), or to the left (as for normal
log messages).
"""
if isinstance(lines, list) and len(lines):
max_len = len(max(lines, key=len))
# Need to add prefix len (in case that the longest line is
# among those that will be prefixed).
max_len += len(prefix)
if suffix and align:
line_fmt = "%%(line)-%ds" % max_len
else:
line_fmt = "%(line)s"
extra = line_fmt % {'line': lines[0]} + suffix
for line in lines[1:]:
extra += "\n" + line_fmt % {'line': prefix + line} + suffix
else:
extra = ""
return extra

def format(self, record):
""" Format multiline message.

Second and further lines from initial message are formatted
'extra' lines.
"""
formatted = super(MultilineFormatter, self).format(record)
lines = formatted.splitlines()
suffix = self.formatSuffix(record)
result = self.formatExtra(lines, suffix=suffix, align=True)
return result


# --------------------------
# Module top-level functions
# --------------------------


def isInitialized():
""" Checks if the module (namely, root logger) is initialized. """
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe "Check if ...", to conform with the other definitions and "Do X" template?

return isinstance(_rootLogger, Logger) and _rootLogger.handlers


def getRootLogger(**kwargs):
""" Reconfigure and return library root logger. """
if kwargs or not isInitialized():
configureRootLogger(**kwargs)
return _rootLogger


def init(**kwargs):
""" Initialize module.

If already initialized, do nothing.
"""
global logger
if not isInitialized():
initRootLogger(**kwargs)
logger = getLogger(__name__)


def initRootLogger(**kwargs):
""" Initialize root logger.

If already initialized, do nothing.
"""
global _rootLogger

if isInitialized():
return _rootLogger

name = kwargs.get('name')
if not name:
name = __name__.split('.')[0]
# Create new root logger
root = RootLogger(WARNING, name)
# Set Logger class 'root' to the new root
Logger.root = root
# Create new manager (object, responsible for new loggers creation)
manager = logging.Manager(root)
if __logging_version < 5:
logging.setLoggerClass(Logger)
else:
manager.setLoggerClass(Logger)
Logger.manager = manager
root.manager = Logger.manager
# 3) reset root logger for our class.
Logger.root = root

filename = kwargs.get('filename')
if filename:
mode = kwargs.get('mode', 'a')
hdlr_name = "file_%s_%s" % (filename, mode)
# Open file without delay, as presence of 'filename'
# keyword supposes that function was called during
# intended module initialization, not as a side effect
# of calling 'getLogger()' before initializtion.
hdlr = FileHandler(filename, mode)
else:
stream = kwargs.get('stream', sys.stderr)
hdlr_name = "stream_%s" % stream.fileno()
hdlr = logging.StreamHandler(stream)

if __logging_version < 5:
hdlr._name = hdlr_name
else:
hdlr.set_name(hdlr_name)

fs = kwargs.get('msg_format', logging.BASIC_FORMAT)
dfs = kwargs.get('datefmt', None)
fmt = MultilineFormatter(fs, dfs)
hdlr.setFormatter(fmt)
root.addHandler(hdlr)

level = kwargs.get('level')
if level is not None:
root.setLevel(level)
_rootLogger = root

return _rootLogger


def configureRootLogger(**kwargs):
""" (Re)configure root logger. """
if not isInitialized():
return init(**kwargs)

root = _rootLogger
name = kwargs.get('name')
if name and name != root.name:
# Renaming is not allowed,
# as logger name is the hierarchy-forming parameter
logger.warning("Root logger ('%s') can not be renamed to '%s'.",
root.name, name)

# Root logger is supposed to have exactly one handler,
# so we count on this fact here
old_hdlr = root.handlers[0]
old_fmt = old_hdlr.formatter

filename = kwargs.get('filename')
stream = kwargs.get('stream', sys.stderr)
if filename:
mode = kwargs.get('mode', 'a')
hdlr_name = "file_%s_%s" % (filename, mode)
# Delay allows us not to open file right now,
# but only when it is actually required
hdlr = FileHandler(filename, mode, delay=True)
elif stream:
hdlr_name = "stream_%s" % stream.fileno()
hdlr = logging.StreamHandler(stream)
else:
hdlr = old_hdlr

if __logging_version < 5:
if not getattr(hdlr, '_name', None):
hdlr._name = hdlr_name
else:
if not hdlr.get_name():
hdlr.set_name(hdlr_name)

# Remove old handler or go on with it instead of the newly created one
# (if it is configured just the same way).
if old_hdlr != hdlr:
if __logging_version < 5 \
and (getattr(old_hdlr, '_name', None)
!= getattr(hdlr, '_name', '')) \
or __logging_version >= 5 \
and old_hdlr.get_name() != hdlr.get_name():
old_hdlr.close()
root.removeHandler(old_hdlr)
root.addHandler(hdlr)
else:
hdlr.close()
hdlr = old_hdlr

fs = kwargs.get("msg_format")
dfs = kwargs.get("datefmt")

# Check if handler formatter was configured earlier
# and use old values if no new specified
if isinstance(old_fmt, logging.Formatter):
if not fs:
fs = old_fmt._fmt
if not dfs:
dfs = old_fmt.datefmt
elif not fs:
fs = logging.BASIC_FORMAT

fmt = MultilineFormatter(fs, dfs)
hdlr.setFormatter(fmt)

level = kwargs.get("level")
if level is not None:
root.setLevel(level)

return root


def getLogger(name):
""" Return logger with given name. """
if not isInitialized():
init()
root = _rootLogger
if name == root.name:
return root
return root.manager.getLogger(name)
Loading