Skip to content

Commit

Permalink
Added pickling of parse tree to and from binary file
Browse files Browse the repository at this point in the history
  • Loading branch information
keiffster committed Jul 5, 2017
1 parent 7d386ae commit b9cebdc
Show file tree
Hide file tree
Showing 12 changed files with 220 additions and 64 deletions.
5 changes: 5 additions & 0 deletions bots/professor/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ brain:

#dump_to_file: /tmp/professor_braintree.txt

save_binary: false
load_binary: false
binary_filename: /tmp/professor.brain
load_aiml_on_binary_fail: false

files:
aiml:
files: $BOT_ROOT/aiml
Expand Down
2 changes: 1 addition & 1 deletion bots/professor/logging.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ handlers:
file:
class: logging.handlers.RotatingFileHandler
formatter: simple
filename: /tmp/alice2.log
filename: /tmp/professor.log

root:
level: DEBUG
Expand Down
48 changes: 45 additions & 3 deletions src/programy/brain.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@
"""

import logging
import os.path
import xml.etree.ElementTree as ET

try:
import _pickle as pickle
except:
import pickle
import gc

from programy.processors.processing import ProcessorLoader
from programy.config.brain import BrainConfiguration
Expand All @@ -33,6 +37,7 @@
from programy.parser.aiml_parser import AIMLParser
from programy.utils.services.service import ServiceFactory
from programy.utils.text.text import TextUtils
import datetime

class Brain(object):

Expand Down Expand Up @@ -118,8 +123,45 @@ def postprocessors(self):
return self._postprocessors

def load(self, brain_configuration: BrainConfiguration):
self._aiml_parser.load_aiml(brain_configuration)

load_aiml = True
if brain_configuration.load_binary is True:
logging.info("Loading binary brain from [%s]"%brain_configuration.binary_filename)
try:
start = datetime.datetime.now()
gc.disable()
f = open(brain_configuration.binary_filename, "rb")
self._aiml_parser = pickle.load(f)
gc.enable()
f.close()
stop = datetime.datetime.now()
diff = stop - start
logging.info("Brain load took a total of %.2f sec" % diff.total_seconds())
load_aiml = False
except Exception as e:
logging.exception(e)
if brain_configuration.load_aiml_on_binary_fail is True:
load_aiml = True
else:
raise e

if load_aiml is True:
logging.info("Loading aiml source brain")
self._aiml_parser.load_aiml(brain_configuration)

if brain_configuration.save_binary is True:
logging.info("Saving binary brain to [%s]"%brain_configuration.binary_filename)
start = datetime.datetime.now()
f = open(brain_configuration.binary_filename, "wb")
pickle.dump(self._aiml_parser, f)
f.close()
stop = datetime.datetime.now()
diff = stop - start
logging.info("Brain save took a total of %.2f sec" % diff.total_seconds())

logging.info("Loading collections")
self.load_collections(brain_configuration)
logging.info("Loading services")
self.load_services(brain_configuration)

def _load_denormals(self, brain_configuration):
Expand Down
33 changes: 33 additions & 0 deletions src/programy/config/brain.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,17 @@ def __init__(self):
self._allow_system_aiml = BrainConfiguration.DEFAULT_ALLOW_SYSTEM_AIML
self._allow_learn_aiml = BrainConfiguration.DEFAULT_ALLOW_LEARN_AIML
self._allow_learnf_aiml = BrainConfiguration.DEFAULT_ALLOW_LEARNF_AIML

self._pattern_nodes = None
self._template_nodes = None

self._dump_to_file = None

self._save_binary = None
self._load_binary = None
self._binary_filename = None
self._load_aiml_on_binary_fail = None

self._aiml_files = None
self._set_files = None
self._map_files = None
Expand All @@ -123,7 +131,9 @@ def __init__(self):
self._triples = None
self._preprocessors = None
self._postprocessors = None

self._services = []

BaseConfigurationData.__init__(self, "brain")

def _get_brain_file_configuration(self, config_file, section, bot_root):
Expand All @@ -149,10 +159,17 @@ def load_config_section(self, config_file, bot_root):
self._allow_learn_aiml = config_file.get_option(brain, "allow_learn_aiml", BrainConfiguration.DEFAULT_ALLOW_LEARN_AIML)
self._allow_learnf_aiml = config_file.get_option(brain, "allow_learnf_aiml", BrainConfiguration.DEFAULT_ALLOW_LEARNF_AIML)
self._allow_learnf_aiml = config_file.get_option(brain, "allow_learnf_aiml", BrainConfiguration.DEFAULT_ALLOW_LEARNF_AIML)

self._pattern_nodes = config_file.get_option(brain, "pattern_nodes", missing_value=None)
self._template_nodes = config_file.get_option(brain, "template_nodes", missing_value=None)

self._dump_to_file = config_file.get_option(brain, "dump_to_file", missing_value=None)

self._save_binary = config_file.get_bool_option(brain, "save_binary", missing_value=False)
self._load_binary = config_file.get_bool_option(brain, "load_binary", missing_value=False)
self._binary_filename = config_file.get_option(brain, "binary_filename", missing_value=None)
self._load_aiml_on_binary_fail = config_file.get_bool_option(brain, "load_aiml_on_binary_fail", missing_value=False)

files = config_file.get_section("files", brain)
if files is not None:
aiml = config_file.get_section("aiml", files)
Expand Down Expand Up @@ -222,6 +239,22 @@ def template_nodes(self):
def dump_to_file(self):
return self._dump_to_file

@property
def save_binary(self):
return self._save_binary

@property
def load_binary(self):
return self._load_binary

@property
def binary_filename(self):
return self._binary_filename

@property
def load_aiml_on_binary_fail(self):
return self._load_aiml_on_binary_fail

@property
def aiml_files(self):
return self._aiml_files
Expand Down
20 changes: 20 additions & 0 deletions src/programy/config/file/json_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,24 @@ def get_option(self, section, option_name, missing_value=None):
logging.warning("Missing value for [%s] in config , return default value %s", option_name, missing_value)
return missing_value

def get_bool_option(self, section, option_name, missing_value=False):
if option_name in section:
value = section[option_name]
if isinstance(value, bool):
return bool(value)
else:
raise Exception("Invalid boolean config value")
else:
logging.warning("Missing value for [%s] in config, return default value %s", option_name, missing_value)
return missing_value

def get_int_option(self, section, option_name, missing_value=0):
if option_name in section:
value = section[option_name]
if isinstance(value, int):
return int(value)
else:
raise Exception("Invalid integer config value")
else:
logging.warning("Missing value for [%s] in config, return default value %d", option_name, missing_value)
return missing_value
21 changes: 21 additions & 0 deletions src/programy/config/file/xml_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,24 @@ def get_option(self, section, option_name, missing_value=None):
logging.warning("Missing value for [%s] in config, return default value %s", option_name, missing_value)
return missing_value

def get_bool_option(self, section, option_name, missing_value=False):
if option_name in section:
value = section[option_name]
if isinstance(value, bool):
return bool(value)
else:
raise Exception("Invalid boolean config value")
else:
logging.warning("Missing value for [%s] in config, return default value %s", option_name, missing_value)
return missing_value

def get_int_option(self, section, option_name, missing_value=0):
if option_name in section:
value = section[option_name]
if isinstance(value, int):
return int(value)
else:
raise Exception("Invalid integer config value")
else:
logging.warning("Missing value for [%s] in config, return default value %d", option_name, missing_value)
return missing_value
21 changes: 17 additions & 4 deletions src/programy/parser/aiml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def load_files_from_directory(self, brain_configuration):
brain_configuration.aiml_files.extension)
stop = datetime.datetime.now()
diff = stop - start
logging.info("Total processing time %f.2 secs" % diff.total_seconds())
logging.info("Total processing time %.6f secs" % diff.total_seconds())
logging.info("Loaded a total of %d aiml files with %d categories" % (len(aimls_loaded), self.num_categories))
if diff.total_seconds() > 0:
logging.info("Thats approx %f aiml files per sec" % (len(aimls_loaded) / diff.total_seconds()))
Expand All @@ -103,7 +103,7 @@ def load_single_file(self, brain_configuration):
self._aiml_loader.load_single_file_contents(brain_configuration.aiml_files.file)
stop = datetime.datetime.now()
diff = stop - start
logging.info("Total processing time %f.2 secs" % diff.total_seconds())
logging.info("Total processing time %.6f secs" % diff.total_seconds())
logging.info("Loaded a single aiml file with %d categories" % (self.num_categories))

def load_aiml(self, brain_configuration: BrainConfiguration):
Expand Down Expand Up @@ -147,7 +147,11 @@ def parse_from_file(self, filename):
if aiml is None or aiml.tag != 'aiml':
raise ParserException("Error, root tag is not <aiml>", filename=filename)
else:
self.parse_aiml(aiml, filename)
start = datetime.datetime.now()
num_categories = self.parse_aiml(aiml, filename)
stop = datetime.datetime.now()
diff = stop - start
logging.info("Processed %s with %d categories in %f.2 secs" %(filename, num_categories, diff.total_seconds()))
except Exception as e:
logging.exception(e)
logging.error("Failed to load contents of AIML file from [%s] - [%s]"%(filename, e))
Expand Down Expand Up @@ -201,10 +205,12 @@ def parse_aiml(self, aiml_xml, filename):
self.parse_version(aiml_xml)

categories_found = False
num_category = 0
for expression in aiml_xml:
if expression.tag == 'topic':
try:
self.parse_topic(expression)
num_topic_categories = self.parse_topic(expression)
num_category += num_topic_categories
categories_found = True

except DuplicateGrammarException as dupe_excep:
Expand All @@ -217,6 +223,7 @@ def parse_aiml(self, aiml_xml, filename):
try:
self.parse_category(expression)
categories_found = True
num_category += 1

except DuplicateGrammarException as dupe_excep:
self.handle_aiml_duplicate(dupe_excep, filename)
Expand All @@ -230,6 +237,8 @@ def parse_aiml(self, aiml_xml, filename):
if categories_found is False:
logging.warning("no categories in aiml file")

return num_category

#########################################################################################
#
# AIML_VERSION ::== 0.9 | 1.0 | 1.1 | 2.0
Expand Down Expand Up @@ -274,16 +283,20 @@ def parse_topic(self, topic_element):
raise ParserException("Error, missing name attribute for topic", xml_element=topic_element)

category_found = False
num_category = 0
for child in topic_element:
if child.tag == 'category':
self.parse_category(child, topic_pattern)
category_found = True
num_category += 1
else:
raise ParserException("Error unknown child node of topic, %s" % child.tag, xml_element=topic_element)

if category_found is False:
raise ParserException("Error, no categories in topic", xml_element=topic_element)

return num_category

def find_topic(self, category_xml, topic_element=None):
topics = category_xml.findall('topic')
if topic_element is not None:
Expand Down
Loading

0 comments on commit b9cebdc

Please sign in to comment.