From 0c8681f308abbb10a9a996becc132c6f064146ef Mon Sep 17 00:00:00 2001
From: Tristan Crockett <tristan.h.crockett@gmail.com>
Date: Fri, 15 Sep 2017 10:48:52 -0500
Subject: [PATCH] Increase logging to firehose level

---
 architect/builders.py                   | 12 +++++++-----
 architect/feature_dictionary_creator.py | 11 +++++++++++
 architect/feature_generators.py         | 17 +++++++++++++++++
 architect/feature_group_creator.py      | 25 ++++++++++++++++++++++++-
 architect/feature_group_mixer.py        | 13 ++++++++++---
 architect/label_generators.py           |  3 ++-
 architect/planner.py                    | 23 +++++++++++++++++++++++
 architect/state_table_generators.py     |  7 ++++++-
 8 files changed, 100 insertions(+), 11 deletions(-)

diff --git a/architect/builders.py b/architect/builders.py
index 444d63f..d984f8d 100644
--- a/architect/builders.py
+++ b/architect/builders.py
@@ -152,6 +152,7 @@ def make_entity_date_table(
             table_name=table_name,
             index_query=indices_query
         )
+        logging.info('Creating matrix-specific entity-date table for matrix %s with query %s', matrix_uuid, query)
         self.engine.execute(query)
 
         return table_name
@@ -248,7 +249,7 @@ def build_matrix(
 
         logging.info('Creating matrix %s > %s', matrix_metadata['matrix_id'], matrix_filename)
         # make the entity time table and query the labels and features tables
-        logging.info('Making entity date table')
+        logging.info('Making entity date table for matrix %s', matrix_uuid)
         entity_date_table_name = self.make_entity_date_table(
             as_of_times,
             label_name,
@@ -258,7 +259,7 @@ def build_matrix(
             matrix_uuid,
             matrix_metadata['label_window']
         )
-        logging.info('Writing feature group data')
+        logging.info('Extracting feature group data from database into file for matrix %s', matrix_uuid)
         features_csv_names = self.write_features_data(
             as_of_times,
             feature_dictionary,
@@ -266,7 +267,7 @@ def build_matrix(
             matrix_uuid
         )
         try:
-            logging.info('Writing label data')
+            logging.info('Extracting label data frmo database into file for matrix %s', matrix_uuid)
             labels_csv_name = self.write_labels_data(
                 label_name,
                 label_type,
@@ -277,7 +278,7 @@ def build_matrix(
             features_csv_names.insert(0, labels_csv_name)
 
             # stitch together the csvs
-            logging.info('Merging features data')
+            logging.info('Merging feature files for matrix %s', matrix_uuid)
             output = self.merge_feature_csvs(
                 features_csv_names,
                 matrix_directory,
@@ -289,7 +290,7 @@ def build_matrix(
                 self.remove_file(csv_name)
         try:
             # store the matrix
-            logging.info('Archiving matrix with metta')
+            logging.info('Archiving matrix %s with metta', matrix_uuid)
             metta.archive_matrix(
                 matrix_config=matrix_metadata,
                 df_matrix=output,
@@ -418,6 +419,7 @@ def write_to_csv(self, query_string, file_name, header='HEADER'):
         :rtype: none
         """
         matrix_csv = self.open_fh_for_writing(file_name)
+        logging.debug('Copying to CSV query %s', query_string)
         try:
             copy_sql = 'COPY ({query}) TO STDOUT WITH CSV {head}'.format(
                 query=query_string,
diff --git a/architect/feature_dictionary_creator.py b/architect/feature_dictionary_creator.py
index f35e86d..ee1b7e7 100644
--- a/architect/feature_dictionary_creator.py
+++ b/architect/feature_dictionary_creator.py
@@ -1,3 +1,4 @@
+import logging
 from architect.utils import str_in_sql
 
 
@@ -32,6 +33,11 @@ def feature_dictionary(self, feature_table_names, index_column_lookup):
                     )
                 )
             ]
+            logging.info(
+                'Feature names found for table %s: %s',
+                feature_table_name,
+                feature_names
+            )
             feature_dictionary[feature_table_name] = feature_names
         return(feature_dictionary)
 
@@ -57,5 +63,10 @@ def _build_feature_names_query(self, table_name, index_columns):
             schema=self.features_schema_name,
             index_columns=str_in_sql(index_columns)
         )
+        logging.info(
+            'Extracting all possible feature names for table %s with query %s',
+            table_name,
+            feature_names_query
+        )
 
         return(feature_names_query)
diff --git a/architect/feature_generators.py b/architect/feature_generators.py
index aab046b..5b858b4 100644
--- a/architect/feature_generators.py
+++ b/architect/feature_generators.py
@@ -114,10 +114,21 @@ def _compute_choices(self, choice_query):
                 for row
                 in self.db_engine.execute(choice_query)
             ]
+            logging.info(
+                'Computed list of categoricals: %s for choice query: %s',
+                self.categorical_cache[choice_query],
+                choice_query
+            )
         return self.categorical_cache[choice_query]
 
     def _build_choices(self, categorical):
+        logging.info(
+            'Building categorical choices for column %s, metrics %s',
+            categorical['column'],
+            categorical['metrics']
+        )
         if 'choices' in categorical:
+            logging.info('Found list of configured choices: %s', categorical['choices'])
             return categorical['choices']
         else:
             return self._compute_choices(categorical['choice_query'])
@@ -150,6 +161,11 @@ def _build_array_categoricals(self, categorical_config):
         ]
 
     def _aggregation(self, aggregation_config, feature_dates):
+        logging.info(
+            'Building collate.SpacetimeAggregation for config %s and as_of_dates %s',
+            aggregation_config,
+            feature_dates
+        )
         aggregates = [
             Aggregate(aggregate['quantity'], aggregate['metrics'])
             for aggregate in aggregation_config.get('aggregates', [])
@@ -267,6 +283,7 @@ def run_commands(self, command_list):
         conn = self.db_engine.connect()
         trans = conn.begin()
         for command in command_list:
+            logging.debug('Executing feature generation query: %s', command)
             conn.execute(command)
         trans.commit()
 
diff --git a/architect/feature_group_creator.py b/architect/feature_group_creator.py
index 4e3badf..f3412f9 100644
--- a/architect/feature_group_creator.py
+++ b/architect/feature_group_creator.py
@@ -1,3 +1,6 @@
+import logging
+
+
 def table_subsetter(config_item, table, features):
     "Return features matching a given table"
     if table == config_item:
@@ -65,6 +68,11 @@ def subsets(self, feature_dictionary):
         Returns: (list) subsets of the feature dictionary, in the same
             table-based structure
         """
+        logging.info(
+            'Creating feature groups. config: %s, Master feature dictionary: %s',
+            self.definition,
+            feature_dictionary
+        )
         subsets = []
         for name, config in sorted(self.definition.items()):
             for config_item in config:
@@ -72,9 +80,24 @@ def subsets(self, feature_dictionary):
                 for table, features in feature_dictionary.items():
                     matching_features =\
                         self.subsetters[name](config_item, table, features)
+                    logging.info(
+                        'Matching features for config item %s, table %s: %s',
+                        config_item,
+                        table,
+                        matching_features
+                    )
                     if len(matching_features) > 0:
                         subset[table] = matching_features
+                    else:
+                        logging.warning(
+                            'No matching features found for config item %s, table %s, master features %s',
+                            config_item,
+                            table,
+                            features
+                        )
+
                 subsets.append(subset)
         if not any(subset for subset in subsets if any(subset)):
-            raise Exception('No matching feature subsets found!')
+            raise Exception('No matching feature groups found!')
+        logging.info('Found %s total feature subsets', len(subsets))
         return subsets
diff --git a/architect/feature_group_mixer.py b/architect/feature_group_mixer.py
index ff8f6cf..5a0656e 100644
--- a/architect/feature_group_mixer.py
+++ b/architect/feature_group_mixer.py
@@ -1,3 +1,6 @@
+import logging
+
+
 def leave_one_in(feature_groups):
     """For each group, return a copy of just that group
 
@@ -65,7 +68,11 @@ def generate(self, feature_groups):
                 each representing a group
         Returns: (list) of feature dicts
         """
-        results = []
+        final_results = []
         for strategy in self.strategies:
-            results += self.strategy_lookup[strategy](feature_groups)
-        return results
+            logging.info('Mixing feature groups %s using strategy %s', feature_groups, strategy)
+            results = self.strategy_lookup[strategy](feature_groups)
+            logging.info('Mixing found new feature groups %s', results)
+            final_results += results
+
+        return final_results
diff --git a/architect/label_generators.py b/architect/label_generators.py
index 4d3edfa..4f22321 100644
--- a/architect/label_generators.py
+++ b/architect/label_generators.py
@@ -40,7 +40,7 @@ def generate(
             start_date=start_date,
             label_window=label_window,
         )
-        logging.debug(query)
+        logging.debug('Running label generation query: %s', query)
         self.db_engine.execute(query)
         return labels_table
 
@@ -70,6 +70,7 @@ def generate_all_labels(
                      len(label_windows))
         for as_of_date in as_of_dates:
             for label_window in label_windows:
+                logging.info('Generating labels for as of date %s and label window %s', as_of_date, label_window)
                 self.generate(
                     start_date=as_of_date,
                     label_window=label_window,
diff --git a/architect/planner.py b/architect/planner.py
index d77f25a..9d9b1db 100644
--- a/architect/planner.py
+++ b/architect/planner.py
@@ -1,5 +1,6 @@
 import copy
 import itertools
+import logging
 
 from metta import metta_io as metta
 
@@ -125,6 +126,14 @@ def generate_plans(self, matrix_set_definitions, feature_dictionaries):
         updated_definitions = []
         build_tasks = dict()
         for matrix_set in matrix_set_definitions:
+            logging.info('Making plans for matrix set %s', matrix_set)
+            logging.info(
+                'Iterating over %s label names, %s label_types, %s states, %s feature dictionaries',
+                len(self.label_names),
+                len(self.label_types),
+                len(self.states),
+                len(feature_dictionaries)
+            )
             train_matrix = matrix_set['train_matrix']
             for label_name, label_type, state, feature_dictionary in itertools.product(
                 self.label_names,
@@ -143,6 +152,7 @@ def generate_plans(self, matrix_set_definitions, feature_dictionaries):
                     'train',
                 )
                 train_uuid = metta.generate_uuid(train_metadata)
+                logging.info('Matrix UUID %s found for train metadata %s', train_uuid, train_metadata)
                 if train_uuid not in build_tasks:
                     build_tasks[train_uuid] = self._generate_build_task(
                         train_metadata,
@@ -150,6 +160,9 @@ def generate_plans(self, matrix_set_definitions, feature_dictionaries):
                         train_matrix,
                         feature_dictionary
                     )
+                    logging.info('Train uuid %s not found in build tasks yet, so added', train_uuid)
+                else:
+                    logging.info('Train uuid %s already found in build tasks', train_uuid)
                 matrix_set_clone['train_uuid'] = train_uuid
 
                 test_uuids = []
@@ -163,6 +176,7 @@ def generate_plans(self, matrix_set_definitions, feature_dictionaries):
                         'test',
                     )
                     test_uuid = metta.generate_uuid(test_metadata)
+                    logging.info('Matrix UUID %s found for test metadata %s', test_uuid, test_metadata)
                     if test_uuid not in build_tasks:
                         build_tasks[test_uuid] = self._generate_build_task(
                             test_metadata,
@@ -170,15 +184,24 @@ def generate_plans(self, matrix_set_definitions, feature_dictionaries):
                             test_matrix,
                             feature_dictionary
                         )
+                        logging.info('Test uuid %s not found in build tasks yet, so added', test_uuid)
+                    else:
+                        logging.info('Test uuid %s already found in build tasks', test_uuid)
 
                     test_uuids.append(test_uuid)
                 matrix_set_clone['test_uuids'] = test_uuids
                 updated_definitions.append(matrix_set_clone)
 
+        logging.info(
+            'Planner is finished generating matrix plans. %s matrix definitions and %s unique build tasks found', 
+            len(updated_definitions),
+            len(build_tasks.keys())
+        )
         return updated_definitions, build_tasks
 
     def build_all_matrices(self, *args, **kwargs):
         self.builder.build_all_matrices(*args, **kwargs)
 
     def build_matrix(self, *args, **kwargs):
+        logging.info('Building matrix with args %s', args)
         self.builder.build_matrix(*args, **kwargs)
diff --git a/architect/state_table_generators.py b/architect/state_table_generators.py
index 99519c9..dd8b58f 100644
--- a/architect/state_table_generators.py
+++ b/architect/state_table_generators.py
@@ -105,8 +105,10 @@ def sparse_table_name(self):
     @property
     def sparse_table_query_func(self):
         if self.dense_state_table:
+            logging.info('Dense state table passed to StateTableGenerator, so computing sparse table using it')
             return self._sparse_table_query_from_dense
         else:
+            logging.info('Dense state table not passed to StateTableGenerator, so computing sparse table using events table')
             return self._sparse_table_query_from_events
 
     def _all_known_states(self, dense_state_table):
@@ -150,6 +152,7 @@ def _sparse_table_query_from_dense(self, as_of_dates):
             as_of_dates=[date.isoformat() for date in as_of_dates],
             state_column_string=', '.join(state_columns)
         )
+        logging.debug('Assembled sparse state table query: %s', query)
         return query
 
     def _sparse_table_query_from_events(self, as_of_dates):
@@ -177,6 +180,7 @@ def _sparse_table_query_from_events(self, as_of_dates):
             as_of_dates=[date.isoformat() for date in as_of_dates],
             active_state=DEFAULT_ACTIVE_STATE
         )
+        logging.debug('Assembled sparse state table query: %s', query)
         return query
 
     def generate_sparse_table(self, as_of_dates):
@@ -187,6 +191,7 @@ def generate_sparse_table(self, as_of_dates):
             as_of_dates (list of datetime.dates) Dates to include in the sparse
                 state table
         """
+        logging.debug('Generating sparse table using as_of_dates: %s', as_of_dates)
         self._generate_sparse_table(self.sparse_table_query_func(as_of_dates))
 
     def _generate_sparse_table(self, generate_query):
@@ -201,7 +206,7 @@ def _generate_sparse_table(self, generate_query):
             'create index on {} (entity_id, as_of_date)'
             .format(self.sparse_table_name)
         )
-        logging.info('Indices created for sparse state table')
+        logging.info('Indices created on entity_id and as_of_date for sparse state table')
 
     def clean_up(self):
         self.db_engine.execute(