diff --git a/architect/builders.py b/architect/builders.py index 444d63f..d984f8d 100644 --- a/architect/builders.py +++ b/architect/builders.py @@ -152,6 +152,7 @@ def make_entity_date_table( table_name=table_name, index_query=indices_query ) + logging.info('Creating matrix-specific entity-date table for matrix %s with query %s', matrix_uuid, query) self.engine.execute(query) return table_name @@ -248,7 +249,7 @@ def build_matrix( logging.info('Creating matrix %s > %s', matrix_metadata['matrix_id'], matrix_filename) # make the entity time table and query the labels and features tables - logging.info('Making entity date table') + logging.info('Making entity date table for matrix %s', matrix_uuid) entity_date_table_name = self.make_entity_date_table( as_of_times, label_name, @@ -258,7 +259,7 @@ def build_matrix( matrix_uuid, matrix_metadata['label_window'] ) - logging.info('Writing feature group data') + logging.info('Extracting feature group data from database into file for matrix %s', matrix_uuid) features_csv_names = self.write_features_data( as_of_times, feature_dictionary, @@ -266,7 +267,7 @@ def build_matrix( matrix_uuid ) try: - logging.info('Writing label data') + logging.info('Extracting label data frmo database into file for matrix %s', matrix_uuid) labels_csv_name = self.write_labels_data( label_name, label_type, @@ -277,7 +278,7 @@ def build_matrix( features_csv_names.insert(0, labels_csv_name) # stitch together the csvs - logging.info('Merging features data') + logging.info('Merging feature files for matrix %s', matrix_uuid) output = self.merge_feature_csvs( features_csv_names, matrix_directory, @@ -289,7 +290,7 @@ def build_matrix( self.remove_file(csv_name) try: # store the matrix - logging.info('Archiving matrix with metta') + logging.info('Archiving matrix %s with metta', matrix_uuid) metta.archive_matrix( matrix_config=matrix_metadata, df_matrix=output, @@ -418,6 +419,7 @@ def write_to_csv(self, query_string, file_name, header='HEADER'): :rtype: none """ matrix_csv = self.open_fh_for_writing(file_name) + logging.debug('Copying to CSV query %s', query_string) try: copy_sql = 'COPY ({query}) TO STDOUT WITH CSV {head}'.format( query=query_string, diff --git a/architect/feature_dictionary_creator.py b/architect/feature_dictionary_creator.py index f35e86d..ee1b7e7 100644 --- a/architect/feature_dictionary_creator.py +++ b/architect/feature_dictionary_creator.py @@ -1,3 +1,4 @@ +import logging from architect.utils import str_in_sql @@ -32,6 +33,11 @@ def feature_dictionary(self, feature_table_names, index_column_lookup): ) ) ] + logging.info( + 'Feature names found for table %s: %s', + feature_table_name, + feature_names + ) feature_dictionary[feature_table_name] = feature_names return(feature_dictionary) @@ -57,5 +63,10 @@ def _build_feature_names_query(self, table_name, index_columns): schema=self.features_schema_name, index_columns=str_in_sql(index_columns) ) + logging.info( + 'Extracting all possible feature names for table %s with query %s', + table_name, + feature_names_query + ) return(feature_names_query) diff --git a/architect/feature_generators.py b/architect/feature_generators.py index aab046b..5b858b4 100644 --- a/architect/feature_generators.py +++ b/architect/feature_generators.py @@ -114,10 +114,21 @@ def _compute_choices(self, choice_query): for row in self.db_engine.execute(choice_query) ] + logging.info( + 'Computed list of categoricals: %s for choice query: %s', + self.categorical_cache[choice_query], + choice_query + ) return self.categorical_cache[choice_query] def _build_choices(self, categorical): + logging.info( + 'Building categorical choices for column %s, metrics %s', + categorical['column'], + categorical['metrics'] + ) if 'choices' in categorical: + logging.info('Found list of configured choices: %s', categorical['choices']) return categorical['choices'] else: return self._compute_choices(categorical['choice_query']) @@ -150,6 +161,11 @@ def _build_array_categoricals(self, categorical_config): ] def _aggregation(self, aggregation_config, feature_dates): + logging.info( + 'Building collate.SpacetimeAggregation for config %s and as_of_dates %s', + aggregation_config, + feature_dates + ) aggregates = [ Aggregate(aggregate['quantity'], aggregate['metrics']) for aggregate in aggregation_config.get('aggregates', []) @@ -267,6 +283,7 @@ def run_commands(self, command_list): conn = self.db_engine.connect() trans = conn.begin() for command in command_list: + logging.debug('Executing feature generation query: %s', command) conn.execute(command) trans.commit() diff --git a/architect/feature_group_creator.py b/architect/feature_group_creator.py index 4e3badf..f3412f9 100644 --- a/architect/feature_group_creator.py +++ b/architect/feature_group_creator.py @@ -1,3 +1,6 @@ +import logging + + def table_subsetter(config_item, table, features): "Return features matching a given table" if table == config_item: @@ -65,6 +68,11 @@ def subsets(self, feature_dictionary): Returns: (list) subsets of the feature dictionary, in the same table-based structure """ + logging.info( + 'Creating feature groups. config: %s, Master feature dictionary: %s', + self.definition, + feature_dictionary + ) subsets = [] for name, config in sorted(self.definition.items()): for config_item in config: @@ -72,9 +80,24 @@ def subsets(self, feature_dictionary): for table, features in feature_dictionary.items(): matching_features =\ self.subsetters[name](config_item, table, features) + logging.info( + 'Matching features for config item %s, table %s: %s', + config_item, + table, + matching_features + ) if len(matching_features) > 0: subset[table] = matching_features + else: + logging.warning( + 'No matching features found for config item %s, table %s, master features %s', + config_item, + table, + features + ) + subsets.append(subset) if not any(subset for subset in subsets if any(subset)): - raise Exception('No matching feature subsets found!') + raise Exception('No matching feature groups found!') + logging.info('Found %s total feature subsets', len(subsets)) return subsets diff --git a/architect/feature_group_mixer.py b/architect/feature_group_mixer.py index ff8f6cf..5a0656e 100644 --- a/architect/feature_group_mixer.py +++ b/architect/feature_group_mixer.py @@ -1,3 +1,6 @@ +import logging + + def leave_one_in(feature_groups): """For each group, return a copy of just that group @@ -65,7 +68,11 @@ def generate(self, feature_groups): each representing a group Returns: (list) of feature dicts """ - results = [] + final_results = [] for strategy in self.strategies: - results += self.strategy_lookup[strategy](feature_groups) - return results + logging.info('Mixing feature groups %s using strategy %s', feature_groups, strategy) + results = self.strategy_lookup[strategy](feature_groups) + logging.info('Mixing found new feature groups %s', results) + final_results += results + + return final_results diff --git a/architect/label_generators.py b/architect/label_generators.py index 4d3edfa..4f22321 100644 --- a/architect/label_generators.py +++ b/architect/label_generators.py @@ -40,7 +40,7 @@ def generate( start_date=start_date, label_window=label_window, ) - logging.debug(query) + logging.debug('Running label generation query: %s', query) self.db_engine.execute(query) return labels_table @@ -70,6 +70,7 @@ def generate_all_labels( len(label_windows)) for as_of_date in as_of_dates: for label_window in label_windows: + logging.info('Generating labels for as of date %s and label window %s', as_of_date, label_window) self.generate( start_date=as_of_date, label_window=label_window, diff --git a/architect/planner.py b/architect/planner.py index d77f25a..9d9b1db 100644 --- a/architect/planner.py +++ b/architect/planner.py @@ -1,5 +1,6 @@ import copy import itertools +import logging from metta import metta_io as metta @@ -125,6 +126,14 @@ def generate_plans(self, matrix_set_definitions, feature_dictionaries): updated_definitions = [] build_tasks = dict() for matrix_set in matrix_set_definitions: + logging.info('Making plans for matrix set %s', matrix_set) + logging.info( + 'Iterating over %s label names, %s label_types, %s states, %s feature dictionaries', + len(self.label_names), + len(self.label_types), + len(self.states), + len(feature_dictionaries) + ) train_matrix = matrix_set['train_matrix'] for label_name, label_type, state, feature_dictionary in itertools.product( self.label_names, @@ -143,6 +152,7 @@ def generate_plans(self, matrix_set_definitions, feature_dictionaries): 'train', ) train_uuid = metta.generate_uuid(train_metadata) + logging.info('Matrix UUID %s found for train metadata %s', train_uuid, train_metadata) if train_uuid not in build_tasks: build_tasks[train_uuid] = self._generate_build_task( train_metadata, @@ -150,6 +160,9 @@ def generate_plans(self, matrix_set_definitions, feature_dictionaries): train_matrix, feature_dictionary ) + logging.info('Train uuid %s not found in build tasks yet, so added', train_uuid) + else: + logging.info('Train uuid %s already found in build tasks', train_uuid) matrix_set_clone['train_uuid'] = train_uuid test_uuids = [] @@ -163,6 +176,7 @@ def generate_plans(self, matrix_set_definitions, feature_dictionaries): 'test', ) test_uuid = metta.generate_uuid(test_metadata) + logging.info('Matrix UUID %s found for test metadata %s', test_uuid, test_metadata) if test_uuid not in build_tasks: build_tasks[test_uuid] = self._generate_build_task( test_metadata, @@ -170,15 +184,24 @@ def generate_plans(self, matrix_set_definitions, feature_dictionaries): test_matrix, feature_dictionary ) + logging.info('Test uuid %s not found in build tasks yet, so added', test_uuid) + else: + logging.info('Test uuid %s already found in build tasks', test_uuid) test_uuids.append(test_uuid) matrix_set_clone['test_uuids'] = test_uuids updated_definitions.append(matrix_set_clone) + logging.info( + 'Planner is finished generating matrix plans. %s matrix definitions and %s unique build tasks found', + len(updated_definitions), + len(build_tasks.keys()) + ) return updated_definitions, build_tasks def build_all_matrices(self, *args, **kwargs): self.builder.build_all_matrices(*args, **kwargs) def build_matrix(self, *args, **kwargs): + logging.info('Building matrix with args %s', args) self.builder.build_matrix(*args, **kwargs) diff --git a/architect/state_table_generators.py b/architect/state_table_generators.py index 99519c9..dd8b58f 100644 --- a/architect/state_table_generators.py +++ b/architect/state_table_generators.py @@ -105,8 +105,10 @@ def sparse_table_name(self): @property def sparse_table_query_func(self): if self.dense_state_table: + logging.info('Dense state table passed to StateTableGenerator, so computing sparse table using it') return self._sparse_table_query_from_dense else: + logging.info('Dense state table not passed to StateTableGenerator, so computing sparse table using events table') return self._sparse_table_query_from_events def _all_known_states(self, dense_state_table): @@ -150,6 +152,7 @@ def _sparse_table_query_from_dense(self, as_of_dates): as_of_dates=[date.isoformat() for date in as_of_dates], state_column_string=', '.join(state_columns) ) + logging.debug('Assembled sparse state table query: %s', query) return query def _sparse_table_query_from_events(self, as_of_dates): @@ -177,6 +180,7 @@ def _sparse_table_query_from_events(self, as_of_dates): as_of_dates=[date.isoformat() for date in as_of_dates], active_state=DEFAULT_ACTIVE_STATE ) + logging.debug('Assembled sparse state table query: %s', query) return query def generate_sparse_table(self, as_of_dates): @@ -187,6 +191,7 @@ def generate_sparse_table(self, as_of_dates): as_of_dates (list of datetime.dates) Dates to include in the sparse state table """ + logging.debug('Generating sparse table using as_of_dates: %s', as_of_dates) self._generate_sparse_table(self.sparse_table_query_func(as_of_dates)) def _generate_sparse_table(self, generate_query): @@ -201,7 +206,7 @@ def _generate_sparse_table(self, generate_query): 'create index on {} (entity_id, as_of_date)' .format(self.sparse_table_name) ) - logging.info('Indices created for sparse state table') + logging.info('Indices created on entity_id and as_of_date for sparse state table') def clean_up(self): self.db_engine.execute(