Skip to content

Commit

Permalink
Merge pull request #43 from dssg/more_logging
Browse files Browse the repository at this point in the history
Increase logging to firehose level
  • Loading branch information
thcrock authored Sep 15, 2017
2 parents 1fa3127 + 0c8681f commit d6ea36b
Show file tree
Hide file tree
Showing 8 changed files with 100 additions and 11 deletions.
12 changes: 7 additions & 5 deletions architect/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def make_entity_date_table(
table_name=table_name,
index_query=indices_query
)
logging.info('Creating matrix-specific entity-date table for matrix %s with query %s', matrix_uuid, query)
self.engine.execute(query)

return table_name
Expand Down Expand Up @@ -248,7 +249,7 @@ def build_matrix(

logging.info('Creating matrix %s > %s', matrix_metadata['matrix_id'], matrix_filename)
# make the entity time table and query the labels and features tables
logging.info('Making entity date table')
logging.info('Making entity date table for matrix %s', matrix_uuid)
entity_date_table_name = self.make_entity_date_table(
as_of_times,
label_name,
Expand All @@ -258,15 +259,15 @@ def build_matrix(
matrix_uuid,
matrix_metadata['label_window']
)
logging.info('Writing feature group data')
logging.info('Extracting feature group data from database into file for matrix %s', matrix_uuid)
features_csv_names = self.write_features_data(
as_of_times,
feature_dictionary,
entity_date_table_name,
matrix_uuid
)
try:
logging.info('Writing label data')
logging.info('Extracting label data frmo database into file for matrix %s', matrix_uuid)
labels_csv_name = self.write_labels_data(
label_name,
label_type,
Expand All @@ -277,7 +278,7 @@ def build_matrix(
features_csv_names.insert(0, labels_csv_name)

# stitch together the csvs
logging.info('Merging features data')
logging.info('Merging feature files for matrix %s', matrix_uuid)
output = self.merge_feature_csvs(
features_csv_names,
matrix_directory,
Expand All @@ -289,7 +290,7 @@ def build_matrix(
self.remove_file(csv_name)
try:
# store the matrix
logging.info('Archiving matrix with metta')
logging.info('Archiving matrix %s with metta', matrix_uuid)
metta.archive_matrix(
matrix_config=matrix_metadata,
df_matrix=output,
Expand Down Expand Up @@ -418,6 +419,7 @@ def write_to_csv(self, query_string, file_name, header='HEADER'):
:rtype: none
"""
matrix_csv = self.open_fh_for_writing(file_name)
logging.debug('Copying to CSV query %s', query_string)
try:
copy_sql = 'COPY ({query}) TO STDOUT WITH CSV {head}'.format(
query=query_string,
Expand Down
11 changes: 11 additions & 0 deletions architect/feature_dictionary_creator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from architect.utils import str_in_sql


Expand Down Expand Up @@ -32,6 +33,11 @@ def feature_dictionary(self, feature_table_names, index_column_lookup):
)
)
]
logging.info(
'Feature names found for table %s: %s',
feature_table_name,
feature_names
)
feature_dictionary[feature_table_name] = feature_names
return(feature_dictionary)

Expand All @@ -57,5 +63,10 @@ def _build_feature_names_query(self, table_name, index_columns):
schema=self.features_schema_name,
index_columns=str_in_sql(index_columns)
)
logging.info(
'Extracting all possible feature names for table %s with query %s',
table_name,
feature_names_query
)

return(feature_names_query)
17 changes: 17 additions & 0 deletions architect/feature_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,21 @@ def _compute_choices(self, choice_query):
for row
in self.db_engine.execute(choice_query)
]
logging.info(
'Computed list of categoricals: %s for choice query: %s',
self.categorical_cache[choice_query],
choice_query
)
return self.categorical_cache[choice_query]

def _build_choices(self, categorical):
logging.info(
'Building categorical choices for column %s, metrics %s',
categorical['column'],
categorical['metrics']
)
if 'choices' in categorical:
logging.info('Found list of configured choices: %s', categorical['choices'])
return categorical['choices']
else:
return self._compute_choices(categorical['choice_query'])
Expand Down Expand Up @@ -150,6 +161,11 @@ def _build_array_categoricals(self, categorical_config):
]

def _aggregation(self, aggregation_config, feature_dates):
logging.info(
'Building collate.SpacetimeAggregation for config %s and as_of_dates %s',
aggregation_config,
feature_dates
)
aggregates = [
Aggregate(aggregate['quantity'], aggregate['metrics'])
for aggregate in aggregation_config.get('aggregates', [])
Expand Down Expand Up @@ -267,6 +283,7 @@ def run_commands(self, command_list):
conn = self.db_engine.connect()
trans = conn.begin()
for command in command_list:
logging.debug('Executing feature generation query: %s', command)
conn.execute(command)
trans.commit()

Expand Down
25 changes: 24 additions & 1 deletion architect/feature_group_creator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import logging


def table_subsetter(config_item, table, features):
"Return features matching a given table"
if table == config_item:
Expand Down Expand Up @@ -65,16 +68,36 @@ def subsets(self, feature_dictionary):
Returns: (list) subsets of the feature dictionary, in the same
table-based structure
"""
logging.info(
'Creating feature groups. config: %s, Master feature dictionary: %s',
self.definition,
feature_dictionary
)
subsets = []
for name, config in sorted(self.definition.items()):
for config_item in config:
subset = {}
for table, features in feature_dictionary.items():
matching_features =\
self.subsetters[name](config_item, table, features)
logging.info(
'Matching features for config item %s, table %s: %s',
config_item,
table,
matching_features
)
if len(matching_features) > 0:
subset[table] = matching_features
else:
logging.warning(
'No matching features found for config item %s, table %s, master features %s',
config_item,
table,
features
)

subsets.append(subset)
if not any(subset for subset in subsets if any(subset)):
raise Exception('No matching feature subsets found!')
raise Exception('No matching feature groups found!')
logging.info('Found %s total feature subsets', len(subsets))
return subsets
13 changes: 10 additions & 3 deletions architect/feature_group_mixer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import logging


def leave_one_in(feature_groups):
"""For each group, return a copy of just that group
Expand Down Expand Up @@ -65,7 +68,11 @@ def generate(self, feature_groups):
each representing a group
Returns: (list) of feature dicts
"""
results = []
final_results = []
for strategy in self.strategies:
results += self.strategy_lookup[strategy](feature_groups)
return results
logging.info('Mixing feature groups %s using strategy %s', feature_groups, strategy)
results = self.strategy_lookup[strategy](feature_groups)
logging.info('Mixing found new feature groups %s', results)
final_results += results

return final_results
3 changes: 2 additions & 1 deletion architect/label_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def generate(
start_date=start_date,
label_window=label_window,
)
logging.debug(query)
logging.debug('Running label generation query: %s', query)
self.db_engine.execute(query)
return labels_table

Expand Down Expand Up @@ -70,6 +70,7 @@ def generate_all_labels(
len(label_windows))
for as_of_date in as_of_dates:
for label_window in label_windows:
logging.info('Generating labels for as of date %s and label window %s', as_of_date, label_window)
self.generate(
start_date=as_of_date,
label_window=label_window,
Expand Down
23 changes: 23 additions & 0 deletions architect/planner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import copy
import itertools
import logging

from metta import metta_io as metta

Expand Down Expand Up @@ -125,6 +126,14 @@ def generate_plans(self, matrix_set_definitions, feature_dictionaries):
updated_definitions = []
build_tasks = dict()
for matrix_set in matrix_set_definitions:
logging.info('Making plans for matrix set %s', matrix_set)
logging.info(
'Iterating over %s label names, %s label_types, %s states, %s feature dictionaries',
len(self.label_names),
len(self.label_types),
len(self.states),
len(feature_dictionaries)
)
train_matrix = matrix_set['train_matrix']
for label_name, label_type, state, feature_dictionary in itertools.product(
self.label_names,
Expand All @@ -143,13 +152,17 @@ def generate_plans(self, matrix_set_definitions, feature_dictionaries):
'train',
)
train_uuid = metta.generate_uuid(train_metadata)
logging.info('Matrix UUID %s found for train metadata %s', train_uuid, train_metadata)
if train_uuid not in build_tasks:
build_tasks[train_uuid] = self._generate_build_task(
train_metadata,
train_uuid,
train_matrix,
feature_dictionary
)
logging.info('Train uuid %s not found in build tasks yet, so added', train_uuid)
else:
logging.info('Train uuid %s already found in build tasks', train_uuid)
matrix_set_clone['train_uuid'] = train_uuid

test_uuids = []
Expand All @@ -163,22 +176,32 @@ def generate_plans(self, matrix_set_definitions, feature_dictionaries):
'test',
)
test_uuid = metta.generate_uuid(test_metadata)
logging.info('Matrix UUID %s found for test metadata %s', test_uuid, test_metadata)
if test_uuid not in build_tasks:
build_tasks[test_uuid] = self._generate_build_task(
test_metadata,
test_uuid,
test_matrix,
feature_dictionary
)
logging.info('Test uuid %s not found in build tasks yet, so added', test_uuid)
else:
logging.info('Test uuid %s already found in build tasks', test_uuid)

test_uuids.append(test_uuid)
matrix_set_clone['test_uuids'] = test_uuids
updated_definitions.append(matrix_set_clone)

logging.info(
'Planner is finished generating matrix plans. %s matrix definitions and %s unique build tasks found',
len(updated_definitions),
len(build_tasks.keys())
)
return updated_definitions, build_tasks

def build_all_matrices(self, *args, **kwargs):
self.builder.build_all_matrices(*args, **kwargs)

def build_matrix(self, *args, **kwargs):
logging.info('Building matrix with args %s', args)
self.builder.build_matrix(*args, **kwargs)
7 changes: 6 additions & 1 deletion architect/state_table_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,10 @@ def sparse_table_name(self):
@property
def sparse_table_query_func(self):
if self.dense_state_table:
logging.info('Dense state table passed to StateTableGenerator, so computing sparse table using it')
return self._sparse_table_query_from_dense
else:
logging.info('Dense state table not passed to StateTableGenerator, so computing sparse table using events table')
return self._sparse_table_query_from_events

def _all_known_states(self, dense_state_table):
Expand Down Expand Up @@ -150,6 +152,7 @@ def _sparse_table_query_from_dense(self, as_of_dates):
as_of_dates=[date.isoformat() for date in as_of_dates],
state_column_string=', '.join(state_columns)
)
logging.debug('Assembled sparse state table query: %s', query)
return query

def _sparse_table_query_from_events(self, as_of_dates):
Expand Down Expand Up @@ -177,6 +180,7 @@ def _sparse_table_query_from_events(self, as_of_dates):
as_of_dates=[date.isoformat() for date in as_of_dates],
active_state=DEFAULT_ACTIVE_STATE
)
logging.debug('Assembled sparse state table query: %s', query)
return query

def generate_sparse_table(self, as_of_dates):
Expand All @@ -187,6 +191,7 @@ def generate_sparse_table(self, as_of_dates):
as_of_dates (list of datetime.dates) Dates to include in the sparse
state table
"""
logging.debug('Generating sparse table using as_of_dates: %s', as_of_dates)
self._generate_sparse_table(self.sparse_table_query_func(as_of_dates))

def _generate_sparse_table(self, generate_query):
Expand All @@ -201,7 +206,7 @@ def _generate_sparse_table(self, generate_query):
'create index on {} (entity_id, as_of_date)'
.format(self.sparse_table_name)
)
logging.info('Indices created for sparse state table')
logging.info('Indices created on entity_id and as_of_date for sparse state table')

def clean_up(self):
self.db_engine.execute(
Expand Down

0 comments on commit d6ea36b

Please sign in to comment.