Skip to content

Commit

Permalink
remove print
Browse files Browse the repository at this point in the history
  • Loading branch information
fonhorst committed Jun 5, 2024
1 parent 391d6f6 commit 8c2e60e
Show file tree
Hide file tree
Showing 7 changed files with 15 additions and 18 deletions.
2 changes: 0 additions & 2 deletions autotm/ontology/ontology_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def format_attention(attention, layers=None, heads=None):
layer_attention = layer_attention.squeeze(0)
if heads:
layer_attention = layer_attention[heads]
# print(layer_attention[0])
squeezed.append(layer_attention)
# num_layers x num_heads x seq_len x seq_len
return torch.stack(squeezed)
Expand Down Expand Up @@ -138,7 +137,6 @@ def build_graph(autotm_model, topic_labels,
tokens = tokenizer.convert_ids_to_tokens(inputs[0])

res, tokens_new = get_attention_vals(attention, tokens, head_num=2, layer_num=0)
print(res, tokens_new)

try:
v, i = torch.topk(res.flatten(), 5)
Expand Down
1 change: 0 additions & 1 deletion autotm/preprocessing/dictionaries_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ def write_vw_dict(res_dict, vocab_words, fpath):
try:
fopen.write(f"{word}" + " " + " ".join(res_dict[word]) + "\n")
except:
# print(f'The word {word} is not found')
pass
logger.info(f"{fpath} is ready!")

Expand Down
1 change: 0 additions & 1 deletion autotm/preprocessing/text_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ def lemmatize_text_en(text):


def lemmatize_text(df, **kwargs):
# print(kwargs)
lang = kwargs["lang"]
col_to_process = kwargs["col_to_process"]
if lang == "ru":
Expand Down
9 changes: 6 additions & 3 deletions autotm/visualization/dynamic_tracker.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import os
import time
import warnings
Expand All @@ -12,6 +13,8 @@
FITNESS_DIFF_COL = "fitness_diff"
PARAMS_DIST_COL = "params_dist"

logger = logging.getLogger(__name__)


class MetricsCollector:
def __init__(
Expand Down Expand Up @@ -163,7 +166,7 @@ def save_fitness(self, generation: int, params: list, fitness: float):

def get_metric_df(self):
if self.metric_df is not None:
print("Metric df already exists")
logger("Metric df already exists")
else:
population_max = []
for i in range(self.num_generations + 1):
Expand All @@ -177,7 +180,7 @@ def get_metric_df(self):
columns=[GENERATION_COL, FITNESS_COL],
)
if self.mutation_df is not None:
print("Mutation df already exists")
logger.info("Mutation df already exists")
else:
dfs = []
for gen in self.mutation_changes:
Expand All @@ -196,7 +199,7 @@ def get_metric_df(self):
# warnings.warn("No mutations changes have been found to save", RuntimeWarning)
self.mutation_df = pd.DataFrame([])
if self.crossover_df is not None:
print("Crossover df already exists")
logger.info("Crossover df already exists")
else:
dfs = []
for gen in self.crossover_changes:
Expand Down
6 changes: 3 additions & 3 deletions distributed/autotm_distributed/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def prepare_voc(batches_dir, vw_path, data_path, column_name='processed_text'):
try:
for file in os.listdir(data_path):
if file.startswith('part'):
print('part_{}'.format(num_parts), end='\r')
logger.info('Preparing vocabulary: part_{}'.format(num_parts))
if file.split('.')[-1] == 'csv':
part = pd.read_csv(os.path.join(data_path, file))
else:
Expand All @@ -165,14 +165,14 @@ def prepare_voc(batches_dir, vw_path, data_path, column_name='processed_text'):
num_parts += 1

except NotADirectoryError:
print('part 1/1')
logger.info('Preparing vocabulary: part 1/1')
part = pd.read_csv(data_path)
part_processed = part[column_name].tolist()
for text in part_processed:
result = return_string_part('@default_class', text)
ofile.write(result + '\n')

print(' batches {} \n vocabulary {} \n are ready'.format(batches_dir, vw_path))
logger.info('Preparing vocabulary: batches {} \n vocabulary {} \n are ready'.format(batches_dir, vw_path))


def prepare_batch_vectorizer(batches_dir, vw_path, data_path, column_name='processed_text'):
Expand Down
13 changes: 6 additions & 7 deletions distributed/autotm_distributed/tm.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def train(self, option='offline'):

if self.n1 > 0:
if self._early_stopping():
print('Early stopping is triggered')
logger.info('Early stopping is triggered')
return

# if ((self.n2 != 0) and (self.B != 0)):
Expand Down Expand Up @@ -470,7 +470,7 @@ def train(self, option='offline'):

if self.n1 + self.n2 + self.n3 > 0:
if self._early_stopping():
print('Early stopping is triggered')
logger.info('Early stopping is triggered')
return

if self.n4 != 0:
Expand Down Expand Up @@ -524,7 +524,6 @@ def get_topics(self):
def _get_avg_coherence_score(self, for_individ_fitness=False):
coherences_main, coherences_back = self.__return_all_tokens_coherence(self.model, s=self.S, b=self.B)
if for_individ_fitness:
# print('COMPONENTS: ', np.mean(list(coherences_main.values())), np.min(list(coherences_main.values())))
return np.mean(list(coherences_main.values())) + np.min(list(coherences_main.values()))
return np.mean(list(coherences_main.values()))

Expand Down Expand Up @@ -680,7 +679,7 @@ def metrics_get_avg_coherence_score(self, for_individ_fitness=False) -> MetricsS
# coeff = self._calculate_labels_coeff()
coeff = 1.0
if for_individ_fitness:
print('COMPONENTS: ', np.mean(list(coherences_main.values())), np.min(list(coherences_main.values())))
logger.info('COMPONENTS: ', np.mean(list(coherences_main.values())), np.min(list(coherences_main.values())))
avg_coherence_score = \
np.mean(list(coherences_main.values())) + np.min(list(coherences_main.values())) * coeff
else:
Expand Down Expand Up @@ -709,9 +708,9 @@ def metrics_get_last_avg_vals(self, texts, total_tokens,
topic_significance_uni = np.mean(ts_uniform(topic_word_dist))
topic_significance_vacuous = np.mean(ts_vacuous(doc_topic_dist, topic_word_dist, total_tokens))
topic_significance_back = np.mean(ts_bground(doc_topic_dist))
print(f'Topic Significance - Uniform Distribution Over Words: {topic_significance_uni}')
print(f'Topic Significance - Vacuous Semantic Distribution: {topic_significance_vacuous}')
print(f'Topic Significance - Background Distribution: {topic_significance_back}')
logger.info(f'Topic Significance - Uniform Distribution Over Words: {topic_significance_uni}')
logger.info(f'Topic Significance - Vacuous Semantic Distribution: {topic_significance_vacuous}')
logger.info(f'Topic Significance - Background Distribution: {topic_significance_back}')
else:
topic_significance_uni = None
topic_significance_vacuous = None
Expand Down
1 change: 0 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

CURR_PATH = os.path.abspath(os.path.dirname(__file__))
LIB_PATH = os.path.join(CURR_PATH, os.path.pardir)
print(LIB_PATH)
sys.path.insert(0, LIB_PATH)

project = "AutoTM"
Expand Down

0 comments on commit 8c2e60e

Please sign in to comment.