remove print

aimclub · Jun 5, 2024 · 8c2e60e · 8c2e60e
1 parent 391d6f6
commit 8c2e60e
Show file tree

Hide file tree

Showing 7 changed files with 15 additions and 18 deletions.
diff --git a/autotm/ontology/ontology_extractor.py b/autotm/ontology/ontology_extractor.py
@@ -25,7 +25,6 @@ def format_attention(attention, layers=None, heads=None):
         layer_attention = layer_attention.squeeze(0)
         if heads:
             layer_attention = layer_attention[heads]
-        #             print(layer_attention[0])
         squeezed.append(layer_attention)
     # num_layers x num_heads x seq_len x seq_len
     return torch.stack(squeezed)
@@ -138,7 +137,6 @@ def build_graph(autotm_model, topic_labels,
         tokens = tokenizer.convert_ids_to_tokens(inputs[0])
 
         res, tokens_new = get_attention_vals(attention, tokens, head_num=2, layer_num=0)
-        print(res, tokens_new)
 
         try:
             v, i = torch.topk(res.flatten(), 5)

diff --git a/autotm/preprocessing/dictionaries_preparation.py b/autotm/preprocessing/dictionaries_preparation.py
@@ -156,7 +156,6 @@ def write_vw_dict(res_dict, vocab_words, fpath):
             try:
                 fopen.write(f"{word}" + " " + " ".join(res_dict[word]) + "\n")
             except:
-                # print(f'The word {word} is not found')
                 pass
     logger.info(f"{fpath} is ready!")
 

diff --git a/autotm/preprocessing/text_preprocessing.py b/autotm/preprocessing/text_preprocessing.py
@@ -141,7 +141,6 @@ def lemmatize_text_en(text):
 
 
 def lemmatize_text(df, **kwargs):
-    # print(kwargs)
     lang = kwargs["lang"]
     col_to_process = kwargs["col_to_process"]
     if lang == "ru":

diff --git a/autotm/visualization/dynamic_tracker.py b/autotm/visualization/dynamic_tracker.py
@@ -1,3 +1,4 @@
+import logging
 import os
 import time
 import warnings
@@ -12,6 +13,8 @@
 FITNESS_DIFF_COL = "fitness_diff"
 PARAMS_DIST_COL = "params_dist"
 
+logger = logging.getLogger(__name__)
+
 
 class MetricsCollector:
     def __init__(
@@ -163,7 +166,7 @@ def save_fitness(self, generation: int, params: list, fitness: float):
 
     def get_metric_df(self):
         if self.metric_df is not None:
-            print("Metric df already exists")
+            logger("Metric df already exists")
         else:
             population_max = []
             for i in range(self.num_generations + 1):
@@ -177,7 +180,7 @@ def get_metric_df(self):
                 columns=[GENERATION_COL, FITNESS_COL],
             )
         if self.mutation_df is not None:
-            print("Mutation df already exists")
+            logger.info("Mutation df already exists")
         else:
             dfs = []
             for gen in self.mutation_changes:
@@ -196,7 +199,7 @@ def get_metric_df(self):
                 # warnings.warn("No mutations changes have been found to save", RuntimeWarning)
                 self.mutation_df = pd.DataFrame([])
         if self.crossover_df is not None:
-            print("Crossover df already exists")
+            logger.info("Crossover df already exists")
         else:
             dfs = []
             for gen in self.crossover_changes:

diff --git a/distributed/autotm_distributed/preprocessing.py b/distributed/autotm_distributed/preprocessing.py
@@ -153,7 +153,7 @@ def prepare_voc(batches_dir, vw_path, data_path, column_name='processed_text'):
         try:
             for file in os.listdir(data_path):
                 if file.startswith('part'):
-                    print('part_{}'.format(num_parts), end='\r')
+                    logger.info('Preparing vocabulary: part_{}'.format(num_parts))
                     if file.split('.')[-1] == 'csv':
                         part = pd.read_csv(os.path.join(data_path, file))
                     else:
@@ -165,14 +165,14 @@ def prepare_voc(batches_dir, vw_path, data_path, column_name='processed_text'):
                     num_parts += 1
 
         except NotADirectoryError:
-            print('part 1/1')
+            logger.info('Preparing vocabulary: part 1/1')
             part = pd.read_csv(data_path)
             part_processed = part[column_name].tolist()
             for text in part_processed:
                 result = return_string_part('@default_class', text)
                 ofile.write(result + '\n')
 
-    print(' batches {} \n vocabulary {} \n are ready'.format(batches_dir, vw_path))
+    logger.info('Preparing vocabulary: batches {} \n vocabulary {} \n are ready'.format(batches_dir, vw_path))
 
 
 def prepare_batch_vectorizer(batches_dir, vw_path, data_path, column_name='processed_text'):

diff --git a/distributed/autotm_distributed/tm.py b/distributed/autotm_distributed/tm.py
@@ -431,7 +431,7 @@ def train(self, option='offline'):
 
         if self.n1 > 0:
             if self._early_stopping():
-                print('Early stopping is triggered')
+                logger.info('Early stopping is triggered')
                 return
 
         #         if ((self.n2 != 0) and (self.B != 0)):
@@ -470,7 +470,7 @@ def train(self, option='offline'):
 
         if self.n1 + self.n2 + self.n3 > 0:
             if self._early_stopping():
-                print('Early stopping is triggered')
+                logger.info('Early stopping is triggered')
                 return
 
         if self.n4 != 0:
@@ -524,7 +524,6 @@ def get_topics(self):
     def _get_avg_coherence_score(self, for_individ_fitness=False):
         coherences_main, coherences_back = self.__return_all_tokens_coherence(self.model, s=self.S, b=self.B)
         if for_individ_fitness:
-            # print('COMPONENTS: ', np.mean(list(coherences_main.values())), np.min(list(coherences_main.values())))
             return np.mean(list(coherences_main.values())) + np.min(list(coherences_main.values()))
         return np.mean(list(coherences_main.values()))
 
@@ -680,7 +679,7 @@ def metrics_get_avg_coherence_score(self, for_individ_fitness=False) -> MetricsS
         # coeff = self._calculate_labels_coeff()
         coeff = 1.0
         if for_individ_fitness:
-            print('COMPONENTS: ', np.mean(list(coherences_main.values())), np.min(list(coherences_main.values())))
+            logger.info('COMPONENTS: ', np.mean(list(coherences_main.values())), np.min(list(coherences_main.values())))
             avg_coherence_score = \
                 np.mean(list(coherences_main.values())) + np.min(list(coherences_main.values())) * coeff
         else:
@@ -709,9 +708,9 @@ def metrics_get_last_avg_vals(self, texts, total_tokens,
             topic_significance_uni = np.mean(ts_uniform(topic_word_dist))
             topic_significance_vacuous = np.mean(ts_vacuous(doc_topic_dist, topic_word_dist, total_tokens))
             topic_significance_back = np.mean(ts_bground(doc_topic_dist))
-            print(f'Topic Significance - Uniform Distribution Over Words: {topic_significance_uni}')
-            print(f'Topic Significance - Vacuous Semantic Distribution: {topic_significance_vacuous}')
-            print(f'Topic Significance - Background Distribution: {topic_significance_back}')
+            logger.info(f'Topic Significance - Uniform Distribution Over Words: {topic_significance_uni}')
+            logger.info(f'Topic Significance - Vacuous Semantic Distribution: {topic_significance_vacuous}')
+            logger.info(f'Topic Significance - Background Distribution: {topic_significance_back}')
         else:
             topic_significance_uni = None
             topic_significance_vacuous = None

diff --git a/docs/conf.py b/docs/conf.py
@@ -11,7 +11,6 @@
 
 CURR_PATH = os.path.abspath(os.path.dirname(__file__))
 LIB_PATH = os.path.join(CURR_PATH, os.path.pardir)
-print(LIB_PATH)
 sys.path.insert(0, LIB_PATH)
 
 project = "AutoTM"