Fix fname

ngc436 · ngc436 · commit 31c0779e37f0 · 2023-11-26T22:18:13.000+03:00
diff --git a/README.md b/README.md
@@ -1,8 +1,6 @@
 
 <p align="center">
-<picture>
 <img src="docs/img/MyLogo.png" alt="Library scheme" height="200"/>
-</picture>
 </p>
 
 <h2 align="center">
diff --git a/autotm/algorithms_for_tuning/genetic_algorithm/mutation.py b/autotm/algorithms_for_tuning/genetic_algorithm/mutation.py
@@ -11,12 +11,43 @@ def mutation_one_param(
         high_spm: float,
         low_n: int,
         high_n: int,
-        low_back: float,
-        high_back: float,
+        low_back: int,
+        high_back: int,
         low_decor: float,
         high_decor: float,
         elem_mutation_prob: float = 0.1,
 ):
+    """
+    One-point mutation
+
+    Checking the probability of mutation for each of the elements
+
+    Parameters
+    ----------
+    individ: List[float]
+        Individual to be processed
+    low_spb: float
+        The lower possible bound for sparsity regularizer of back topics
+    high_spb: float
+        The higher possible bound for sparsity regularizer of back topics
+    low_spm: float
+        The lower possible bound for sparsity regularizer of specific topics
+    high_spm: float
+        The higher possible bound for sparsity regularizer of specific topics
+    low_n: int
+        The lower possible bound for amount of iterations between stages
+    high_n: int
+        The higher possible bound for amount of iterations between stages
+    low_back:
+        The lower possible bound for amount of back topics
+    high_back:
+        The higher possible bound for amount of back topics
+
+
+    Returns
+    ----------
+    Updated individuals with exchanged chromosome parts
+    """
     for i in range(len(individ)):
         if random.random() <= elem_mutation_prob:
             if i in [2, 3]:
diff --git a/autotm/fitness/tm.py b/autotm/fitness/tm.py
@@ -59,7 +59,7 @@ class Dataset:
     _ppmi_dict_df_path: str = "ppmi_df.txt"
     _ppmi_dict_tf_path: str = "ppmi_tf.txt"
     _mutual_info_dict_path: str = "mutual_info_dict.pkl"
-    _texts_path: str = "ppp.csv"
+    _texts_path: str = "prep_df.csv"
     _labels_path = "labels.pkl"
 
     def __init__(self, base_path: str, topic_count: int):
diff --git a/autotm/preprocessing/dictionaries_preparation.py b/autotm/preprocessing/dictionaries_preparation.py
@@ -322,7 +322,7 @@ def mutual_info_dict_preparation(fname):
 
 
 def prepare_all_artifacts(save_path: str):
-    DATASET_PATH = os.path.join(save_path, "ppp.csv")
+    DATASET_PATH = os.path.join(save_path, "prep_df.csv")
     BATCHES_DIR = os.path.join(save_path, "batches")
     WV_PATH = os.path.join(save_path, "test_set_data_voc.txt")
     COOC_DICTIONARY_PATH = os.path.join(save_path, "cooc_dictionary.txt")
@@ -333,7 +333,7 @@ def prepare_all_artifacts(save_path: str):
     ppmi_dict_df = os.path.join(save_path, "ppmi_df.txt")
     ppmi_dict_tf = os.path.join(save_path, "ppmi_tf.txt")
     MUTUAL_INFO_DICT_PATH = os.path.join(save_path, "mutual_info_dict.pkl")
-    DOCUMENTS_TO_BATCH_PATH = os.path.join(save_path, "ppp.csv")
+    DOCUMENTS_TO_BATCH_PATH = os.path.join(save_path, "prep_df.csv")
 
     # TODO: check why batch vectorizer is returned (unused further)
     prepare_batch_vectorizer(
diff --git a/autotm/preprocessing/text_preprocessing.py b/autotm/preprocessing/text_preprocessing.py
@@ -164,7 +164,7 @@ def process_dataset(
     :return:
     """
     os.makedirs(save_path, exist_ok=True)
-    save_path = os.path.join(save_path, "ppp.csv")
+    save_path = os.path.join(save_path, "prep_df.csv")
     data = pd.read_csv(fname) if isinstance(fname, str) else cast(pd.DataFrame, fname)
     data = parallelize_dataframe(
         data, lemmatize_text, n_cores, lang=lang, col_to_process=col_to_process