Skip to content

Commit

Permalink
Fix fname
Browse files Browse the repository at this point in the history
  • Loading branch information
ngc436 committed Nov 26, 2023
1 parent 05e9261 commit 31c0779
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 8 deletions.
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@

<p align="center">
<picture>
<img src="docs/img/MyLogo.png" alt="Library scheme" height="200"/>
</picture>
</p>

<h2 align="center">
Expand Down
35 changes: 33 additions & 2 deletions autotm/algorithms_for_tuning/genetic_algorithm/mutation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,43 @@ def mutation_one_param(
high_spm: float,
low_n: int,
high_n: int,
low_back: float,
high_back: float,
low_back: int,
high_back: int,
low_decor: float,
high_decor: float,
elem_mutation_prob: float = 0.1,
):
"""
One-point mutation
Checking the probability of mutation for each of the elements
Parameters
----------
individ: List[float]
Individual to be processed
low_spb: float
The lower possible bound for sparsity regularizer of back topics
high_spb: float
The higher possible bound for sparsity regularizer of back topics
low_spm: float
The lower possible bound for sparsity regularizer of specific topics
high_spm: float
The higher possible bound for sparsity regularizer of specific topics
low_n: int
The lower possible bound for amount of iterations between stages
high_n: int
The higher possible bound for amount of iterations between stages
low_back:
The lower possible bound for amount of back topics
high_back:
The higher possible bound for amount of back topics
Returns
----------
Updated individuals with exchanged chromosome parts
"""
for i in range(len(individ)):
if random.random() <= elem_mutation_prob:
if i in [2, 3]:
Expand Down
2 changes: 1 addition & 1 deletion autotm/fitness/tm.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class Dataset:
_ppmi_dict_df_path: str = "ppmi_df.txt"
_ppmi_dict_tf_path: str = "ppmi_tf.txt"
_mutual_info_dict_path: str = "mutual_info_dict.pkl"
_texts_path: str = "ppp.csv"
_texts_path: str = "prep_df.csv"
_labels_path = "labels.pkl"

def __init__(self, base_path: str, topic_count: int):
Expand Down
4 changes: 2 additions & 2 deletions autotm/preprocessing/dictionaries_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def mutual_info_dict_preparation(fname):


def prepare_all_artifacts(save_path: str):
DATASET_PATH = os.path.join(save_path, "ppp.csv")
DATASET_PATH = os.path.join(save_path, "prep_df.csv")
BATCHES_DIR = os.path.join(save_path, "batches")
WV_PATH = os.path.join(save_path, "test_set_data_voc.txt")
COOC_DICTIONARY_PATH = os.path.join(save_path, "cooc_dictionary.txt")
Expand All @@ -333,7 +333,7 @@ def prepare_all_artifacts(save_path: str):
ppmi_dict_df = os.path.join(save_path, "ppmi_df.txt")
ppmi_dict_tf = os.path.join(save_path, "ppmi_tf.txt")
MUTUAL_INFO_DICT_PATH = os.path.join(save_path, "mutual_info_dict.pkl")
DOCUMENTS_TO_BATCH_PATH = os.path.join(save_path, "ppp.csv")
DOCUMENTS_TO_BATCH_PATH = os.path.join(save_path, "prep_df.csv")

# TODO: check why batch vectorizer is returned (unused further)
prepare_batch_vectorizer(
Expand Down
2 changes: 1 addition & 1 deletion autotm/preprocessing/text_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def process_dataset(
:return:
"""
os.makedirs(save_path, exist_ok=True)
save_path = os.path.join(save_path, "ppp.csv")
save_path = os.path.join(save_path, "prep_df.csv")
data = pd.read_csv(fname) if isinstance(fname, str) else cast(pd.DataFrame, fname)
data = parallelize_dataframe(
data, lemmatize_text, n_cores, lang=lang, col_to_process=col_to_process
Expand Down

0 comments on commit 31c0779

Please sign in to comment.