some fixex

SkBlaz · Jan 17, 2025 · 985970a · 985970a
1 parent 77821fb
commit 985970a
Show file tree

Hide file tree

Showing 7 changed files with 20 additions and 27 deletions.
diff --git a/autoBOTLib/features/features_document_graph.py b/autoBOTLib/features/features_document_graph.py
@@ -9,7 +9,6 @@
 import numpy as np
 from scipy.sparse import csgraph
 from sklearn.decomposition import TruncatedSVD
-import matplotlib.pyplot as plt
 import tqdm
 from collections import OrderedDict
 import networkx as nx
@@ -74,7 +73,7 @@ def fit(self, text_list):
         # Subsample the document space to reduce graph size.
         if len(text_list) > self.doc_limit:
             if self.targets is None:
-                if not self.doc_limit is None:
+                if self.doc_limit is not None:
                     text_list = text_list[:self.doc_limit]
 
             else:
@@ -214,7 +213,6 @@ def get_graph(self, wspace, ltl):
 
 if __name__ == "__main__":
 
-    import matplotlib.pyplot as plt
 
     from sklearn.linear_model import LogisticRegression
     from sklearn.model_selection import cross_val_score

diff --git a/autoBOTLib/features/features_token_relations.py b/autoBOTLib/features/features_token_relations.py
@@ -11,7 +11,6 @@
 import pandas as pd
 from collections import defaultdict
 import numpy as np
-from scipy import sparse
 import tqdm
 import multiprocessing as mp
 import scipy.sparse as sps
@@ -116,7 +115,7 @@ def witem_kernel(self, instance):
                 if distance > 2:
                     context_size = int(np.log2(distance))
                     encoded_witem = w1 + "--" + str(context_size) + "--" + w2
-                    if not encoded_witem in global_distances:
+                    if encoded_witem not in global_distances:
                         global_distances[encoded_witem] = 0
                     global_distances[encoded_witem] += 1
 

diff --git a/autoBOTLib/features/features_topic.py b/autoBOTLib/features/features_topic.py
@@ -80,7 +80,7 @@ def transform(self, new_documents):
             for k, v in self.topic_features.items():
                 denominator = len(v)
                 overlap = len(parts.intersection(v)) / denominator
-                if not overlap is None:
+                if overlap is not None:
                     new_features[enx, k] = overlap
 
         return new_features

diff --git a/autoBOTLib/learning/torch_sparse_nn.py b/autoBOTLib/learning/torch_sparse_nn.py
@@ -384,11 +384,9 @@ def torch_learners(final_run,
 if __name__ == "__main__":
 
     import numpy as np
-    from scipy.sparse import csr_matrix
     from numpy.random import default_rng
     from scipy.sparse import random
     from scipy import stats
-    import matplotlib.pyplot as plt
 
     rng = default_rng()
     rvs = stats.uniform().rvs

diff --git a/autoBOTLib/misc/misc_keyword_detection.py b/autoBOTLib/misc/misc_keyword_detection.py
@@ -25,16 +25,16 @@ def __init__(self, hyperparameters, verbose=True):
         self.distance_method = hyperparameters["distance_method"]
         self.hyperparameters = hyperparameters
 
-        if not "max_occurrence" in self.hyperparameters:
+        if "max_occurrence" not in self.hyperparameters:
             self.hyperparameters['max_occurrence'] = 3
 
-        if not "max_similar" in self.hyperparameters:
+        if "max_similar" not in self.hyperparameters:
             self.hyperparameters['max_similar'] = 3
 
-        if not 'stopwords' in self.hyperparameters:
+        if 'stopwords' not in self.hyperparameters:
             self.hyperparameters['stopwords'] = None
 
-        if not 'connectives' in self.hyperparameters:
+        if 'connectives' not in self.hyperparameters:
             self.hyperparameters['connectives'] = True
 
         self.verbose = verbose
@@ -89,17 +89,17 @@ def process_line(line):
             line = line.strip()
             line = [i for i in word_tokenize(line.lower()) if i not in stop]
             self.whole_document += line
-            if not stopwords is None:
-                line = [w for w in line if not w in stopwords]
+            if stopwords is not None:
+                line = [w for w in line if w not in stopwords]
 
-            if not stemmer is None:
+            if stemmer is not None:
                 line = [stemmer.stem(w) for w in line]
 
-            if not lemmatizer is None:
+            if lemmatizer is not None:
                 new_line = []
                 for x in line:
                     lemma = lemmatizer.lemmatize(x)
-                    if not (lemma in self.inverse_lemmatizer_mapping):
+                    if lemma not in self.inverse_lemmatizer_mapping:
                         self.inverse_lemmatizer_mapping[lemma] = set()
                     self.inverse_lemmatizer_mapping[lemma].add(x)
                     new_line.append(lemma)
@@ -295,7 +295,7 @@ def find_keywords(self, document, input_type="file", validate=False):
                 w2 = pair[1]
                 if w1 in kw_map and w2 in kw_map:
                     score = np.mean([kw_map[w1], kw_map[w2]])
-                    if not w1 + " " + w2 in all_terms:
+                    if w1 + " " + w2 not in all_terms:
                         higher_order_1.append((w1 + " " + w2, score))
                         all_terms.add(w1 + " " + w2)
 
@@ -309,7 +309,7 @@ def find_keywords(self, document, input_type="file", validate=False):
                         ]
                         term = edge[0] + " " + pair[0] + " " + pair[1]
                         score = np.mean(trip_score)
-                        if not term in all_terms:
+                        if term not in all_terms:
                             higher_order_2.append((term, score))
                             all_terms.add(term)
 
@@ -320,7 +320,7 @@ def find_keywords(self, document, input_type="file", validate=False):
                         ]
                         term = pair[0] + " " + pair[1] + " " + edge[1]
                         score = np.mean(trip_score)
-                        if not term in all_terms:
+                        if term not in all_terms:
                             higher_order_2.append((term, score))
                             all_terms.add(term)
         else:

diff --git a/autoBOTLib/optimization/optimization_engine.py b/autoBOTLib/optimization/optimization_engine.py
@@ -6,7 +6,6 @@
 import time
 import itertools
 import numpy as np
-import tqdm
 from scipy import sparse
 from collections import defaultdict, Counter
 from autoBOTLib.optimization.optimization_metrics import *
@@ -377,7 +376,7 @@ def update_global_feature_importances(self):
             for fx, coef in zip(subspace_features, coefficients):
                 space_of_the_feature = self.global_feature_name_hash[fx]
 
-                if not fx in fdict:
+                if fx not in fdict:
                     fdict[fx] = np.abs(coef)
 
                 else:

diff --git a/autoBOTLib/optimization/optimization_feature_constructors.py b/autoBOTLib/optimization/optimization_feature_constructors.py
@@ -8,8 +8,7 @@
 from sklearn.pipeline import FeatureUnion
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.model_selection import GridSearchCV
-from sklearn.svm import LinearSVC
-from sklearn.linear_model import LogisticRegression, SGDClassifier
+from sklearn.linear_model import SGDClassifier
 from sklearn.feature_extraction.text import TfidfVectorizer
 
 from autoBOTLib.features.features_topic import *
@@ -418,7 +417,7 @@ def get_features(df_data,
         include_image_transformer = False
 
 
-    if not custom_pipeline is None and combine_with_existing_representation == False:
+    if custom_pipeline is not None and combine_with_existing_representation == False:
 
         features = custom_pipeline
 
@@ -589,7 +588,7 @@ def get_features(df_data,
 
             ## representation_type is pre-set
 
-            if not representation_type in feature_presets:
+            if representation_type not in feature_presets:
                 logging.info(
                     "Please, specify a valid preset! (see the documentation for the up-to-date namings)"
                 )
@@ -606,7 +605,7 @@ def get_features(df_data,
                 feature_transformer_vault[x] for x in representation_type
             ]
 
-    if not custom_pipeline is None and combine_with_existing_representation:
+    if custom_pipeline is not None and combine_with_existing_representation:
         features = features + custom_pipeline
 
     feature_names = [x[0] for x in features]