diff --git a/autoBOTLib/features/features_document_graph.py b/autoBOTLib/features/features_document_graph.py index 354e742..409bcfd 100644 --- a/autoBOTLib/features/features_document_graph.py +++ b/autoBOTLib/features/features_document_graph.py @@ -9,7 +9,6 @@ import numpy as np from scipy.sparse import csgraph from sklearn.decomposition import TruncatedSVD -import matplotlib.pyplot as plt import tqdm from collections import OrderedDict import networkx as nx @@ -74,7 +73,7 @@ def fit(self, text_list): # Subsample the document space to reduce graph size. if len(text_list) > self.doc_limit: if self.targets is None: - if not self.doc_limit is None: + if self.doc_limit is not None: text_list = text_list[:self.doc_limit] else: @@ -214,7 +213,6 @@ def get_graph(self, wspace, ltl): if __name__ == "__main__": - import matplotlib.pyplot as plt from sklearn.linear_model import LogisticRegression from sklearn.model_selection import cross_val_score diff --git a/autoBOTLib/features/features_token_relations.py b/autoBOTLib/features/features_token_relations.py index 1f9dbd3..5f67208 100644 --- a/autoBOTLib/features/features_token_relations.py +++ b/autoBOTLib/features/features_token_relations.py @@ -11,7 +11,6 @@ import pandas as pd from collections import defaultdict import numpy as np -from scipy import sparse import tqdm import multiprocessing as mp import scipy.sparse as sps @@ -116,7 +115,7 @@ def witem_kernel(self, instance): if distance > 2: context_size = int(np.log2(distance)) encoded_witem = w1 + "--" + str(context_size) + "--" + w2 - if not encoded_witem in global_distances: + if encoded_witem not in global_distances: global_distances[encoded_witem] = 0 global_distances[encoded_witem] += 1 diff --git a/autoBOTLib/features/features_topic.py b/autoBOTLib/features/features_topic.py index de55750..93d71f6 100644 --- a/autoBOTLib/features/features_topic.py +++ b/autoBOTLib/features/features_topic.py @@ -80,7 +80,7 @@ def transform(self, new_documents): for k, v in self.topic_features.items(): denominator = len(v) overlap = len(parts.intersection(v)) / denominator - if not overlap is None: + if overlap is not None: new_features[enx, k] = overlap return new_features diff --git a/autoBOTLib/learning/torch_sparse_nn.py b/autoBOTLib/learning/torch_sparse_nn.py index 0aa56c0..5cb2700 100644 --- a/autoBOTLib/learning/torch_sparse_nn.py +++ b/autoBOTLib/learning/torch_sparse_nn.py @@ -384,11 +384,9 @@ def torch_learners(final_run, if __name__ == "__main__": import numpy as np - from scipy.sparse import csr_matrix from numpy.random import default_rng from scipy.sparse import random from scipy import stats - import matplotlib.pyplot as plt rng = default_rng() rvs = stats.uniform().rvs diff --git a/autoBOTLib/misc/misc_keyword_detection.py b/autoBOTLib/misc/misc_keyword_detection.py index eeac8fa..58c6945 100644 --- a/autoBOTLib/misc/misc_keyword_detection.py +++ b/autoBOTLib/misc/misc_keyword_detection.py @@ -25,16 +25,16 @@ def __init__(self, hyperparameters, verbose=True): self.distance_method = hyperparameters["distance_method"] self.hyperparameters = hyperparameters - if not "max_occurrence" in self.hyperparameters: + if "max_occurrence" not in self.hyperparameters: self.hyperparameters['max_occurrence'] = 3 - if not "max_similar" in self.hyperparameters: + if "max_similar" not in self.hyperparameters: self.hyperparameters['max_similar'] = 3 - if not 'stopwords' in self.hyperparameters: + if 'stopwords' not in self.hyperparameters: self.hyperparameters['stopwords'] = None - if not 'connectives' in self.hyperparameters: + if 'connectives' not in self.hyperparameters: self.hyperparameters['connectives'] = True self.verbose = verbose @@ -89,17 +89,17 @@ def process_line(line): line = line.strip() line = [i for i in word_tokenize(line.lower()) if i not in stop] self.whole_document += line - if not stopwords is None: - line = [w for w in line if not w in stopwords] + if stopwords is not None: + line = [w for w in line if w not in stopwords] - if not stemmer is None: + if stemmer is not None: line = [stemmer.stem(w) for w in line] - if not lemmatizer is None: + if lemmatizer is not None: new_line = [] for x in line: lemma = lemmatizer.lemmatize(x) - if not (lemma in self.inverse_lemmatizer_mapping): + if lemma not in self.inverse_lemmatizer_mapping: self.inverse_lemmatizer_mapping[lemma] = set() self.inverse_lemmatizer_mapping[lemma].add(x) new_line.append(lemma) @@ -295,7 +295,7 @@ def find_keywords(self, document, input_type="file", validate=False): w2 = pair[1] if w1 in kw_map and w2 in kw_map: score = np.mean([kw_map[w1], kw_map[w2]]) - if not w1 + " " + w2 in all_terms: + if w1 + " " + w2 not in all_terms: higher_order_1.append((w1 + " " + w2, score)) all_terms.add(w1 + " " + w2) @@ -309,7 +309,7 @@ def find_keywords(self, document, input_type="file", validate=False): ] term = edge[0] + " " + pair[0] + " " + pair[1] score = np.mean(trip_score) - if not term in all_terms: + if term not in all_terms: higher_order_2.append((term, score)) all_terms.add(term) @@ -320,7 +320,7 @@ def find_keywords(self, document, input_type="file", validate=False): ] term = pair[0] + " " + pair[1] + " " + edge[1] score = np.mean(trip_score) - if not term in all_terms: + if term not in all_terms: higher_order_2.append((term, score)) all_terms.add(term) else: diff --git a/autoBOTLib/optimization/optimization_engine.py b/autoBOTLib/optimization/optimization_engine.py index 0d58d81..34b1725 100644 --- a/autoBOTLib/optimization/optimization_engine.py +++ b/autoBOTLib/optimization/optimization_engine.py @@ -6,7 +6,6 @@ import time import itertools import numpy as np -import tqdm from scipy import sparse from collections import defaultdict, Counter from autoBOTLib.optimization.optimization_metrics import * @@ -377,7 +376,7 @@ def update_global_feature_importances(self): for fx, coef in zip(subspace_features, coefficients): space_of_the_feature = self.global_feature_name_hash[fx] - if not fx in fdict: + if fx not in fdict: fdict[fx] = np.abs(coef) else: diff --git a/autoBOTLib/optimization/optimization_feature_constructors.py b/autoBOTLib/optimization/optimization_feature_constructors.py index 447ef35..9bcc93d 100644 --- a/autoBOTLib/optimization/optimization_feature_constructors.py +++ b/autoBOTLib/optimization/optimization_feature_constructors.py @@ -8,8 +8,7 @@ from sklearn.pipeline import FeatureUnion from sklearn.base import BaseEstimator, TransformerMixin from sklearn.model_selection import GridSearchCV -from sklearn.svm import LinearSVC -from sklearn.linear_model import LogisticRegression, SGDClassifier +from sklearn.linear_model import SGDClassifier from sklearn.feature_extraction.text import TfidfVectorizer from autoBOTLib.features.features_topic import * @@ -418,7 +417,7 @@ def get_features(df_data, include_image_transformer = False - if not custom_pipeline is None and combine_with_existing_representation == False: + if custom_pipeline is not None and combine_with_existing_representation == False: features = custom_pipeline @@ -589,7 +588,7 @@ def get_features(df_data, ## representation_type is pre-set - if not representation_type in feature_presets: + if representation_type not in feature_presets: logging.info( "Please, specify a valid preset! (see the documentation for the up-to-date namings)" ) @@ -606,7 +605,7 @@ def get_features(df_data, feature_transformer_vault[x] for x in representation_type ] - if not custom_pipeline is None and combine_with_existing_representation: + if custom_pipeline is not None and combine_with_existing_representation: features = features + custom_pipeline feature_names = [x[0] for x in features]