Skip to content

Commit

Permalink
some fixex
Browse files Browse the repository at this point in the history
  • Loading branch information
SkBlaz committed Jan 17, 2025
1 parent 77821fb commit 985970a
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 27 deletions.
4 changes: 1 addition & 3 deletions autoBOTLib/features/features_document_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import numpy as np
from scipy.sparse import csgraph
from sklearn.decomposition import TruncatedSVD
import matplotlib.pyplot as plt
import tqdm
from collections import OrderedDict
import networkx as nx
Expand Down Expand Up @@ -74,7 +73,7 @@ def fit(self, text_list):
# Subsample the document space to reduce graph size.
if len(text_list) > self.doc_limit:
if self.targets is None:
if not self.doc_limit is None:
if self.doc_limit is not None:
text_list = text_list[:self.doc_limit]

else:
Expand Down Expand Up @@ -214,7 +213,6 @@ def get_graph(self, wspace, ltl):

if __name__ == "__main__":

import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
Expand Down
3 changes: 1 addition & 2 deletions autoBOTLib/features/features_token_relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import pandas as pd
from collections import defaultdict
import numpy as np
from scipy import sparse
import tqdm
import multiprocessing as mp
import scipy.sparse as sps
Expand Down Expand Up @@ -116,7 +115,7 @@ def witem_kernel(self, instance):
if distance > 2:
context_size = int(np.log2(distance))
encoded_witem = w1 + "--" + str(context_size) + "--" + w2
if not encoded_witem in global_distances:
if encoded_witem not in global_distances:
global_distances[encoded_witem] = 0
global_distances[encoded_witem] += 1

Expand Down
2 changes: 1 addition & 1 deletion autoBOTLib/features/features_topic.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def transform(self, new_documents):
for k, v in self.topic_features.items():
denominator = len(v)
overlap = len(parts.intersection(v)) / denominator
if not overlap is None:
if overlap is not None:
new_features[enx, k] = overlap

return new_features
Expand Down
2 changes: 0 additions & 2 deletions autoBOTLib/learning/torch_sparse_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,11 +384,9 @@ def torch_learners(final_run,
if __name__ == "__main__":

import numpy as np
from scipy.sparse import csr_matrix
from numpy.random import default_rng
from scipy.sparse import random
from scipy import stats
import matplotlib.pyplot as plt

rng = default_rng()
rvs = stats.uniform().rvs
Expand Down
24 changes: 12 additions & 12 deletions autoBOTLib/misc/misc_keyword_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,16 @@ def __init__(self, hyperparameters, verbose=True):
self.distance_method = hyperparameters["distance_method"]
self.hyperparameters = hyperparameters

if not "max_occurrence" in self.hyperparameters:
if "max_occurrence" not in self.hyperparameters:
self.hyperparameters['max_occurrence'] = 3

if not "max_similar" in self.hyperparameters:
if "max_similar" not in self.hyperparameters:
self.hyperparameters['max_similar'] = 3

if not 'stopwords' in self.hyperparameters:
if 'stopwords' not in self.hyperparameters:
self.hyperparameters['stopwords'] = None

if not 'connectives' in self.hyperparameters:
if 'connectives' not in self.hyperparameters:
self.hyperparameters['connectives'] = True

self.verbose = verbose
Expand Down Expand Up @@ -89,17 +89,17 @@ def process_line(line):
line = line.strip()
line = [i for i in word_tokenize(line.lower()) if i not in stop]
self.whole_document += line
if not stopwords is None:
line = [w for w in line if not w in stopwords]
if stopwords is not None:
line = [w for w in line if w not in stopwords]

if not stemmer is None:
if stemmer is not None:
line = [stemmer.stem(w) for w in line]

if not lemmatizer is None:
if lemmatizer is not None:
new_line = []
for x in line:
lemma = lemmatizer.lemmatize(x)
if not (lemma in self.inverse_lemmatizer_mapping):
if lemma not in self.inverse_lemmatizer_mapping:
self.inverse_lemmatizer_mapping[lemma] = set()
self.inverse_lemmatizer_mapping[lemma].add(x)
new_line.append(lemma)
Expand Down Expand Up @@ -295,7 +295,7 @@ def find_keywords(self, document, input_type="file", validate=False):
w2 = pair[1]
if w1 in kw_map and w2 in kw_map:
score = np.mean([kw_map[w1], kw_map[w2]])
if not w1 + " " + w2 in all_terms:
if w1 + " " + w2 not in all_terms:
higher_order_1.append((w1 + " " + w2, score))
all_terms.add(w1 + " " + w2)

Expand All @@ -309,7 +309,7 @@ def find_keywords(self, document, input_type="file", validate=False):
]
term = edge[0] + " " + pair[0] + " " + pair[1]
score = np.mean(trip_score)
if not term in all_terms:
if term not in all_terms:
higher_order_2.append((term, score))
all_terms.add(term)

Expand All @@ -320,7 +320,7 @@ def find_keywords(self, document, input_type="file", validate=False):
]
term = pair[0] + " " + pair[1] + " " + edge[1]
score = np.mean(trip_score)
if not term in all_terms:
if term not in all_terms:
higher_order_2.append((term, score))
all_terms.add(term)
else:
Expand Down
3 changes: 1 addition & 2 deletions autoBOTLib/optimization/optimization_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import time
import itertools
import numpy as np
import tqdm
from scipy import sparse
from collections import defaultdict, Counter
from autoBOTLib.optimization.optimization_metrics import *
Expand Down Expand Up @@ -377,7 +376,7 @@ def update_global_feature_importances(self):
for fx, coef in zip(subspace_features, coefficients):
space_of_the_feature = self.global_feature_name_hash[fx]

if not fx in fdict:
if fx not in fdict:
fdict[fx] = np.abs(coef)

else:
Expand Down
9 changes: 4 additions & 5 deletions autoBOTLib/optimization/optimization_feature_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
from sklearn.pipeline import FeatureUnion
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.feature_extraction.text import TfidfVectorizer

from autoBOTLib.features.features_topic import *
Expand Down Expand Up @@ -418,7 +417,7 @@ def get_features(df_data,
include_image_transformer = False


if not custom_pipeline is None and combine_with_existing_representation == False:
if custom_pipeline is not None and combine_with_existing_representation == False:

features = custom_pipeline

Expand Down Expand Up @@ -589,7 +588,7 @@ def get_features(df_data,

## representation_type is pre-set

if not representation_type in feature_presets:
if representation_type not in feature_presets:
logging.info(
"Please, specify a valid preset! (see the documentation for the up-to-date namings)"
)
Expand All @@ -606,7 +605,7 @@ def get_features(df_data,
feature_transformer_vault[x] for x in representation_type
]

if not custom_pipeline is None and combine_with_existing_representation:
if custom_pipeline is not None and combine_with_existing_representation:
features = features + custom_pipeline

feature_names = [x[0] for x in features]
Expand Down

0 comments on commit 985970a

Please sign in to comment.