diff --git a/agents.py b/agents.py
index 2e292948b..135711249 100644
--- a/agents.py
+++ b/agents.py
@@ -354,8 +354,7 @@ def list_things_at(self, location, tclass=Thing):
             return [thing for thing in self.things
                     if thing.location == location and isinstance(thing, tclass)]
         return [thing for thing in self.things
-                if all(x==y for x,y in zip(thing.location, location))
-                    and isinstance(thing, tclass)]
+                if all(x == y for x, y in zip(thing.location, location)) and isinstance(thing, tclass)]
 
     def some_things_at(self, location, tclass=Thing):
         """Return true if at least one of the things at location
diff --git a/agents4e.py b/agents4e.py
index 7c66a6194..7308cbb59 100644
--- a/agents4e.py
+++ b/agents4e.py
@@ -354,8 +354,7 @@ def list_things_at(self, location, tclass=Thing):
             return [thing for thing in self.things
                     if thing.location == location and isinstance(thing, tclass)]
         return [thing for thing in self.things
-                if all(x==y for x,y in zip(thing.location, location))
-                    and isinstance(thing, tclass)]
+                if all(x == y for x, y in zip(thing.location, location)) and isinstance(thing, tclass)]
 
     def some_things_at(self, location, tclass=Thing):
         """Return true if at least one of the things at location
diff --git a/deep_learning4e.py b/deep_learning4e.py
index 4f8f52ad9..bea9c8d2c 100644
--- a/deep_learning4e.py
+++ b/deep_learning4e.py
@@ -1,9 +1,9 @@
 """Deep learning. (Chapters 20)"""
 
-import math
 import random
 import statistics
 
+import numpy as np
 from keras import Sequential, optimizers
 from keras.layers import Embedding, SimpleRNN, Dense
 from keras.preprocessing import sequence
@@ -249,7 +249,7 @@ def adam(dataset, net, loss, epochs=1000, rho=(0.9, 0.999), delta=1 / 10 ** 8,
             r_hat = scalar_vector_product(1 / (1 - rho[1] ** t), r)
 
             # rescale r_hat
-            r_hat = map_vector(lambda x: 1 / (math.sqrt(x) + delta), r_hat)
+            r_hat = map_vector(lambda x: 1 / (np.sqrt(x) + delta), r_hat)
 
             # delta weights
             delta_theta = scalar_vector_product(-l_rate, element_wise_product(s_hat, r_hat))
@@ -341,7 +341,7 @@ def forward(self, inputs):
         res = []
         # get normalized value of each input
         for i in range(len(self.nodes)):
-            val = [(inputs[i] - mu) * self.weights[0] / math.sqrt(self.epsilon + stderr ** 2) + self.weights[1]]
+            val = [(inputs[i] - mu) * self.weights[0] / np.sqrt(self.epsilon + stderr ** 2) + self.weights[1]]
             res.append(val)
             self.nodes[i].val = val
         return res
diff --git a/games.py b/games.py
index efc65cc67..97bceb198 100644
--- a/games.py
+++ b/games.py
@@ -1,11 +1,13 @@
-"""Games or Adversarial Search. (Chapter 5)"""
+"""Games or Adversarial Search (Chapter 5)"""
 
 import copy
 import itertools
 import random
 from collections import namedtuple
 
-from utils import vector_add, inf
+import numpy as np
+
+from utils import vector_add
 
 GameState = namedtuple('GameState', 'to_move, utility, board, moves')
 StochasticGameState = namedtuple('StochasticGameState', 'to_move, utility, board, moves, chance')
@@ -24,7 +26,7 @@ def minmax_decision(state, game):
     def max_value(state):
         if game.terminal_test(state):
             return game.utility(state, player)
-        v = -inf
+        v = -np.inf
         for a in game.actions(state):
             v = max(v, min_value(game.result(state, a)))
         return v
@@ -32,7 +34,7 @@ def max_value(state):
     def min_value(state):
         if game.terminal_test(state):
             return game.utility(state, player)
-        v = inf
+        v = np.inf
         for a in game.actions(state):
             v = min(v, max_value(game.result(state, a)))
         return v
@@ -53,13 +55,13 @@ def expect_minmax(state, game):
     player = game.to_move(state)
 
     def max_value(state):
-        v = -inf
+        v = -np.inf
         for a in game.actions(state):
             v = max(v, chance_node(state, a))
         return v
 
     def min_value(state):
-        v = inf
+        v = np.inf
         for a in game.actions(state):
             v = min(v, chance_node(state, a))
         return v
@@ -94,7 +96,7 @@ def alpha_beta_search(state, game):
     def max_value(state, alpha, beta):
         if game.terminal_test(state):
             return game.utility(state, player)
-        v = -inf
+        v = -np.inf
         for a in game.actions(state):
             v = max(v, min_value(game.result(state, a), alpha, beta))
             if v >= beta:
@@ -105,7 +107,7 @@ def max_value(state, alpha, beta):
     def min_value(state, alpha, beta):
         if game.terminal_test(state):
             return game.utility(state, player)
-        v = inf
+        v = np.inf
         for a in game.actions(state):
             v = min(v, max_value(game.result(state, a), alpha, beta))
             if v <= alpha:
@@ -114,8 +116,8 @@ def min_value(state, alpha, beta):
         return v
 
     # Body of alpha_beta_search:
-    best_score = -inf
-    beta = inf
+    best_score = -np.inf
+    beta = np.inf
     best_action = None
     for a in game.actions(state):
         v = min_value(game.result(state, a), best_score, beta)
@@ -135,7 +137,7 @@ def alpha_beta_cutoff_search(state, game, d=4, cutoff_test=None, eval_fn=None):
     def max_value(state, alpha, beta, depth):
         if cutoff_test(state, depth):
             return eval_fn(state)
-        v = -inf
+        v = -np.inf
         for a in game.actions(state):
             v = max(v, min_value(game.result(state, a), alpha, beta, depth + 1))
             if v >= beta:
@@ -146,7 +148,7 @@ def max_value(state, alpha, beta, depth):
     def min_value(state, alpha, beta, depth):
         if cutoff_test(state, depth):
             return eval_fn(state)
-        v = inf
+        v = np.inf
         for a in game.actions(state):
             v = min(v, max_value(game.result(state, a), alpha, beta, depth + 1))
             if v <= alpha:
@@ -158,8 +160,8 @@ def min_value(state, alpha, beta, depth):
     # The default test cuts off at depth d or at a terminal state
     cutoff_test = (cutoff_test or (lambda state, depth: depth > d or game.terminal_test(state)))
     eval_fn = eval_fn or (lambda state: game.utility(state, player))
-    best_score = -inf
-    beta = inf
+    best_score = -np.inf
+    beta = np.inf
     best_action = None
     for a in game.actions(state):
         v = min_value(game.result(state, a), best_score, beta, 1)
diff --git a/games4e.py b/games4e.py
index 3fb000862..aba5b0eb3 100644
--- a/games4e.py
+++ b/games4e.py
@@ -1,11 +1,13 @@
-"""Games or Adversarial Search. (Chapter 5)"""
+"""Games or Adversarial Search (Chapter 5)"""
 
 import copy
 import itertools
 import random
 from collections import namedtuple
 
-from utils4e import vector_add, MCT_Node, ucb, inf
+import numpy as np
+
+from utils4e import vector_add, MCT_Node, ucb
 
 GameState = namedtuple('GameState', 'to_move, utility, board, moves')
 StochasticGameState = namedtuple('StochasticGameState', 'to_move, utility, board, moves, chance')
@@ -24,7 +26,7 @@ def minmax_decision(state, game):
     def max_value(state):
         if game.terminal_test(state):
             return game.utility(state, player)
-        v = -inf
+        v = -np.inf
         for a in game.actions(state):
             v = max(v, min_value(game.result(state, a)))
         return v
@@ -32,7 +34,7 @@ def max_value(state):
     def min_value(state):
         if game.terminal_test(state):
             return game.utility(state, player)
-        v = inf
+        v = np.inf
         for a in game.actions(state):
             v = min(v, max_value(game.result(state, a)))
         return v
@@ -53,13 +55,13 @@ def expect_minmax(state, game):
     player = game.to_move(state)
 
     def max_value(state):
-        v = -inf
+        v = -np.inf
         for a in game.actions(state):
             v = max(v, chance_node(state, a))
         return v
 
     def min_value(state):
-        v = inf
+        v = np.inf
         for a in game.actions(state):
             v = min(v, chance_node(state, a))
         return v
@@ -94,7 +96,7 @@ def alpha_beta_search(state, game):
     def max_value(state, alpha, beta):
         if game.terminal_test(state):
             return game.utility(state, player)
-        v = -inf
+        v = -np.inf
         for a in game.actions(state):
             v = max(v, min_value(game.result(state, a), alpha, beta))
             if v >= beta:
@@ -105,7 +107,7 @@ def max_value(state, alpha, beta):
     def min_value(state, alpha, beta):
         if game.terminal_test(state):
             return game.utility(state, player)
-        v = inf
+        v = np.inf
         for a in game.actions(state):
             v = min(v, max_value(game.result(state, a), alpha, beta))
             if v <= alpha:
@@ -114,8 +116,8 @@ def min_value(state, alpha, beta):
         return v
 
     # Body of alpha_beta_search:
-    best_score = -inf
-    beta = inf
+    best_score = -np.inf
+    beta = np.inf
     best_action = None
     for a in game.actions(state):
         v = min_value(game.result(state, a), best_score, beta)
@@ -135,7 +137,7 @@ def alpha_beta_cutoff_search(state, game, d=4, cutoff_test=None, eval_fn=None):
     def max_value(state, alpha, beta, depth):
         if cutoff_test(state, depth):
             return eval_fn(state)
-        v = -inf
+        v = -np.inf
         for a in game.actions(state):
             v = max(v, min_value(game.result(state, a), alpha, beta, depth + 1))
             if v >= beta:
@@ -146,7 +148,7 @@ def max_value(state, alpha, beta, depth):
     def min_value(state, alpha, beta, depth):
         if cutoff_test(state, depth):
             return eval_fn(state)
-        v = inf
+        v = np.inf
         for a in game.actions(state):
             v = min(v, max_value(game.result(state, a), alpha, beta, depth + 1))
             if v <= alpha:
@@ -158,8 +160,8 @@ def min_value(state, alpha, beta, depth):
     # The default test cuts off at depth d or at a terminal state
     cutoff_test = (cutoff_test or (lambda state, depth: depth > d or game.terminal_test(state)))
     eval_fn = eval_fn or (lambda state: game.utility(state, player))
-    best_score = -inf
-    beta = inf
+    best_score = -np.inf
+    beta = np.inf
     best_action = None
     for a in game.actions(state):
         v = min_value(game.result(state, a), best_score, beta, 1)
diff --git a/gui/romania_problem.py b/gui/romania_problem.py
index 55efa1837..08219bb55 100644
--- a/gui/romania_problem.py
+++ b/gui/romania_problem.py
@@ -1,14 +1,10 @@
+from copy import deepcopy
 from tkinter import *
-import sys
-import os.path
-import math
-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+
 from search import *
-from search import breadth_first_tree_search as bfts, depth_first_tree_search as dfts, \
-    depth_first_graph_search as dfgs, breadth_first_graph_search as bfs, uniform_cost_search as ucs, \
-    astar_search as asts
 from utils import PriorityQueue
-from copy import deepcopy
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
 
 root = None
 city_coord = {}
@@ -289,7 +285,6 @@ def make_rectangle(map, x0, y0, margin, city_name):
 
 
 def make_legend(map):
-
     rect1 = map.create_rectangle(600, 100, 610, 110, fill="white")
     text1 = map.create_text(615, 105, anchor=W, text="Un-explored")
 
@@ -325,13 +320,11 @@ def tree_search(problem):
         display_current(node)
     if counter % 3 == 1 and counter >= 0:
         if problem.goal_test(node.state):
-
             return node
         frontier.extend(node.expand(problem))
 
         display_frontier(frontier)
     if counter % 3 == 2 and counter >= 0:
-
         display_explored(node)
     return None
 
@@ -562,7 +555,7 @@ def astar_search(problem, h=None):
 
 # TODO:
 # Remove redundant code.
-# Make the interchangbility work between various algorithms at each step.
+# Make the interchangeability work between various algorithms at each step.
 def on_click():
     """
     This function defines the action of the 'Next' button.
@@ -572,7 +565,7 @@ def on_click():
     if "Breadth-First Tree Search" == algo.get():
         node = breadth_first_tree_search(romania_problem)
         if node is not None:
-            final_path = bfts(romania_problem).solution()
+            final_path = breadth_first_tree_search(romania_problem).solution()
             final_path.append(start.get())
             display_final(final_path)
             next_button.config(state="disabled")
@@ -580,7 +573,7 @@ def on_click():
     elif "Depth-First Tree Search" == algo.get():
         node = depth_first_tree_search(romania_problem)
         if node is not None:
-            final_path = dfts(romania_problem).solution()
+            final_path = depth_first_tree_search(romania_problem).solution()
             final_path.append(start.get())
             display_final(final_path)
             next_button.config(state="disabled")
@@ -588,7 +581,7 @@ def on_click():
     elif "Breadth-First Graph Search" == algo.get():
         node = breadth_first_graph_search(romania_problem)
         if node is not None:
-            final_path = bfs(romania_problem).solution()
+            final_path = breadth_first_graph_search(romania_problem).solution()
             final_path.append(start.get())
             display_final(final_path)
             next_button.config(state="disabled")
@@ -596,7 +589,7 @@ def on_click():
     elif "Depth-First Graph Search" == algo.get():
         node = depth_first_graph_search(romania_problem)
         if node is not None:
-            final_path = dfgs(romania_problem).solution()
+            final_path = depth_first_graph_search(romania_problem).solution()
             final_path.append(start.get())
             display_final(final_path)
             next_button.config(state="disabled")
@@ -604,7 +597,7 @@ def on_click():
     elif "Uniform Cost Search" == algo.get():
         node = uniform_cost_search(romania_problem)
         if node is not None:
-            final_path = ucs(romania_problem).solution()
+            final_path = uniform_cost_search(romania_problem).solution()
             final_path.append(start.get())
             display_final(final_path)
             next_button.config(state="disabled")
@@ -612,7 +605,7 @@ def on_click():
     elif "A* - Search" == algo.get():
         node = astar_search(romania_problem)
         if node is not None:
-            final_path = asts(romania_problem).solution()
+            final_path = astar_search(romania_problem).solution()
             final_path.append(start.get())
             display_final(final_path)
             next_button.config(state="disabled")
@@ -626,6 +619,7 @@ def reset_map():
         city_map.itemconfig(city_coord[city], fill="white")
     next_button.config(state="normal")
 
+
 # TODO: Add more search algorithms in the OptionMenu
 
 
diff --git a/knowledge.py b/knowledge.py
index 945f27d3d..8c27c3eb8 100644
--- a/knowledge.py
+++ b/knowledge.py
@@ -1,23 +1,23 @@
 """Knowledge in learning (Chapter 19)"""
 
-from random import shuffle
-from math import log
-from utils import power_set
 from collections import defaultdict
-from itertools import combinations, product
-from logic import (FolKB, constant_symbols, predicate_symbols, standardize_variables,
-                   variables, is_definite_clause, subst, expr, Expr)
 from functools import partial
+from itertools import combinations, product
+from random import shuffle
 
+import numpy as np
 
-# ______________________________________________________________________________
+from logic import (FolKB, constant_symbols, predicate_symbols, standardize_variables,
+                   variables, is_definite_clause, subst, expr, Expr)
+from utils import power_set
 
 
 def current_best_learning(examples, h, examples_so_far=None):
     """
     [Figure 19.2]
     The hypothesis is a list of dictionaries, with each dictionary representing
-    a disjunction."""
+    a disjunction.
+    """
     if examples_so_far is None:
         examples_so_far = []
     if not examples:
@@ -128,7 +128,8 @@ def version_space_learning(examples):
     """
     [Figure 19.3]
     The version space is a list of hypotheses, which in turn are a list
-    of dictionaries/disjunctions."""
+    of dictionaries/disjunctions.
+    """
     V = all_hypotheses(examples)
     for e in examples:
         if V:
@@ -314,7 +315,6 @@ def new_literals(self, clause):
 
     def choose_literal(self, literals, examples):
         """Choose the best literal based on the information gain."""
-
         return max(literals, key=partial(self.gain, examples=examples))
 
     def gain(self, l, examples):
@@ -345,8 +345,8 @@ def gain(self, l, examples):
             represents = lambda d: all(d[x] == example[x] for x in example)
             if any(represents(l_) for l_ in post_pos):
                 T += 1
-        value = T * (log(len(post_pos) / (len(post_pos) + len(post_neg)) + 1e-12, 2) -
-                     log(pre_pos / (pre_pos + pre_neg), 2))
+        value = T * (np.log2(len(post_pos) / (len(post_pos) + len(post_neg)) + 1e-12) -
+                     np.log2(pre_pos / (pre_pos + pre_neg)))
         return value
 
     def update_examples(self, target, examples, extended_examples):
diff --git a/learning.py b/learning.py
index 401729cb9..bcaf0961e 100644
--- a/learning.py
+++ b/learning.py
@@ -1,20 +1,13 @@
-"""Learning from examples. (Chapters 18)"""
+"""Learning from examples (Chapters 18)"""
 
 import copy
-import heapq
-import math
-import random
 from collections import defaultdict
-from statistics import mean, stdev
+from statistics import stdev
 
-import numpy as np
 from qpsolvers import solve_qp
 
 from probabilistic_learning import NaiveBayesLearner
-from utils import (remove_all, unique, mode, argmax_random_tie, isclose, dot_product, vector_add, clip, sigmoid,
-                   scalar_vector_product, weighted_sample_with_replacement, num_or_str, normalize, print_table,
-                   open_data, sigmoid_derivative, probability, relu, relu_derivative, tanh, tanh_derivative, leaky_relu,
-                   leaky_relu_derivative, elu, elu_derivative, mean_boolean_error, random_weights, linear_kernel, inf)
+from utils import *
 
 
 class DataSet:
@@ -272,7 +265,7 @@ def cross_validation_wrapper(learner, dataset, k=10, trials=1):
     while True:
         errT, errV = cross_validation(learner, dataset, size, k, trials)
         # check for convergence provided err_val is not empty
-        if errT and not isclose(errT[-1], errT, rel_tol=1e-6):
+        if errT and not np.isclose(errT[-1], errT, rel_tol=1e-6):
             best_size = 0
             min_val = inf
             i = 0
@@ -462,7 +455,7 @@ def split_by(attr, examples):
 def information_content(values):
     """Number of bits to represent the probability distribution in values."""
     probabilities = normalize(remove_all(0, values))
-    return sum(-p * math.log2(p) for p in probabilities)
+    return sum(-p * np.log2(p) for p in probabilities)
 
 
 def DecisionListLearner(dataset):
@@ -980,7 +973,7 @@ def ada_boost(dataset, L, K):
             if example[target] == h_k(example):
                 w[j] *= error / (1 - error)
         w = normalize(w)
-        z.append(math.log((1 - error) / error))
+        z.append(np.log((1 - error) / error))
     return weighted_majority(h, z)
 
 
diff --git a/learning4e.py b/learning4e.py
index bd3bcf50a..01d9ea290 100644
--- a/learning4e.py
+++ b/learning4e.py
@@ -1,20 +1,14 @@
-"""Learning from examples. (Chapters 18)"""
+"""Learning from examples (Chapters 18)"""
 
 import copy
-import heapq
-import math
-import random
 from collections import defaultdict
-from statistics import mean, stdev
+from statistics import stdev
 
-import numpy as np
 from qpsolvers import solve_qp
 
 from probabilistic_learning import NaiveBayesLearner
 from utils import sigmoid, sigmoid_derivative
-from utils4e import (remove_all, unique, mode, argmax_random_tie, isclose, dot_product, num_or_str, normalize, clip,
-                     weighted_sample_with_replacement, print_table, open_data, probability, random_weights,
-                     mean_boolean_error, linear_kernel, inf)
+from utils4e import *
 
 
 class DataSet:
@@ -457,7 +451,7 @@ def split_by(attr, examples):
 def information_content(values):
     """Number of bits to represent the probability distribution in values."""
     probabilities = normalize(remove_all(0, values))
-    return sum(-p * math.log2(p) for p in probabilities)
+    return sum(-p * np.log2(p) for p in probabilities)
 
 
 def DecisionListLearner(dataset):
@@ -754,7 +748,7 @@ def ada_boost(dataset, L, K):
             if example[target] == h_k(example):
                 w[j] *= error / (1 - error)
         w = normalize(w)
-        z.append(math.log((1 - error) / error))
+        z.append(np.log((1 - error) / error))
     return weighted_majority(h, z)
 
 
diff --git a/making_simple_decision4e.py b/making_simple_decision4e.py
index a3b50e57c..4a35f94bd 100644
--- a/making_simple_decision4e.py
+++ b/making_simple_decision4e.py
@@ -1,4 +1,4 @@
-"""Making Simple Decisions. (Chapter 15)"""
+"""Making Simple Decisions (Chapter 15)"""
 
 import random
 
diff --git a/mdp.py b/mdp.py
index f558c8d40..1003e26b5 100644
--- a/mdp.py
+++ b/mdp.py
@@ -1,5 +1,5 @@
 """
-Markov Decision Processes. (Chapter 17)
+Markov Decision Processes (Chapter 17)
 
 First we define an MDP, and the special case of a GridMDP, in which
 states are laid out in a 2-dimensional grid. We also represent a policy
diff --git a/mdp4e.py b/mdp4e.py
index afa87ea0a..f8871bdc9 100644
--- a/mdp4e.py
+++ b/mdp4e.py
@@ -1,5 +1,5 @@
 """
-Markov Decision Processes. (Chapter 16)
+Markov Decision Processes (Chapter 16)
 
 First we define an MDP, and the special case of a GridMDP, in which
 states are laid out in a 2-dimensional grid. We also represent a policy
diff --git a/nlp.py b/nlp.py
index d883f3566..03aabf54b 100644
--- a/nlp.py
+++ b/nlp.py
@@ -1,4 +1,4 @@
-"""Natural Language Processing; Chart Parsing and PageRanking. (Chapter 22-23)"""
+"""Natural Language Processing; Chart Parsing and PageRanking (Chapter 22-23)"""
 
 from collections import defaultdict
 from utils import weighted_choice
diff --git a/notebook.py b/notebook.py
index b28e97230..507aec330 100644
--- a/notebook.py
+++ b/notebook.py
@@ -11,7 +11,7 @@
 from PIL import Image
 from matplotlib import lines
 
-from games import TicTacToe, alpha_beta_player, random_player, Fig52Extended, inf
+from games import TicTacToe, alpha_beta_player, random_player, Fig52Extended
 from learning import DataSet
 from logic import parse_definite_clause, standardize_variables, unify_mm, subst
 from search import GraphProblem, romania_map
@@ -642,7 +642,7 @@ def max_value(node, alpha, beta):
                 self.change_list.append(('h',))
                 self.change_list.append(('p',))
                 return game.utility(node, player)
-            v = -inf
+            v = -np.inf
             self.change_list.append(('a', node))
             self.change_list.append(('ab', node, v, beta))
             self.change_list.append(('h',))
@@ -671,7 +671,7 @@ def min_value(node, alpha, beta):
                 self.change_list.append(('h',))
                 self.change_list.append(('p',))
                 return game.utility(node, player)
-            v = inf
+            v = np.inf
             self.change_list.append(('a', node))
             self.change_list.append(('ab', node, alpha, v))
             self.change_list.append(('h',))
@@ -694,7 +694,7 @@ def min_value(node, alpha, beta):
             self.change_list.append(('h',))
             return v
 
-        return max_value(node, -inf, inf)
+        return max_value(node, -np.inf, np.inf)
 
     def stack_manager_gen(self):
         self.alpha_beta_search(0)
diff --git a/notebook4e.py b/notebook4e.py
index 8a5d92cd6..fa19b12d2 100644
--- a/notebook4e.py
+++ b/notebook4e.py
@@ -12,7 +12,7 @@
 from matplotlib import lines
 from matplotlib.colors import ListedColormap
 
-from games import TicTacToe, alpha_beta_player, random_player, Fig52Extended, inf
+from games import TicTacToe, alpha_beta_player, random_player, Fig52Extended
 from learning import DataSet
 from logic import parse_definite_clause, standardize_variables, unify_mm, subst
 from search import GraphProblem, romania_map
@@ -678,7 +678,7 @@ def max_value(node, alpha, beta):
                 self.change_list.append(('h',))
                 self.change_list.append(('p',))
                 return game.utility(node, player)
-            v = -inf
+            v = -np.inf
             self.change_list.append(('a', node))
             self.change_list.append(('ab', node, v, beta))
             self.change_list.append(('h',))
@@ -707,7 +707,7 @@ def min_value(node, alpha, beta):
                 self.change_list.append(('h',))
                 self.change_list.append(('p',))
                 return game.utility(node, player)
-            v = inf
+            v = np.inf
             self.change_list.append(('a', node))
             self.change_list.append(('ab', node, alpha, v))
             self.change_list.append(('h',))
@@ -730,7 +730,7 @@ def min_value(node, alpha, beta):
             self.change_list.append(('h',))
             return v
 
-        return max_value(node, -inf, inf)
+        return max_value(node, -np.inf, np.inf)
 
     def stack_manager_gen(self):
         self.alpha_beta_search(0)
diff --git a/perception4e.py b/perception4e.py
index a36461cf6..d5bc15718 100644
--- a/perception4e.py
+++ b/perception4e.py
@@ -1,4 +1,4 @@
-"""Perception. (Chapter 24)"""
+"""Perception (Chapter 24)"""
 
 import cv2
 import keras
@@ -9,7 +9,7 @@
 from keras.layers import Dense, Activation, Flatten, InputLayer, Conv2D, MaxPooling2D
 from keras.models import Sequential
 
-from utils4e import gaussian_kernel_2D, inf
+from utils4e import gaussian_kernel_2D
 
 
 # ____________________________________________________
@@ -86,8 +86,8 @@ def sum_squared_difference(pic1, pic2):
     pic1 = np.asarray(pic1)
     pic2 = np.asarray(pic2)
     assert pic1.shape == pic2.shape
-    min_ssd = inf
-    min_dxy = (inf, inf)
+    min_ssd = np.inf
+    min_dxy = (np.inf, np.inf)
 
     # consider picture shift from -30 to 30
     for Dx in range(-30, 31):
@@ -241,7 +241,7 @@ def min_cut(self, source, sink):
         max_flow = 0
 
         while self.bfs(source, sink, parent):
-            path_flow = inf
+            path_flow = np.inf
             # find the minimum flow of s-t path
             for s, t in parent:
                 path_flow = min(path_flow, self.flow[s][t])
diff --git a/planning.py b/planning.py
index 5d57c3f55..1e4a19209 100644
--- a/planning.py
+++ b/planning.py
@@ -1,17 +1,17 @@
-"""
-Planning (Chapters 10-11)
-"""
+"""Planning (Chapters 10-11)"""
 
 import copy
 import itertools
 from collections import deque, defaultdict
 from functools import reduce as _reduce
 
+import numpy as np
+
 import search
 from csp import sat_up, NaryCSP, Constraint, ac_search_solver, is_constraint
 from logic import FolKB, conjuncts, unify_mm, associate, SAT_plan, cdcl_satisfiable
 from search import Node
-from utils import Expr, expr, first, inf
+from utils import Expr, expr, first
 
 
 class PlanningProblem:
@@ -593,7 +593,7 @@ def h(self, state):
         try:
             return len(linearize(GraphPlan(relaxed_planning_problem).execute()))
         except:
-            return inf
+            return np.inf
 
 
 class BackwardPlan(search.Problem):
@@ -646,7 +646,7 @@ def h(self, subgoal):
         try:
             return len(linearize(GraphPlan(relaxed_planning_problem).execute()))
         except:
-            return inf
+            return np.inf
 
 
 def CSPlan(planning_problem, solution_length, CSP_solver=ac_search_solver, arc_heuristic=sat_up):
diff --git a/probability.py b/probability.py
index 9925079a2..e1e77d224 100644
--- a/probability.py
+++ b/probability.py
@@ -1,14 +1,10 @@
-"""Probability models. (Chapter 13-15)"""
+"""Probability models (Chapter 13-15)"""
 
-import random
 from collections import defaultdict
 from functools import reduce
 
-import numpy as np
-
 from agents import Agent
-from utils import (product, element_wise_product, matrix_multiplication, vector_add, scalar_vector_product,
-                   weighted_sample_with_replacement, isclose, probability, normalize, extend)
+from utils import *
 
 
 def DTAgentProgram(belief_state):
@@ -68,7 +64,7 @@ def normalize(self):
         Returns the normalized distribution.
         Raises a ZeroDivisionError if the sum of the values is 0."""
         total = sum(self.prob.values())
-        if not isclose(total, 1.0):
+        if not np.isclose(total, 1.0):
             for val in self.prob:
                 self.prob[val] /= total
         return self
diff --git a/probability4e.py b/probability4e.py
index cd1ff2022..d413a55ae 100644
--- a/probability4e.py
+++ b/probability4e.py
@@ -1,12 +1,13 @@
-"""Probability models."""
+"""Probability models (Chapter 12-13)"""
 
 import copy
 import random
 from collections import defaultdict
 from functools import reduce
-from math import sqrt, pi, exp
 
-from utils4e import product, isclose, probability, extend
+import numpy as np
+
+from utils4e import product, probability, extend
 
 
 # ______________________________________________________________________________
@@ -69,7 +70,7 @@ def normalize(self):
         Returns the normalized distribution.
         Raises a ZeroDivisionError if the sum of the values is 0."""
         total = sum(self.prob.values())
-        if not isclose(total, 1.0):
+        if not np.isclose(total, 1.0):
             for val in self.prob:
                 self.prob[val] /= total
         return self
@@ -385,7 +386,7 @@ def gaussian_probability(param, event, value):
     for k, v in event.items():
         # buffer varianle to calculate h1*a_h1 + h2*a_h2
         buff += param['a'][k] * v
-    res = 1 / (param['sigma'] * sqrt(2 * pi)) * exp(-0.5 * ((value - buff - param['b']) / param['sigma']) ** 2)
+    res = 1 / (param['sigma'] * np.sqrt(2 * np.pi)) * np.exp(-0.5 * ((value - buff - param['b']) / param['sigma']) ** 2)
     return res
 
 
@@ -403,7 +404,7 @@ def logistic_probability(param, event, value):
         # buffer variable to calculate (value-mu)/sigma
 
         buff *= (v - param['mu']) / param['sigma']
-    p = 1 - 1 / (1 + exp(-4 / sqrt(2 * pi) * buff))
+    p = 1 - 1 / (1 + np.exp(-4 / np.sqrt(2 * np.pi) * buff))
     return p if value else 1 - p
 
 
@@ -456,8 +457,7 @@ def continuous_p(self, value, c_event, d_event):
     ('Cost', 'Subsidy', 'Harvest',
      {True: {'sigma': 0.5, 'b': 1, 'a': {'Harvest': 0.5}},
       False: {'sigma': 0.6, 'b': 1, 'a': {'Harvest': 0.5}}}, 'c'),
-    ('Buys', '', 'Cost', {T: {'mu': 0.5, 'sigma': 0.5}, F: {'mu': 0.6, 'sigma': 0.6}}, 'd'),
-])
+    ('Buys', '', 'Cost', {T: {'mu': 0.5, 'sigma': 0.5}, F: {'mu': 0.6, 'sigma': 0.6}}, 'd')])
 
 
 # ______________________________________________________________________________
diff --git a/reinforcement_learning.py b/reinforcement_learning.py
index a640ac39a..4cb91af0f 100644
--- a/reinforcement_learning.py
+++ b/reinforcement_learning.py
@@ -1,4 +1,4 @@
-"""Reinforcement Learning. (Chapter 21)"""
+"""Reinforcement Learning (Chapter 21)"""
 
 import random
 from collections import defaultdict
diff --git a/reinforcement_learning4e.py b/reinforcement_learning4e.py
index fecfdaa32..eaaba3e5a 100644
--- a/reinforcement_learning4e.py
+++ b/reinforcement_learning4e.py
@@ -1,4 +1,4 @@
-"""Reinforcement Learning. (Chapter 21)"""
+"""Reinforcement Learning (Chapter 21)"""
 
 import random
 from collections import defaultdict
diff --git a/search.py b/search.py
index 999dc8f57..0104eb341 100644
--- a/search.py
+++ b/search.py
@@ -6,14 +6,10 @@
 functions.
 """
 
-import bisect
-import math
-import random
 import sys
 from collections import deque
 
-from utils import (is_in, argmax_random_tie, probability, weighted_sampler, memoize, print_table, open_data,
-                   PriorityQueue, name, distance, vector_add, inf)
+from utils import *
 
 
 class Problem:
@@ -331,7 +327,7 @@ def bidirectional_search(problem):
     gF, gB = {problem.initial: 0}, {problem.goal: 0}
     openF, openB = [problem.initial], [problem.goal]
     closedF, closedB = [], []
-    U = inf
+    U = np.inf
 
     def extend(U, open_dir, open_other, g_dir, g_other, closed_dir):
         """Extend search in given direction"""
@@ -357,7 +353,7 @@ def extend(U, open_dir, open_other, g_dir, g_other, closed_dir):
 
     def find_min(open_dir, g):
         """Finds minimum priority, g and f values in open_dir"""
-        m, m_f = inf, inf
+        m, m_f = np.inf, np.inf
         for n in open_dir:
             f = g[n] + problem.h(n)
             pr = max(f, 2 * g[n])
@@ -369,7 +365,7 @@ def find_min(open_dir, g):
     def find_key(pr_min, open_dir, g):
         """Finds key in open_dir with value equal to pr_min
         and minimum g value."""
-        m = inf
+        m = np.inf
         state = -1
         for n in open_dir:
             pr = max(g[n] + problem.h(n), 2 * g[n])
@@ -395,7 +391,7 @@ def find_key(pr_min, open_dir, g):
             # Extend backward
             U, openB, closedB, gB = extend(U, openB, openF, gB, gF, closedB)
 
-    return inf
+    return np.inf
 
 
 # ______________________________________________________________________________
@@ -605,7 +601,7 @@ def RBFS(problem, node, flimit):
             return node, 0  # (The second value is immaterial)
         successors = node.expand(problem)
         if len(successors) == 0:
-            return None, inf
+            return None, np.inf
         for s in successors:
             s.f = max(s.path_cost + h(s), node.f)
         while True:
@@ -617,14 +613,14 @@ def RBFS(problem, node, flimit):
             if len(successors) > 1:
                 alternative = successors[1].f
             else:
-                alternative = inf
+                alternative = np.inf
             result, best.f = RBFS(problem, best, min(flimit, alternative))
             if result is not None:
                 return result, best.f
 
     node = Node(problem.initial)
     node.f = h(node)
-    result, bestf = RBFS(problem, node, inf)
+    result, bestf = RBFS(problem, node, np.inf)
     return result
 
 
@@ -648,7 +644,7 @@ def hill_climbing(problem):
 
 def exp_schedule(k=20, lam=0.005, limit=100):
     """One possible schedule function for simulated annealing"""
-    return lambda t: (k * math.exp(-lam * t) if t < limit else 0)
+    return lambda t: (k * np.exp(-lam * t) if t < limit else 0)
 
 
 def simulated_annealing(problem, schedule=exp_schedule()):
@@ -664,7 +660,7 @@ def simulated_annealing(problem, schedule=exp_schedule()):
             return current.state
         next_choice = random.choice(neighbors)
         delta_e = problem.value(next_choice.state) - problem.value(current.state)
-        if delta_e > 0 or probability(math.exp(delta_e / T)):
+        if delta_e > 0 or probability(np.exp(delta_e / T)):
             current = next_choice
 
 
@@ -683,7 +679,7 @@ def simulated_annealing_full(problem, schedule=exp_schedule()):
             return current.state
         next_choice = random.choice(neighbors)
         delta_e = problem.value(next_choice.state) - problem.value(current.state)
-        if delta_e > 0 or probability(math.exp(delta_e / T)):
+        if delta_e > 0 or probability(np.exp(delta_e / T)):
             current = next_choice
 
 
@@ -1080,7 +1076,7 @@ def RandomGraph(nodes=list(range(10)), min_links=2, width=400, height=300,
 
                 def distance_to_node(n):
                     if n is node or g.get(node, n):
-                        return inf
+                        return np.inf
                     return distance(g.locations[n], here)
 
                 neighbor = min(nodes, key=distance_to_node)
@@ -1188,11 +1184,11 @@ def result(self, state, action):
         return action
 
     def path_cost(self, cost_so_far, A, action, B):
-        return cost_so_far + (self.graph.get(A, B) or inf)
+        return cost_so_far + (self.graph.get(A, B) or np.inf)
 
     def find_min_edge(self):
         """Find minimum value of edges."""
-        m = inf
+        m = np.inf
         for d in self.graph.graph_dict.values():
             local_min = min(d.values())
             m = min(m, local_min)
@@ -1208,7 +1204,7 @@ def h(self, node):
 
             return int(distance(locs[node.state], locs[self.goal]))
         else:
-            return inf
+            return np.inf
 
 
 class GraphProblemStochastic(GraphProblem):
@@ -1368,7 +1364,7 @@ def boggle_neighbors(n2, cache={}):
 
 def exact_sqrt(n2):
     """If n2 is a perfect square, return its square root, else raise error."""
-    n = int(math.sqrt(n2))
+    n = int(np.sqrt(n2))
     assert n * n == n2
     return n
 
diff --git a/tests/test_search.py b/tests/test_search.py
index 978894fa3..d37f8fa38 100644
--- a/tests/test_search.py
+++ b/tests/test_search.py
@@ -156,15 +156,13 @@ def test_recursive_best_first_search():
         romania_problem).solution() == ['Sibiu', 'Rimnicu', 'Pitesti', 'Bucharest']
     assert recursive_best_first_search(
         EightPuzzle((2, 4, 3, 1, 5, 6, 7, 8, 0))).solution() == [
-               'UP', 'LEFT', 'UP', 'LEFT', 'DOWN', 'RIGHT', 'RIGHT', 'DOWN'
-           ]
+               'UP', 'LEFT', 'UP', 'LEFT', 'DOWN', 'RIGHT', 'RIGHT', 'DOWN']
 
     def manhattan(node):
         state = node.state
         index_goal = {0: [2, 2], 1: [0, 0], 2: [0, 1], 3: [0, 2], 4: [1, 0], 5: [1, 1], 6: [1, 2], 7: [2, 0], 8: [2, 1]}
         index_state = {}
         index = [[0, 0], [0, 1], [0, 2], [1, 0], [1, 1], [1, 2], [2, 0], [2, 1], [2, 2]]
-        x, y = 0, 0
 
         for i in range(len(state)):
             index_state[state[i]] = index[i]
@@ -260,12 +258,10 @@ def test_LRTAStarAgent():
 
 def test_genetic_algorithm():
     # Graph coloring
-    edges = {
-        'A': [0, 1],
-        'B': [0, 3],
-        'C': [1, 2],
-        'D': [2, 3]
-    }
+    edges = {'A': [0, 1],
+             'B': [0, 3],
+             'C': [1, 2],
+             'D': [2, 3]}
 
     def fitness(c):
         return sum(c[n1] != c[n2] for (n1, n2) in edges.values())
diff --git a/tests/test_text.py b/tests/test_text.py
index 0d8e3b6ab..3aaa007f6 100644
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -1,9 +1,10 @@
 import random
 
+import numpy as np
 import pytest
 
 from text import *
-from utils import isclose, open_data
+from utils import open_data
 
 random.seed("aima-python")
 
@@ -31,9 +32,9 @@ def test_text_models():
                          (13, ('as', 'well', 'as'))]
 
     # Test isclose
-    assert isclose(P1['the'], 0.0611, rel_tol=0.001)
-    assert isclose(P2['of', 'the'], 0.0108, rel_tol=0.01)
-    assert isclose(P3['so', 'as', 'to'], 0.000323, rel_tol=0.001)
+    assert np.isclose(P1['the'], 0.0611, rtol=0.001)
+    assert np.isclose(P2['of', 'the'], 0.0108, rtol=0.01)
+    assert np.isclose(P3['so', 'as', 'to'], 0.000323, rtol=0.001)
 
     # Test cond_prob.get
     assert P2.cond_prob.get(('went',)) is None
diff --git a/tests/test_utils.py b/tests/test_utils.py
index e7a22b562..31b5848f0 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -116,10 +116,10 @@ def test_cross_entropy():
 
 def test_rms_error():
     assert rms_error([2, 2], [2, 2]) == 0
-    assert rms_error((0, 0), (0, 1)) == math.sqrt(0.5)
+    assert rms_error((0, 0), (0, 1)) == np.sqrt(0.5)
     assert rms_error((1, 0), (0, 1)) == 1
-    assert rms_error((0, 0), (0, -1)) == math.sqrt(0.5)
-    assert rms_error((0, 0.5), (0, -0.5)) == math.sqrt(0.5)
+    assert rms_error((0, 0), (0, -1)) == np.sqrt(0.5)
+    assert rms_error((0, 0.5), (0, -0.5)) == np.sqrt(0.5)
 
 
 def test_manhattan_distance():
diff --git a/text.py b/text.py
index 58918bb4d..11a5731f1 100644
--- a/text.py
+++ b/text.py
@@ -1,5 +1,5 @@
 """
-Statistical Language Processing tools. (Chapter 22)
+Statistical Language Processing tools (Chapter 22)
 
 We define Unigram and Ngram text models, use them to generate random text,
 and show the Viterbi algorithm for segmentation of letters into words.
@@ -7,15 +7,16 @@
 working on a tiny sample of Unix manual pages.
 """
 
-from utils import hashabledict
-from probabilistic_learning import CountingProbDist
-import search
-
-from math import log, exp
-from collections import defaultdict
 import heapq
-import re
 import os
+import re
+from collections import defaultdict
+
+import numpy as np
+
+import search
+from probabilistic_learning import CountingProbDist
+from utils import hashabledict
 
 
 class UnigramWordModel(CountingProbDist):
@@ -184,7 +185,7 @@ def query(self, query_text, n=10):
     def score(self, word, docid):
         """Compute a score for this word on the document with this docid."""
         # There are many options; here we take a very simple approach
-        return log(1 + self.index[word][docid]) / log(1 + self.documents[docid].nwords)
+        return np.log(1 + self.index[word][docid]) / np.log(1 + self.documents[docid].nwords)
 
     def total_score(self, words, docid):
         """Compute the sum of the scores of these words on the document with this docid."""
@@ -385,10 +386,10 @@ def score(self, code):
 
         # add small positive value to prevent computing log(0)
         # TODO: Modify the values to make score more accurate
-        logP = (sum(log(self.Pwords[word] + 1e-20) for word in words(text)) +
-                sum(log(self.P1[c] + 1e-5) for c in text) +
-                sum(log(self.P2[b] + 1e-10) for b in bigrams(text)))
-        return -exp(logP)
+        logP = (sum(np.log(self.Pwords[word] + 1e-20) for word in words(text)) +
+                sum(np.log(self.P1[c] + 1e-5) for c in text) +
+                sum(np.log(self.P2[b] + 1e-10) for b in bigrams(text)))
+        return -np.exp(logP)
 
 
 class PermutationDecoderProblem(search.Problem):
diff --git a/utils.py b/utils.py
index 04fbd303c..1d7f1e4f5 100644
--- a/utils.py
+++ b/utils.py
@@ -1,11 +1,10 @@
-"""Provides some utilities widely used by other modules."""
+"""Provides some utilities widely used by other modules"""
 
 import bisect
 import collections
 import collections.abc
 import functools
 import heapq
-import math
 import operator
 import os.path
 import random
@@ -14,11 +13,6 @@
 
 import numpy as np
 
-try:  # math.inf was added in Python 3.5
-    from math import inf
-except ImportError:  # Python 3.4
-    inf = float('inf')
-
 
 # ______________________________________________________________________________
 # Functions on Sequences and Iterables
@@ -236,15 +230,15 @@ def num_or_str(x):  # TODO: rename as `atom`
 
 
 def euclidean_distance(x, y):
-    return math.sqrt(sum((_x - _y) ** 2 for _x, _y in zip(x, y)))
+    return np.sqrt(sum((_x - _y) ** 2 for _x, _y in zip(x, y)))
 
 
 def cross_entropy_loss(x, y):
-    return (-1.0 / len(x)) * sum(x * math.log(y) + (1 - x) * math.log(1 - y) for x, y in zip(x, y))
+    return (-1.0 / len(x)) * sum(x * np.log(y) + (1 - x) * np.log(1 - y) for x, y in zip(x, y))
 
 
 def rms_error(x, y):
-    return math.sqrt(ms_error(x, y))
+    return np.sqrt(ms_error(x, y))
 
 
 def ms_error(x, y):
@@ -299,15 +293,15 @@ def sigmoid_derivative(value):
 
 def sigmoid(x):
     """Return activation value of x with sigmoid function."""
-    return 1 / (1 + math.exp(-x))
+    return 1 / (1 + np.exp(-x))
 
 
 def elu(x, alpha=0.01):
-    return x if x > 0 else alpha * (math.exp(x) - 1)
+    return x if x > 0 else alpha * (np.exp(x) - 1)
 
 
 def elu_derivative(value, alpha=0.01):
-    return 1 if value > 0 else alpha * math.exp(value)
+    return 1 if value > 0 else alpha * np.exp(value)
 
 
 def tanh(x):
@@ -341,7 +335,7 @@ def step(x):
 
 def gaussian(mean, st_dev, x):
     """Given the mean and standard deviation of a distribution, it returns the probability of x."""
-    return 1 / (math.sqrt(2 * math.pi) * st_dev) * math.e ** (-0.5 * (float(x - mean) / st_dev) ** 2)
+    return 1 / (np.sqrt(2 * np.pi) * st_dev) * np.e ** (-0.5 * (float(x - mean) / st_dev) ** 2)
 
 
 def linear_kernel(x, y=None):
@@ -366,13 +360,6 @@ def rbf_kernel(x, y=None, gamma=None):
                             np.sum(x * x, axis=1).reshape((-1, 1)) + np.sum(y * y, axis=1).reshape((1, -1))))
 
 
-try:  # math.isclose was added in Python 3.5
-    from math import isclose
-except ImportError:  # Python 3.4
-    def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
-        """Return true if numbers a and b are close to each other."""
-        return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
-
 # ______________________________________________________________________________
 # Grid Functions
 
@@ -397,7 +384,7 @@ def distance(a, b):
     """The distance between two (x, y) points."""
     xA, yA = a
     xB, yB = b
-    return math.hypot((xA - xB), (yA - yB))
+    return np.hypot((xA - xB), (yA - yB))
 
 
 def distance_squared(a, b):
diff --git a/utils4e.py b/utils4e.py
index 3aec273f8..6ed4a7f79 100644
--- a/utils4e.py
+++ b/utils4e.py
@@ -1,11 +1,10 @@
-"""Provides some utilities widely used by other modules."""
+"""Provides some utilities widely used by other modules"""
 
 import bisect
 import collections
 import collections.abc
 import functools
 import heapq
-import math
 import os.path
 import random
 from itertools import chain, combinations
@@ -13,11 +12,6 @@
 
 import numpy as np
 
-try:  # math.inf was added in Python 3.5
-    from math import inf
-except ImportError:  # Python 3.4
-    inf = float('inf')
-
 
 # part1. General data structures and their functions
 # ______________________________________________________________________________
@@ -318,11 +312,11 @@ def num_or_str(x):  # TODO: rename as `atom`
 
 
 def euclidean_distance(x, y):
-    return math.sqrt(sum((_x - _y) ** 2 for _x, _y in zip(x, y)))
+    return np.sqrt(sum((_x - _y) ** 2 for _x, _y in zip(x, y)))
 
 
 def rms_error(x, y):
-    return math.sqrt(ms_error(x, y))
+    return np.sqrt(ms_error(x, y))
 
 
 def ms_error(x, y):
@@ -350,7 +344,7 @@ def hamming_distance(x, y):
 
 def cross_entropy_loss(x, y):
     """Example of cross entropy loss. x and y are 1D iterable objects."""
-    return (-1.0 / len(x)) * sum(x * math.log(y) + (1 - x) * math.log(1 - y) for x, y in zip(x, y))
+    return (-1.0 / len(x)) * sum(x * np.log(y) + (1 - x) * np.log(1 - y) for x, y in zip(x, y))
 
 
 def mse_loss(x, y):
@@ -419,7 +413,7 @@ def clip(x, lowest, highest):
 
 def softmax1D(x):
     """Return the softmax vector of input vector x."""
-    exps = [math.exp(_x) for _x in x]
+    exps = [np.exp(_x) for _x in x]
     sum_exps = sum(exps)
     return [exp / sum_exps for exp in exps]
 
@@ -431,7 +425,7 @@ def f(self, x):
             return 1
         if x <= -100:
             return 0
-        return 1 / (1 + math.exp(-x))
+        return 1 / (1 + np.exp(-x))
 
     def derivative(self, value):
         return value * (1 - value)
@@ -449,10 +443,10 @@ def derivative(self, value):
 class elu(Activation):
 
     def f(self, x, alpha=0.01):
-        return x if x > 0 else alpha * (math.exp(x) - 1)
+        return x if x > 0 else alpha * (np.exp(x) - 1)
 
     def derivative(self, value, alpha=0.01):
-        return 1 if value > 0 else alpha * math.exp(value)
+        return 1 if value > 0 else alpha * np.exp(value)
 
 
 class tanh(Activation):
@@ -480,7 +474,7 @@ def step(x):
 
 def gaussian(mean, st_dev, x):
     """Given the mean and standard deviation of a distribution, it returns the probability of x."""
-    return 1 / (math.sqrt(2 * math.pi) * st_dev) * math.exp(-0.5 * (float(x - mean) / st_dev) ** 2)
+    return 1 / (np.sqrt(2 * np.pi) * st_dev) * np.exp(-0.5 * (float(x - mean) / st_dev) ** 2)
 
 
 def gaussian_2D(means, sigma, point):
@@ -489,7 +483,7 @@ def gaussian_2D(means, sigma, point):
     assert det != 0
     x_u = vector_add(point, scalar_vector_product(-1, means))
     buff = matrix_multiplication(matrix_multiplication([x_u], inverse), np.array(x_u).T)
-    return 1 / (math.sqrt(det) * 2 * math.pi) * math.exp(-0.5 * buff[0][0])
+    return 1 / (np.sqrt(det) * 2 * np.pi) * np.exp(-0.5 * buff[0][0])
 
 
 def linear_kernel(x, y=None):
@@ -514,13 +508,6 @@ def rbf_kernel(x, y=None, gamma=None):
                             np.sum(x * x, axis=1).reshape((-1, 1)) + np.sum(y * y, axis=1).reshape((1, -1))))
 
 
-try:  # math.isclose was added in Python 3.5
-    from math import isclose
-except ImportError:  # Python 3.4
-    def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
-        """Return true if numbers a and b are close to each other."""
-        return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
-
 # part4. Self defined data structures
 # ______________________________________________________________________________
 # Grid Functions
@@ -546,7 +533,7 @@ def distance(a, b):
     """The distance between two (x, y) points."""
     xA, yA = a
     xB, yB = b
-    return math.hypot((xA - xB), (yA - yB))
+    return np.hypot((xA - xB), (yA - yB))
 
 
 def distance_squared(a, b):
@@ -907,7 +894,7 @@ def __init__(self, parent=None, state=None, U=0, N=0):
 
 
 def ucb(n, C=1.4):
-    return inf if n.N == 0 else n.U / n.N + C * math.sqrt(math.log(n.parent.N) / n.N)
+    return np.inf if n.N == 0 else n.U / n.N + C * np.sqrt(np.log(n.parent.N) / n.N)
 
 
 # ______________________________________________________________________________