Skip to content

Commit

Permalink
Merge pull request #398 from EugeneMMF/main
Browse files Browse the repository at this point in the history
sudoku solver script.
  • Loading branch information
larymak authored Sep 9, 2024
2 parents f1b1de7 + 2609693 commit 7d8a459
Show file tree
Hide file tree
Showing 22 changed files with 1,321 additions and 0 deletions.
28 changes: 28 additions & 0 deletions MachineLearning Projects/sudoku_solver/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Sudoku Solver

* This app was built to allow users to solve their sudokus using a computer.
* There is a Flask based webserver `web_interface.py` which when run gives a web interface to upload an image of a sudoku to be solved. The response is a solved sudoku.
* There is a file `full_stack_http.py` which needs to be run alongside the webserver for the full app to run. This is in charge of opening multiple process channels to process the images that are sent to the webserver.
* The app relies of Pytesseract to identify the characters in the sudoku image.

# Operation

* The image is first stripped of color.
* It is then cropped to select the section of the sudoku. NOTE: This section is not dependent on the sudoku but has been hardcoded.
* The resulting image is passed to `Pytesseract` to extract the characters and their position.
* Using the characters and their position the grid size is determined.
* The appropriate grid is created and filled with the discovered characters.
* The grid is then solved with an algorithm contained in `sudoku.py`.
* A snapshot of the solved grid is then created and sent back to the user.
* The resultant snapshot is rendered on the browser page.

# To Run

* First install `Pytesseract`
* Install `Flask`
* Then run the `full_stack_http.py` file.
* Then run the `web_interface.py` file.
* Go to the browser and load the URL provided in the previous step.
* Click the upload button.
* Select your image and submit the form.
* Wait for the result to be loaded.
Binary file not shown.
Binary file not shown.
Binary file not shown.
4 changes: 4 additions & 0 deletions MachineLearning Projects/sudoku_solver/config.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
UPLOAD_FOLDER="uploads"
SECRET_KEY="secret"
SOLVER_IP="localhost"
SOLVER_PORT=3535
Binary file added MachineLearning Projects/sudoku_solver/f1.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added MachineLearning Projects/sudoku_solver/f2.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
136 changes: 136 additions & 0 deletions MachineLearning Projects/sudoku_solver/full_stack_http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import multiprocessing.util
import socket
from perspective import resolve_image
from sudoku import Grid
import argparse
import multiprocessing
import os

temp_result_file = "resultfile.png"
temp_input_file = "tempfile.jpg"

def process_handle_transaction(proc_num:int, sock:socket.socket):
print(f"[{proc_num}] Waiting for client...")
sock2, address2 = sock.accept()
print(f"[{proc_num}] Connected to client with address: {address2}")
sock2.settimeout(1)
rec_buf = b''
split = temp_input_file.split('.')
my_temp_input_file = ".".join(i for i in split[:-1]) + str(proc_num) + "." + split[-1]
split = temp_result_file.split('.')
my_temp_result_file = ".".join(i for i in split[:-1]) + str(proc_num) + "." + split[-1]
try:
while True:
try:
rec = sock2.recv(1)
rec_buf += rec
if len(rec) == 0:
print(f"[{proc_num}] Lost connection")
break
except socket.timeout:
with open(my_temp_input_file, "wb") as f:
f.write(rec_buf)
rec_buf = b''
grid_size, points = resolve_image(my_temp_input_file)
grid = Grid(rows=grid_size[0], columns=grid_size[1])
assignment_values = {}
for val,loc in points:
assignment_values[loc] = val
grid.preassign(assignment_values)
grid.solve()
grid.save_grid_image(path=my_temp_result_file, size=(400,400))
with open(my_temp_result_file, "rb") as f:
sock2.send(f.read())
os.remove(my_temp_input_file)
os.remove(my_temp_result_file)
sock2.close()
print(f"[{proc_num}] Finished!")
break
finally:
sock2.close()

class Manager():
def __init__(self, address:tuple[str,int]):
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.address = address

def wait_for_connect(self):
print("Waiting for client...")
self.sock2, self.address2 = self.sock.accept()
print(f"Connected to client with address: {self.address2}")
self.sock2.settimeout(1)

def run(self):
self.sock.bind(self.address)
self.sock.listen()
print(f"Listening from address: {self.address}")
try:
while True:
self.wait_for_connect()
rec_buf = b''
while True:
try:
rec = self.sock2.recv(1)
rec_buf += rec
if len(rec) == 0:
print("Lost connection")
break
except socket.timeout:
with open(temp_input_file, "wb") as f:
f.write(rec_buf)
rec_buf = b''
grid_size, points = resolve_image(temp_input_file)
grid = Grid(rows=grid_size[0], columns=grid_size[1])
assignment_values = {}
for val,loc in points:
assignment_values[loc] = val
grid.preassign(assignment_values)
grid.solve()
grid.save_grid_image(path=temp_result_file, size=(400,400))
with open(temp_result_file, "rb") as f:
self.sock2.send(f.read())
os.remove(temp_input_file)
os.remove(temp_result_file)
self.sock2.close()
break
finally:
try:
self.sock2.close()
except socket.error:
pass
except AttributeError:
pass
self.sock.close()

def run_multiprocessing(self, max_clients:int=8):
self.sock.bind(self.address)
self.sock.listen()
print(f"Listening from address: {self.address}")
processes:dict[int,multiprocessing.Process]= {}
proc_num = 0
try:
while True:
if len(processes) <= max_clients:
proc = multiprocessing.Process(target=process_handle_transaction, args=(proc_num, self.sock))
proc.start()
processes[proc_num] = proc
proc_num += 1
proc_num%=(max_clients*2)
keys = list(processes.keys())
for proc_n in keys:
if not processes[proc_n].is_alive():
processes.pop(proc_n)
finally:
if len(processes):
for proc in processes.values():
proc.kill()
self.sock.close()

if "__main__" == __name__:
parser = argparse.ArgumentParser()
parser.add_argument("--port", type=int, default=3535, help="The port to host the server.")
parser.add_argument("--host", type=str, default="localhost", help="The host or ip-address to host the server.")
args = parser.parse_args()
address = (args.host, args.port)
manager = Manager(address)
manager.run_multiprocessing(max_clients=multiprocessing.cpu_count())
141 changes: 141 additions & 0 deletions MachineLearning Projects/sudoku_solver/image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import torch
from torch.utils.data import Dataset, DataLoader
import PIL.Image as Image
import pandas as pd
from tqdm import tqdm
import numpy as np


class SudokuDataset(Dataset):
def __init__(self, grid_locations_file:str, input_shape:tuple[int, int]) -> None:
super().__init__()
self.grid_locations = []
self.image_filenames = []
self.input_shape = input_shape
self.all_data = pd.read_csv(grid_locations_file, header=0)
self.image_filenames = list(self.all_data['filepath'].to_numpy())
self.grid_locations = [list(a[1:]) for a in self.all_data.values]
to_pop = []
for i,file in enumerate(self.image_filenames):
try:
Image.open(file)
except FileNotFoundError:
to_pop.append(i)
print(f"{file} not found.")
for i in reversed(to_pop):
self.image_filenames.pop(i)
self.grid_locations.pop(i)
# print(self.all_data.columns)
# print(self.grid_locations)

def __len__(self) -> int:
return len(self.image_filenames)

def __getitem__(self, index) -> dict[str, torch.Tensor]:
image = Image.open(self.image_filenames[index]).convert("L")
size = image.size
image = image.resize(self.input_shape)
image = np.array(image)
image = image.reshape((1,*image.shape))
location = self.grid_locations[index]
for i in range(len(location)):
if i%2:
location[i] /= size[1]
else:
location[i] /= size[0]
return {
"image": torch.tensor(image, dtype=torch.float32)/255.,
"grid": torch.tensor(location, dtype=torch.float32)
}

class Model(torch.nn.Module):
def __init__(self, input_shape:tuple[int,int], number_of_layers:int, dims:int, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.input_shape = input_shape
self.conv_layers:list = []
self.conv_layers.append(torch.nn.Conv2d(1, dims, (3,3), padding='same'))
for _ in range(number_of_layers-1):
self.conv_layers.append(torch.nn.Conv2d(dims, dims, (3,3), padding='same'))
self.conv_layers.append(torch.nn.LeakyReLU(negative_slope=0.01))
self.conv_layers.append(torch.nn.MaxPool2d((2,2)))
self.conv_layers.append(torch.nn.BatchNorm2d(dims))
self.flatten = torch.nn.Flatten()
self.location = [
torch.nn.Linear(4107, 8),
torch.nn.Sigmoid()
]
self.conv_layers = torch.nn.ModuleList(self.conv_layers)
self.location = torch.nn.ModuleList(self.location)

def forward(self, x:torch.Tensor) -> torch.Tensor:
for layer in self.conv_layers:
x = layer(x)
x = self.flatten(x)
location = x
for layer in self.location:
location = layer(location)
return location

def create_model(input_shape:tuple[int,int], number_of_layers:int, dims:int):
model = Model(input_shape, number_of_layers, dims)
for p in model.parameters():
if p.dim() > 1:
torch.nn.init.xavier_uniform_(p)
return model

def get_dataset(filename:str, input_shape:tuple[int,int], batch_size:int) -> DataLoader:
train_dataset = SudokuDataset(filename, input_shape)
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True)
return train_dataloader

def train(epochs:int, config:dict, model:None|Model = None) -> Model:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if not model:
print("========== Using new model =========")
model = create_model(config['input_shape'], config['number_of_layers'], config['dims']).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
loss = torch.nn.MSELoss().to(device)
dataset = get_dataset(config['filename'], config['input_shape'], config['batch_size'])
prev_error = 0
try:
for epoch in range(1, epochs+1):
batch_iterator = tqdm(dataset, f"Epoch {epoch}/{epochs}:")
for batch in batch_iterator:
x = batch['image'].to(device)
y_true = batch['grid'].to(device)
# print(batch['grid'])
# return
y_pred = model(x)
error = loss(y_true, y_pred)
batch_iterator.set_postfix({"loss":f"Loss: {error.item():6.6f}"})
error.backward()
optimizer.step()
# optimizer.zero_grad()
if abs(error-0.5) < 0.05:# or (prev_error-error)<0.000001:
del(model)
model = create_model(config['input_shape'], config['number_of_layers'], config['dims']).to(device)
print("New model created")
prev_error = error
except KeyboardInterrupt:
torch.save(model, "model.pt")
return model

def test(config:dict, model_filename:str):
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.load("model.pt").to(device)
loss = torch.nn.MSELoss().to(device)
dataset = get_dataset(config['filename'], config['input_shape'], config['batch_size'])


if __name__ == '__main__':
config = {
"input_shape": (300,300),
"filename": "archive/outlines_sorted.csv",
"number_of_layers": 4,
"dims": 3,
"batch_size": 8,
"lr": 1e-5
}
# model = train(50, config)
model = torch.load("model.pt")
test(config, model)
Binary file added MachineLearning Projects/sudoku_solver/model.pt
Binary file not shown.
Loading

0 comments on commit 7d8a459

Please sign in to comment.