convert to rust

tinmarr · Jan 18, 2023 · b8c4cd5 · b8c4cd5
1 parent f205e95
commit b8c4cd5
Show file tree

Hide file tree

Showing 9 changed files with 821 additions and 105 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,5 @@
+{
+    "licenser.license": "GPLv3",
+    "licenser.projectName": "word_unscrambler",
+    "licenser.author": "Martin Chaperot"
+}
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "word_unscrambler"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -1,29 +1,40 @@
 # Word-Unscrambler
-This python program outputs a list of the possible options a scrambled word could be.
+
+This Rust program outputs a list of the possible options a scrambled word could be.
+
+_Looking for the Python version? Its been archived to the
+[python-version](https://github.com/tinmarr/Word-Unscrambler/tree/python-version) branch_
 
 ![Demo Gif](ezgif.com-gif-maker.gif)
 
-## About 
+## About
+
 This program was created by [Martin Chaperot-Merino](https://github.com/tinmarr)
 
-This word unscrambler can clean unstructured text before employing a named entities recognition (NER) algorithm. For example, the word unscrambler function can be applied to every word in a text file before looking these words up in a gazetteer (a list of entities such as cities, organizations, days of the week, etc.)
+This word unscrambler can clean unstructured text before employing a named entities recognition (NER) algorithm. For
+example, the word unscrambler function can be applied to every word in a text file before looking these words up in a
+gazetteer (a list of entities such as cities, organizations, days of the week, etc.)
 
 # How to use
+
 1. Open the IDE: [https://replit.com/@Tinmarr/Word-Unscrambler?v=1](https://replit.com/@Tinmarr/Word-Unscrambler?v=1)
-2. Wait for the Prompt <br />
-![The code asks to enter a scrambled word](step1.png)
-3. Enter a scrambled word <br />
-![The entered word is lleho](step2.png)
-4. Hit enter <br />
-![The code return hello and asks if you want to restart](step3.png)
+2. Wait for the Prompt <br /> ![The code asks to enter a scrambled word](step1.png)
+3. Enter a scrambled word <br /> ![The entered word is lleho](step2.png)
+4. Hit enter <br /> ![The code return hello and asks if you want to restart](step3.png)
 
 # How it works
-It takes words from a text file and uses a lookup function to find words with the same letters (where the order of words does not matter).
+
+It takes words from a text file and uses a lookup function to find words with the same letters (where the order of words
+does not matter).
 
 ## The key to its speed
-It converts all the words into integers (which is based on the letters) and groups words with the same integer in a dictionary. Then it converts the typed word into an integer and looks up that integer in the dictionary.
 
-A first function Word2Vect converts a word into a 26 dimensions vector. Each dimension represents the number of occurrences of a letter ('a', 'b', 'c'...). 
+It converts all the words into integers (which is based on the letters) and groups words with the same integer in a
+dictionary. Then it converts the typed word into an integer and looks up that integer in the dictionary.
+
+A first function Word2Vect converts a word into a 26 dimensions vector. Each dimension represents the number of
+occurrences of a letter ('a', 'b', 'c'...).
+
 ```
 def Word2Vect(word):
     l = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
@@ -36,7 +47,10 @@ def Word2Vect(word):
             v[ind] += 1
     return v
 ```
-Then a second function Vect2Int converts a 26 dimensions vector into an integer. Each dimension is reduced to a 4 bits. All bits of the Integer are used to code the vector.
+
+Then a second function Vect2Int converts a 26 dimensions vector into an integer. Each dimension is reduced to a 4 bits.
+All bits of the Integer are used to code the vector.
+
 ```
 def Vect2Int(vect):
     pv = 0
@@ -47,4 +61,5 @@ def Vect2Int(vect):
         pv += 4
     return f
 ```
+
 Using an integer as lookup value in a dictionary makes it run really fast!
diff --git a/DL.txt → assets/DL.txt b/DL.txt → assets/DL.txt
diff --git a/main.py b/main.py
diff --git a/src/main.rs b/src/main.rs
@@ -0,0 +1,98 @@
+// Copyright (C) 2023 Martin Chaperot
+//
+// This file is part of word_unscrambler.
+//
+// word_unscrambler is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// word_unscrambler is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with word_unscrambler.  If not, see <http://www.gnu.org/licenses/>.
+use std::collections::BTreeMap;
+use std::fs;
+use std::io::{self, Write};
+
+const LETTERS: [char; 26] = [
+    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
+    't', 'u', 'v', 'w', 'x', 'y', 'z',
+];
+
+fn load_dictionary() -> BTreeMap<u128, Vec<String>> {
+    let data = fs::read_to_string("assets/DL.txt").expect("Unable to read dictionary file");
+    let mut map: BTreeMap<u128, Vec<String>> = BTreeMap::new();
+    for line in data.lines() {
+        let word = line.to_string().to_lowercase();
+        if word == "" {
+            continue;
+        }
+        let i = word_2_int(&word);
+        match map.get_mut(&i) {
+            Some(vector) => {
+                vector.push(word);
+            }
+            None => {
+                map.insert(i, vec![word]);
+            }
+        }
+    }
+    map
+}
+
+fn word_2_int(word: &String) -> u128 {
+    let mut word_int: u128 = 0;
+    for letter in word.chars() {
+        let i: u32 = match LETTERS.binary_search(&letter) {
+            Ok(i) => i,
+            Err(_) => continue,
+        }
+        .try_into()
+        .expect("If this panics something went horribly wrong");
+        word_int += 2u128.pow(4u32 * i);
+    }
+    word_int
+}
+
+fn main() {
+    let map = load_dictionary();
+    loop {
+        let mut word = String::new();
+
+        print!("Enter a scrambled word: ");
+        io::stdout().flush().unwrap();
+
+        io::stdin()
+            .read_line(&mut word)
+            .expect("Failed to read line");
+
+        word = word.trim().to_string().to_lowercase();
+        match map.get(&word_2_int(&word)) {
+            Some(vector) => {
+                println!("{:?}", vector);
+            }
+            None => {
+                println!("No match found");
+            }
+        }
+
+        print!("Try again? [Y/n]: ");
+        io::stdout().flush().unwrap();
+
+        let mut again = String::new();
+
+        io::stdin()
+            .read_line(&mut again)
+            .expect("Failed to read line");
+
+        if again.trim().to_lowercase() != "n" {
+            continue;
+        } else {
+            break;
+        }
+    }
+}