Update code in PORT/src/cpp/mod.rs, PORT/src/rust/config.rs, PORT/src…

…/rust/token/keywords.rs, PORT/src/rust/token/mod.rs, PORT/src/rust/token/token_value.rs, PORT/src/rust/panic_hook.rs, PORT/src/python/mod.rs, PORT/src/cpp/src/file_stream.cpp, and PORT/src/main.rs
kneorain · Apr 2, 2024 · ca439d4 · ca439d4
1 parent 30a8964
commit ca439d4
Show file tree

Hide file tree

Showing 16 changed files with 323 additions and 337 deletions.
diff --git a/PORT/src/cpp/mod.rs b/PORT/src/cpp/mod.rs
@@ -1,4 +1,3 @@
-#[no_mangle]
 mod private;
 pub mod shared;
 

diff --git a/PORT/src/cpp/src/file_stream.cpp b/PORT/src/cpp/src/file_stream.cpp
@@ -146,7 +146,7 @@ static inline std::string rl(char*& data, int line) {
     return (line == 0) ? std::string(line_start, cursor - line_start) : std::string();
 }
 
-static std::_Iter_diff_t<char *> tl(char const *fname) {
+static long long tl(char const *fname) {
     #ifdef __unix__ // tell kernel the access pattern.
         posix_fadvise(fd, 0, 0, 1);  // FDADVICE_SEQUENTIAL
     #endif
@@ -199,7 +199,7 @@ static std::_Iter_diff_t<char *> tl(char const *fname) {
     #endif
 
     // get the total number of lines
-    auto total_lines = std::count(
+    long long total_lines = std::count(
         data,
         data + size,
         '\n'

diff --git a/PORT/src/main.rs b/PORT/src/main.rs
@@ -9,11 +9,10 @@ extern crate pretty_env_logger;
 #[macro_use]
 extern crate log;
 
-use std::fs::File;
-use std::io::{BufRead, Read};
 use std::{ io, panic, thread };
 use std::time;
 
+#[allow(dead_code)]
 fn test_threads() {
     let _handler = thread::Builder
         ::new()
@@ -54,17 +53,20 @@ fn main() -> io::Result<()> {
     rust::init_rust();
     python::init_python();
 
-    ////python::test::test_kwargs(1, 2, None);
-    ////println!("{:?}", python::test::test_args(1, 2));
-    ////
-    ////cpp::test::c("hello from Rust");
-    ////
-    ////println!("Result: {}", cpp::test::add_sum(2, 62)); // 2^62 is the max value for i64
-    ////
-    ////// print the cwd
-    ////let cwd = std::env::current_dir().unwrap();
-    ////println!("Current working directory: {}", cwd.display());
-
+    println!("\n---------- PYTHON ----------");
+    python::test::test_kwargs(1, 2, None);
+    println!("{:?}", python::test::test_args(1, 2));
+
+    println!("\n---------- C++ ----------");
+    cpp::test::c("hello from Rust");
+
+    println!("Result: {}", cpp::test::add_sum(2, 62)); // 2^62 is the max value for i64
+
+    // print the cwd
+    println!("\n---------- RUST ----------");
+    let cwd = std::env::current_dir().unwrap();
+    println!("Current working directory: {}", cwd.display());
+
     //let inst = cpp::file_stream::new_file_stream("PORT/src/test.hlx");
 
 
@@ -74,17 +76,20 @@ fn main() -> io::Result<()> {
     //let inst = cpp::file_stream::new_file_stream("PORT/src/test.hlx");
     //let _reader = inst.get_data_from_chunk(0);
 
-    let start = time::Instant::now();
+    // let start = time::Instant::now();
 
-    let input = File::open("PORT/src/copy.hlx")?;
-    let buffered = std::io::BufReader::new(input);
-    let line_count = buffered.lines().count();
-
-    let elapsed = start.elapsed();
-
-    println!("Rust IO Elapsed: {:?}", elapsed);
-    println!("total lines {}", line_count);
+    // let input = File::open("PORT/src/copy.hlx")?;
+    // let buffered = std::io::BufReader::new(input);
+    // let line_count = buffered.lines().count();
+
+    // let elapsed = start.elapsed();
+
+    // println!("Rust IO Elapsed: {:?}", elapsed);
+    // println!("total lines {}", line_count);
+
+    // FIXME: C++ IS UNDER COUNTING LINES
 
+    println!("\n---------- C++ ----------");
     let start = time::Instant::now();
 
     let inst = cpp::file_stream::new_file_stream("PORT/src/copy.hlx");

diff --git a/PORT/src/python/mod.rs b/PORT/src/python/mod.rs
@@ -1,4 +1,4 @@
-use std::{fmt::Display, process::exit, time};
+use std::fmt::Display;
 
 use pyo3::{
     types::{PyDict, PyList, PyModule}, Python,
@@ -11,8 +11,6 @@ pub use crate::python::shared::unknown_int::NumericType;
 pub use private::python_import;
 use private::python_import::repr_python;
 use std::env;
-use std::sync::mpsc;
-use std::thread;
 pub use crate::__panic__;
 
 // --------------------------- BEGIN PYTHON IMPORTS ----------------------------

diff --git a/PORT/src/rust/config.rs b/PORT/src/rust/config.rs
@@ -1,5 +1,4 @@
-use serde::de;
-use serde::{de::Error, Deserialize, Serialize};
+use serde::{Deserialize, Serialize};
 use serde_repr::*;
 use std::fs;
 use std::path::Path;

diff --git a/PORT/src/rust/panic_hook.rs b/PORT/src/rust/panic_hook.rs
@@ -65,13 +65,6 @@ pub fn better_panic(panic_info: &panic::PanicInfo) {
                     0
                 }
             };
-
-            let column_no = match panic_info.location() {
-                Some(s) => s.column(),
-                None => {
-                    0
-                }
-            };
 
             let pos = 0;
 

diff --git a/PORT/src/rust/token/keywords.rs b/PORT/src/rust/token/keywords.rs
@@ -61,7 +61,7 @@ pub struct Keyword {
     pub ty: KeywordType,
 }
 #[derive(Debug, PartialEq, Eq, Hash)]
-enum KeywordType {
+pub enum KeywordType {
     ControlFlow,
     Loop,
     LoopControl,

diff --git a/PORT/src/rust/token/lexer.rs b/PORT/src/rust/token/lexer.rs
@@ -1,136 +1,22 @@
-use super::{Token, TokenType};
-use regex::Regex;
-use std::sync::Arc;
-use std::{fs::File, io::self};
-use std::sync::Mutex;
-use std::thread;
-use std::sync::mpsc;
-use std::io::BufRead;
-
-
-pub struct Lexer {
-    file_name: Arc<str>,
+use super::Token;
+
+// lazy_static::lazy_static! {
+//     static ref STRING_RE: Regex = Regex::new(r#"([fbur]*"[^"\\]*(?:\\.[^"\\]*)*")"#).unwrap();
+//     static ref CHARACTER_RE: Regex = Regex::new(r#"([fbur]*'[^'\\]*(?:\\.[^'\\]*)*')"#).unwrap();
+//     static ref NUMERIC_RE: Regex = Regex::new(r#"(\b\d+\.\d+\b|\b\d+[fdui]?\b)"#).unwrap();
+//     static ref IDENTIFIER_RE: Regex = Regex::new(r#"(\b[a-zA-Z][a-zA-Z0-9_]*\b)"#).unwrap();
+//     static ref DELIMITER_RE: Regex = Regex::new(r#"([\(\)\{\}\[\];,])"#).unwrap();
+//     static ref OPERATOR_RE: Regex = Regex::new(r#"([+\-*/%=&|!<>^])"#).unwrap();
+//     static ref COMMENT_RE: Regex = Regex::new(r#"((?://[^\n]*)|(/\*[\s\S]*?\*/))"#).unwrap();
+// }
+
+// make an interface for a lexer
+
+pub trait Lexer {
+    fn lexer    (&self, file:    &str) -> Vec<Token>;
+    fn lexer_str(&self, content: &str) -> Vec<Token>;
 }
 
-
-
-lazy_static::lazy_static! {
-    static ref STRING_RE: Regex = Regex::new(r#"([fbur]*"[^"\\]*(?:\\.[^"\\]*)*")"#).unwrap();
-    static ref CHARACTER_RE: Regex = Regex::new(r#"([fbur]*'[^'\\]*(?:\\.[^'\\]*)*')"#).unwrap();
-    static ref NUMERIC_RE: Regex = Regex::new(r#"(\b\d+\.\d+\b|\b\d+[fdui]?\b)"#).unwrap();
-    static ref IDENTIFIER_RE: Regex = Regex::new(r#"(\b[a-zA-Z][a-zA-Z0-9_]*\b)"#).unwrap();
-    static ref DELIMITER_RE: Regex = Regex::new(r#"([\(\)\{\}\[\];,])"#).unwrap();
-    static ref OPERATOR_RE: Regex = Regex::new(r#"([+\-*/%=&|!<>^])"#).unwrap();
-    static ref COMMENT_RE: Regex = Regex::new(r#"((?://[^\n]*)|(/\*[\s\S]*?\*/))"#).unwrap();
-}
-
-// make an unsafe but VERY fast io reader for the lexer
-
-impl Lexer {
-    pub fn new(file_name: &str) -> Self {
-        Self {
-            file_name: Arc::from(file_name.to_owned()),
-        }
-    }
-
-    pub fn tokenize(&self) -> Vec<Token> {
-        let file = File::open(&*self.file_name).unwrap();
-        let reader = io::BufReader::new(file);
-        let tokens = Arc::new(Mutex::new(Vec::new()));
-        let mut line_number = 1;
-
-        let (sender, receiver) = mpsc::channel();
-
-        // Spawn multiple threads to process lines concurrently
-        let num_threads = 4;
-        let receiver = Arc::new(Mutex::new(receiver));
-
-        for _ in 0..num_threads {
-            let tokens = Arc::clone(&tokens);
-            let file_name = Arc::clone(&self.file_name);
-            let receiver = Arc::clone(&receiver);
-
-            let handle = thread::spawn(move || {
-                while let Ok(line) = receiver.lock().unwrap().recv() {
-                    let line_tokens = Lexer::tokenize_line(file_name.clone(), Arc::from(line), line_number);
-                    line_number += 1;
-
-                    tokens.lock().unwrap().extend(line_tokens);
-                }
-            });
-
-            handle.join().unwrap();
-        }
-
-        // Read the file line by line and send each line to the threads
-        for line in reader.lines() {
-            sender.send(line.unwrap()).unwrap();
-        }
-
-        // Drop the sender to close the channel
-        drop(sender);
-
-        // Wait for all threads to finish
-        for _ in 0..num_threads {
-            receiver.lock().unwrap().recv().unwrap();
-        }
-
-        // Extract the tokens from the Arc<Mutex<Vec<Token>>>
-        Arc::try_unwrap(tokens).unwrap().into_inner().unwrap()
-    }
-
-    fn tokenize_line(file_name: Arc<str>, line: Arc<str>, line_number: i32) -> Vec<Token> {
-        let mut tokens = Vec::new();
-        let mut start = 0;
-
-        while start < line.len() {
-            if let Some((length, token_type)) = Lexer::find_next_token(&line[start..]) {
-                tokens.push(Token::new(
-                    line.clone(),
-                    file_name.clone(),
-                    line_number,
-                    start as u16,
-                    line[start..start + length].trim().to_owned(),
-                    token_type,
-                ));
-                start += length;
-            } else {
-                start += 1; // Skip to next character
-            }
-        }
-
-        return tokens;
-    }
-
-    fn find_next_token(s: &str) -> Option<(usize, TokenType)> {
-        if let Some(captures) = STRING_RE.captures(s) {
-            return Some((captures.get(0).unwrap().as_str().len(), TokenType::STRING));
-        }
-
-        if let Some(captures) = CHARACTER_RE.captures(s) {
-            return Some((captures.get(0).unwrap().as_str().len(), TokenType::CHARACTER));
-        }
-
-        if let Some(captures) = NUMERIC_RE.captures(s) {
-            return Some((captures.get(0).unwrap().as_str().len(), TokenType::NUMERIC));
-        }
-
-        if let Some(captures) = IDENTIFIER_RE.captures(s) {
-            return Some((captures.get(0).unwrap().as_str().len(), TokenType::IDENTIFIER));
-        }
-
-        if let Some(captures) = DELIMITER_RE.captures(s) {
-            return Some((captures.get(0).unwrap().as_str().len(), TokenType::DELIMITER));
-        }
-
-        if let Some(captures) = OPERATOR_RE.captures(s) {
-            return Some((captures.get(0).unwrap().as_str().len(), TokenType::OPERATOR));
-        }
-
-        if let Some(captures) = COMMENT_RE.captures(s) {
-            return Some((captures.get(0).unwrap().as_str().len(), TokenType::COMMENT));
-        }
-
-        return None;
-    }
-}
+trait Tokenizer {
+    fn determine_size(&self, content: &str) -> usize;
+}
diff --git a/PORT/src/rust/token/mod.rs b/PORT/src/rust/token/mod.rs
@@ -1,7 +1,7 @@
 mod token;
 use token::*;
-pub mod token_list;
 pub mod token_value;
 pub mod keywords;
 pub mod primitive_types;
 pub mod lexer;
+// removed token_list.rs since it is not required when using an AST and a lexer/parser