Skip to content

Commit

Permalink
Update code in PORT/src/cpp/mod.rs, PORT/src/rust/config.rs, PORT/src…
Browse files Browse the repository at this point in the history
…/rust/token/keywords.rs, PORT/src/rust/token/mod.rs, PORT/src/rust/token/token_value.rs, PORT/src/rust/panic_hook.rs, PORT/src/python/mod.rs, PORT/src/cpp/src/file_stream.cpp, and PORT/src/main.rs
  • Loading branch information
Ze7111 committed Apr 2, 2024
1 parent 30a8964 commit ca439d4
Show file tree
Hide file tree
Showing 16 changed files with 323 additions and 337 deletions.
1 change: 0 additions & 1 deletion PORT/src/cpp/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#[no_mangle]
mod private;
pub mod shared;

Expand Down
4 changes: 2 additions & 2 deletions PORT/src/cpp/src/file_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ static inline std::string rl(char*& data, int line) {
return (line == 0) ? std::string(line_start, cursor - line_start) : std::string();
}

static std::_Iter_diff_t<char *> tl(char const *fname) {
static long long tl(char const *fname) {
#ifdef __unix__ // tell kernel the access pattern.
posix_fadvise(fd, 0, 0, 1); // FDADVICE_SEQUENTIAL
#endif
Expand Down Expand Up @@ -199,7 +199,7 @@ static std::_Iter_diff_t<char *> tl(char const *fname) {
#endif

// get the total number of lines
auto total_lines = std::count(
long long total_lines = std::count(
data,
data + size,
'\n'
Expand Down
49 changes: 27 additions & 22 deletions PORT/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@ extern crate pretty_env_logger;
#[macro_use]
extern crate log;

use std::fs::File;
use std::io::{BufRead, Read};
use std::{ io, panic, thread };
use std::time;

#[allow(dead_code)]
fn test_threads() {
let _handler = thread::Builder
::new()
Expand Down Expand Up @@ -54,17 +53,20 @@ fn main() -> io::Result<()> {
rust::init_rust();
python::init_python();

////python::test::test_kwargs(1, 2, None);
////println!("{:?}", python::test::test_args(1, 2));
////
////cpp::test::c("hello from Rust");
////
////println!("Result: {}", cpp::test::add_sum(2, 62)); // 2^62 is the max value for i64
////
////// print the cwd
////let cwd = std::env::current_dir().unwrap();
////println!("Current working directory: {}", cwd.display());

println!("\n---------- PYTHON ----------");
python::test::test_kwargs(1, 2, None);
println!("{:?}", python::test::test_args(1, 2));

println!("\n---------- C++ ----------");
cpp::test::c("hello from Rust");

println!("Result: {}", cpp::test::add_sum(2, 62)); // 2^62 is the max value for i64

// print the cwd
println!("\n---------- RUST ----------");
let cwd = std::env::current_dir().unwrap();
println!("Current working directory: {}", cwd.display());

//let inst = cpp::file_stream::new_file_stream("PORT/src/test.hlx");


Expand All @@ -74,17 +76,20 @@ fn main() -> io::Result<()> {
//let inst = cpp::file_stream::new_file_stream("PORT/src/test.hlx");
//let _reader = inst.get_data_from_chunk(0);

let start = time::Instant::now();
// let start = time::Instant::now();

let input = File::open("PORT/src/copy.hlx")?;
let buffered = std::io::BufReader::new(input);
let line_count = buffered.lines().count();

let elapsed = start.elapsed();

println!("Rust IO Elapsed: {:?}", elapsed);
println!("total lines {}", line_count);
// let input = File::open("PORT/src/copy.hlx")?;
// let buffered = std::io::BufReader::new(input);
// let line_count = buffered.lines().count();

// let elapsed = start.elapsed();

// println!("Rust IO Elapsed: {:?}", elapsed);
// println!("total lines {}", line_count);

// FIXME: C++ IS UNDER COUNTING LINES

println!("\n---------- C++ ----------");
let start = time::Instant::now();

let inst = cpp::file_stream::new_file_stream("PORT/src/copy.hlx");
Expand Down
4 changes: 1 addition & 3 deletions PORT/src/python/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{fmt::Display, process::exit, time};
use std::fmt::Display;

use pyo3::{
types::{PyDict, PyList, PyModule}, Python,
Expand All @@ -11,8 +11,6 @@ pub use crate::python::shared::unknown_int::NumericType;
pub use private::python_import;
use private::python_import::repr_python;
use std::env;
use std::sync::mpsc;
use std::thread;
pub use crate::__panic__;

// --------------------------- BEGIN PYTHON IMPORTS ----------------------------
Expand Down
3 changes: 1 addition & 2 deletions PORT/src/rust/config.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use serde::de;
use serde::{de::Error, Deserialize, Serialize};
use serde::{Deserialize, Serialize};
use serde_repr::*;
use std::fs;
use std::path::Path;
Expand Down
7 changes: 0 additions & 7 deletions PORT/src/rust/panic_hook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,6 @@ pub fn better_panic(panic_info: &panic::PanicInfo) {
0
}
};

let column_no = match panic_info.location() {
Some(s) => s.column(),
None => {
0
}
};

let pos = 0;

Expand Down
2 changes: 1 addition & 1 deletion PORT/src/rust/token/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ pub struct Keyword {
pub ty: KeywordType,
}
#[derive(Debug, PartialEq, Eq, Hash)]
enum KeywordType {
pub enum KeywordType {
ControlFlow,
Loop,
LoopControl,
Expand Down
154 changes: 20 additions & 134 deletions PORT/src/rust/token/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,136 +1,22 @@
use super::{Token, TokenType};
use regex::Regex;
use std::sync::Arc;
use std::{fs::File, io::self};
use std::sync::Mutex;
use std::thread;
use std::sync::mpsc;
use std::io::BufRead;


pub struct Lexer {
file_name: Arc<str>,
use super::Token;

// lazy_static::lazy_static! {
// static ref STRING_RE: Regex = Regex::new(r#"([fbur]*"[^"\\]*(?:\\.[^"\\]*)*")"#).unwrap();
// static ref CHARACTER_RE: Regex = Regex::new(r#"([fbur]*'[^'\\]*(?:\\.[^'\\]*)*')"#).unwrap();
// static ref NUMERIC_RE: Regex = Regex::new(r#"(\b\d+\.\d+\b|\b\d+[fdui]?\b)"#).unwrap();
// static ref IDENTIFIER_RE: Regex = Regex::new(r#"(\b[a-zA-Z][a-zA-Z0-9_]*\b)"#).unwrap();
// static ref DELIMITER_RE: Regex = Regex::new(r#"([\(\)\{\}\[\];,])"#).unwrap();
// static ref OPERATOR_RE: Regex = Regex::new(r#"([+\-*/%=&|!<>^])"#).unwrap();
// static ref COMMENT_RE: Regex = Regex::new(r#"((?://[^\n]*)|(/\*[\s\S]*?\*/))"#).unwrap();
// }

// make an interface for a lexer

pub trait Lexer {
fn lexer (&self, file: &str) -> Vec<Token>;
fn lexer_str(&self, content: &str) -> Vec<Token>;
}



lazy_static::lazy_static! {
static ref STRING_RE: Regex = Regex::new(r#"([fbur]*"[^"\\]*(?:\\.[^"\\]*)*")"#).unwrap();
static ref CHARACTER_RE: Regex = Regex::new(r#"([fbur]*'[^'\\]*(?:\\.[^'\\]*)*')"#).unwrap();
static ref NUMERIC_RE: Regex = Regex::new(r#"(\b\d+\.\d+\b|\b\d+[fdui]?\b)"#).unwrap();
static ref IDENTIFIER_RE: Regex = Regex::new(r#"(\b[a-zA-Z][a-zA-Z0-9_]*\b)"#).unwrap();
static ref DELIMITER_RE: Regex = Regex::new(r#"([\(\)\{\}\[\];,])"#).unwrap();
static ref OPERATOR_RE: Regex = Regex::new(r#"([+\-*/%=&|!<>^])"#).unwrap();
static ref COMMENT_RE: Regex = Regex::new(r#"((?://[^\n]*)|(/\*[\s\S]*?\*/))"#).unwrap();
}

// make an unsafe but VERY fast io reader for the lexer

impl Lexer {
pub fn new(file_name: &str) -> Self {
Self {
file_name: Arc::from(file_name.to_owned()),
}
}

pub fn tokenize(&self) -> Vec<Token> {
let file = File::open(&*self.file_name).unwrap();
let reader = io::BufReader::new(file);
let tokens = Arc::new(Mutex::new(Vec::new()));
let mut line_number = 1;

let (sender, receiver) = mpsc::channel();

// Spawn multiple threads to process lines concurrently
let num_threads = 4;
let receiver = Arc::new(Mutex::new(receiver));

for _ in 0..num_threads {
let tokens = Arc::clone(&tokens);
let file_name = Arc::clone(&self.file_name);
let receiver = Arc::clone(&receiver);

let handle = thread::spawn(move || {
while let Ok(line) = receiver.lock().unwrap().recv() {
let line_tokens = Lexer::tokenize_line(file_name.clone(), Arc::from(line), line_number);
line_number += 1;

tokens.lock().unwrap().extend(line_tokens);
}
});

handle.join().unwrap();
}

// Read the file line by line and send each line to the threads
for line in reader.lines() {
sender.send(line.unwrap()).unwrap();
}

// Drop the sender to close the channel
drop(sender);

// Wait for all threads to finish
for _ in 0..num_threads {
receiver.lock().unwrap().recv().unwrap();
}

// Extract the tokens from the Arc<Mutex<Vec<Token>>>
Arc::try_unwrap(tokens).unwrap().into_inner().unwrap()
}

fn tokenize_line(file_name: Arc<str>, line: Arc<str>, line_number: i32) -> Vec<Token> {
let mut tokens = Vec::new();
let mut start = 0;

while start < line.len() {
if let Some((length, token_type)) = Lexer::find_next_token(&line[start..]) {
tokens.push(Token::new(
line.clone(),
file_name.clone(),
line_number,
start as u16,
line[start..start + length].trim().to_owned(),
token_type,
));
start += length;
} else {
start += 1; // Skip to next character
}
}

return tokens;
}

fn find_next_token(s: &str) -> Option<(usize, TokenType)> {
if let Some(captures) = STRING_RE.captures(s) {
return Some((captures.get(0).unwrap().as_str().len(), TokenType::STRING));
}

if let Some(captures) = CHARACTER_RE.captures(s) {
return Some((captures.get(0).unwrap().as_str().len(), TokenType::CHARACTER));
}

if let Some(captures) = NUMERIC_RE.captures(s) {
return Some((captures.get(0).unwrap().as_str().len(), TokenType::NUMERIC));
}

if let Some(captures) = IDENTIFIER_RE.captures(s) {
return Some((captures.get(0).unwrap().as_str().len(), TokenType::IDENTIFIER));
}

if let Some(captures) = DELIMITER_RE.captures(s) {
return Some((captures.get(0).unwrap().as_str().len(), TokenType::DELIMITER));
}

if let Some(captures) = OPERATOR_RE.captures(s) {
return Some((captures.get(0).unwrap().as_str().len(), TokenType::OPERATOR));
}

if let Some(captures) = COMMENT_RE.captures(s) {
return Some((captures.get(0).unwrap().as_str().len(), TokenType::COMMENT));
}

return None;
}
}
trait Tokenizer {
fn determine_size(&self, content: &str) -> usize;
}
2 changes: 1 addition & 1 deletion PORT/src/rust/token/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
mod token;
use token::*;
pub mod token_list;
pub mod token_value;
pub mod keywords;
pub mod primitive_types;
pub mod lexer;
// removed token_list.rs since it is not required when using an AST and a lexer/parser
Loading

0 comments on commit ca439d4

Please sign in to comment.