From 3cb647fc7b881ccd0c4b77754d78a6050f5e5fc9 Mon Sep 17 00:00:00 2001 From: fansehep Date: Mon, 29 Jan 2024 20:44:18 +0800 Subject: [PATCH 1/2] feat: init plan --- .gitignore | 4 +- Cargo.lock | 224 +++++++++++++++++++++++++++++++-- Cargo.toml | 1 + rustfmt.toml | 12 ++ src/bin/lexer_repl.rs | 9 +- src/bin/parser_repl.rs | 4 +- src/bin/plan_repl.rs | 65 ++++++++++ src/catalog/column.rs | 16 +++ src/catalog/mod.rs | 126 +++++++++++++++++++ src/catalog/schema.rs | 16 +++ src/catalog/tableinfo.rs | 28 +++++ src/error.rs | 14 ++- src/lib.rs | 3 + src/parser/column.rs | 3 +- src/parser/expression.rs | 72 +++++++++-- src/parser/keyword.rs | 3 + src/parser/lexer.rs | 3 +- src/parser/mod.rs | 50 ++++---- src/parser/operator.rs | 6 +- src/parser/stmt.rs | 4 +- src/planner/context.rs | 101 +++++++++++++++ src/planner/logical_plan.rs | 34 +++++ src/planner/mod.rs | 211 +++++++++++++++++++++++++++++++ src/planner/plan_expression.rs | 162 ++++++++++++++++++++++++ src/planner/plan_select.rs | 191 ++++++++++++++++++++++++++++ src/planner/plan_table.rs | 3 + src/store/mod.rs | 2 + src/store/rocks.rs | 101 +++++++++++++++ src/store/table.rs | 23 ++++ src/types/expr.rs | 49 ++++++++ src/types/mod.rs | 130 +++++++++++++++++++ src/types/schema.rs | 0 src/types/value.rs | 21 ++++ 33 files changed, 1631 insertions(+), 60 deletions(-) create mode 100644 src/bin/plan_repl.rs create mode 100644 src/catalog/column.rs create mode 100644 src/catalog/mod.rs create mode 100644 src/catalog/schema.rs create mode 100644 src/catalog/tableinfo.rs create mode 100644 src/planner/context.rs create mode 100644 src/planner/logical_plan.rs create mode 100644 src/planner/mod.rs create mode 100644 src/planner/plan_expression.rs create mode 100644 src/planner/plan_select.rs create mode 100644 src/planner/plan_table.rs create mode 100644 src/store/mod.rs create mode 100644 src/store/rocks.rs create mode 100644 src/store/table.rs create mode 100644 src/types/expr.rs delete mode 100644 src/types/schema.rs diff --git a/.gitignore b/.gitignore index af45bab..15c0621 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /target -.shaun_parser_history \ No newline at end of file +.shaun_parser_history +/.idea +/.rocksdata \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 7c3949b..2ea6bad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -43,6 +43,27 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "bindgen" +version = "0.65.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" +dependencies = [ + "bitflags 1.3.2", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "peeking_take_while", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -61,15 +82,36 @@ version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "cc" version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ + "jobserver", "libc", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -78,9 +120,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.30" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defd4e7873dbddba6c7c91e199c7fcb946abc4a6a4ac3195400bcfb01b5de877" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" dependencies = [ "android-tzdata", "iana-time-zone", @@ -90,6 +132,17 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "clang-sys" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clipboard-win" version = "4.5.0" @@ -168,6 +221,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -215,6 +274,15 @@ dependencies = [ "cc", ] +[[package]] +name = "jobserver" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.64" @@ -224,11 +292,60 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "libc" -version = "0.2.147" +version = "0.2.151" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" + +[[package]] +name = "libloading" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" +checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" +dependencies = [ + "cfg-if", + "windows-sys", +] + +[[package]] +name = "librocksdb-sys" +version = "0.11.0+8.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3386f101bcb4bd252d8e9d2fb41ec3b0862a15a62b478c355b2982efa469e3e" +dependencies = [ + "bindgen", + "bzip2-sys", + "cc", + "glob", + "libc", + "libz-sys", + "lz4-sys", + "zstd-sys", +] + +[[package]] +name = "libz-sys" +version = "1.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] [[package]] name = "linux-raw-sys" @@ -242,12 +359,28 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +[[package]] +name = "lz4-sys" +version = "1.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "memchr" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "nibble_vec" version = "0.1.0" @@ -268,6 +401,16 @@ dependencies = [ "libc", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num-traits" version = "0.2.16" @@ -283,20 +426,42 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + +[[package]] +name = "pkg-config" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a" + +[[package]] +name = "prettyplease" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" -version = "1.0.66" +version = "1.0.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +checksum = "2de98502f212cfcea8d0bb305bd0f49d7ebdd75b64ba0a68f937d888f4e0d6db" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.33" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -340,6 +505,22 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" +[[package]] +name = "rocksdb" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb6f170a4041d50a0ce04b0d2e14916d6ca863ea2e422689a5b694395d299ffe" +dependencies = [ + "libc", + "librocksdb-sys", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustix" version = "0.38.13" @@ -389,9 +570,16 @@ dependencies = [ "chrono", "env_logger", "log", + "rocksdb", "rustyline", ] +[[package]] +name = "shlex" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380" + [[package]] name = "smallvec" version = "1.11.1" @@ -406,9 +594,9 @@ checksum = "9e08d8363704e6c71fc928674353e6b7c23dcea9d82d7012c8faf2a3a025f8d0" [[package]] name = "syn" -version = "2.0.30" +version = "2.0.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ddc1f908d32ec46858c2d3b3daa00cc35bf4b6841ce4355c7bb3eedf2283a68" +checksum = "89456b690ff72fddcecf231caedbe615c59480c93358a93dfae7fc29e3ebbf0e" dependencies = [ "proc-macro2", "quote", @@ -448,6 +636,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "wasm-bindgen" version = "0.2.87" @@ -607,3 +801,13 @@ name = "windows_x86_64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "zstd-sys" +version = "2.0.9+zstd.1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml index f0d23ff..7de2826 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,4 @@ log = "0.4.20" env_logger = "0.9.3" chrono = "0.4.30" rustyline = "12.0.0" +rocksdb = "0.21.0" diff --git a/rustfmt.toml b/rustfmt.toml index 3f8f4f2..1a21580 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,4 +1,16 @@ version = "Two" +unstable_features = true comment_width = 80 +wrap_comments = true +format_code_in_doc_comments = true +format_macro_bodies = true +format_macro_matchers = true +normalize_comments = true +normalize_doc_attributes = true +condense_wildcard_suffixes = true newline_style = "Unix" +use_field_init_shorthand = true +use_try_shorthand = true +imports_granularity = "Crate" +group_imports = "StdExternalCrate" \ No newline at end of file diff --git a/src/bin/lexer_repl.rs b/src/bin/lexer_repl.rs index f11c8f7..87973a3 100644 --- a/src/bin/lexer_repl.rs +++ b/src/bin/lexer_repl.rs @@ -1,8 +1,9 @@ -use std::io; -use std::io::Write; +use std::{io, io::Write}; -use shaun::parser::lexer::{self}; -use shaun::parser::token::Token; +use shaun::parser::{ + lexer::{self}, + token::Token, +}; fn main() { env_logger::Builder::new() diff --git a/src/bin/parser_repl.rs b/src/bin/parser_repl.rs index fd2bb06..99dc8f0 100644 --- a/src/bin/parser_repl.rs +++ b/src/bin/parser_repl.rs @@ -1,6 +1,6 @@ use std::io::Write; -use shaun::parser::Parser; +use shaun::parser::{stmt::Statement, Parser}; const PARSER_HISTORY_NAME: &str = ".shaun_parser_history"; @@ -38,7 +38,7 @@ fn main() { p.update(&line); match p.parse_stmt() { Ok(s) => { - dbg!(s); + dbg!(&s); } Err(e) => { println!("{:?}", e); diff --git a/src/bin/plan_repl.rs b/src/bin/plan_repl.rs new file mode 100644 index 0000000..1f3937d --- /dev/null +++ b/src/bin/plan_repl.rs @@ -0,0 +1,65 @@ +use std::{io::Write, sync::Arc}; + +use shaun::{ + catalog::{self, CataLog}, + parser::{stmt::Statement, Parser}, + planner::Planner, +}; + +const PLANNER_HISTORY_NAME: &str = ".shaun_planner_history"; + +fn main() { + env_logger::Builder::new() + .format(|buf, record| { + writeln!( + buf, + "{} {} {}:{} {}", + chrono::Local::now().format("%Y-%m-%d %H:%M:%S%.3f"), + record.level(), + record.file().unwrap(), + record.line().unwrap(), + record.args() + ) + }) + .filter(None, log::LevelFilter::Info) + .init(); + + let green = "\x1b[32m"; + let default = "\x1b[0m"; + let mut p = Parser::new_parser("".to_owned()); + let mut planner = Planner::new(&Arc::new(CataLog::default())); + let mut reader = rustyline::DefaultEditor::new().unwrap(); + if reader.load_history(PLANNER_HISTORY_NAME).is_err() { + println!("No previous history."); + } + + loop { + match reader.readline(&format!("{green}planner> {default}")) { + Ok(line) => { + let _ = reader.add_history_entry(line.as_str()); + if line.trim() == "quit" { + break; + } + p.update(&line); + match p.parse_stmt() { + Ok(s) => match planner.plan(&s) { + Ok(plan) => { + dbg!(plan); + } + Err(e) => { + println!("{e:?}"); + } + }, + Err(e) => { + println!("{:?}", e); + } + } + } + Err(e) => { + println!("error: {e}"); + break; + } + } + } + reader.save_history(PLANNER_HISTORY_NAME).unwrap(); +} diff --git a/src/catalog/column.rs b/src/catalog/column.rs new file mode 100644 index 0000000..be023ef --- /dev/null +++ b/src/catalog/column.rs @@ -0,0 +1,16 @@ +use crate::types::LogicalType; + +#[derive(Debug, Clone, PartialEq)] +pub struct Column { + pub column_type: LogicalType, + pub name: String, +} + +impl Column { + pub fn new(logical_type: LogicalType, name: &str) -> Self { + Self { + column_type: logical_type, + name: name.to_owned(), + } + } +} \ No newline at end of file diff --git a/src/catalog/mod.rs b/src/catalog/mod.rs new file mode 100644 index 0000000..c1afa65 --- /dev/null +++ b/src/catalog/mod.rs @@ -0,0 +1,126 @@ +use std::{ + collections::HashMap, + io::IntoInnerError, + sync::{ + atomic::{self, AtomicI32}, + Arc, + }, +}; + +use log::error; + +use self::{ + schema::Schema, + tableinfo::{IndexInfo, TableInfo}, +}; +use crate::{ + error::{Error, Error::Internal, Result}, + fmt_err, +}; +pub mod column; +pub mod schema; +pub mod tableinfo; + +pub type TableID = i32; +pub type TableInfoRef = Arc; +pub type IndexInfoRef = Arc; + +#[derive(Debug)] +pub struct CataLog { + id_table_map: HashMap>, + name_id_map: HashMap, + next_table_id: AtomicI32, + id_index_map: HashMap, + index_names_map: HashMap>, +} + +impl CataLog { + pub fn default() -> Self { + Self { + id_index_map: HashMap::new(), + name_id_map: HashMap::new(), + next_table_id: 0.into(), + id_table_map: HashMap::new(), + index_names_map: HashMap::new(), + } + } + + pub fn create_table(&mut self, table_name: &String, schema: &Schema) -> Result { + // 表不能已经存在 + if self.name_id_map.contains_key(table_name) { + return Err(Internal(fmt_err!("{table_name} has been exist"))); + } + + let table_id = self.next_table_id.fetch_add(1, atomic::Ordering::Release); + let table_info = Arc::new(TableInfo::new(table_name, table_id, schema)); + + // 更新元数据信息 + self.id_table_map.insert(table_id, Arc::clone(&table_info)); + self.name_id_map.insert(table_name.clone(), table_id); + self.index_names_map + .insert(table_name.clone(), HashMap::new()); + + Ok(table_info) + } + + pub fn is_table_exist(&self, table_name: &str) -> bool { + match self.name_id_map.get(table_name) { + Some(_) => true, + None => false, + } + } + + pub fn table_by_name(&self, table_name: &str) -> Option { + match self.name_id_map.get(table_name) { + Some(id) => match self.id_table_map.get(id) { + Some(table) => Some(Arc::clone(table)), + None => { + error!("table_name: {table_name} {id} is not exist"); + None + } + }, + None => None, + } + } + + pub fn table_by_id(&self, table_id: TableID) -> Option { + match self.id_table_map.get(&table_id) { + Some(table_info) => Some(Arc::clone(table_info)), + None => None, + } + } + + pub fn create_index( + &mut self, + index_name: &String, + table_name: &String, + shema: &Schema, + key_shema: &Schema, + key_attrs: Vec, + key_size: usize, + is_primary_key: bool, + ) -> Result { + match self.name_id_map.get(table_name) { + Some(_) => {} + None => { + return Err(Internal(fmt_err!("{table_name} is not exist in catalog"))); + } + }; + + match self.index_names_map.get(table_name) { + Some(table_index) => match table_index.get(index_name) { + Some(_) => {} + None => { + return Err(Internal(fmt_err!("{table_name} is not exist"))); + } + }, + None => { + return Err(Internal(fmt_err!("{table_name} should be exist but not"))); + } + }; + + let table_meta = self.table_by_name(table_name).unwrap(); + + unimplemented!() + } +} diff --git a/src/catalog/schema.rs b/src/catalog/schema.rs new file mode 100644 index 0000000..cdfc369 --- /dev/null +++ b/src/catalog/schema.rs @@ -0,0 +1,16 @@ +use std::sync::Arc; + +use super::column::Column; + +#[derive(Debug, Clone, PartialEq)] +pub struct Schema { + pub columns: Vec, +} + +pub type SchemaRef = Arc; + +impl Schema { + pub fn default() -> Self { + Self { columns: vec![] } + } +} diff --git a/src/catalog/tableinfo.rs b/src/catalog/tableinfo.rs new file mode 100644 index 0000000..0f0b299 --- /dev/null +++ b/src/catalog/tableinfo.rs @@ -0,0 +1,28 @@ +use super::schema::Schema; + +#[derive(Debug)] +pub struct TableInfo { + pub name: String, + pub id: i32, + pub schema: Schema, +} + +impl TableInfo { + pub fn new(table_name: &String, id: i32, schema: &Schema) -> Self { + Self { + name: table_name.to_owned(), + id, + schema: schema.clone(), + } + } +} + +#[derive(Debug, Clone)] +pub struct IndexInfo { + pub key_schema: Schema, + pub name: String, + pub id: i32, + pub table_name: String, + pub key_size: usize, + pub is_primary_key: bool, +} diff --git a/src/error.rs b/src/error.rs index dcf97ab..b80b3fb 100644 --- a/src/error.rs +++ b/src/error.rs @@ -5,20 +5,26 @@ pub type Result = core::result::Result; #[derive(Clone, Debug, PartialEq)] pub enum Error { Parse(String), - Storage(String), + Store(String), Internal(String), Conf(String), Other(String), + Function(String), + Cast(String), + Plan(String), } impl Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Parse(err) - | Self::Storage(err) + | Self::Store(err) | Self::Internal(err) | Self::Conf(err) - | Self::Other(err) => { + | Self::Other(err) + | Self::Cast(err) + | Self::Function(err) + | Self::Plan(err) => { write!(f, "{}", err) } } @@ -28,6 +34,6 @@ impl Display for Error { #[macro_export] macro_rules! fmt_err { ($($arg:tt)*) => { - format!("{}:{} {}", file!(), line!(), format!($($arg)*)) + format!("[{}:{}:{}] {}", file!(), line!(), column!(), format!($($arg)*)) }; } diff --git a/src/lib.rs b/src/lib.rs index 5369881..3014f2a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,6 @@ +pub mod catalog; pub mod error; pub mod parser; +pub mod planner; +pub mod store; pub mod types; diff --git a/src/parser/column.rs b/src/parser/column.rs index 93d3ad3..3114dad 100644 --- a/src/parser/column.rs +++ b/src/parser/column.rs @@ -1,5 +1,4 @@ -use crate::parser::expression::Expression; -use crate::parser::DataType; +use crate::parser::{expression::Expression, DataType}; #[derive(PartialEq, Debug, Clone)] pub struct Column { diff --git a/src/parser/expression.rs b/src/parser/expression.rs index b47af69..8238981 100644 --- a/src/parser/expression.rs +++ b/src/parser/expression.rs @@ -1,4 +1,8 @@ -use crate::parser::operation::Operation; +use crate::{ + error::{Error, Result}, + fmt_err, + parser::operation::Operation, +}; #[derive(PartialEq, Debug, Clone)] // 字面量 @@ -20,14 +24,66 @@ pub enum Expression { Operation(Operation), } +impl Expression { + pub fn has_aggregation(&self) -> Result { + Ok(match self { + Self::Field(..) => false, + Self::Column(_) => false, + Self::Literal(_) => false, + Self::Function(name, args) => { + match name.clone().to_uppercase().as_str() { + "SUM" | "AVG" | "COUNT" | "MIN" | "ONLY" => return Ok(true), + _ => return Err(Error::Parse(fmt_err!("function_name: {name} is not valid"))), + }; + } + Self::Operation(op) => match op { + Operation::And(l, r) => l.has_aggregation()? || r.has_aggregation()?, + Operation::Not(e) => e.has_aggregation()?, + Operation::Or(l, r) => l.has_aggregation()? || r.has_aggregation()?, + Operation::NotEqual(l, r) => l.has_aggregation()? || r.has_aggregation()?, + Operation::Equal(l, r) => l.has_aggregation()? || r.has_aggregation()?, + Operation::GreaterThan(l, r) => l.has_aggregation()? || r.has_aggregation()?, + Operation::GreaterThanOrEqual(l, r) => { + l.has_aggregation()? || r.has_aggregation()? + } + Operation::LessThan(l, r) => l.has_aggregation()? || r.has_aggregation()?, + Operation::LessThanOrEqual(l, r) => l.has_aggregation()? || r.has_aggregation()?, + Operation::IsNull(e) => e.has_aggregation()?, + Operation::Add(l, r) => l.has_aggregation()? || r.has_aggregation()?, + Operation::Subtract(l, r) => l.has_aggregation()? || r.has_aggregation()?, + Operation::Multiply(l, r) => l.has_aggregation()? || r.has_aggregation()?, + Operation::Divide(l, r) => l.has_aggregation()? || r.has_aggregation()?, + Operation::Assert(e) => e.has_aggregation()?, + Operation::Like(l, r) => l.has_aggregation()? || r.has_aggregation()?, + Operation::Negate(e) => e.has_aggregation()?, + Operation::BitWiseNot(e) => e.has_aggregation()?, + Operation::Modulo(l, r) => l.has_aggregation()? || r.has_aggregation()?, + }, + }) + } +} + #[cfg(test)] mod test { - use super::*; - use crate::parser::stmt::*; - use crate::parser::test::init; - use crate::parser::Parser; use log::error; + use super::*; + use crate::parser::{stmt::*, test::init, Parser}; + + #[test] + fn has_aggreagtion_test() { + let mut expr = Expression::Column(10); + assert_eq!(expr.has_aggregation().unwrap(), false); + expr = Expression::Operation(Operation::And( + Box::new(Expression::Function( + "AVG".to_owned(), + vec![Expression::Literal(Literal::All)], + )), + Box::new(Expression::Column(1)), + )); + assert_eq!(expr.has_aggregation().unwrap(), true); + } + #[test] fn parse_expression_test() { init(); @@ -95,9 +151,9 @@ mod test { } // - - // + 3 - // * / - // * 3 456 4 + // + 3 + // * / + // * 3 456 4 // 1 2 parser.update("SELECT 1 * 2 * 3 + 456 / 4 - 3 c1;"); let mut res_expr = Expression::Operation(Operation::Subtract( diff --git a/src/parser/keyword.rs b/src/parser/keyword.rs index a53b160..f124df7 100644 --- a/src/parser/keyword.rs +++ b/src/parser/keyword.rs @@ -22,6 +22,7 @@ pub enum Keyword { Desc, Double, Drop, + Database, Databases, Describe, Explain, @@ -124,6 +125,7 @@ impl fmt::Display for Keyword { Self::Desc => "Desc", Self::Double => "Double", Self::Drop => "Drop", + Self::Database => "Database", Self::Databases => "Databases", Self::Describe => "Describe", Self::Explain => "Explain", @@ -227,6 +229,7 @@ pub fn find_keyword(keyword_str: &str) -> Keyword { "DESC" => Keyword::Desc, "DOUBLE" => Keyword::Double, "DROP" => Keyword::Drop, + "DATABASE" => Keyword::Database, "DATABASES" => Keyword::Databases, "DESCRIBE" => Keyword::Describe, "EXPLAIN" => Keyword::Explain, diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs index 905dfd1..6d81239 100644 --- a/src/parser/lexer.rs +++ b/src/parser/lexer.rs @@ -1,5 +1,4 @@ -use super::keyword::Keyword; -use super::token::Token; +use super::{keyword::Keyword, token::Token}; use crate::parser::keyword::find_keyword; const STOP_CHAR: char = 0 as char; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 27b247a..1e2f917 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,38 +1,39 @@ mod column; mod data_type; -mod expression; +pub mod expression; mod keyword; pub mod lexer; -mod operation; +pub mod operation; mod operator; -mod stmt; +pub mod stmt; pub mod token; -use crate::parser::operator::{is_infix_oper, is_prefix_oper}; -use crate::parser::stmt::{AlterStmt, AlterType, CreateIndexStmt, DeleteTableStmt}; -use crate::parser::{operation::Operation, operator::match_precedence}; +use std::collections::BTreeMap; -use crate::{ - error::{Error, Result}, - fmt_err, -}; use data_type::DataType; -use expression::Expression; -use expression::Literal; +use expression::{Expression, Literal}; use keyword::Keyword; use lexer::Lexer; -use std::collections::BTreeMap; use stmt::Statement; use token::Token; -use self::stmt::{ - DropTableStmt, ExplainStmt, SetStmt, SetVariableType, TransactionIsolationLevel, UpdateStmt, -}; use self::{ column::Column, operator::Precedence, - stmt::{FromItem, JoinType, OrderByType, SelectStmt}, + stmt::{ + DropTableStmt, ExplainStmt, FromItem, JoinType, OrderByType, SelectStmt, SetStmt, + SetVariableType, TransactionIsolationLevel, UpdateStmt, + }, +}; +use crate::{ + error::{Error, Result}, + fmt_err, + parser::{ + operation::Operation, + operator::{is_infix_oper, is_prefix_oper, match_precedence}, + stmt::{AlterStmt, AlterType, CreateIndexStmt, DeleteTableStmt}, + }, }; pub struct Parser { @@ -200,7 +201,6 @@ impl Parser { Token::KeyWord(Keyword::Column) => { // ALTER TABLE table_name ADD COLUMN new_column_name column_data_type self.next_token(); - dbg!(&self.pre_token); let new_column = self.parse_column()?; Ok(Statement::Alter(AlterStmt { @@ -538,6 +538,11 @@ impl Parser { Token::KeyWord(Keyword::Unique) | Token::KeyWord(Keyword::Index) => { self.parse_create_index_stmt() } + Token::KeyWord(Keyword::Database) => { + self.next_token(); + + Ok(Statement::CreateDatabase(self.next_ident()?)) + } _ => Err(Error::Parse(fmt_err!( "unexpected token: {}", self.peek_token @@ -1474,7 +1479,8 @@ impl Parser { // 如果 ( 是一个中缀运算符, 则是一个函数 Token::LeftParen => Ok(Expression::Function( match exp { - Expression::Literal(Literal::String(s)) => s, + // uppercase all function_name in parser + Expression::Literal(Literal::String(s)) => s.to_uppercase(), _ => { return Err(Error::Parse(fmt_err!( "Operation::LeftParen exp is not Literal::String" @@ -1571,11 +1577,9 @@ pub mod test { }; } - use std::vec; + use std::{io::Write, vec}; - use super::stmt::*; - use super::*; - use std::io::Write; + use super::{stmt::*, *}; #[cfg(test)] static LOG_INIT: std::sync::Once = std::sync::Once::new(); diff --git a/src/parser/operator.rs b/src/parser/operator.rs index a5a08e4..c3ccf6b 100644 --- a/src/parser/operator.rs +++ b/src/parser/operator.rs @@ -1,5 +1,7 @@ -use super::keyword::Keyword; -use super::token::{self, Token}; +use super::{ + keyword::Keyword, + token::{self, Token}, +}; #[derive(Eq, PartialEq, Debug, PartialOrd, Ord)] pub enum Precedence { diff --git a/src/parser/stmt.rs b/src/parser/stmt.rs index c139c64..f19605e 100644 --- a/src/parser/stmt.rs +++ b/src/parser/stmt.rs @@ -1,8 +1,7 @@ use std::collections::BTreeMap; -use crate::parser::column::Column; - use super::expression::Expression; +use crate::parser::column::Column; #[derive(PartialEq, Debug)] pub enum Statement { @@ -20,6 +19,7 @@ pub enum Statement { CreateIndex(CreateIndexStmt), ShowDatabase, ShowTables, + CreateDatabase(String), Set(SetStmt), DescribeTable(String), } diff --git a/src/planner/context.rs b/src/planner/context.rs new file mode 100644 index 0000000..d594333 --- /dev/null +++ b/src/planner/context.rs @@ -0,0 +1,101 @@ +use std::{ + collections::{HashMap, HashSet}, + hash::Hash, + sync::Arc, +}; + +use crate::{ + catalog::{self, column::Column, tableinfo::TableInfo, TableInfoRef}, + error::{ + Error, + Error::{Internal, Plan}, + Result, + }, + fmt_err, + store::table::{self, Table}, + types::expr::Expr, +}; + +#[derive(Debug)] +pub struct PlanContext { + /// table_name => tableinfo + pub(crate) table_map: HashMap, + // column_name => table_name, + pub(crate) column_map: HashMap>, +} + +impl PlanContext { + pub(crate) fn new() -> Self { + Self { + table_map: HashMap::new(), + column_map: HashMap::new(), + } + } + + pub(crate) fn info_by_name(&self, name: &str) -> Option { + match self.table_map.get(name) { + Some(info) => Some(Arc::clone(info)), + None => None, + } + } + + pub(crate) fn tablenames_by_columnname(&self, column_name: &str) -> Option> { + match self.column_map.get(column_name) { + Some(table_names) => { + return Some(Vec::clone(table_names)); + } + None => None, + } + } + + fn insert_column_info(&mut self, table_info: &TableInfoRef) { + for col in &table_info.schema.columns { + if let Some(table_names) = self.column_map.get_mut(&col.name) { + table_names.push(table_info.name.clone()); + } else { + self.column_map + .insert(col.name.to_owned(), vec![table_info.name.to_owned()]); + } + } + + } + + pub(crate) fn insert_table_info( + &mut self, + table_name: &str, + alias: &Option, + info: &TableInfoRef, + ) -> Result<()> { + match alias { + Some(alias_name) => { + match ( + self.table_map.get(table_name), + self.table_map.get(alias_name), + ) { + (None, None) => { + self.table_map + .insert(table_name.to_owned(), Arc::clone(info)); + self.table_map + .insert(alias_name.to_owned(), Arc::clone(info)); + + self.insert_column_info(info); + Ok(()) + } + (None, Some(_)) | (Some(_), None) | (Some(_), Some(_)) => { + Err(Plan(fmt_err!("{table_name} {alias_name} is exist"))) + } + } + } + None => match self.table_map.get(table_name) { + Some(_) => Err(Plan(fmt_err!("{table_name} is exist"))), + None => { + self.table_map + .insert(table_name.to_owned(), Arc::clone(info)); + + self.insert_column_info(info); + Ok(()) + } + }, + } + } +} diff --git a/src/planner/logical_plan.rs b/src/planner/logical_plan.rs new file mode 100644 index 0000000..b2ec0d3 --- /dev/null +++ b/src/planner/logical_plan.rs @@ -0,0 +1,34 @@ +use crate::{catalog::{schema::SchemaRef, TableID}, parser::stmt::{JoinType, OrderByType}, types::expr::Expr}; + + +#[derive(Debug, PartialEq)] +pub enum PlanNode { + Scan { + table_name: String, + table_id: TableID, + predicates: Option, + }, + Aggregation { + input: Box, + schema: SchemaRef, + }, + Filter { + input: Box, + predicates: Expr, + }, + Join { + left: Box, + right: Box, + join_type: JoinType, + predicates: Expr, + }, + Projection { + exprs: Vec<(Expr, Option)>, + input: Box, + }, + Sort { + input: Box, + by_column: Vec<(Expr, OrderByType)>, + }, + Null, +} diff --git a/src/planner/mod.rs b/src/planner/mod.rs new file mode 100644 index 0000000..e610fe0 --- /dev/null +++ b/src/planner/mod.rs @@ -0,0 +1,211 @@ +use std::sync::Arc; + +use self::{context::PlanContext, logical_plan::PlanNode}; +use crate::{catalog::CataLog, parser::stmt::Statement}; + +mod context; +pub mod logical_plan; +mod plan_expression; +mod plan_select; +mod plan_table; + +use crate::error::Result; + +#[derive(Debug)] +pub struct Planner { + pub catalog: Arc, + pub context: PlanContext, +} + +impl Planner { + pub fn new(catalog: &Arc) -> Self { + Self { + catalog: Arc::clone(catalog), + context: PlanContext::new(), + } + } + + pub fn plan(&mut self, ast: &Statement) -> Result { + match ast { + Statement::Select(select_ast) => self.plan_select(&select_ast), + _ => todo!(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + catalog::{column::Column, schema::Schema}, + parser::{stmt::JoinType, Parser}, + types::{ + expr::{Expr, Operator}, + value::Value, + LogicalType, + }, + }; + + #[test] + fn test_select() { + let mut parser = Parser::new_parser("".to_owned()); + let mut catalog = CataLog::default(); + catalog + .create_table( + &"user".to_owned(), + &Schema { + columns: vec![ + Column::new(LogicalType::UInt64, "id"), + Column::new(LogicalType::String, "user"), + Column::new(LogicalType::Int32, "year"), + Column::new(LogicalType::UInt16, "account"), + ], + }, + ) + .unwrap(); + catalog + .create_table( + &"cccc".to_owned(), + &Schema { + columns: vec![Column::new(LogicalType::UInt32, "id")], + }, + ) + .unwrap(); + let catalog = Arc::new(catalog); + let mut planner = Planner::new(&catalog); + match parser + .update(r#"select user.id, user from user join cccc on cccc.id = user.id;"#) + .parse_stmt() + { + Ok(ast) => { + assert_eq!( + planner.plan(&ast).unwrap(), + PlanNode::Projection { + exprs: vec![ + ( + Expr::ColumnExpr { + column_index: 0, + table_name: "user".to_owned(), + column_name: "id".to_owned(), + }, + None, + ), + ( + Expr::ColumnExpr { + column_index: 1, + table_name: "user".to_owned(), + column_name: "user".to_owned() + }, + None + ) + ], + input: Box::new(PlanNode::Join { + left: Box::new(PlanNode::Scan { + table_name: "user".to_owned(), + table_id: 0, + predicates: None, + }), + right: Box::new(PlanNode::Scan { + table_name: "cccc".to_owned(), + table_id: 1, + predicates: None, + }), + join_type: JoinType::Inner, + predicates: Expr::BinaryExpr { + left: Box::new(Expr::ColumnExpr { + column_index: 0, + table_name: "cccc".to_owned(), + column_name: "id".to_owned(), + }), + op: Operator::Eq, + right: Box::new(Expr::ColumnExpr { + column_index: 0, + table_name: "user".to_owned(), + column_name: "id".to_owned(), + }) + } + }), + } + ); + } + Err(err_msg) => { + println!("parse error: {err_msg:?}"); + assert!(false); + } + } + planner.context = PlanContext::new(); + match parser + .update( + r#"select user.id, user from user join cccc on cccc.id = user.id where cccc.id > 10"#, + ) + .parse_stmt() + { + Ok(ast) => { + assert_eq!( + planner.plan(&ast).unwrap(), + PlanNode::Projection { + exprs: vec![ + ( + Expr::ColumnExpr { + column_index: 0, + table_name: "user".to_owned(), + column_name: "id".to_owned(), + }, + None, + ), + ( + Expr::ColumnExpr { + column_index: 1, + table_name: "user".to_owned(), + column_name: "user".to_owned() + }, + None + ) + ], + input: Box::new(PlanNode::Filter { + input: Box::new(PlanNode::Join { + left: Box::new(PlanNode::Scan { + table_name: "user".to_owned(), + table_id: 0, + predicates: None, + }), + right: Box::new(PlanNode::Scan { + table_name: "cccc".to_owned(), + table_id: 1, + predicates: None, + }), + join_type: JoinType::Inner, + predicates: Expr::BinaryExpr { + left: Box::new(Expr::ColumnExpr { + column_index: 0, + table_name: "cccc".to_owned(), + column_name: "id".to_owned(), + }), + op: Operator::Eq, + right: Box::new(Expr::ColumnExpr { + column_index: 0, + table_name: "user".to_owned(), + column_name: "id".to_owned(), + }) + } + }), + predicates: Expr::BinaryExpr { + left: Box::new(Expr::ColumnExpr { + column_index: 0, + table_name: "cccc".to_owned(), + column_name: "id".to_owned() + }), + op: Operator::Gt, + right: Box::new(Expr::Literal(Value::Int(10))), + }, + }) + } + ); + } + Err(err_msg) => { + println!("parse error: {err_msg:?}"); + assert!(false); + } + } + } +} diff --git a/src/planner/plan_expression.rs b/src/planner/plan_expression.rs new file mode 100644 index 0000000..32258e0 --- /dev/null +++ b/src/planner/plan_expression.rs @@ -0,0 +1,162 @@ +use super::Planner; +use crate::{ + catalog::{column, TableID}, + error::{ + Error::{Internal, Plan}, + Result, + }, + fmt_err, + parser::{ + expression::{Expression, *}, + operation::{self, Operation}, + stmt::ExplainStmt, + }, + types::{ + self, + expr::{self, AggExpr, Expr, Operator, SUPPORT_AGG_NAME}, + value::Value, + }, +}; + +impl Planner { + pub fn plan_expression(&self, expr: &Expression) -> Result { + match expr { + Expression::Field(table_name, column_name) => match table_name { + Some(table_name) => match self.context.info_by_name(&table_name) { + Some(info) => { + for (idx, col) in info.schema.columns.iter().enumerate() { + if (*col).name == (*column_name) { + return Ok(Expr::ColumnExpr { + column_index: idx as TableID, + table_name: table_name.clone(), + column_name: column_name.clone(), + }); + } + } + + return Err(Plan(fmt_err!("{column_name} is not exist in {table_name}"))); + } + None => return Err(Plan(fmt_err!("{table_name} is not exist"))), + }, + None => { + // eg: `SELECT * FROM t1 JOIN t2 ON (t1.)column1 = (t2.)column2;` + // when plan the expression, we don't known the column_name belone. + match self.context.tablenames_by_columnname(&column_name) { + Some(table_names) => { + if table_names.len() == 1 { + match self.context.info_by_name(&table_names[0]) { + Some(info) => { + for (idx, col) in info.schema.columns.iter().enumerate() { + if (*col).name == (*column_name) { + return Ok(Expr::ColumnExpr { + column_index: idx as TableID, + table_name: table_names[0].clone(), + column_name: column_name.clone(), + }); + } + } + } + None => { + return Err(Internal(fmt_err!( + "{table_names:?} must exist in table_map" + ))); + } + } + } else { + return Err(Plan(fmt_err!( + "{column_name} is fuzzy, {table_names:?}" + ))); + } + } + None => { + return Err(Plan(fmt_err!("{column_name} is not exist"))); + } + } + + todo!() + } + }, + Expression::Column(_) => todo!(), + Expression::Literal(literal) => match literal { + Literal::All => Ok(Expr::Literal(Value::All)), + Literal::Null => Ok(Expr::Literal(Value::Null)), + Literal::Bool(v) => Ok(Expr::Literal(Value::Bool(*v))), + Literal::Int(v) => Ok(Expr::Literal(Value::Int(*v))), + Literal::Float(v) => Ok(Expr::Literal(Value::Float(*v))), + Literal::String(v) => Ok(Expr::Literal(Value::String(v.clone()))), + }, + Expression::Function(func_name, args) => match func_name.as_str() { + "MIN" => Ok(Expr::Agg { + agg_type: AggExpr::Min, + args: self.plan_expression_list(args)?, + }), + "MAX" => Ok(Expr::Agg { + agg_type: AggExpr::Max, + args: self.plan_expression_list(args)?, + }), + "COUNT" => Ok(Expr::Agg { + agg_type: AggExpr::Count, + args: self.plan_expression_list(args)?, + }), + "SUM" => Ok(Expr::Agg { + agg_type: AggExpr::Sum, + args: self.plan_expression_list(args)?, + }), + _ => { + return Err(Plan(fmt_err!( + "{func_name} unknown function_name, only support {SUPPORT_AGG_NAME:#?}" + ))); + } + }, + Expression::Operation(op) => self.plan_operation(op), + } + } + + fn plan_binary_op( + &self, + l: &Box, + r: &Box, + op: Operator, + ) -> Result { + Ok(Expr::BinaryExpr { + left: Box::new(self.plan_expression(&*l)?), + op, + right: Box::new(self.plan_expression(&*r)?), + }) + } + + fn plan_operation(&self, op: &Operation) -> Result { + Ok(match op { + Operation::And(l, r) => self.plan_binary_op(l, r, Operator::And)?, + Operation::Not(_) => { + todo!() + } + Operation::Or(l, r) => self.plan_binary_op(l, r, Operator::Or)?, + Operation::NotEqual(l, r) => self.plan_binary_op(l, r, Operator::NotEq)?, + Operation::Equal(l, r) => self.plan_binary_op(l, r, Operator::Eq)?, + Operation::GreaterThan(l, r) => self.plan_binary_op(l, r, Operator::Gt)?, + Operation::GreaterThanOrEqual(l, r) => self.plan_binary_op(l, r, Operator::GtEq)?, + Operation::LessThan(l, r) => self.plan_binary_op(l, r, Operator::Lt)?, + Operation::LessThanOrEqual(l, r) => self.plan_binary_op(l, r, Operator::LtEq)?, + Operation::IsNull(_) => todo!(), + Operation::Add(l, r) => self.plan_binary_op(l, r, Operator::Plus)?, + Operation::Subtract(l, r) => self.plan_binary_op(l, r, Operator::Minus)?, + Operation::Multiply(l, r) => self.plan_binary_op(l, r, Operator::Multiply)?, + Operation::Divide(l, r) => self.plan_binary_op(l, r, Operator::Divide)?, + Operation::Assert(_) => todo!(), + Operation::Like(..) => todo!(), + Operation::Negate(_) => todo!(), + Operation::BitWiseNot(_) => todo!(), + Operation::Modulo(..) => todo!(), + }) + } + + fn plan_expression_list(&self, exprs_list: &Vec) -> Result> { + let mut res = vec![]; + for expr in exprs_list { + res.push(self.plan_expression(expr)?); + } + + Ok(res) + } +} diff --git a/src/planner/plan_select.rs b/src/planner/plan_select.rs new file mode 100644 index 0000000..15e5d2d --- /dev/null +++ b/src/planner/plan_select.rs @@ -0,0 +1,191 @@ +use std::sync::Arc; + +use super::{ + logical_plan::{self, PlanNode}, + Planner, +}; +use crate::{ + error::{Error::Plan, Result}, + fmt_err, + parser::{ + expression::Expression, + stmt::{self, FromItem, SelectStmt, *}, + }, +}; + +impl Planner { + // planbuilder.go:243 + /// if sql has not `from` item, there are 4 examples for this: + /// ``` + /// SELECT function_name(args); + /// SELECT constant_expr; + /// SELECT @vars; + /// and because we don't support subquery, + /// SELECT (SELECT ...) is error. + /// ``` + pub(crate) fn plan_select(&mut self, select_stmt: &SelectStmt) -> Result { + // let node = match &select_stmt.froms { + // Some(froms) => self.plan_from(&froms[0]).unwrap(), + // None => PlanNode::Null, + // }; + + // let node = match &select_stmt.wheres { + // Some(where_expr) => PlanNode::Filter(Filter { + // source: Box::new(node), + // predicate: self.plan_expression(where_expr)?, + // }), + // None => PlanNode::Null, + // }; + // // in shaun-parser, actually here is only a group by expr + + // let mut is_has_agg = false; + // for (expr, _) in &select_stmt.selects { + // if expr.has_aggregation()? { + // is_has_agg = true; + // break; + // } + // } + if select_stmt.selects.is_empty() { + return Err(Plan("can't select empty".to_owned())); + } + let node = match &select_stmt.froms { + Some(froms) => self.plan_from(&froms[0])?, + None => { + // if the 'from' is empty + // it must like: + // 1. `SELECT constant_expresssion;` + // 2. `SELECT @variable_name;` + todo!() + } + }; + let node = match &select_stmt.wheres { + Some(where_expr) => PlanNode::Filter { + input: Box::new(node), + predicates: self.plan_expression(where_expr)?, + }, + None => node, + }; + + let mut is_has_agg = false; + for (expr, _) in &select_stmt.selects { + if expr.has_aggregation()? { + is_has_agg = true; + break; + } + } + let node = if is_has_agg || select_stmt.group_by.is_some() { + todo!() + } else { + self.plan_normal_select(select_stmt, node)? + }; + + let node = match &select_stmt.order { + Some(order_by) => PlanNode::Sort { + input: Box::new(node), + by_column: { + let mut exprs = vec![]; + for (expr, orderby_type) in order_by { + exprs.push((self.plan_expression(&expr)?, orderby_type.clone())); + } + exprs + }, + }, + None => node, + }; + + Ok(node) + } + + pub(crate) fn plan_normal_select( + &mut self, + select_stmt: &SelectStmt, + node: PlanNode, + ) -> Result { + Ok(PlanNode::Projection { + exprs: { + let mut exprs = vec![]; + for (expr, alias) in &select_stmt.selects { + exprs.push((self.plan_expression(&expr)?, alias.to_owned())); + } + + exprs + }, + input: Box::new(node), + }) + } + + /// for example, we have this SQL: + /// ``` + /// SELECT v3, MAX(v1) + MAX(v2) + /// FROM user + /// WHERE condition_expression + /// GROUP BY v3 + /// HAVING COUNT(v4) > COUNT(v5); + /// ``` + /// we hope generate this logical plan: + /// ``` + /// Projection([v3, MAX(v1) + MAX(v2)]) + /// GroupBy(v3) agg_types = [MAX(v1), MAX(v2), COUNT(v4), COUNT(v5)] + /// Filter(condition_expression) + /// TableScan(user) + /// ``` + /// and actuall here is only has a GroupBy expresssion + pub(crate) fn plan_select_agg( + &mut self, + select_stmt: &SelectStmt, + node: &mut PlanNode, + ) -> Result { + let mut group_by_exprs = vec![]; + match &select_stmt.group_by { + Some(exprs) => { + group_by_exprs.push(self.plan_expression(&exprs[0])?); + } + None => {} + }; + + todo!() + } + + pub(crate) fn plan_from(&mut self, join_item: &FromItem) -> Result { + Ok(match join_item { + FromItem::Join { + left, + right, + join_type, + predicate, + } => { + let left_node = self.plan_from(left)?; + let right_node = self.plan_from(right)?; + + // in mysql, if don't write the join condition in SQL, the default way + // is join with two tables primary key. + // but here if you write this `SELECT * FROM t1 JOIN t2;` It's an error + match predicate { + Some(expr) => PlanNode::Join { + left: Box::new(left_node), + right: Box::new(right_node), + join_type: join_type.clone(), + predicates: self.plan_expression(expr)?, + }, + None => { + return Err(Plan(fmt_err!("join must take with condition expression"))); + } + } + } + FromItem::Table { name, alias } => PlanNode::Scan { + table_name: name.clone(), + table_id: match self.catalog.table_by_name(name) { + Some(info) => { + self.context.insert_table_info(name, alias, &info)?; + + info.id + } + None => { + return Err(Plan(fmt_err!("table: {name} is not exist"))); + } + }, + predicates: None, + }, + }) + } +} diff --git a/src/planner/plan_table.rs b/src/planner/plan_table.rs new file mode 100644 index 0000000..c3eea32 --- /dev/null +++ b/src/planner/plan_table.rs @@ -0,0 +1,3 @@ +use std::iter::FilterMap; + + diff --git a/src/store/mod.rs b/src/store/mod.rs new file mode 100644 index 0000000..4985f18 --- /dev/null +++ b/src/store/mod.rs @@ -0,0 +1,2 @@ +pub mod rocks; +pub mod table; diff --git a/src/store/rocks.rs b/src/store/rocks.rs new file mode 100644 index 0000000..f32d8f4 --- /dev/null +++ b/src/store/rocks.rs @@ -0,0 +1,101 @@ +use std::{path::Iter, sync::Arc}; + +use rocksdb::{ + DBAccess, DBCommon, DBIteratorWithThreadMode, IteratorMode, MultiThreaded, SingleThreaded, + ThreadMode, DB, +}; + +use crate::{ + error::{Error, Error::Store, Result}, + fmt_err, +}; + +#[derive(Debug)] +pub struct Rocks { + pub path: String, + pub handle: DB, +} + +type RocksIter<'a> = DBIteratorWithThreadMode<'a, DB>; + +pub type RocksRef = Arc; + +impl Rocks { + pub(crate) fn new(path: &str) -> Result { + Ok(Self { + path: path.to_string(), + handle: match DB::open_default(path) { + Ok(db) => db, + Err(e) => { + return Err(Store(fmt_err!("{e} init rocksdb fail"))); + } + }, + }) + } + + pub(crate) fn put( + &mut self, + key: &[u8], + value: &[u8], + ) -> std::result::Result<(), rocksdb::Error> { + self.handle.put(key, value) + } + + pub(crate) fn get>( + &mut self, + key: K, + ) -> std::result::Result>, rocksdb::Error> { + self.handle.get(key) + } + + pub(crate) fn iter<'a>(&self, mode: IteratorMode<'a>) -> RocksIter { + self.handle.iterator(mode) + } + + // pub(crate) fn prefix_iter(&self, prefix: &str) -> RocksPrefixIter { + // let pp = self.handle.prefix_iterator(prefix); + // } +} + +#[cfg(test)] +mod tests { + + use super::*; + + fn type_of(_: T) -> &'static str { + std::any::type_name::() + } + + #[test] + pub fn test() { + // DBCommon 实现了 Drop + // let mut db = Rocks::new("./rocksdata").unwrap(); + // db.put(b"123", b"dada").unwrap(); + // db.put(b"123dakld", b"dakdlakldjakl").unwrap(); + // db.put(b"bbb", b"daldald").unwrap(); + // let data = db.get("123").unwrap().unwrap(); + // assert_eq!(data, Vec::from("dada")); + + // let _ = db + // .iter(IteratorMode::Start) + // .map(|item| match item { + // Ok((k, v)) => { + // println!( + // "{} {}", + // std::str::from_utf8(&*k).unwrap(), + // std::str::from_utf8(&*v).unwrap() + // ); + // } + // Err(err) => { + // println!("{err}"); + // } + // }) + // .for_each(drop); + // println!("prefix_iter"); + // const a: &[u8] = b"da"; + // let pp = db.handle.prefix_iterator("12"); + // let mut got = pp.collect::, >>().unwrap(); + // got.reverse(); + // assert_eq!(got.as_slice(), &[Box::new(a), Box::new(a)]); + } +} diff --git a/src/store/table.rs b/src/store/table.rs new file mode 100644 index 0000000..43442dc --- /dev/null +++ b/src/store/table.rs @@ -0,0 +1,23 @@ +use super::rocks::{Rocks, RocksRef}; +use crate::{ + catalog::schema::SchemaRef, + error::{Error, Result}, + types::value::{ScalarValue, Value}, +}; + +#[derive(Debug)] +pub struct Table { + pub name: String, + pub db: RocksRef, + pub schema: SchemaRef, +} + +// impl Table { +// pub fn next_tuple(&self) -> Option> { + +// } + +// pub fn insert_tuple() -> Result<()> { +// unimplemented!() +// } +// } diff --git a/src/types/expr.rs b/src/types/expr.rs new file mode 100644 index 0000000..3348fc6 --- /dev/null +++ b/src/types/expr.rs @@ -0,0 +1,49 @@ +use super::value::Value; + +#[derive(Debug, Clone, PartialEq)] +pub enum Expr { + ColumnExpr { + column_index: i32, + table_name: String, + column_name: String, + }, + Alias(Box, String), + Literal(Value), + BinaryExpr { + left: Box, + op: Operator, + right: Box, + }, + Agg { + agg_type: AggExpr, + args: Vec, + }, +} + +pub static SUPPORT_AGG_NAME: &[&str] = &["MIN", "MAX", "COUNT", "SUM"]; + +#[derive(Debug, Clone, PartialEq)] +pub enum AggExpr { + Min, + Max, + Count, + Sum, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Operator { + Eq, + NotEq, + Lt, + LtEq, + Gt, + GtEq, + Plus, + Minus, + Multiply, + Divide, + Modules, + And, + Or, + Xor, +} diff --git a/src/types/mod.rs b/src/types/mod.rs index fc45e21..b6c3c74 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -1 +1,131 @@ +pub mod expr; pub mod value; + +#[derive(Debug, Clone, PartialEq)] +pub enum LogicalType { + Null, + Bool, + Int8, + Int16, + Int32, + Int64, + UInt8, + UInt16, + UInt32, + UInt64, + Float32, + Float64, + String, + VarChar(usize), +} + +pub fn is_can_cast(from: &LogicalType, to: &LogicalType) -> bool { + if from == to { + return true; + } + + match from { + LogicalType::Null => true, + LogicalType::Bool => false, + LogicalType::Int8 => matches!( + to, + LogicalType::Int16 + | LogicalType::Int32 + | LogicalType::Int64 + | LogicalType::UInt8 + | LogicalType::UInt16 + | LogicalType::UInt32 + | LogicalType::UInt64 + | LogicalType::Float32 + | LogicalType::Float64 + ), + LogicalType::Int16 => matches!( + to, + LogicalType::Int32 + | LogicalType::Int64 + | LogicalType::UInt8 + | LogicalType::UInt16 + | LogicalType::UInt32 + | LogicalType::UInt64 + | LogicalType::Float32 + | LogicalType::Float64 + ), + LogicalType::Int32 => matches!( + to, + LogicalType::Int8 + | LogicalType::Int16 + | LogicalType::Int32 + | LogicalType::Int64 + | LogicalType::UInt8 + | LogicalType::UInt16 + | LogicalType::UInt32 + | LogicalType::UInt64 + | LogicalType::Float32 + | LogicalType::Float64 + ), + LogicalType::Int64 => matches!( + to, + LogicalType::Int8 + | LogicalType::Int16 + | LogicalType::Int32 + | LogicalType::UInt8 + | LogicalType::UInt16 + | LogicalType::UInt32 + | LogicalType::UInt64 + | LogicalType::Float32 + | LogicalType::Float64 + ), + LogicalType::UInt8 => matches!( + to, + LogicalType::Int8 + | LogicalType::Int16 + | LogicalType::Int32 + | LogicalType::Int64 + | LogicalType::UInt16 + | LogicalType::UInt32 + | LogicalType::UInt64 + | LogicalType::Float32 + | LogicalType::Float64 + ), + LogicalType::UInt16 => matches!( + to, + LogicalType::Int8 + | LogicalType::Int16 + | LogicalType::Int32 + | LogicalType::Int64 + | LogicalType::UInt8 + | LogicalType::UInt32 + | LogicalType::UInt64 + | LogicalType::Float32 + | LogicalType::Float64 + ), + LogicalType::UInt32 => matches!( + to, + LogicalType::Int8 + | LogicalType::Int16 + | LogicalType::Int32 + | LogicalType::Int64 + | LogicalType::UInt8 + | LogicalType::UInt32 + | LogicalType::UInt64 + | LogicalType::Float32 + | LogicalType::Float64 + ), + LogicalType::UInt64 => matches!( + to, + LogicalType::Int8 + | LogicalType::Int16 + | LogicalType::Int32 + | LogicalType::Int64 + | LogicalType::UInt8 + | LogicalType::UInt16 + | LogicalType::UInt32 + | LogicalType::Float32 + | LogicalType::Float64 + ), + LogicalType::Float32 => matches!(to, LogicalType::Float64), + LogicalType::Float64 => false, + LogicalType::VarChar(_) => matches!(to, LogicalType::String), + LogicalType::String => matches!(to, LogicalType::VarChar(_)), + } +} diff --git a/src/types/schema.rs b/src/types/schema.rs deleted file mode 100644 index e69de29..0000000 diff --git a/src/types/value.rs b/src/types/value.rs index 10241e0..c174dde 100644 --- a/src/types/value.rs +++ b/src/types/value.rs @@ -1,8 +1,29 @@ #[derive(Clone, Debug, PartialEq)] pub enum Value { + All, Null, Bool(bool), Int(i64), Float(f64), String(String), } + + + + +#[derive(Debug, Clone)] +pub enum ScalarValue { + Null, + Bool(Option), + Int8(Option), + Int16(Option), + Int32(Option), + Int64(Option), + UInt8(Option), + UInt16(Option), + UInt32(Option), + UInt64(Option), + Float32(Option), + Float64(Option), + String(String), +} From bb1ceab2abf1de81701edac311da99df7614ac5b Mon Sep 17 00:00:00 2001 From: fansehep Date: Mon, 29 Jan 2024 21:42:19 +0800 Subject: [PATCH 2/2] feat: update planner --- src/bin/parser_repl.rs | 2 +- src/bin/plan_repl.rs | 4 +-- src/lib.rs | 1 + src/optimizer/mod.rs | 0 src/optimizer/trans.rs | 0 src/planner/mod.rs | 18 ++++++++-- src/planner/optimize/constant_folding.rs | 0 src/planner/optimize/filter_pushdown.rs | 24 ++++++++++++++ src/planner/optimize/mod.rs | 8 +++++ src/planner/plan_createtable.rs | 42 ++++++++++++++++++++++++ src/planner/plan_expression.rs | 2 +- src/store/rocks.rs | 2 +- 12 files changed, 96 insertions(+), 7 deletions(-) create mode 100644 src/optimizer/mod.rs create mode 100644 src/optimizer/trans.rs create mode 100644 src/planner/optimize/constant_folding.rs create mode 100644 src/planner/optimize/filter_pushdown.rs create mode 100644 src/planner/optimize/mod.rs create mode 100644 src/planner/plan_createtable.rs diff --git a/src/bin/parser_repl.rs b/src/bin/parser_repl.rs index 99dc8f0..2496cca 100644 --- a/src/bin/parser_repl.rs +++ b/src/bin/parser_repl.rs @@ -1,6 +1,6 @@ use std::io::Write; -use shaun::parser::{stmt::Statement, Parser}; +use shaun::parser::Parser; const PARSER_HISTORY_NAME: &str = ".shaun_parser_history"; diff --git a/src/bin/plan_repl.rs b/src/bin/plan_repl.rs index 1f3937d..79a6961 100644 --- a/src/bin/plan_repl.rs +++ b/src/bin/plan_repl.rs @@ -1,8 +1,8 @@ use std::{io::Write, sync::Arc}; use shaun::{ - catalog::{self, CataLog}, - parser::{stmt::Statement, Parser}, + catalog::CataLog, + parser::Parser, planner::Planner, }; diff --git a/src/lib.rs b/src/lib.rs index 3014f2a..0b464e5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,3 +4,4 @@ pub mod parser; pub mod planner; pub mod store; pub mod types; +pub mod optimizer; \ No newline at end of file diff --git a/src/optimizer/mod.rs b/src/optimizer/mod.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/optimizer/trans.rs b/src/optimizer/trans.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/planner/mod.rs b/src/planner/mod.rs index e610fe0..04cce05 100644 --- a/src/planner/mod.rs +++ b/src/planner/mod.rs @@ -7,7 +7,8 @@ mod context; pub mod logical_plan; mod plan_expression; mod plan_select; -mod plan_table; +mod plan_createtable; +mod optimize; use crate::error::Result; @@ -25,6 +26,14 @@ impl Planner { } } + /// planner will catch the sql metainfo + /// use it after reset_ctx + pub fn reset_ctx(&mut self) -> &mut Self { + self.context = PlanContext::new(); + + self + } + pub fn plan(&mut self, ast: &Statement) -> Result { match ast { Statement::Select(select_ast) => self.plan_select(&select_ast), @@ -133,7 +142,7 @@ mod tests { assert!(false); } } - planner.context = PlanContext::new(); + planner.reset_ctx(); match parser .update( r#"select user.id, user from user join cccc on cccc.id = user.id where cccc.id > 10"#, @@ -208,4 +217,9 @@ mod tests { } } } + + #[test] + fn test_multi_join() { + + } } diff --git a/src/planner/optimize/constant_folding.rs b/src/planner/optimize/constant_folding.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/planner/optimize/filter_pushdown.rs b/src/planner/optimize/filter_pushdown.rs new file mode 100644 index 0000000..5d2bd99 --- /dev/null +++ b/src/planner/optimize/filter_pushdown.rs @@ -0,0 +1,24 @@ +use super::LogicalOptimizer; +use crate::{planner::logical_plan::PlanNode, types::expr::Expr}; + +impl LogicalOptimizer { + pub fn filter_pushdown(&self, node: &PlanNode, predicate: Option) { + match node { + PlanNode::Projection { input, exprs: _ } => { + self.filter_pushdown(input, None); + } + PlanNode::Filter { input, predicates } => { + self.filter_pushdown(input, Some(predicates.clone())); + } + PlanNode::Join { + left, + right, + join_type, + predicates, + } => { + + } + _ => return, + } + } +} diff --git a/src/planner/optimize/mod.rs b/src/planner/optimize/mod.rs new file mode 100644 index 0000000..e1d296a --- /dev/null +++ b/src/planner/optimize/mod.rs @@ -0,0 +1,8 @@ +pub mod constant_folding; +pub mod filter_pushdown; + + +pub struct LogicalOptimizer { + +} + diff --git a/src/planner/plan_createtable.rs b/src/planner/plan_createtable.rs new file mode 100644 index 0000000..2ea5c45 --- /dev/null +++ b/src/planner/plan_createtable.rs @@ -0,0 +1,42 @@ +use std::{collections::HashSet, iter::FilterMap}; + +use super::Planner; +use crate::{ + error::{Error::Plan, Result}, + fmt_err, + parser::stmt::CreateTableStmt, +}; + +impl Planner { + /// check the create table + /// ``` + /// 1. table can't already exist in catalog + /// 2. table can't have multi same column name + /// 3. table only support one primary key + /// ``` + pub(crate) fn check_create_table(&mut self, create_stmt: &CreateTableStmt) -> Result<()> { + if self.catalog.is_table_exist(&create_stmt.table_name) { + return Err(Plan(fmt_err!( + "{} is already exist in database", + &create_stmt.table_name + ))); + } + + let mut column_name_set = HashSet::new(); + for col in &create_stmt.columns { + if column_name_set.contains(&col.name) { + return Err(Plan(fmt_err!("{} is fuzzy in your SQL", col.name))); + } + column_name_set.insert(&col.name); + } + + if create_stmt.columns.iter().filter(|x| x.primary_key).count() != 1 { + return Err(Plan(fmt_err!( + "table {} only support one primary key", + create_stmt.table_name + ))); + } + + Ok(()) + } +} diff --git a/src/planner/plan_expression.rs b/src/planner/plan_expression.rs index 32258e0..5948a52 100644 --- a/src/planner/plan_expression.rs +++ b/src/planner/plan_expression.rs @@ -40,7 +40,7 @@ impl Planner { }, None => { // eg: `SELECT * FROM t1 JOIN t2 ON (t1.)column1 = (t2.)column2;` - // when plan the expression, we don't known the column_name belone. + // plan the expression, but don't known the column_name belone. match self.context.tablenames_by_columnname(&column_name) { Some(table_names) => { if table_names.len() == 1 { diff --git a/src/store/rocks.rs b/src/store/rocks.rs index f32d8f4..e704016 100644 --- a/src/store/rocks.rs +++ b/src/store/rocks.rs @@ -48,7 +48,7 @@ impl Rocks { self.handle.get(key) } - pub(crate) fn iter<'a>(&self, mode: IteratorMode<'a>) -> RocksIter { + pub(crate) fn iter(&self, mode: IteratorMode<'_>) -> RocksIter { self.handle.iterator(mode) }