diff --git a/examples/Cargo.toml b/examples/Cargo.toml index ba58bdb..49fcac6 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -13,30 +13,6 @@ thriller_utils = { path = "../thriller-utils" } name = "topo_sort" path = "graph/topo_sort.rs" -[[example]] -name = "access" -path = "loop/access.rs" - -[[example]] -name = "multiloops" -path = "loop/multiloops.rs" - [[example]] name = "gemm" path = "ops/gemm.rs" - -[[example]] -name = "rf_block" -path = "gemm/rf_block.rs" - -[[example]] -name = "shared_block" -path = "gemm/shared_block.rs" - -[[example]] -name = "global_block" -path = "gemm/global_block.rs" - -[[example]] -name = "gemm_codegen" -path = "gemm/gemm_codegen.rs" diff --git a/examples/gemm/gemm_codegen.rs b/examples/gemm/gemm_codegen.rs deleted file mode 100644 index 8aa3c76..0000000 --- a/examples/gemm/gemm_codegen.rs +++ /dev/null @@ -1,83 +0,0 @@ -use std::{cell::RefCell, rc::Rc}; - -use thriller_core::{ - initialize, BlockLayout, BlockShape, BlockType, MemoryLevel, RegularVar, ThrillerBlock, - ThrillerEngine, ThrillerGraph, ThrillerNode, ThrillerNodeInner, -}; -use thriller_utils::{BufBuilder, ThrillerUtils}; - -fn main() { - initialize(); - - let g_a = Rc::new(BufBuilder::row_major_global_tile("gA", &[256, 256])); - let g_b = Rc::new(BufBuilder::col_major_global_tile("gB", &[256, 256])); - let g_c = Rc::new(BufBuilder::row_major_global_tile("gC", &[256, 256])); - - let shared_block = - ThrillerUtils::build_shared_gemm_block(g_a.clone(), g_b.clone(), g_c.clone()); - let shared_block = Rc::new(shared_block); - - let shared_block_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Block( - shared_block.clone(), - )))); - - let mut global_graph = ThrillerGraph::new(MemoryLevel::Global); - - global_graph.add_nodes(vec![shared_block_node.clone()]); - - let global_graph = Rc::new(global_graph); - - let global_block = ThrillerBlock::new( - vec![], - vec![], - MemoryLevel::Global, - global_graph.clone(), - BlockType::Map, - ); - - let mut engine = ThrillerEngine::new(global_block); - - let var_a = Rc::new(RegularVar::new(String::from("A"))); - let var_b = Rc::new(RegularVar::new(String::from("B"))); - let var_c = Rc::new(RegularVar::new(String::from("C"))); - - let block_layout_a = Rc::new(BlockLayout::new([ - BlockShape::Num(1), - BlockShape::Num(1), - BlockShape::Num(1), - ])); - - let block_layout_b = Rc::new(BlockLayout::new([ - BlockShape::Num(1), - BlockShape::Num(1), - BlockShape::Num(1), - ])); - - let block_layout_c = Rc::new(BlockLayout::new([ - BlockShape::Num(1), - BlockShape::Num(1), - BlockShape::Num(1), - ])); - - engine.add_inputs(vec![ - (var_a.clone(), g_a.clone()), - (var_b.clone(), g_b.clone()), - ]); - engine.add_outputs(vec![(var_c.clone(), g_c.clone())]); - - engine.add_input_blocks(vec![block_layout_a, block_layout_b]); - engine.add_output_blocks(vec![block_layout_c]); - - let code = engine.emit_dataflow("thriller_gemm").unwrap(); - println!("{}", code); - - let repo_dir = engine.install_library().unwrap(); - println!("Library installed at: {}", repo_dir); - - engine - .persist( - format!("{}/{}", repo_dir, "src/kernels/thriller_gemm.cu"), - "thriller_gemm".to_string(), - ) - .unwrap(); -} diff --git a/examples/gemm/global_block.rs b/examples/gemm/global_block.rs deleted file mode 100644 index 6b1fd09..0000000 --- a/examples/gemm/global_block.rs +++ /dev/null @@ -1,74 +0,0 @@ -use std::{cell::RefCell, rc::Rc}; - -use thriller_core::{ - initialize, BlockLayout, BlockShape, BlockType, MemoryLevel, RegularVar, ThrillerBlock, - ThrillerEngine, ThrillerGraph, ThrillerNode, ThrillerNodeInner, -}; -use thriller_utils::{BufBuilder, ThrillerUtils}; - -fn main() { - initialize(); - - let g_a = Rc::new(BufBuilder::row_major_global_tile("gA", &[256, 256])); - let g_b = Rc::new(BufBuilder::row_major_global_tile("gB", &[256, 256])); - let g_c = Rc::new(BufBuilder::row_major_global_tile("gC", &[256, 256])); - - let shared_block = - ThrillerUtils::build_shared_gemm_block(g_a.clone(), g_b.clone(), g_c.clone()); - let shared_block = Rc::new(shared_block); - - let shared_block_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Block( - shared_block.clone(), - )))); - - let mut global_graph = ThrillerGraph::new(MemoryLevel::Global); - - global_graph.add_nodes(vec![shared_block_node.clone()]); - - let global_graph = Rc::new(global_graph); - - let global_block = ThrillerBlock::new( - vec![], - vec![], - MemoryLevel::Global, - global_graph.clone(), - BlockType::Map, - ); - - let mut engine = ThrillerEngine::new(global_block); - - let var_a = Rc::new(RegularVar::new(String::from("A"))); - let var_b = Rc::new(RegularVar::new(String::from("B"))); - let var_c = Rc::new(RegularVar::new(String::from("C"))); - - let block_layout_a = Rc::new(BlockLayout::new([ - BlockShape::Num(1), - BlockShape::Num(1), - BlockShape::Num(1), - ])); - - let block_layout_b = Rc::new(BlockLayout::new([ - BlockShape::Num(1), - BlockShape::Num(1), - BlockShape::Num(1), - ])); - - let block_layout_c = Rc::new(BlockLayout::new([ - BlockShape::Num(1), - BlockShape::Num(1), - BlockShape::Num(1), - ])); - - engine.add_inputs(vec![ - (var_a.clone(), g_a.clone()), - (var_b.clone(), g_b.clone()), - ]); - engine.add_outputs(vec![(var_c.clone(), g_c.clone())]); - - engine.add_input_blocks(vec![block_layout_a, block_layout_b]); - engine.add_output_blocks(vec![block_layout_c]); - - let code = engine.emit_dataflow("thriller_gemm").unwrap(); - - println!("{}", code); -} diff --git a/examples/gemm/rf_block.rs b/examples/gemm/rf_block.rs deleted file mode 100644 index dd849c0..0000000 --- a/examples/gemm/rf_block.rs +++ /dev/null @@ -1,101 +0,0 @@ -use std::vec; -use std::{cell::RefCell, rc::Rc}; - -use thriller_core::{ - initialize, AccessMap, AccessMatrix, AccessOffset, AttachedEdge, BlockType, Gemm, - IterationBound, IterationVar, MemoryLevel, Task, ThrillerBlock, ThrillerEdge, ThrillerGraph, - ThrillerNode, ThrillerNodeInner, -}; - -use thriller_utils::BufBuilder; - -fn main() { - initialize(); - let s_a = Rc::new(BufBuilder::row_major_shared_tile("sA", &[256, 256])); - let r_a = Rc::new(BufBuilder::row_major_reg_tile("rA", &[64, 64])); - let s_b = Rc::new(BufBuilder::col_major_shared_tile("sB", &[256, 256])); - let r_b = Rc::new(BufBuilder::row_major_reg_tile("rB", &[64, 64])); - let mut in_edge0 = AttachedEdge::new(s_a, r_a.clone(), None); - let mut in_edge1 = AttachedEdge::new(s_b, r_b.clone(), None); - - let acc = Rc::new(BufBuilder::row_major_reg_tile("rC", &[64, 64])); - let s_c = Rc::new(BufBuilder::row_major_shared_tile("sC", &[256, 256])); - let out_edge = AttachedEdge::new(acc.clone(), s_c, None); - - let iter_var = Rc::new(IterationVar::new( - "i", - (IterationBound::Fixed(0), IterationBound::Fixed(10)), - )); - - let mut access_map = AccessMap::new(1, vec![1]); - access_map.add_iter_var(iter_var); - - access_map.add_access_matrix(AccessMatrix(vec![vec![1]])); - access_map.add_access_matrix(AccessMatrix(vec![vec![1]])); - - access_map.add_access_offset(AccessOffset(vec![0])); - access_map.add_access_offset(AccessOffset(vec![0])); - - let access_map = Rc::new(access_map); - - in_edge0.replace_access_map(access_map.clone()); - in_edge1.replace_access_map(access_map.clone()); - - let mut subgraph = ThrillerGraph::new(MemoryLevel::Register); - - let r_a_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Buffer( - r_a.clone(), - )))); - let r_b_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Buffer( - r_b.clone(), - )))); - let acc_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Buffer( - acc.clone(), - )))); - - let gemm = Gemm::new( - vec![r_a_node.clone(), r_b_node.clone()], - acc_node.clone(), - access_map.clone(), - ); - - let gemm_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Op( - Box::new(gemm), - )))); - - let ra_gemm_edge = ThrillerEdge::new(r_a_node.clone(), gemm_node.clone()); - let rb_gemm_edge = ThrillerEdge::new(r_b_node.clone(), gemm_node.clone()); - let gemm_acc_edge = ThrillerEdge::new(gemm_node.clone(), acc_node.clone()); - - let ra_gemm_edge_ref = Rc::new(ra_gemm_edge); - let rb_gemm_edge_ref = Rc::new(rb_gemm_edge); - let gemm_acc_edge_ref = Rc::new(gemm_acc_edge); - - subgraph.add_nodes(vec![ - r_a_node.clone(), - r_b_node.clone(), - acc_node.clone(), - gemm_node.clone(), - ]); - subgraph.add_edges(vec![ - ra_gemm_edge_ref.clone(), - rb_gemm_edge_ref.clone(), - gemm_acc_edge_ref.clone(), - ]); - - subgraph.connect(); - - let mut block = ThrillerBlock::new( - vec![Rc::new(in_edge0), Rc::new(in_edge1)], - vec![Rc::new(out_edge)], - MemoryLevel::Register, - Rc::new(subgraph), - BlockType::Reduce, - ); - - block.merge_access_map(); - - let code = block.emit().unwrap(); - - println!("{}", code); -} diff --git a/examples/gemm/shared_block.rs b/examples/gemm/shared_block.rs deleted file mode 100644 index ab69099..0000000 --- a/examples/gemm/shared_block.rs +++ /dev/null @@ -1,99 +0,0 @@ -use std::vec; -use std::{cell::RefCell, rc::Rc}; - -use thriller_core::{ - initialize, AccessMap, AccessMatrix, AccessOffset, AttachedEdge, BlockType, IterationBound, - IterationVar, MemoryLevel, Task, ThrillerBlock, ThrillerEdge, ThrillerGraph, ThrillerNode, - ThrillerNodeInner, -}; -use thriller_utils::{BufBuilder, ThrillerUtils}; - -fn main() { - initialize(); - - let iter_var = Rc::new(IterationVar::new( - "j", - (IterationBound::Fixed(0), IterationBound::Fixed(10)), - )); - - let mut access_map = AccessMap::new(1, vec![1]); - access_map.add_iter_var(iter_var); - - access_map.add_access_matrix(AccessMatrix(vec![vec![1]])); - access_map.add_access_matrix(AccessMatrix(vec![vec![1]])); - - access_map.add_access_offset(AccessOffset(vec![0])); - access_map.add_access_offset(AccessOffset(vec![0])); - - let access_map = Rc::new(access_map); - - let s_a = Rc::new(BufBuilder::row_major_shared_tile("sA", &[256, 256])); - let s_b = Rc::new(BufBuilder::col_major_shared_tile("sB", &[256, 256])); - let s_c = Rc::new(BufBuilder::row_major_shared_tile("sC", &[256, 256])); - let g_a = Rc::new(BufBuilder::row_major_global_tile("gA", &[256, 256])); - let g_b = Rc::new(BufBuilder::col_major_global_tile("gB", &[256, 256])); - let g_c = Rc::new(BufBuilder::row_major_global_tile("gC", &[256, 256])); - - let in_edge0 = AttachedEdge::new(g_a.clone(), s_a.clone(), Some(access_map.clone())); - let in_edge1 = AttachedEdge::new(g_b.clone(), s_b.clone(), Some(access_map.clone())); - let out_edge = AttachedEdge::new(g_c.clone(), s_c.clone(), None); - - let rf_gemm_graph = ThrillerUtils::build_gemm_rf_block(s_a.clone(), s_b.clone(), s_c.clone()); - - // let rf_code = rf_gemm_graph.emit().unwrap(); - - // println!("{}", rf_code); - - let s_a_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Buffer( - s_a.clone(), - )))); - - let s_b_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Buffer( - s_b.clone(), - )))); - - let s_c_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Buffer( - s_c.clone(), - )))); - - let rf_block_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Block( - Rc::new(rf_gemm_graph), - )))); - - let sa_block_edge = Rc::new(ThrillerEdge::new(s_a_node.clone(), rf_block_node.clone())); - - let sb_block_edge = Rc::new(ThrillerEdge::new(s_b_node.clone(), rf_block_node.clone())); - - let block_sc_edge = Rc::new(ThrillerEdge::new(rf_block_node.clone(), s_c_node.clone())); - - let mut subgraph = ThrillerGraph::new(MemoryLevel::Shared); - - subgraph.add_nodes(vec![ - s_a_node.clone(), - s_b_node.clone(), - s_c_node.clone(), - rf_block_node.clone(), - ]); - - subgraph.add_edges(vec![ - sa_block_edge.clone(), - sb_block_edge.clone(), - block_sc_edge.clone(), - ]); - - subgraph.connect(); - - let mut shared_block = ThrillerBlock::new( - vec![Rc::new(in_edge0), Rc::new(in_edge1)], - vec![Rc::new(out_edge)], - MemoryLevel::Shared, - Rc::new(subgraph), - BlockType::Map, - ); - - shared_block.merge_access_map(); - - let code = shared_block.emit().unwrap(); - - println!("{}", code); -} diff --git a/examples/loop/access.rs b/examples/loop/access.rs deleted file mode 100644 index 15fd9f0..0000000 --- a/examples/loop/access.rs +++ /dev/null @@ -1,108 +0,0 @@ -use std::{rc::Rc, vec}; - -use thriller_core::{ - initialize, AccessMap, AccessMatrix, AccessOffset, IterationBound, IterationVar, ThrillerError, - ThrillerResult, Var, -}; - -fn main() { - initialize(); - - /* - * for(i1 = 0; i1 < M; i1++){ - * for(i2 = 0; i2 < N; i2++){ - * for(i3 = 0; i3 < K; i3++){ - * } - * } - * } - */ - - let iter_var1 = Rc::new(IterationVar::new( - "i1", - (IterationBound::Fixed(0), IterationBound::Fixed(10)), - )); - let iter_var2 = Rc::new(IterationVar::new( - "i2", - (IterationBound::Fixed(0), IterationBound::Fixed(10)), - )); - let iter_var3 = Rc::new(IterationVar::new( - "i3", - (IterationBound::Fixed(0), IterationBound::Fixed(10)), - )); - - let mut access_map = AccessMap::new(3, vec![2, 2, 2]); - access_map.add_iter_var(iter_var1); - access_map.add_iter_var(iter_var2); - access_map.add_iter_var(iter_var3); - - access_map.add_access_matrix(AccessMatrix(vec![vec![1, 0, 0], vec![0, 0, 1]])); - access_map.add_access_matrix(AccessMatrix(vec![vec![0, 0, 1], vec![0, 1, 0]])); - access_map.add_access_matrix(AccessMatrix(vec![vec![1, 0, 0], vec![0, 1, 0]])); - - access_map.add_access_offset(AccessOffset(vec![0, 1])); - access_map.add_access_offset(AccessOffset(vec![1, 0])); - access_map.add_access_offset(AccessOffset(vec![0, 0])); - - let mul_add_op = |access_map: &AccessMap| -> ThrillerResult { - let access_matrixs = access_map.get_access_matrixs(); - let access_offsets = access_map.get_access_offsets(); - if access_matrixs.len() != 3 || access_offsets.len() != 3 { - return Err(ThrillerError::InvalidAccessPattern); - } - - let mut access_codes = vec![String::new(); 3]; - let iter_vars = access_map.get_iter_vars(); - - for (i, matrix) in access_matrixs.iter().enumerate() { - for (j, access) in matrix.0.iter().enumerate() { - if access[0] != 0 { - access_codes[i] += format!( - "[{access} * {iter_var} + {offset}]", - access = access[0], - iter_var = iter_vars[j].get_name(), - offset = access_offsets[0].0[j] - ) - .as_str(); - } - - if access[1] != 0 { - access_codes[i] += format!( - "[{access} * {iter_var} + {offset}]", - access = access[1], - iter_var = iter_vars[j].get_name(), - offset = access_offsets[1].0[j] - ) - .as_str(); - } - - if access[2] != 0 { - access_codes[i] += format!( - "[{access} * {iter_var} + {offset}]", - access = access[2], - iter_var = iter_vars[2].get_name(), - offset = access_offsets[i].0[j] - ) - .as_str(); - } - } - } - - let mut code = String::new(); - - code += format!( - "C{c} += A{a} * B{b};\n", - c = access_codes[2], - a = access_codes[0], - b = access_codes[1] - ) - .as_str(); - - Ok(code) - }; - - let mul_add_code = mul_add_op(&access_map).unwrap(); - - let code = access_map.gen_loop_access(mul_add_code).unwrap(); - - println!("{}", code); -} diff --git a/examples/loop/multiloops.rs b/examples/loop/multiloops.rs deleted file mode 100644 index 0090899..0000000 --- a/examples/loop/multiloops.rs +++ /dev/null @@ -1,69 +0,0 @@ -use std::rc::Rc; - -use thriller_core::{ - initialize, AccessMap, AccessMatrix, AccessOffset, AttachedEdge, BlockType, IterationBound, - IterationVar, MemoryLevel, Task, ThrillerBlock, ThrillerGraph, -}; - -use thriller_utils::BufBuilder; - -fn main() { - initialize(); - - let iter_var1 = Rc::new(IterationVar::new( - "i1", - (IterationBound::Fixed(0), IterationBound::Fixed(10)), - )); - - let iter_var2 = Rc::new(IterationVar::new( - "i2", - (IterationBound::Fixed(0), IterationBound::Fixed(20)), - )); - - let mut access_map1 = AccessMap::new(1, vec![1]); - access_map1.add_iter_var(iter_var1); - - access_map1.add_access_matrix(AccessMatrix(vec![vec![1]])); - access_map1.add_access_matrix(AccessMatrix(vec![vec![1]])); - - access_map1.add_access_offset(AccessOffset(vec![0])); - access_map1.add_access_offset(AccessOffset(vec![0])); - - let access_map1 = Rc::new(access_map1); - - let mut access_map2 = AccessMap::new(1, vec![1]); - access_map2.add_iter_var(iter_var2); - - access_map2.add_access_matrix(AccessMatrix(vec![vec![1]])); - access_map2.add_access_matrix(AccessMatrix(vec![vec![1]])); - - access_map2.add_access_offset(AccessOffset(vec![0])); - access_map2.add_access_offset(AccessOffset(vec![0])); - - let access_map2 = Rc::new(access_map2); - - let s_a = Rc::new(BufBuilder::row_major_shared_tile("sA", &[256, 256])); - let r_a = Rc::new(BufBuilder::row_major_reg_tile("sB", &[64, 64])); - let s_b = Rc::new(BufBuilder::col_major_shared_tile("sC", &[256, 256])); - let r_b = Rc::new(BufBuilder::row_major_reg_tile("rB", &[64, 64])); - - let in_edge0 = AttachedEdge::new(s_a.clone(), r_a.clone(), Some(access_map1.clone())); - let in_edge1 = AttachedEdge::new(s_b.clone(), r_b.clone(), Some(access_map2.clone())); - let in_edge2 = AttachedEdge::new(s_a.clone(), r_b.clone(), Some(access_map1.clone())); - - let subgraph = ThrillerGraph::new(MemoryLevel::Register); - - let mut block = ThrillerBlock::new( - vec![Rc::new(in_edge0), Rc::new(in_edge1), Rc::new(in_edge2)], - vec![], - MemoryLevel::Register, - Rc::new(subgraph), - BlockType::Reduce, - ); - - block.merge_loops(); - - let code = block.emit().unwrap(); - - println!("{}", code); -} diff --git a/scripts/run_examples.sh b/scripts/run_examples.sh index 8d293ce..79b432a 100755 --- a/scripts/run_examples.sh +++ b/scripts/run_examples.sh @@ -10,6 +10,6 @@ function run_examples(){ run_examples "examples/" run_examples "examples/graph" run_examples "examples/ops" -run_examples "examples/loop" -run_examples "examples/gemm" +# run_examples "examples/loop" +# run_examples "examples/gemm" diff --git a/thriller-bindings/examples/gemm/__pycache__/context.cpython-312.pyc b/thriller-bindings/examples/gemm/__pycache__/context.cpython-312.pyc new file mode 100644 index 0000000..b655eb7 Binary files /dev/null and b/thriller-bindings/examples/gemm/__pycache__/context.cpython-312.pyc differ diff --git a/thriller-bindings/examples/gemm/context.py b/thriller-bindings/examples/gemm/context.py new file mode 100644 index 0000000..03e79d0 --- /dev/null +++ b/thriller-bindings/examples/gemm/context.py @@ -0,0 +1,5 @@ +import os +import sys + +sys.path.insert( + 0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) diff --git a/thriller-bindings/examples/gemm/gemm_g2r.py b/thriller-bindings/examples/gemm/gemm_g2r.py new file mode 100644 index 0000000..013f517 --- /dev/null +++ b/thriller-bindings/examples/gemm/gemm_g2r.py @@ -0,0 +1,88 @@ +import context + +import pythriller + +if __name__ == '__main__': + pythriller.initialize_thriller_flow() + + LayoutA = pythriller.PyLayout.RowMajor + LayoutB = pythriller.PyLayout.RowMajor + LayoutC = pythriller.PyLayout.RowMajor + + GlobalLayoutA = pythriller.PyLayout.RowMajor + GlobalLayoutB = pythriller.PyLayout.ColMajor + GlobalLayoutC = pythriller.PyLayout.RowMajor + + BufTypeA = pythriller.PyBufType.RegTile + BufTypeB = pythriller.PyBufType.RegTile + BufTypeC = pythriller.PyBufType.RegTile + + GlobalTypeA = pythriller.PyBufType.GlobalTile + GlobalTypeB = pythriller.PyBufType.GlobalTile + GlobalTypeC = pythriller.PyBufType.GlobalTile + + DimA = [64, 64] + DimB = [64, 64] + DimC = [64, 64] + + GlobalDimA = [256, 256] + GlobalDimB = [256, 256] + GlobalDimC = [256, 256] + + rA = pythriller.PyBuffer("rA", DimA, LayoutA, BufTypeA) + rB = pythriller.PyBuffer("rB", DimB, LayoutB, BufTypeB) + acc = pythriller.PyBuffer("acc", DimC, LayoutC, BufTypeC) + + gA = pythriller.PyBuffer("gA", GlobalDimA, GlobalLayoutA, GlobalTypeA) + gB = pythriller.PyBuffer("gB", GlobalDimB, GlobalLayoutB, GlobalTypeB) + gC = pythriller.PyBuffer("gC", GlobalDimC, GlobalLayoutC, GlobalTypeC) + + print(rA) + print(rB) + print(acc) + + print(gA) + print(gB) + print(gC) + + MemoryLevel = pythriller.PyMemoryLevel.Register + RegGraph = pythriller.PyGraph(MemoryLevel) + + NodeA = pythriller.PyNode(rA) + NodeB = pythriller.PyNode(rB) + NodeAcc = pythriller.PyNode(acc) + + GemmNode = pythriller.PyNode.gemm(NodeA, NodeB, NodeAcc) + + LoopIter = pythriller.IterationVar('i', (0, 4)) + + access_dims = [1] + access_map = [[1]] + access_offset = [0] + + AccessMap = pythriller.AccessMap( + access_dims, access_map, access_offset, [LoopIter]) + + EdgeA_Gemm = pythriller.PyEdge(NodeA, GemmNode) + EdgeB_GEMM = pythriller.PyEdge(NodeB, GemmNode) + EdgeGemm_Acc = pythriller.PyEdge(GemmNode, NodeAcc) + + RegGraph.add_nodes([NodeA, NodeB, NodeAcc, GemmNode]) + RegGraph.add_edges([EdgeA_Gemm, EdgeB_GEMM, EdgeGemm_Acc]) + + RegGraph.connect() + + LoadGlobalToRegEdgeA = pythriller.AttachedEdge(gA, rA, AccessMap) + LoadGlobalToRegEdgeB = pythriller.AttachedEdge(gB, rB, AccessMap) + StoreRegToGlobalEdgeC = pythriller.AttachedEdge(acc, gC, AccessMap) + G2RBlockMemLevel = pythriller.PyMemoryLevel.Register + + G2RBlockType = pythriller.BlockType.Reduce + + GlobalToRegBlock = pythriller.Block( + [LoadGlobalToRegEdgeA, LoadGlobalToRegEdgeB], [StoreRegToGlobalEdgeC], RegGraph, G2RBlockType, [LoopIter]) + + code = GlobalToRegBlock.codegen() + + print("================Codegen=================") + print(code) diff --git a/thriller-bindings/pythriller/__init__.py b/thriller-bindings/pythriller/__init__.py index da712be..9bccce6 100644 --- a/thriller-bindings/pythriller/__init__.py +++ b/thriller-bindings/pythriller/__init__.py @@ -1,2 +1 @@ -from .context import initialize_thriller_flow, PyLayout, PyBufType -from .buffer import create_buffer +from .context import initialize_thriller_flow, PyLayout, PyBufType, PyBuffer, PyGraph, PyNode, PyEdge, PyMemoryLevel, Gemm, AttachedEdge, Block, BlockType, IterationVar, AccessMap diff --git a/thriller-bindings/pythriller/buffer.py b/thriller-bindings/pythriller/buffer.py deleted file mode 100644 index 4497662..0000000 --- a/thriller-bindings/pythriller/buffer.py +++ /dev/null @@ -1,5 +0,0 @@ -from .context import PyBuffer, PyLayout, PyBufType - - -def create_buffer(name, dim, layout, buf_type): - return PyBuffer(name, dim, layout, buf_type) diff --git a/thriller-bindings/src/access.rs b/thriller-bindings/src/access.rs new file mode 100644 index 0000000..1bd8362 --- /dev/null +++ b/thriller-bindings/src/access.rs @@ -0,0 +1,58 @@ +use std::rc::Rc; + +use thriller_core::{AccessMap, AccessMatrix, AccessOffset}; + +use pyo3::{prelude::*, types::PyList}; + +use crate::var::PyIterationVar; + +#[pyclass(unsendable, module = "access", name = "AccessMap")] +pub struct PyAccessMap(pub Rc); + +#[pymethods] +impl PyAccessMap { + #[new] + fn new( + dims: Bound, + access: Bound, + offset: Bound, + vars: Bound, + ) -> Self { + let dims = dims + .into_iter() + .map(|d| d.extract::().unwrap()) + .collect::>(); + + let vars = vars + .into_iter() + .map(|v| v.extract::>().unwrap().0.clone()) + .collect::>(); + + let access = access + .into_iter() + .map(|a| { + a.extract::>() + .unwrap() + .into_iter() + .map(|i| i.extract::().unwrap()) + .collect::>() + }) + .collect::>(); + + let access = AccessMatrix(access); + + let offset = offset + .into_iter() + .map(|o| o.extract::().unwrap()) + .collect::>(); + let offset = AccessOffset(offset); + + let mut map = AccessMap::new(dims.len(), dims); + + map.add_iter_vars(vars); + map.add_access_matrix(access); + map.add_access_offset(offset); + + PyAccessMap(Rc::new(map)) + } +} diff --git a/thriller-bindings/src/block.rs b/thriller-bindings/src/block.rs new file mode 100644 index 0000000..6f299b2 --- /dev/null +++ b/thriller-bindings/src/block.rs @@ -0,0 +1,86 @@ +use std::rc::Rc; + +use thriller_core::{AttachedEdge, BlockType, Task, ThrillerBlock}; + +use pyo3::{prelude::*, types::PyList}; + +use crate::{access::PyAccessMap, buffer::PyBuffer, graph::PyGraph, var::PyIterationVar}; + +#[pyclass(module = "block", name = "BlockType")] +pub enum PyBlockType { + Reduce, + Map, +} + +#[pyclass(unsendable, module = "block", name = "Block")] +pub struct PyBlock(pub ThrillerBlock); + +#[pyclass(unsendable, module = "block", name = "AttachedEdge")] +pub struct PyAttachedEdge(pub Rc); + +#[pymethods] +impl PyBlock { + #[new] + fn new( + inputs: &Bound, + outputs: &Bound, + subgraph: PyRef, + block_type: PyRef, + ivars: &Bound, + ) -> PyResult { + let block_type = match *block_type { + PyBlockType::Reduce => BlockType::Reduce, + PyBlockType::Map => BlockType::Map, + }; + + let inputs = inputs + .into_iter() + .map(|edge| { + // TODO(KuangjuX): fix `unwarp()`. + let edge = edge.extract::>().unwrap(); + Rc::clone(&edge.0) + }) + .collect::>(); + + let outputs = outputs + .into_iter() + .map(|edge| { + // TODO(KuangjuX): fix `unwarp()`. + let edge = edge.extract::>().unwrap(); + Rc::clone(&edge.0) + }) + .collect::>(); + + let ivars = ivars + .into_iter() + .map(|ivar| { + // TODO(KuangjuX): fix `unwarp()`. + let ivar = ivar.extract::>().unwrap(); + Rc::clone(&ivar.0) + }) + .collect::>(); + + let subgraph = Rc::clone(&subgraph.0); + + let block = ThrillerBlock::new(inputs, outputs, subgraph, block_type, ivars); + + Ok(PyBlock(block)) + } + + fn codegen(&self) -> PyResult { + self.0 + .emit() + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("{:?}", e))) + } +} + +#[pymethods] +impl PyAttachedEdge { + #[new] + fn new(src: PyRef, dst: PyRef, map: PyRef) -> Self { + let src = Rc::clone(&src.0); + let dst = Rc::clone(&dst.0); + let map = Rc::clone(&map.0); + PyAttachedEdge(Rc::new(AttachedEdge::new(src, dst, Some(map)))) + } +} diff --git a/thriller-bindings/src/buffer.rs b/thriller-bindings/src/buffer.rs index 749fce8..6688991 100644 --- a/thriller-bindings/src/buffer.rs +++ b/thriller-bindings/src/buffer.rs @@ -1,8 +1,10 @@ +use std::rc::Rc; + use pyo3::prelude::*; use thriller_core::{BufType, Buffer, Dim, Layout}; -#[pyclass] -pub struct PyBuffer(pub Buffer); +#[pyclass(unsendable)] +pub struct PyBuffer(pub Rc); #[pyclass] pub enum PyLayout { @@ -34,7 +36,7 @@ impl PyBuffer { PyBufType::RegVec => BufType::RegVec, }; - Self(Buffer::new(name.as_str(), buf_type, &dim, layout)) + Self(Rc::new(Buffer::new(name.as_str(), buf_type, &dim, layout))) } fn __str__(&self) -> PyResult { diff --git a/thriller-bindings/src/graph.rs b/thriller-bindings/src/graph.rs new file mode 100644 index 0000000..83646ff --- /dev/null +++ b/thriller-bindings/src/graph.rs @@ -0,0 +1,120 @@ +use pyo3::prelude::*; +use pyo3::types::PyList; + +use thriller_core::{ + AccessMap, Gemm, MemoryLevel, Task, ThrillerEdge, ThrillerGraph, ThrillerNode, + ThrillerNodeInner, +}; + +use crate::buffer::PyBuffer; + +use std::{cell::RefCell, rc::Rc}; + +#[pyclass] +pub enum PyMemoryLevel { + Register, + Shared, + Global, +} + +#[pyclass(unsendable)] +pub struct PyGraph(pub Rc>); + +#[pymethods] +impl PyGraph { + #[new] + fn new(mem_level: &PyMemoryLevel) -> PyGraph { + let mem_level = match mem_level { + PyMemoryLevel::Register => MemoryLevel::Register, + PyMemoryLevel::Shared => MemoryLevel::Shared, + PyMemoryLevel::Global => MemoryLevel::Global, + }; + + PyGraph(Rc::new(RefCell::new(ThrillerGraph::new(mem_level)))) + } + + fn add_nodes(&mut self, nodes: &Bound<'_, PyList>) -> PyResult<()> { + let nodes = nodes + .into_iter() + .map(|node| { + // TODO(KuangjuX): fix `unwarp`. + let node = node.extract::>().unwrap(); + Rc::clone(&node.0) + }) + .collect::>(); + + self.0.borrow_mut().add_nodes(nodes); + Ok(()) + } + + fn add_edges(&mut self, edges: &Bound<'_, PyList>) -> PyResult<()> { + let edges = edges + .into_iter() + .map(|edge| { + // TODO(KuangjuX): fix `unwarp`. + let edge = edge.extract::>().unwrap(); + Rc::clone(&edge.0) + }) + .collect::>(); + + self.0.borrow_mut().add_edges(edges); + Ok(()) + } + + fn connect(&mut self) { + self.0.borrow_mut().connect(); + } + + fn codegen(&self) -> PyResult { + self.0 + .borrow() + .emit() + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("{:?}", e))) + } +} + +#[pyclass(unsendable)] +pub struct PyNode(pub Rc>); + +#[pymethods] +impl PyNode { + #[new] + fn buffer(buf: &PyBuffer) -> Self { + let node = ThrillerNode::new(thriller_core::ThrillerNodeInner::Buffer(Rc::clone(&buf.0))); + PyNode(Rc::new(RefCell::new(node))) + } + + fn gemm(a: PyRef, b: PyRef, c: PyRef) -> Self { + let access_map = AccessMap::new(0, vec![]); + + let node_a = Rc::clone(&a.0); + let node_b = Rc::clone(&b.0); + let node_c = Rc::clone(&c.0); + + let gemm = Gemm::new(vec![node_a, node_b], node_c, Rc::new(access_map)); + + let node = ThrillerNode::new(ThrillerNodeInner::Op(Box::new(gemm))); + + PyNode(Rc::new(RefCell::new(node))) + } + + fn codegen(&self) -> PyResult { + let node = self.0.borrow(); + node.emit() + .map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("{:?}", e))) + } +} + +#[pyclass(unsendable)] +pub struct PyEdge(pub Rc); + +#[pymethods] +impl PyEdge { + #[new] + fn new(src: PyRef, dst: PyRef) -> Self { + let src = Rc::clone(&src.0); + let dst = Rc::clone(&dst.0); + let edge = ThrillerEdge::new(src, dst); + PyEdge(Rc::new(edge)) + } +} diff --git a/thriller-bindings/src/lib.rs b/thriller-bindings/src/lib.rs index 591a07c..522cbab 100644 --- a/thriller-bindings/src/lib.rs +++ b/thriller-bindings/src/lib.rs @@ -1,9 +1,19 @@ +use access::PyAccessMap; use pyo3::prelude::*; +use block::{PyAttachedEdge, PyBlock, PyBlockType}; use buffer::{PyBufType, PyBuffer, PyLayout}; +use graph::{PyEdge, PyGraph, PyMemoryLevel, PyNode}; +use op::PyGemm; use thriller_core::initialize; +use var::PyIterationVar; +mod access; +mod block; mod buffer; +mod graph; +mod op; +mod var; #[pyfunction] fn initialize_thriller_flow() -> PyResult<()> { @@ -20,5 +30,21 @@ fn thriller_flow(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + m.add_class::()?; + + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + m.add_class::()?; + + m.add_class::()?; + Ok(()) } diff --git a/thriller-bindings/src/op.rs b/thriller-bindings/src/op.rs new file mode 100644 index 0000000..84a60e8 --- /dev/null +++ b/thriller-bindings/src/op.rs @@ -0,0 +1,31 @@ +use pyo3::prelude::*; +use thriller_core::{AccessMap, Gemm, Task}; + +use crate::graph::PyNode; + +use std::rc::Rc; + +#[pyclass(unsendable, module = "operators", name = "Gemm")] +pub struct PyGemm(pub Gemm); + +#[pymethods] +impl PyGemm { + #[new] + fn new(a: PyRef, b: PyRef, c: PyRef) -> Self { + let access_map = AccessMap::new(0, vec![]); + + let node_a = Rc::clone(&a.0); + let node_b = Rc::clone(&b.0); + let node_c = Rc::clone(&c.0); + + let gemm = Gemm::new(vec![node_a, node_b], node_c, Rc::new(access_map)); + + PyGemm(gemm) + } + + fn codegen(&self) -> PyResult { + self.0 + .emit() + .map_err(|e| PyErr::new::(format!("{:?}", e))) + } +} diff --git a/thriller-bindings/src/var.rs b/thriller-bindings/src/var.rs new file mode 100644 index 0000000..09977a2 --- /dev/null +++ b/thriller-bindings/src/var.rs @@ -0,0 +1,23 @@ +use std::rc::Rc; + +use thriller_core::{IterationBound, IterationVar}; + +use pyo3::prelude::*; +use pyo3::types::PyTuple; + +#[pyclass(unsendable, module = "var", name = "IterationVar")] +pub struct PyIterationVar(pub Rc); + +#[pymethods] +impl PyIterationVar { + #[new] + fn new(name: String, domain: &Bound<'_, PyTuple>) -> Self { + let domain = domain.extract::<(usize, usize)>().unwrap(); + let domain_bound = ( + IterationBound::Fixed(domain.0), + IterationBound::Fixed(domain.1), + ); + let var = IterationVar::new(&name, domain_bound); + PyIterationVar(Rc::new(var)) + } +} diff --git a/thriller-bindings/tests/test_bindings.py b/thriller-bindings/tests/test_bindings.py index 42558ea..848329a 100644 --- a/thriller-bindings/tests/test_bindings.py +++ b/thriller-bindings/tests/test_bindings.py @@ -18,9 +18,9 @@ DimB = [256, 256] DimC = [256, 256] - gA = pythriller.create_buffer("gA", DimA, LayoutA, BufTypeA) - gB = pythriller.create_buffer("gB", DimB, LayoutB, BufTypeB) - gC = pythriller.create_buffer("gC", DimC, LayoutC, BufTypeC) + gA = pythriller.PyBuffer("gA", DimA, LayoutA, BufTypeA) + gB = pythriller.PyBuffer("gB", DimB, LayoutB, BufTypeB) + gC = pythriller.PyBuffer("gC", DimC, LayoutC, BufTypeC) print(gA) print(gB) diff --git a/thriller-core/src/access.rs b/thriller-core/src/access.rs index f9b7ae6..a8b145d 100644 --- a/thriller-core/src/access.rs +++ b/thriller-core/src/access.rs @@ -36,6 +36,11 @@ impl AccessMap { self.iter_vars.push(iter_var); } + /// Add iter vars to access map. + pub fn add_iter_vars(&mut self, iter_vars: Vec>) { + self.iter_vars.extend(iter_vars); + } + /// Get iter vars in access map. pub fn get_iter_vars(&self) -> &Vec> { &self.iter_vars @@ -67,10 +72,7 @@ impl AccessMap { } /// Generate loop based on `AccessMap` information. - pub fn gen_loop_access(&self, inner_code: String) -> ThrillerResult -// where - // F: Fn(&AccessMap) -> ThrillerResult, - { + pub fn gen_loop_access(&self, inner_code: String) -> ThrillerResult { let mut code = String::new(); let mut indent = 0; if self.loop_depth != self.iter_vars.len() { @@ -91,10 +93,6 @@ impl AccessMap { indent += 4; } - // let mut access_code = String::new(); - // for f in op { - // access_code.push_str(f(self)?.as_str()); - // } let access_lines: Vec<&str> = inner_code.lines().collect(); access_lines.iter().for_each(|line| { diff --git a/thriller-core/src/buffer.rs b/thriller-core/src/buffer.rs index 1139572..0ad218e 100644 --- a/thriller-core/src/buffer.rs +++ b/thriller-core/src/buffer.rs @@ -16,6 +16,7 @@ pub enum BufType { /// A Buffer data structure. #[allow(dead_code)] +#[derive(Clone, Debug)] pub struct Buffer { name: String, id: usize, diff --git a/thriller-core/src/dataflow/block.rs b/thriller-core/src/dataflow/block.rs index 45238ee..f807b46 100644 --- a/thriller-core/src/dataflow/block.rs +++ b/thriller-core/src/dataflow/block.rs @@ -1,15 +1,13 @@ +use std::cell::RefCell; use std::rc::Rc; use std::vec::Vec; use crate::dataflow::{AttachedEdge, ThrillerGraph}; use crate::error::{ThrillerError, ThrillerResult}; +use crate::kernels::sync::Sync; use crate::task::Task; use crate::var::Var; -use crate::{next_id, AccessMap, MemoryLevel}; - -use crate::kernels::sync::Sync; - -use super::loop_analysis::LoopGroup; +use crate::{next_id, BufType, IterationBound, IterationVar}; #[derive(PartialEq, Clone, Copy)] /// A map relation from inputs into outputs. @@ -25,11 +23,9 @@ pub struct ThrillerBlock { id: usize, pub(crate) inputs: Vec>, pub(crate) outputs: Vec>, - pub(crate) mem_level: MemoryLevel, - pub(crate) subgraph: Rc, + pub(crate) subgraph: Rc>, pub(crate) block_type: BlockType, - pub(crate) unified_access_map: Option>, - pub(crate) loop_groups: Vec, + pub(crate) ivars: Vec>, } impl ThrillerBlock { @@ -37,19 +33,17 @@ impl ThrillerBlock { pub fn new( inputs: Vec>, outputs: Vec>, - mem_level: MemoryLevel, - subgraph: Rc, + subgraph: Rc>, block_type: BlockType, + ivars: Vec>, ) -> Self { ThrillerBlock { inputs, outputs, - mem_level, subgraph, block_type, + ivars, id: next_id(), - unified_access_map: None, - loop_groups: vec![], } } @@ -58,214 +52,268 @@ impl ThrillerBlock { self.block_type } - /// Merge the same access maps into a unified access map. - pub fn merge_access_map(&mut self) { - // Iterate over the inputs and check if the access maps are the same. - // If they are the same, then we can merge them into a unified access map. + fn emit_loop(&self) -> ThrillerResult { + let mut code = String::new(); + + let mut indent = 0; + + // Generate loop. + for ivar in self.ivars.iter() { + let (upper, lower) = ivar.get_domain(); + + code += match (upper, lower) { + (IterationBound::Fixed(upper), IterationBound::Fixed(lower)) => { + format!( + "{indent}for(int {ivar} = {lower}; {ivar} < {upper}; {ivar}++){{\n", + indent = " ".repeat(indent), + ivar = ivar.get_name(), + lower = lower, + upper = upper + ) + } - // TODO: Implement this function. + _ => todo!(), + } + .as_str(); - self.merge_loops(); - if self.loop_groups.len() == 1 { - self.unified_access_map = Some(self.inputs[0].get_access().as_ref().unwrap().clone()); - } else { - self.unified_access_map = None; + indent += 4; } - } - pub(crate) fn get_inputs(&self) -> &Vec> { - &self.inputs + Ok(code) } - pub(crate) fn gen_loop_load(&self) -> ThrillerResult { + fn emit_loop_closure(&self) -> ThrillerResult { + let mut indent = ((self.ivars.len() - 1) * 4) as isize; let mut code = String::new(); - for edge in self.inputs.iter() { - if edge.get_access().is_some() { - // TODO: Add access pattern support for load operation. - code += self.gen_load(edge)?.as_str(); - } + while indent >= 0 { + code += format!("{indent}}}\n", indent = " ".repeat(indent as usize)).as_str(); + indent -= 4; } + Ok(code) } - /// Generate load code for the block inputs. - pub(crate) fn gen_load(&self, edge: &Rc) -> ThrillerResult { - // TODO: This is not a final version of the load code generation. It is just a pseudocode representation of the formalized data flow. + fn emit_load(&self) -> ThrillerResult { let mut code = String::new(); - // Generate load inputs. - - match self.mem_level { - MemoryLevel::Register => { - let access_map = edge - .get_access() - .as_ref() - .ok_or(ThrillerError::MissingAccessMap)?; - - let loop_depth = access_map.get_loop_depth(); - if loop_depth != 1 { - return Err(ThrillerError::InvalidLoadAccess); - } + let indent = " ".repeat(self.ivars.len() * 4); - let offsets = access_map.get_access_offsets(); - let matrixs = access_map.get_access_matrixs(); - - let iter_vars = access_map.get_iter_vars(); - - code.push_str(&format!( - "copy_2d_tile_s2r({src}[{src_access} * {src_index} + {src_offset}], {dst}[{dst_access} * {dst_index} + {dst_offset}]);\n", - src = edge.get_src_name(), - src_access = matrixs[0].0[0][0], - src_offset = offsets[0].0[0], - src_index = iter_vars[0].get_name(), - dst = edge.get_dst_name(), - dst_index = iter_vars[0].get_name(), - dst_offset = offsets[1].0[0], - dst_access = matrixs[1].0[0][0] - )); - } + let mut insert_copy_async = false; + let mut insert_syncthreads = false; - MemoryLevel::Shared => { - let access_map = edge - .get_access() - .as_ref() - .ok_or(ThrillerError::MissingAccessMap)?; + for edge in self.inputs.iter() { + // Insert `syncthreads()` when loading tiles. + insert_syncthreads = true; + + let sbuf = &edge.src; + let dbuf = &edge.dst; + + // TODO(KuangjuX): Support Access Memory code generation. + #[allow(unused_variables)] + let access_map = edge + .get_access() + .as_ref() + .ok_or(ThrillerError::MissingAccessMap)?; + + let sbuf_var = sbuf.get_name(); + let dbuf_var = dbuf.get_name(); + + let sbuf_id = sbuf.get_id(); + let dbuf_id = dbuf.get_id(); + + match (sbuf.get_typing(), dbuf.get_typing()) { + (BufType::GlobalTile, BufType::RegTile) => { + code += format!( + "{indent}loader_tile_g2r_{sid}_to_{did}({sbuf_var}, {dbuf_var});\n", + indent = indent, + sid = sbuf_id, + did = dbuf_id, + sbuf_var = sbuf_var, + dbuf_var = dbuf_var + ) + .as_str(); + } - let loop_depth = access_map.get_loop_depth(); - if loop_depth != 1 { - return Err(ThrillerError::InvalidLoadAccess); + (BufType::SharedTile, BufType::RegTile) => { + insert_copy_async = true; + code += format!( + "{indent}loader_tile_s2r_{sid}_to_{did}({sbuf_var}, {dbuf_var});\n", + indent = indent, + sid = sbuf_id, + did = dbuf_id, + sbuf_var = sbuf_var, + dbuf_var = dbuf_var + ) + .as_str(); } - let offsets = access_map.get_access_offsets(); - let matrixs = access_map.get_access_matrixs(); - - let iter_vars = access_map.get_iter_vars(); - - code.push_str(&format!( - "copy_2d_tile_g2s({src}[{src_access} * {src_index} + {src_offset}], {dst}[{dst_access} * {dst_index} + {dst_offset}]);\n", - src = edge.get_src_name(), - src_access = matrixs[0].0[0][0], - src_offset = offsets[0].0[0], - src_index = iter_vars[0].get_name(), - dst = edge.get_dst_name(), - dst_index = iter_vars[0].get_name(), - dst_offset = offsets[1].0[0], - dst_access = matrixs[1].0[0][0] - )); + _ => todo!(), } + } - MemoryLevel::Global => { - unimplemented!(); - } + if insert_copy_async { + code += format!( + "{indent}{copy_async}", + indent = indent, + copy_async = Sync::emit_copy_async() + ) + .as_str(); + } + + if insert_syncthreads { + code += format!( + "{indent}{syncthreads}", + indent = indent, + syncthreads = Sync::emit_sync() + ) + .as_str(); } + Ok(code) } - pub(crate) fn emit_store(&self, edge: &Rc) -> ThrillerResult { + fn emit_store(&self) -> ThrillerResult { let mut code = String::new(); - if self.block_type == BlockType::Reduce { - return Ok(code); - } - // Generate store outputs. - match self.block_type { - BlockType::Map => match self.mem_level { - MemoryLevel::Register => { - code.push_str(&format!( - "copy_2d_tile_r2s({}, {});\n", - edge.get_src_name(), - edge.get_dst_name() - )); - } - MemoryLevel::Shared => { - code.push_str(&format!( - "copy_2d_tile_s2g({}, {});\n", - edge.get_src_name(), - edge.get_dst_name() - )); + for edge in self.outputs.iter() { + let sbuf = &edge.src; + let dbuf = &edge.dst; + + // TODO(KuangjuX): Support Access Memory code generation. + #[allow(unused_variables)] + let access_map = edge + .get_access() + .as_ref() + .ok_or(ThrillerError::MissingAccessMap)?; + + let sbuf_var = sbuf.get_name(); + let dbuf_var = dbuf.get_name(); + + let sbuf_id = sbuf.get_id(); + let dbuf_id = dbuf.get_id(); + + match (sbuf.get_typing(), dbuf.get_typing()) { + (BufType::RegTile, BufType::GlobalTile) => { + code += format!( + "storer_tile_r2g_{sid}_to_{did}({sbuf_var}, {dbuf_var});\n", + sid = sbuf_id, + did = dbuf_id, + sbuf_var = sbuf_var, + dbuf_var = dbuf_var + ) + .as_str(); } - _ => {} - }, - - BlockType::Reduce => {} + _ => todo!(), + } } + Ok(code) } - /// Generate store code for the block outputs. - pub(crate) fn gen_store(&self) -> ThrillerResult { + fn emit_sync(&self) -> ThrillerResult { let mut code = String::new(); - if self.block_type == BlockType::Reduce { - return Ok(code); - } - for edge in self.outputs.iter() { - code += &self.emit_store(edge)?; - } - Ok(code) - } - #[allow(dead_code)] - pub(crate) fn reduce(&self) -> Option<&Vec>> { - match self.block_type { - BlockType::Reduce => Some(&self.outputs), - _ => None, - } - } + // TODO(KuangjuX): Check Memory Hiercary and insert sync primitive. + code += Sync::emit_sync().as_str(); - #[allow(dead_code)] - pub(crate) fn split_subgraph(&mut self) { - unimplemented!() + Ok(code) } + // /// Generate load code for the block inputs. + // #[allow(dead_code)] + // fn gen_load(&self, edge: &Rc) -> ThrillerResult { + // // TODO: This is not a final version of the load code generation. It is just a pseudocode representation of the formalized data flow. + // let mut code = String::new(); + // // Generate load inputs. + + // match self.mem_level { + // MemoryLevel::Register => { + // let access_map = edge + // .get_access() + // .as_ref() + // .ok_or(ThrillerError::MissingAccessMap)?; + + // let loop_depth = access_map.get_loop_depth(); + // if loop_depth != 1 { + // return Err(ThrillerError::InvalidLoadAccess); + // } + + // let offsets = access_map.get_access_offsets(); + // let matrixs = access_map.get_access_matrixs(); + + // let iter_vars = access_map.get_iter_vars(); + + // code.push_str(&format!( + // "copy_2d_tile_s2r({src}[{src_access} * {src_index} + {src_offset}], {dst}[{dst_access} * {dst_index} + {dst_offset}]);\n", + // src = edge.get_src_name(), + // src_access = matrixs[0].0[0][0], + // src_offset = offsets[0].0[0], + // src_index = iter_vars[0].get_name(), + // dst = edge.get_dst_name(), + // dst_index = iter_vars[0].get_name(), + // dst_offset = offsets[1].0[0], + // dst_access = matrixs[1].0[0][0] + // )); + // } + + // MemoryLevel::Shared => { + // let access_map = edge + // .get_access() + // .as_ref() + // .ok_or(ThrillerError::MissingAccessMap)?; + + // let loop_depth = access_map.get_loop_depth(); + // if loop_depth != 1 { + // return Err(ThrillerError::InvalidLoadAccess); + // } + + // let offsets = access_map.get_access_offsets(); + // let matrixs = access_map.get_access_matrixs(); + + // let iter_vars = access_map.get_iter_vars(); + + // code.push_str(&format!( + // "copy_2d_tile_g2s({src}[{src_access} * {src_index} + {src_offset}], {dst}[{dst_access} * {dst_index} + {dst_offset}]);\n", + // src = edge.get_src_name(), + // src_access = matrixs[0].0[0][0], + // src_offset = offsets[0].0[0], + // src_index = iter_vars[0].get_name(), + // dst = edge.get_dst_name(), + // dst_index = iter_vars[0].get_name(), + // dst_offset = offsets[1].0[0], + // dst_access = matrixs[1].0[0][0] + // )); + // } + + // MemoryLevel::Global => { + // unimplemented!(); + // } + // } + // Ok(code) + // } + pub(crate) fn emit_block(&self) -> ThrillerResult { let mut code = String::new(); - if let Some(access_map) = &self.unified_access_map { - let mut inner_code = String::new(); - inner_code += &self.gen_loop_load()?; - inner_code += Sync::emit_sync().as_str(); - if self.mem_level == MemoryLevel::Shared { - inner_code += Sync::emit_copy_async().as_str(); - } - inner_code += self.subgraph.emit()?.as_str(); - code += access_map.gen_loop_access(inner_code)?.as_str(); - - code += Sync::emit_sync().as_str(); - if let Some(reduce_outputs) = self.subgraph.reduce_block_outputs() { - // self.outputs.extend(reduce_outputs); - for output in reduce_outputs { - code += &self.emit_store(&output)?; - } - } + code += self.emit_loop()?.as_str(); + let indent = " ".repeat(self.ivars.len() * 4); - code += &self.gen_store()?; - Ok(code) - } else { - // TODO: Handle cases without an unified access map. - if self.inputs.is_empty() && self.outputs.is_empty() { - let code = self.subgraph.emit()?; - Ok(code) - } else { - // unimplemented!(); - let mut code = String::new(); - for group in self.loop_groups.iter() { - let edges = &group.edges; - let mut inner_code = String::new(); - - for edge in edges.iter() { - inner_code += &self.gen_load(edge)?; - } - - let access_map = group.edges[0].get_access().as_ref().unwrap(); - code += access_map.gen_loop_access(inner_code)?.as_str(); - } + code += self.emit_load()?.as_str(); - // TODO: Add codegen for subgraph and split subgraph into different loops. + let subgraph_code = self.subgraph.borrow().emit()?; - Ok(code) - } + for line in subgraph_code.lines() { + code += format!("{indent}{line}\n", indent = indent, line = line).as_str() } + + code += self.emit_loop_closure()?.as_str(); + + code += self.emit_sync()?.as_str(); + + code += self.emit_store()?.as_str(); + + Ok(code) } } diff --git a/thriller-core/src/dataflow/edge.rs b/thriller-core/src/dataflow/edge.rs index c0eb85f..1ba3915 100644 --- a/thriller-core/src/dataflow/edge.rs +++ b/thriller-core/src/dataflow/edge.rs @@ -5,7 +5,6 @@ use crate::access::AccessMap; use crate::buffer::Buffer; use crate::dataflow::ThrillerNode; use crate::next_id; -use crate::var::Var; /// AttachedEdge is an edge that connects a source and destination buffer /// with additional access pattern information `AccessMap`. @@ -47,32 +46,6 @@ impl AttachedEdge { pub fn replace_access_map(&mut self, access: Rc) { self.access = Some(access); } - - pub(crate) fn check_loop_equal(&self, other: &AttachedEdge) -> bool { - if let (Some(this), Some(other)) = (self.get_access(), other.get_access()) { - // Check `loop_depth` is the same. - if this.get_loop_depth() != other.get_loop_depth() { - return false; - } - // Check if iter_vars are the same. - if this.get_iter_vars().len() != other.get_iter_vars().len() { - return false; - } - - for (this_iter_var, other_iter_var) in this - .get_iter_vars() - .iter() - .zip(other.get_iter_vars().iter()) - { - if this_iter_var.get_id() != other_iter_var.get_id() { - return false; - } - } - - return true; - } - false - } } /// `ThrillerEdge` repersent load/store in dataflow graph. @@ -95,20 +68,4 @@ impl ThrillerEdge { pub(crate) fn get_dst(&self) -> Rc> { self.dst.clone() } - - // /// Get the source node name of the edge. - // pub fn get_src_name(&self) -> &String { - // match self.get_src().borrow().get_inner() { - // ThrillerNodeInner::Buffer(buffer) => buffer.get_name(), - // _ => panic!("Source is not a buffer"), - // } - // } - - // /// Get the destination node name of the edge. - // pub fn get_dst_name(&self) -> &String { - // match self.dst.borrow().get_inner() { - // ThrillerNodeInner::Buffer(buffer) => buffer.get_name(), - // _ => panic!("Destination is not a buffer"), - // } - // } } diff --git a/thriller-core/src/dataflow/graph.rs b/thriller-core/src/dataflow/graph.rs index 248eb1a..85d1446 100644 --- a/thriller-core/src/dataflow/graph.rs +++ b/thriller-core/src/dataflow/graph.rs @@ -4,17 +4,18 @@ use std::rc::Rc; use std::vec::Vec; use crate::dataflow::{ThrillerEdge, ThrillerNode, ThrillerNodeInner}; +use crate::debug; use crate::task::Task; -use crate::{debug, AttachedEdge}; use crate::{next_id, MemoryLevel, ThrillerResult}; /// Thriller Dataflow Graph structure. -#[allow(dead_code)] #[derive(Default)] pub struct ThrillerGraph { + #[allow(dead_code)] id: usize, nodes: Vec>>, edges: Vec>, + #[allow(dead_code)] mem_level: MemoryLevel, } @@ -94,27 +95,6 @@ impl ThrillerGraph { sorted_nodes } - - /// Reduce the block outputs in the graph. - pub fn reduce_block_outputs(&self) -> Option>> { - let sorted_nodes = self.topo_sort(); - - for node in sorted_nodes { - if let ThrillerNodeInner::Block(block) = node.borrow().get_inner() { - let outputs = block.reduce(); - let mut reduced_outputs = Vec::new(); - if let Some(outputs) = outputs { - for output in outputs { - reduced_outputs.push(output.clone()); - } - return Some(reduced_outputs); - } - return None; - } - } - - None - } } impl Task for ThrillerGraph { @@ -128,19 +108,6 @@ impl Task for ThrillerGraph { code += op.emit()?.as_str(); } ThrillerNodeInner::Block(block) => { - // let indent = 4; - // let block_code = block.emit()?; - // let lines = block_code.lines().collect::>(); - // code += "{\n"; - // for line in lines { - // code.push_str(&format!( - // "{indent}{line}\n", - // indent = " ".repeat(indent), - // line = line - // )); - // } - // code += "}\n"; - code += block.emit()?.as_str(); } _ => {} diff --git a/thriller-core/src/dataflow/loop_analysis.rs b/thriller-core/src/dataflow/loop_analysis.rs deleted file mode 100644 index aefde31..0000000 --- a/thriller-core/src/dataflow/loop_analysis.rs +++ /dev/null @@ -1,50 +0,0 @@ -use std::rc::Rc; - -use log::info; - -use crate::{next_id, AttachedEdge}; - -use super::block::ThrillerBlock; - -pub struct LoopGroup { - #[allow(dead_code)] - index: usize, - pub(crate) edges: Vec>, -} - -impl ThrillerBlock { - /// Merge the same nest loop pattern into a loop group. - pub fn merge_loops(&mut self) { - let mut loop_groups = vec![]; - for input in self.get_inputs().iter() { - if let Some(group_index) = self.check_loop_edge_equal(input, &loop_groups) { - loop_groups[group_index].edges.push(input.clone()); - } else { - let new_group = LoopGroup { - index: next_id(), - edges: vec![input.clone()], - }; - loop_groups.push(new_group); - } - } - - self.loop_groups = loop_groups; - - info!("loop groups size: {}", self.loop_groups.len()); - } - - pub(crate) fn check_loop_edge_equal( - &self, - ref_edge: &Rc, - loop_groups: &[LoopGroup], - ) -> Option { - for (index, group) in loop_groups.iter().enumerate() { - for edge in group.edges.iter() { - if ref_edge.check_loop_equal(edge) { - return Some(index); - } - } - } - None - } -} diff --git a/thriller-core/src/dataflow/mod.rs b/thriller-core/src/dataflow/mod.rs index 2276d67..e84073a 100644 --- a/thriller-core/src/dataflow/mod.rs +++ b/thriller-core/src/dataflow/mod.rs @@ -1,7 +1,6 @@ mod block; mod edge; mod graph; -mod loop_analysis; mod node; pub use block::{BlockType, ThrillerBlock}; diff --git a/thriller-core/src/dataflow/node.rs b/thriller-core/src/dataflow/node.rs index f116283..58aafa9 100644 --- a/thriller-core/src/dataflow/node.rs +++ b/thriller-core/src/dataflow/node.rs @@ -68,8 +68,8 @@ impl ThrillerNode { &self.nexts } - #[allow(dead_code)] - pub(crate) fn get_inner(&self) -> &ThrillerNodeInner { + #[doc(hidden)] + pub fn get_inner(&self) -> &ThrillerNodeInner { &self.inner } diff --git a/thriller-core/src/task/compute/gemm.rs b/thriller-core/src/task/compute/gemm.rs index 962ca4d..7272d10 100644 --- a/thriller-core/src/task/compute/gemm.rs +++ b/thriller-core/src/task/compute/gemm.rs @@ -4,10 +4,8 @@ use std::rc::Rc; use crate::{next_id, AccessMap, Task, ThrillerError, ThrillerNode, ThrillerResult, Var}; /// Gemm is a task that performs matrix multiplication. -// #[derive(Clone, Copy)] +#[derive(Clone)] pub struct Gemm { - // inputs: Vec>, - // output: Rc, prevs: Vec>>, next: Rc>, access_map: Rc, @@ -65,7 +63,7 @@ impl Task for Gemm { } code += format!( - "cute::gemm(mma, {buf_a}{a}, {buf_b}{b}, {buf_c}{c});\n", + "compute::gemm_({buf_a}{a}, {buf_b}{b}, {buf_c}{c});\n", a = access_codes[0], b = access_codes[1], c = access_codes[2], diff --git a/thriller-core/src/var/iteration.rs b/thriller-core/src/var/iteration.rs index cd61199..16c274d 100644 --- a/thriller-core/src/var/iteration.rs +++ b/thriller-core/src/var/iteration.rs @@ -4,6 +4,7 @@ use super::{regular::RegularVar, Var}; use crate::next_id; /// A bound of the iteration variable. +#[derive(Clone)] pub enum IterationBound { /// A fixed bound. Fixed(usize), @@ -30,6 +31,7 @@ impl Display for IterationBound { } /// A Variable that represents a loop index. +#[derive(Clone)] pub struct IterationVar { name: String, id: usize, diff --git a/thriller-core/src/var/regular.rs b/thriller-core/src/var/regular.rs index e66d0b8..22917cc 100644 --- a/thriller-core/src/var/regular.rs +++ b/thriller-core/src/var/regular.rs @@ -1,6 +1,7 @@ use crate::{next_id, Var}; /// A regular variable. +#[derive(Clone)] pub struct RegularVar { name: String, id: usize, diff --git a/thriller-utils/src/gemm.rs b/thriller-utils/src/gemm.rs deleted file mode 100644 index 50fd072..0000000 --- a/thriller-utils/src/gemm.rs +++ /dev/null @@ -1,187 +0,0 @@ -use std::cell::RefCell; -use std::rc::Rc; - -use thriller_core::{ - AccessMap, AccessMatrix, AccessOffset, AttachedEdge, BlockType, Buffer, Gemm, IterationBound, - IterationVar, MemoryLevel, ThrillerBlock, ThrillerEdge, ThrillerGraph, ThrillerNode, - ThrillerNodeInner, -}; - -use crate::BufBuilder; - -use crate::ThrillerUtils; - -impl ThrillerUtils { - /// Build a RF level GEMM graph. - pub fn build_gemm_rf_block(s_a: Rc, s_b: Rc, s_c: Rc) -> ThrillerBlock { - let r_a = Rc::new(BufBuilder::row_major_reg_tile("rA", &[64, 64])); - let r_b = Rc::new(BufBuilder::col_major_reg_tile("rB", &[64, 64])); - - let mut in_edge0 = AttachedEdge::new(s_a, r_a.clone(), None); - let mut in_edge1 = AttachedEdge::new(s_b, r_b.clone(), None); - - let acc = Rc::new(BufBuilder::row_major_reg_tile("acc", &[64, 64])); - - let out_edge = AttachedEdge::new(acc.clone(), s_c, None); - - let iter_var = Rc::new(IterationVar::new( - "i", - (IterationBound::Fixed(0), IterationBound::Fixed(10)), - )); - - let mut access_map = AccessMap::new(1, vec![1]); - access_map.add_iter_var(iter_var); - - access_map.add_access_matrix(AccessMatrix(vec![vec![1]])); - access_map.add_access_matrix(AccessMatrix(vec![vec![1]])); - - access_map.add_access_offset(AccessOffset(vec![0])); - access_map.add_access_offset(AccessOffset(vec![0])); - - let access_map = Rc::new(access_map); - - in_edge0.replace_access_map(access_map.clone()); - in_edge1.replace_access_map(access_map.clone()); - - let mut subgraph = ThrillerGraph::new(MemoryLevel::Register); - - let r_a_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Buffer( - r_a.clone(), - )))); - let r_b_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Buffer( - r_b.clone(), - )))); - let acc_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Buffer( - acc.clone(), - )))); - - let gemm = Gemm::new( - vec![r_a_node.clone(), r_b_node.clone()], - acc_node.clone(), - access_map.clone(), - ); - - let gemm_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Op( - Box::new(gemm), - )))); - - let ra_gemm_edge = ThrillerEdge::new(r_a_node.clone(), gemm_node.clone()); - let rb_gemm_edge = ThrillerEdge::new(r_b_node.clone(), gemm_node.clone()); - let gemm_acc_edge = ThrillerEdge::new(gemm_node.clone(), acc_node.clone()); - - let ra_gemm_edge_ref = Rc::new(ra_gemm_edge); - let rb_gemm_edge_ref = Rc::new(rb_gemm_edge); - let gemm_acc_edge_ref = Rc::new(gemm_acc_edge); - - subgraph.add_nodes(vec![ - r_a_node.clone(), - r_b_node.clone(), - acc_node.clone(), - gemm_node.clone(), - ]); - subgraph.add_edges(vec![ - ra_gemm_edge_ref.clone(), - rb_gemm_edge_ref.clone(), - gemm_acc_edge_ref.clone(), - ]); - - subgraph.connect(); - - let mut block = ThrillerBlock::new( - vec![Rc::new(in_edge0), Rc::new(in_edge1)], - vec![Rc::new(out_edge)], - MemoryLevel::Register, - Rc::new(subgraph), - BlockType::Reduce, - ); - - block.merge_access_map(); - - block - } - - /// Build a shared level GEMM graph. - pub fn build_shared_gemm_block( - g_a: Rc, - g_b: Rc, - g_c: Rc, - ) -> ThrillerBlock { - let iter_var = Rc::new(IterationVar::new( - "j", - (IterationBound::Fixed(0), IterationBound::Fixed(10)), - )); - - let mut access_map = AccessMap::new(1, vec![1]); - access_map.add_iter_var(iter_var); - - access_map.add_access_matrix(AccessMatrix(vec![vec![1]])); - access_map.add_access_matrix(AccessMatrix(vec![vec![1]])); - - access_map.add_access_offset(AccessOffset(vec![0])); - access_map.add_access_offset(AccessOffset(vec![0])); - - let access_map = Rc::new(access_map); - - let s_a = Rc::new(BufBuilder::row_major_shared_tile("sA", &[256, 256])); - let s_b = Rc::new(BufBuilder::col_major_shared_tile("sB", &[256, 256])); - let s_c = Rc::new(BufBuilder::row_major_shared_tile("sC", &[256, 256])); - - let in_edge0 = AttachedEdge::new(g_a.clone(), s_a.clone(), Some(access_map.clone())); - let in_edge1 = AttachedEdge::new(g_b.clone(), s_b.clone(), Some(access_map.clone())); - let out_edge = AttachedEdge::new(g_c.clone(), s_c.clone(), None); - - let rf_gemm_graph = - ThrillerUtils::build_gemm_rf_block(s_a.clone(), s_b.clone(), s_c.clone()); - - let s_a_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Buffer( - s_a.clone(), - )))); - - let s_b_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Buffer( - s_b.clone(), - )))); - - let s_c_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Buffer( - s_c.clone(), - )))); - - let rf_block_node = Rc::new(RefCell::new(ThrillerNode::new(ThrillerNodeInner::Block( - Rc::new(rf_gemm_graph), - )))); - - let sa_block_edge = Rc::new(ThrillerEdge::new(s_a_node.clone(), rf_block_node.clone())); - - let sb_block_edge = Rc::new(ThrillerEdge::new(s_b_node.clone(), rf_block_node.clone())); - - let block_sc_edge = Rc::new(ThrillerEdge::new(rf_block_node.clone(), s_c_node.clone())); - - let mut subgraph = ThrillerGraph::new(MemoryLevel::Shared); - - subgraph.add_nodes(vec![ - s_a_node.clone(), - s_b_node.clone(), - s_c_node.clone(), - rf_block_node.clone(), - ]); - - subgraph.add_edges(vec![ - sa_block_edge.clone(), - sb_block_edge.clone(), - block_sc_edge.clone(), - ]); - - subgraph.connect(); - - let mut shared_block = ThrillerBlock::new( - vec![Rc::new(in_edge0), Rc::new(in_edge1)], - vec![Rc::new(out_edge)], - MemoryLevel::Shared, - Rc::new(subgraph), - BlockType::Map, - ); - - shared_block.merge_access_map(); - - shared_block - } -} diff --git a/thriller-utils/src/lib.rs b/thriller-utils/src/lib.rs index e86c15e..280bfa3 100644 --- a/thriller-utils/src/lib.rs +++ b/thriller-utils/src/lib.rs @@ -4,9 +4,5 @@ #![deny(warnings)] mod buf; -mod gemm; pub use buf::BufBuilder; - -/// Thriller utilities. -pub struct ThrillerUtils;