From dab19b7c07849e2525e0e52b49709a7ab5a5e144 Mon Sep 17 00:00:00 2001
From: Agent Smith <mstmole@163.com>
Date: Wed, 1 Jan 2025 11:53:46 +0800
Subject: [PATCH] feat: presice mark

---
 .vscode/launch.json            |  2 +-
 Cargo.toml                     |  2 +
 alloc-aarch64.ll               | 70 +++++++++++++++++-----------------
 alloc-jit-aarch64.ll           | 70 +++++++++++++++++-----------------
 alloc-jit-x64.ll               | 70 +++++++++++++++++-----------------
 alloc-x64.ll                   | 68 +++++++++++++++++----------------
 immix                          |  2 +-
 planglib/core/gc.pi            |  2 +-
 planglib/std/json/encode.pi    |  2 +-
 src/ast/builder/llvmbuilder.rs | 22 +++--------
 10 files changed, 155 insertions(+), 155 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index e3415113..6231d35e 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -87,7 +87,7 @@
             "cwd": "${workspaceFolder}",
             "env": {
                 // "GC_LOG": "info"
-                // "PL_IMMIX_HEAP_SIZE": "80000000"
+                "PL_IMMIX_HEAP_SIZE": "70000000"
             }
             // "stopOnEntry": true
         },
diff --git a/Cargo.toml b/Cargo.toml
index ba6dd7ed..55cafc85 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -101,6 +101,8 @@ members = ["internal_macro", "vm", "pl_linker", "immix", "kagari"]
 lto = "fat"
 opt-level = 3
 debug = "line-tables-only"
+overflow-checks = false
+debug-assertions = false
 
 [profile.bench]
 opt-level = 3
diff --git a/alloc-aarch64.ll b/alloc-aarch64.ll
index ee51d440..adfaff10 100644
--- a/alloc-aarch64.ll
+++ b/alloc-aarch64.ll
@@ -32,7 +32,7 @@ define void @gc_thread_init() {
     ret void
 }
 
-declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
+declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture, i8, i64, i1) nounwind
 
 
 
@@ -46,9 +46,9 @@ define double @sqrt_64(double %Val) {
 ; define new DioGC__malloc
 define ptr addrspace(1) @DioGC__malloc(i64 %size, i8 %obj_type, i64 %rsp) noinline optnone allockind("alloc") {
 entry:
-    ; if size > 128, call slowpath
+    ; if size > 7936, call slowpath
     ; call void @printi64ln(i64 2222)
-    %size_gt_128 = icmp ugt i64 %size, 128
+    %size_gt_128 = icmp ugt i64 %size, 7936
     br i1 %size_gt_128, label %call_slowpath, label %check_collector
 check_collector:
     ; Load collector from gc_handle
@@ -63,6 +63,7 @@ call_slowpath:
     %innerrsp = tail call ptr asm alignstack "mov $0, sp", "=r"() #0
     %rspi = ptrtoint ptr %innerrsp to i64
     %slowpath_result = call ptr addrspace(1) @DioGC__malloc_slowpath(i64 %size, i8 %obj_type, i64 %rspi, ptr @gc_handle)
+    call void @llvm.memset.p1.i64(ptr addrspace(1) %slowpath_result, i8 0, i64 %size, i1 false)
     ; call void @printi64ln(i64 999)
     ; %slowpath_result_i = ptrtoint ptr addrspace(1) %slowpath_result to i64
     ; call void @printi64ln(i64 %slowpath_result_i)
@@ -94,11 +95,12 @@ fastpath_start:
     
 
     
-    ; Calculate alloc size = (size + 7) / 8 * 8
+    ; Calculate alloc size = (size + 7) / 8 * 8 + 8
     ; LINE_SIZE is 128
     %size_plus_7 = add i64 %size, 7
     %size_div_8 = lshr i64 %size_plus_7, 3
-    %alloc_size = shl i64 %size_div_8, 3
+    %alloc_size_body = shl i64 %size_div_8, 3
+    %alloc_size = add i64 %alloc_size_body, 8
 
 
 
@@ -107,44 +109,44 @@ fastpath_start:
     %hole_end_minus_cursor = sub i64 %hole_end_i64, %cursor_i64
     ; check if hole_end - cursor >= alloc_size
     %hole_end_minus_cursor_ge_alloc_size = icmp sge i64 %hole_end_minus_cursor, %alloc_size
-    br i1 %hole_end_minus_cursor_ge_alloc_size, label %check_current_line, label %call_slowpath
+    br i1 %hole_end_minus_cursor_ge_alloc_size, label %fast_path, label %call_slowpath
     
-check_current_line:
-    ; Check if alloc in current line is possible
-    ; let current_line_remains = self.cursor.align_offset(LINE_SIZE);
-    %current_line_occupied = and i64 %cursor_i64, 127
-    %current_line_remains = sub i64 128, %current_line_occupied
-
-    
-    ; call void @printi64ln(i64 %current_line_remains)
-    ; call void @printi64ln(i64 %alloc_size)
-    ; check if alloc_size <= current_line_remains && current_line_remains != 0
-    %alloc_size_le_remains = icmp ule i64 %alloc_size, %current_line_remains
-    %current_line_remains_ne_0 = icmp ne i64 %current_line_remains, 0
-    %alloc_size_le_remains_and_ne_0 = and i1 %alloc_size_le_remains, %current_line_remains_ne_0
-    br i1 %alloc_size_le_remains_and_ne_0, label %fast_path, label %check_remaining
-
-check_remaining:
-    ; Check if 128 <= hole_end - cursor
-    %hole_end_minus_cursor_ge_128 = icmp uge i64 %hole_end_minus_cursor, 128
-
-    ; self.cursor = self.cursor.add(current_line_remains);
-    %new_cursor_i = add i64 %cursor_i64, %current_line_remains
-    %new_cursor = inttoptr i64 %new_cursor_i to ptr addrspace(1)
-    br i1 %hole_end_minus_cursor_ge_128, label %fast_path, label %call_slowpath
-
 
 fast_path:
-    ; phi get cursor
-    %cursor_phi = phi ptr addrspace(1) [ %cursor, %check_current_line ], [ %new_cursor, %check_remaining ]
 
-    %cursor_phi_i = ptrtoint ptr addrspace(1) %cursor_phi to i64
+    ; set header
+    ; 1. store zero to first bytt of %cursor
+    store i8 0, ptr addrspace(1) %cursor
+    ; 2. store obj_type to second byte of %cursor
+    %cursor_obj_type_ptr = getelementptr i8, ptr addrspace(1) %cursor, i64 1
+    store i8 %obj_type, ptr addrspace(1) %cursor_obj_type_ptr
+    ; 3. store size (trunc to i16) to third and fourth byte of %cursor
+    %cursor_size_ptr = getelementptr i16, ptr addrspace(1) %cursor, i64 1
+    %size_cast = trunc i64 %alloc_size to i16
+    store i16 %size_cast, ptr addrspace(1) %cursor_size_ptr
+    ; 4. store %cursor's lower 32 bits to fifth to eighth byte of %cursor
+    %cursor_i32 = trunc i64 %cursor_i64 to i32
+    %cursor_i32_ptr = getelementptr i32, ptr addrspace(1) %cursor, i64 1
+    store i32 %cursor_i32, ptr addrspace(1) %cursor_i32_ptr
+
+
+
+    %cursor_phi_i = add i64 %cursor_i64, 8
+    %cursor_phi = inttoptr i64 %cursor_phi_i to ptr addrspace(1)
     ; Update cursor
-    %new_cursor_after_alloc_i = add i64 %cursor_phi_i, %alloc_size
+    %new_cursor_after_alloc_i = add i64 %cursor_i64, %alloc_size
+    ; checi if new_cursor_after_alloc_i is zero
+    %new_cursor_after_alloc_is_zero = icmp eq i64 %new_cursor_after_alloc_i, 0
+    br i1 %new_cursor_after_alloc_is_zero, label %unreachable_path, label %update_cursor
+unreachable_path:
+    unreachable
+
+update_cursor:
     %new_cursor_after_alloc = inttoptr i64 %new_cursor_after_alloc_i to ptr addrspace(1)
     store ptr addrspace(1) %new_cursor_after_alloc, ptr addrspace(1) %cursor_ptr, align 8
     ; call void @printi64ln(i64 4)
     ; call void @printi64ln(i64 %cursor_phi_i)
+    call void @llvm.memset.p1.i64(ptr addrspace(1) %cursor_phi, i8 0, i64 %size, i1 false)
     ret ptr addrspace(1) %cursor_phi
 }
 
diff --git a/alloc-jit-aarch64.ll b/alloc-jit-aarch64.ll
index f28a72a8..7f0b439b 100644
--- a/alloc-jit-aarch64.ll
+++ b/alloc-jit-aarch64.ll
@@ -45,9 +45,9 @@ define double @sqrt_64(double %Val) {
 ; define new DioGC__malloc
 define ptr addrspace(1) @DioGC__malloc(i64 %size, i8 %obj_type, i64 %rsp) noinline optnone allockind("alloc") {
 entry:
-    ; if size > 128, call slowpath
+    ; if size > 7936, call slowpath
     ; call void @printi64ln(i64 2222)
-    %size_gt_128 = icmp ugt i64 %size, 128
+    %size_gt_128 = icmp ugt i64 %size, 7936
     br i1 %size_gt_128, label %call_slowpath, label %check_collector
 check_collector:
     ; Load collector from gc_handle
@@ -62,6 +62,7 @@ call_slowpath:
     %innerrsp = tail call ptr asm alignstack "mov $0, sp", "=r"() #0
     %rspi = ptrtoint ptr %innerrsp to i64
     %slowpath_result = call ptr addrspace(1) @DioGC__malloc_slowpath_jit(i64 %size, i8 %obj_type, i64 %rspi)
+    call void @llvm.memset.p1.i64(ptr addrspace(1) %slowpath_result, i8 0, i64 %size, i1 false)
     ; call void @printi64ln(i64 999)
     ; %slowpath_result_i = ptrtoint ptr addrspace(1) %slowpath_result to i64
     ; call void @printi64ln(i64 %slowpath_result_i)
@@ -75,7 +76,7 @@ fastpath_start:
 
     ; Get thread_local_allocator (first field)
     %block = load ptr addrspace(1), ptr %thread_local_allocator_ptr, align 8
-
+    
     ; check block is null
     %block_is_null = icmp eq ptr addrspace(1) %block, null
     br i1 %block_is_null, label %call_slowpath, label %load_block_fields
@@ -98,11 +99,12 @@ load_block_fields:
     
 
     
-    ; Calculate alloc size = (size + 7) / 8 * 8
+    ; Calculate alloc size = (size + 7) / 8 * 8 + 8
     ; LINE_SIZE is 128
     %size_plus_7 = add i64 %size, 7
     %size_div_8 = lshr i64 %size_plus_7, 3
-    %alloc_size = shl i64 %size_div_8, 3
+    %alloc_size_body = shl i64 %size_div_8, 3
+    %alloc_size = add i64 %alloc_size_body, 8
 
 
 
@@ -111,44 +113,44 @@ load_block_fields:
     %hole_end_minus_cursor = sub i64 %hole_end_i64, %cursor_i64
     ; check if hole_end - cursor >= alloc_size
     %hole_end_minus_cursor_ge_alloc_size = icmp sge i64 %hole_end_minus_cursor, %alloc_size
-    br i1 %hole_end_minus_cursor_ge_alloc_size, label %check_current_line, label %call_slowpath
-    
-check_current_line:
-    ; Check if alloc in current line is possible
-    ; let current_line_remains = self.cursor.align_offset(LINE_SIZE);
-    %current_line_occupied = and i64 %cursor_i64, 127
-    %current_line_remains = sub i64 128, %current_line_occupied
-
+    br i1 %hole_end_minus_cursor_ge_alloc_size, label %fast_path, label %call_slowpath
     
-    ; call void @printi64ln(i64 %current_line_remains)
-    ; call void @printi64ln(i64 %alloc_size)
-    ; check if alloc_size <= current_line_remains && current_line_remains != 0
-    %alloc_size_le_remains = icmp ule i64 %alloc_size, %current_line_remains
-    %current_line_remains_ne_0 = icmp ne i64 %current_line_remains, 0
-    %alloc_size_le_remains_and_ne_0 = and i1 %alloc_size_le_remains, %current_line_remains_ne_0
-    br i1 %alloc_size_le_remains_and_ne_0, label %fast_path, label %check_remaining
-
-check_remaining:
-    ; Check if 128 <= hole_end - cursor
-    %hole_end_minus_cursor_ge_128 = icmp uge i64 %hole_end_minus_cursor, 128
-
-    ; self.cursor = self.cursor.add(current_line_remains);
-    %new_cursor_i = add i64 %cursor_i64, %current_line_remains
-    %new_cursor = inttoptr i64 %new_cursor_i to ptr addrspace(1)
-    br i1 %hole_end_minus_cursor_ge_128, label %fast_path, label %call_slowpath
-
 
 fast_path:
-    ; phi get cursor
-    %cursor_phi = phi ptr addrspace(1) [ %cursor, %check_current_line ], [ %new_cursor, %check_remaining ]
 
-    %cursor_phi_i = ptrtoint ptr addrspace(1) %cursor_phi to i64
+    ; set header
+    ; 1. store zero to first bytt of %cursor
+    store i8 0, ptr addrspace(1) %cursor
+    ; 2. store obj_type to second byte of %cursor
+    %cursor_obj_type_ptr = getelementptr i8, ptr addrspace(1) %cursor, i64 1
+    store i8 %obj_type, ptr addrspace(1) %cursor_obj_type_ptr
+    ; 3. store size (trunc to i16) to third and fourth byte of %cursor
+    %cursor_size_ptr = getelementptr i16, ptr addrspace(1) %cursor, i64 1
+    %size_cast = trunc i64 %alloc_size to i16
+    store i16 %size_cast, ptr addrspace(1) %cursor_size_ptr
+    ; 4. store %cursor's lower 32 bits to fifth to eighth byte of %cursor
+    %cursor_i32 = trunc i64 %cursor_i64 to i32
+    %cursor_i32_ptr = getelementptr i32, ptr addrspace(1) %cursor, i64 1
+    store i32 %cursor_i32, ptr addrspace(1) %cursor_i32_ptr
+
+
+
+    %cursor_phi_i = add i64 %cursor_i64, 8
+    %cursor_phi = inttoptr i64 %cursor_phi_i to ptr addrspace(1)
     ; Update cursor
-    %new_cursor_after_alloc_i = add i64 %cursor_phi_i, %alloc_size
+    %new_cursor_after_alloc_i = add i64 %cursor_i64, %alloc_size
+    ; checi if new_cursor_after_alloc_i is zero
+    %new_cursor_after_alloc_is_zero = icmp eq i64 %new_cursor_after_alloc_i, 0
+    br i1 %new_cursor_after_alloc_is_zero, label %unreachable_path, label %update_cursor
+unreachable_path:
+    unreachable
+
+update_cursor:
     %new_cursor_after_alloc = inttoptr i64 %new_cursor_after_alloc_i to ptr addrspace(1)
     store ptr addrspace(1) %new_cursor_after_alloc, ptr addrspace(1) %cursor_ptr, align 8
     ; call void @printi64ln(i64 4)
     ; call void @printi64ln(i64 %cursor_phi_i)
+    call void @llvm.memset.p1.i64(ptr addrspace(1) %cursor_phi, i8 0, i64 %size, i1 false)
     ret ptr addrspace(1) %cursor_phi
 }
 
diff --git a/alloc-jit-x64.ll b/alloc-jit-x64.ll
index 0d560917..f14594e8 100644
--- a/alloc-jit-x64.ll
+++ b/alloc-jit-x64.ll
@@ -45,9 +45,9 @@ define double @sqrt_64(double %Val) {
 ; define new DioGC__malloc
 define ptr addrspace(1) @DioGC__malloc(i64 %size, i8 %obj_type, i64 %rsp) noinline optnone allockind("alloc") {
 entry:
-    ; if size > 128, call slowpath
+    ; if size > 7936, call slowpath
     ; call void @printi64ln(i64 2222)
-    %size_gt_128 = icmp ugt i64 %size, 128
+    %size_gt_128 = icmp ugt i64 %size, 7936
     br i1 %size_gt_128, label %call_slowpath, label %check_collector
 check_collector:
     ; Load collector from gc_handle
@@ -62,6 +62,7 @@ call_slowpath:
     %innerrsp = tail call ptr asm alignstack "mov %rsp, $0", "=r"() #0
     %rspi = ptrtoint ptr %innerrsp to i64
     %slowpath_result = call ptr addrspace(1) @DioGC__malloc_slowpath_jit(i64 %size, i8 %obj_type, i64 %rspi)
+    call void @llvm.memset.p1.i64(ptr addrspace(1) %slowpath_result, i8 0, i64 %size, i1 false)
     ; call void @printi64ln(i64 999)
     ; %slowpath_result_i = ptrtoint ptr addrspace(1) %slowpath_result to i64
     ; call void @printi64ln(i64 %slowpath_result_i)
@@ -75,7 +76,7 @@ fastpath_start:
 
     ; Get thread_local_allocator (first field)
     %block = load ptr addrspace(1), ptr %thread_local_allocator_ptr, align 8
-
+    
     ; check block is null
     %block_is_null = icmp eq ptr addrspace(1) %block, null
     br i1 %block_is_null, label %call_slowpath, label %load_block_fields
@@ -98,11 +99,12 @@ load_block_fields:
     
 
     
-    ; Calculate alloc size = (size + 7) / 8 * 8
+    ; Calculate alloc size = (size + 7) / 8 * 8 + 8
     ; LINE_SIZE is 128
     %size_plus_7 = add i64 %size, 7
     %size_div_8 = lshr i64 %size_plus_7, 3
-    %alloc_size = shl i64 %size_div_8, 3
+    %alloc_size_body = shl i64 %size_div_8, 3
+    %alloc_size = add i64 %alloc_size_body, 8
 
 
 
@@ -111,44 +113,44 @@ load_block_fields:
     %hole_end_minus_cursor = sub i64 %hole_end_i64, %cursor_i64
     ; check if hole_end - cursor >= alloc_size
     %hole_end_minus_cursor_ge_alloc_size = icmp sge i64 %hole_end_minus_cursor, %alloc_size
-    br i1 %hole_end_minus_cursor_ge_alloc_size, label %check_current_line, label %call_slowpath
-    
-check_current_line:
-    ; Check if alloc in current line is possible
-    ; let current_line_remains = self.cursor.align_offset(LINE_SIZE);
-    %current_line_occupied = and i64 %cursor_i64, 127
-    %current_line_remains = sub i64 128, %current_line_occupied
-
+    br i1 %hole_end_minus_cursor_ge_alloc_size, label %fast_path, label %call_slowpath
     
-    ; call void @printi64ln(i64 %current_line_remains)
-    ; call void @printi64ln(i64 %alloc_size)
-    ; check if alloc_size <= current_line_remains && current_line_remains != 0
-    %alloc_size_le_remains = icmp ule i64 %alloc_size, %current_line_remains
-    %current_line_remains_ne_0 = icmp ne i64 %current_line_remains, 0
-    %alloc_size_le_remains_and_ne_0 = and i1 %alloc_size_le_remains, %current_line_remains_ne_0
-    br i1 %alloc_size_le_remains_and_ne_0, label %fast_path, label %check_remaining
-
-check_remaining:
-    ; Check if 128 <= hole_end - cursor
-    %hole_end_minus_cursor_ge_128 = icmp uge i64 %hole_end_minus_cursor, 128
-
-    ; self.cursor = self.cursor.add(current_line_remains);
-    %new_cursor_i = add i64 %cursor_i64, %current_line_remains
-    %new_cursor = inttoptr i64 %new_cursor_i to ptr addrspace(1)
-    br i1 %hole_end_minus_cursor_ge_128, label %fast_path, label %call_slowpath
-
 
 fast_path:
-    ; phi get cursor
-    %cursor_phi = phi ptr addrspace(1) [ %cursor, %check_current_line ], [ %new_cursor, %check_remaining ]
 
-    %cursor_phi_i = ptrtoint ptr addrspace(1) %cursor_phi to i64
+    ; set header
+    ; 1. store zero to first bytt of %cursor
+    store i8 0, ptr addrspace(1) %cursor
+    ; 2. store obj_type to second byte of %cursor
+    %cursor_obj_type_ptr = getelementptr i8, ptr addrspace(1) %cursor, i64 1
+    store i8 %obj_type, ptr addrspace(1) %cursor_obj_type_ptr
+    ; 3. store size (trunc to i16) to third and fourth byte of %cursor
+    %cursor_size_ptr = getelementptr i16, ptr addrspace(1) %cursor, i64 1
+    %size_cast = trunc i64 %alloc_size to i16
+    store i16 %size_cast, ptr addrspace(1) %cursor_size_ptr
+    ; 4. store %cursor's lower 32 bits to fifth to eighth byte of %cursor
+    %cursor_i32 = trunc i64 %cursor_i64 to i32
+    %cursor_i32_ptr = getelementptr i32, ptr addrspace(1) %cursor, i64 1
+    store i32 %cursor_i32, ptr addrspace(1) %cursor_i32_ptr
+
+
+
+    %cursor_phi_i = add i64 %cursor_i64, 8
+    %cursor_phi = inttoptr i64 %cursor_phi_i to ptr addrspace(1)
     ; Update cursor
-    %new_cursor_after_alloc_i = add i64 %cursor_phi_i, %alloc_size
+    %new_cursor_after_alloc_i = add i64 %cursor_i64, %alloc_size
+    ; checi if new_cursor_after_alloc_i is zero
+    %new_cursor_after_alloc_is_zero = icmp eq i64 %new_cursor_after_alloc_i, 0
+    br i1 %new_cursor_after_alloc_is_zero, label %unreachable_path, label %update_cursor
+unreachable_path:
+    unreachable
+
+update_cursor:
     %new_cursor_after_alloc = inttoptr i64 %new_cursor_after_alloc_i to ptr addrspace(1)
     store ptr addrspace(1) %new_cursor_after_alloc, ptr addrspace(1) %cursor_ptr, align 8
     ; call void @printi64ln(i64 4)
     ; call void @printi64ln(i64 %cursor_phi_i)
+    call void @llvm.memset.p1.i64(ptr addrspace(1) %cursor_phi, i8 0, i64 %size, i1 false)
     ret ptr addrspace(1) %cursor_phi
 }
 
diff --git a/alloc-x64.ll b/alloc-x64.ll
index a31ea5a1..173ec7a5 100644
--- a/alloc-x64.ll
+++ b/alloc-x64.ll
@@ -46,9 +46,9 @@ define double @sqrt_64(double %Val) {
 ; define new DioGC__malloc
 define ptr addrspace(1) @DioGC__malloc(i64 %size, i8 %obj_type, i64 %rsp) noinline optnone allockind("alloc") {
 entry:
-    ; if size > 128, call slowpath
+    ; if size > 7936, call slowpath
     ; call void @printi64ln(i64 2222)
-    %size_gt_128 = icmp ugt i64 %size, 128
+    %size_gt_128 = icmp ugt i64 %size, 7936
     br i1 %size_gt_128, label %call_slowpath, label %check_collector
 check_collector:
     ; Load collector from gc_handle
@@ -63,6 +63,7 @@ call_slowpath:
     %innerrsp = tail call ptr asm alignstack "mov %rsp, $0", "=r"() #0
     %rspi = ptrtoint ptr %innerrsp to i64
     %slowpath_result = call ptr addrspace(1) @DioGC__malloc_slowpath(i64 %size, i8 %obj_type, i64 %rspi, ptr @gc_handle)
+    call void @llvm.memset.p1.i64(ptr addrspace(1) %slowpath_result, i8 0, i64 %size, i1 false)
     ; call void @printi64ln(i64 999)
     ; %slowpath_result_i = ptrtoint ptr addrspace(1) %slowpath_result to i64
     ; call void @printi64ln(i64 %slowpath_result_i)
@@ -94,11 +95,12 @@ fastpath_start:
     
 
     
-    ; Calculate alloc size = (size + 7) / 8 * 8
+    ; Calculate alloc size = (size + 7) / 8 * 8 + 8
     ; LINE_SIZE is 128
     %size_plus_7 = add i64 %size, 7
     %size_div_8 = lshr i64 %size_plus_7, 3
-    %alloc_size = shl i64 %size_div_8, 3
+    %alloc_size_body = shl i64 %size_div_8, 3
+    %alloc_size = add i64 %alloc_size_body, 8
 
 
 
@@ -107,44 +109,44 @@ fastpath_start:
     %hole_end_minus_cursor = sub i64 %hole_end_i64, %cursor_i64
     ; check if hole_end - cursor >= alloc_size
     %hole_end_minus_cursor_ge_alloc_size = icmp sge i64 %hole_end_minus_cursor, %alloc_size
-    br i1 %hole_end_minus_cursor_ge_alloc_size, label %check_current_line, label %call_slowpath
+    br i1 %hole_end_minus_cursor_ge_alloc_size, label %fast_path, label %call_slowpath
     
-check_current_line:
-    ; Check if alloc in current line is possible
-    ; let current_line_remains = self.cursor.align_offset(LINE_SIZE);
-    %current_line_occupied = and i64 %cursor_i64, 127
-    %current_line_remains = sub i64 128, %current_line_occupied
-
-    
-    ; call void @printi64ln(i64 %current_line_remains)
-    ; call void @printi64ln(i64 %alloc_size)
-    ; check if alloc_size <= current_line_remains && current_line_remains != 0
-    %alloc_size_le_remains = icmp ule i64 %alloc_size, %current_line_remains
-    %current_line_remains_ne_0 = icmp ne i64 %current_line_remains, 0
-    %alloc_size_le_remains_and_ne_0 = and i1 %alloc_size_le_remains, %current_line_remains_ne_0
-    br i1 %alloc_size_le_remains_and_ne_0, label %fast_path, label %check_remaining
-
-check_remaining:
-    ; Check if 128 <= hole_end - cursor
-    %hole_end_minus_cursor_ge_128 = icmp uge i64 %hole_end_minus_cursor, 128
-
-    ; self.cursor = self.cursor.add(current_line_remains);
-    %new_cursor_i = add i64 %cursor_i64, %current_line_remains
-    %new_cursor = inttoptr i64 %new_cursor_i to ptr addrspace(1)
-    br i1 %hole_end_minus_cursor_ge_128, label %fast_path, label %call_slowpath
-
 
 fast_path:
-    ; phi get cursor
-    %cursor_phi = phi ptr addrspace(1) [ %cursor, %check_current_line ], [ %new_cursor, %check_remaining ]
 
-    %cursor_phi_i = ptrtoint ptr addrspace(1) %cursor_phi to i64
+    ; set header
+    ; 1. store zero to first bytt of %cursor
+    store i8 0, ptr addrspace(1) %cursor
+    ; 2. store obj_type to second byte of %cursor
+    %cursor_obj_type_ptr = getelementptr i8, ptr addrspace(1) %cursor, i64 1
+    store i8 %obj_type, ptr addrspace(1) %cursor_obj_type_ptr
+    ; 3. store size (trunc to i16) to third and fourth byte of %cursor
+    %cursor_size_ptr = getelementptr i16, ptr addrspace(1) %cursor, i64 1
+    %size_cast = trunc i64 %alloc_size to i16
+    store i16 %size_cast, ptr addrspace(1) %cursor_size_ptr
+    ; 4. store %cursor's lower 32 bits to fifth to eighth byte of %cursor
+    %cursor_i32 = trunc i64 %cursor_i64 to i32
+    %cursor_i32_ptr = getelementptr i32, ptr addrspace(1) %cursor, i64 1
+    store i32 %cursor_i32, ptr addrspace(1) %cursor_i32_ptr
+
+
+
+    %cursor_phi_i = add i64 %cursor_i64, 8
+    %cursor_phi = inttoptr i64 %cursor_phi_i to ptr addrspace(1)
     ; Update cursor
-    %new_cursor_after_alloc_i = add i64 %cursor_phi_i, %alloc_size
+    %new_cursor_after_alloc_i = add i64 %cursor_i64, %alloc_size
+    ; checi if new_cursor_after_alloc_i is zero
+    %new_cursor_after_alloc_is_zero = icmp eq i64 %new_cursor_after_alloc_i, 0
+    br i1 %new_cursor_after_alloc_is_zero, label %unreachable_path, label %update_cursor
+unreachable_path:
+    unreachable
+
+update_cursor:
     %new_cursor_after_alloc = inttoptr i64 %new_cursor_after_alloc_i to ptr addrspace(1)
     store ptr addrspace(1) %new_cursor_after_alloc, ptr addrspace(1) %cursor_ptr, align 8
     ; call void @printi64ln(i64 4)
     ; call void @printi64ln(i64 %cursor_phi_i)
+    call void @llvm.memset.p1.i64(ptr addrspace(1) %cursor_phi, i8 0, i64 %size, i1 false)
     ret ptr addrspace(1) %cursor_phi
 }
 
diff --git a/immix b/immix
index be145244..b8a15593 160000
--- a/immix
+++ b/immix
@@ -1 +1 @@
-Subproject commit be145244fd4d159b4b3cedd62e8a23b49800a30a
+Subproject commit b8a15593897d2afaa6898afbbf6b3f7e903b2daa
diff --git a/planglib/core/gc.pi b/planglib/core/gc.pi
index 53ed57f3..a388838b 100644
--- a/planglib/core/gc.pi
+++ b/planglib/core/gc.pi
@@ -44,7 +44,7 @@ fn DioGC__remove_coro_stack(stack:i64) void;
 
 pub fn DioGC__safepoint(sp:i64) void;
 
-pub fn DioGC__register_global(p:i64, tp:i32) void;
+pub fn DioGC__register_global(p:i64, tp:i8) void;
 
 pub fn DioGC__set_eva(ev:bool) void;
 
diff --git a/planglib/std/json/encode.pi b/planglib/std/json/encode.pi
index 93039424..06b89a47 100644
--- a/planglib/std/json/encode.pi
+++ b/planglib/std/json/encode.pi
@@ -8,7 +8,7 @@ pub trait JSONSerilizable {
 }
 
 pub fn encode<T>(t:T) string {
-    let sb = stringbuilder(10);
+    let sb = stringbuilder(100);
     let isarr = false;
     if_arr(t, { 
         let a = _arr;
diff --git a/src/ast/builder/llvmbuilder.rs b/src/ast/builder/llvmbuilder.rs
index bcc0f90d..217550bf 100644
--- a/src/ast/builder/llvmbuilder.rs
+++ b/src/ast/builder/llvmbuilder.rs
@@ -550,10 +550,7 @@ impl<'a, 'ctx> LLVMBuilder<'a, 'ctx> {
                                 arr_size.into(),
                                 self.context
                                     .i8_type()
-                                    .const_int(
-                                        arr.element_type.borrow().get_immix_type() as u64,
-                                        false,
-                                    )
+                                    .const_int(ObjectType::Trait as u64, false)
                                     .into(),
                                 rsp.into(),
                             ]
@@ -563,10 +560,7 @@ impl<'a, 'ctx> LLVMBuilder<'a, 'ctx> {
                                 arr_size.into(),
                                 self.context
                                     .i8_type()
-                                    .const_int(
-                                        arr.element_type.borrow().get_immix_type() as u64,
-                                        false,
-                                    )
+                                    .const_int(ObjectType::Trait as u64, false)
                                     .into(),
                             ]
                             .to_vec()
@@ -3024,18 +3018,14 @@ impl<'a, 'ctx> IRBuilder<'a, 'ctx> for LLVMBuilder<'a, 'ctx> {
             .builder
             .build_ptr_to_int(global.as_pointer_value(), self.context.i64_type(), "")
             .unwrap();
-        let td = self.targetmachine.get_target_data();
-        let byte_size_int = self
-            .get_llvm_value(self.int_value(&PriType::I32, td.get_store_size(&base_type), false))
+        let obj_tp = pltp.get_immix_type();
+        let obj_ty = self
+            .get_llvm_value(self.int_value(&PriType::I8, obj_tp.int_value() as _, false))
             .unwrap()
             .into_int_value();
 
         self.builder
-            .build_call(
-                f,
-                &[ptrtoint.into(), byte_size_int.into()],
-                "register_global",
-            )
+            .build_call(f, &[ptrtoint.into(), obj_ty.into()], "register_global")
             .unwrap();
         self.get_llvm_value_handle(&global.as_any_value_enum())
     }