@@ -45,9 +45,9 @@ define double @sqrt_64(double %Val) {
4545; define new DioGC__malloc
4646define ptr addrspace (1 ) @DioGC__malloc (i64 %size , i8 %obj_type , i64 %rsp ) noinline optnone allockind("alloc" ) {
4747entry:
48- ; if size > 128 , call slowpath
48+ ; if size > 7936 , call slowpath
4949 ; call void @printi64ln(i64 2222)
50- %size_gt_128 = icmp ugt i64 %size , 128
50+ %size_gt_128 = icmp ugt i64 %size , 7936
5151 br i1 %size_gt_128 , label %call_slowpath , label %check_collector
5252check_collector:
5353 ; Load collector from gc_handle
@@ -62,6 +62,7 @@ call_slowpath:
6262 %innerrsp = tail call ptr asm alignstack "mov %rsp, $0" , "=r" () #0
6363 %rspi = ptrtoint ptr %innerrsp to i64
6464 %slowpath_result = call ptr addrspace (1 ) @DioGC__malloc_slowpath_jit (i64 %size , i8 %obj_type , i64 %rspi )
65+ call void @llvm.memset.p1.i64 (ptr addrspace (1 ) %slowpath_result , i8 0 , i64 %size , i1 false )
6566 ; call void @printi64ln(i64 999)
6667 ; %slowpath_result_i = ptrtoint ptr addrspace(1) %slowpath_result to i64
6768 ; call void @printi64ln(i64 %slowpath_result_i)
@@ -75,7 +76,7 @@ fastpath_start:
7576
7677 ; Get thread_local_allocator (first field)
7778 %block = load ptr addrspace (1 ), ptr %thread_local_allocator_ptr , align 8
78-
79+
7980 ; check block is null
8081 %block_is_null = icmp eq ptr addrspace (1 ) %block , null
8182 br i1 %block_is_null , label %call_slowpath , label %load_block_fields
@@ -98,11 +99,12 @@ load_block_fields:
9899
99100
100101
101- ; Calculate alloc size = (size + 7) / 8 * 8
102+ ; Calculate alloc size = (size + 7) / 8 * 8 + 8
102103 ; LINE_SIZE is 128
103104 %size_plus_7 = add i64 %size , 7
104105 %size_div_8 = lshr i64 %size_plus_7 , 3
105- %alloc_size = shl i64 %size_div_8 , 3
106+ %alloc_size_body = shl i64 %size_div_8 , 3
107+ %alloc_size = add i64 %alloc_size_body , 8
106108
107109
108110
@@ -111,44 +113,44 @@ load_block_fields:
111113 %hole_end_minus_cursor = sub i64 %hole_end_i64 , %cursor_i64
112114 ; check if hole_end - cursor >= alloc_size
113115 %hole_end_minus_cursor_ge_alloc_size = icmp sge i64 %hole_end_minus_cursor , %alloc_size
114- br i1 %hole_end_minus_cursor_ge_alloc_size , label %check_current_line , label %call_slowpath
115-
116- check_current_line:
117- ; Check if alloc in current line is possible
118- ; let current_line_remains = self.cursor.align_offset(LINE_SIZE);
119- %current_line_occupied = and i64 %cursor_i64 , 127
120- %current_line_remains = sub i64 128 , %current_line_occupied
121-
116+ br i1 %hole_end_minus_cursor_ge_alloc_size , label %fast_path , label %call_slowpath
122117
123- ; call void @printi64ln(i64 %current_line_remains)
124- ; call void @printi64ln(i64 %alloc_size)
125- ; check if alloc_size <= current_line_remains && current_line_remains != 0
126- %alloc_size_le_remains = icmp ule i64 %alloc_size , %current_line_remains
127- %current_line_remains_ne_0 = icmp ne i64 %current_line_remains , 0
128- %alloc_size_le_remains_and_ne_0 = and i1 %alloc_size_le_remains , %current_line_remains_ne_0
129- br i1 %alloc_size_le_remains_and_ne_0 , label %fast_path , label %check_remaining
130-
131- check_remaining:
132- ; Check if 128 <= hole_end - cursor
133- %hole_end_minus_cursor_ge_128 = icmp uge i64 %hole_end_minus_cursor , 128
134-
135- ; self.cursor = self.cursor.add(current_line_remains);
136- %new_cursor_i = add i64 %cursor_i64 , %current_line_remains
137- %new_cursor = inttoptr i64 %new_cursor_i to ptr addrspace (1 )
138- br i1 %hole_end_minus_cursor_ge_128 , label %fast_path , label %call_slowpath
139-
140118
141119fast_path:
142- ; phi get cursor
143- %cursor_phi = phi ptr addrspace (1 ) [ %cursor , %check_current_line ], [ %new_cursor , %check_remaining ]
144120
145- %cursor_phi_i = ptrtoint ptr addrspace (1 ) %cursor_phi to i64
121+ ; set header
122+ ; 1. store zero to first bytt of %cursor
123+ store i8 0 , ptr addrspace (1 ) %cursor
124+ ; 2. store obj_type to second byte of %cursor
125+ %cursor_obj_type_ptr = getelementptr i8 , ptr addrspace (1 ) %cursor , i64 1
126+ store i8 %obj_type , ptr addrspace (1 ) %cursor_obj_type_ptr
127+ ; 3. store size (trunc to i16) to third and fourth byte of %cursor
128+ %cursor_size_ptr = getelementptr i16 , ptr addrspace (1 ) %cursor , i64 1
129+ %size_cast = trunc i64 %alloc_size to i16
130+ store i16 %size_cast , ptr addrspace (1 ) %cursor_size_ptr
131+ ; 4. store %cursor's lower 32 bits to fifth to eighth byte of %cursor
132+ %cursor_i32 = trunc i64 %cursor_i64 to i32
133+ %cursor_i32_ptr = getelementptr i32 , ptr addrspace (1 ) %cursor , i64 1
134+ store i32 %cursor_i32 , ptr addrspace (1 ) %cursor_i32_ptr
135+
136+
137+
138+ %cursor_phi_i = add i64 %cursor_i64 , 8
139+ %cursor_phi = inttoptr i64 %cursor_phi_i to ptr addrspace (1 )
146140 ; Update cursor
147- %new_cursor_after_alloc_i = add i64 %cursor_phi_i , %alloc_size
141+ %new_cursor_after_alloc_i = add i64 %cursor_i64 , %alloc_size
142+ ; checi if new_cursor_after_alloc_i is zero
143+ %new_cursor_after_alloc_is_zero = icmp eq i64 %new_cursor_after_alloc_i , 0
144+ br i1 %new_cursor_after_alloc_is_zero , label %unreachable_path , label %update_cursor
145+ unreachable_path:
146+ unreachable
147+
148+ update_cursor:
148149 %new_cursor_after_alloc = inttoptr i64 %new_cursor_after_alloc_i to ptr addrspace (1 )
149150 store ptr addrspace (1 ) %new_cursor_after_alloc , ptr addrspace (1 ) %cursor_ptr , align 8
150151 ; call void @printi64ln(i64 4)
151152 ; call void @printi64ln(i64 %cursor_phi_i)
153+ call void @llvm.memset.p1.i64 (ptr addrspace (1 ) %cursor_phi , i8 0 , i64 %size , i1 false )
152154 ret ptr addrspace (1 ) %cursor_phi
153155}
154156
0 commit comments