@@ -46,9 +46,10 @@ define double @sqrt_64(double %Val) {
4646; define new DioGC__malloc
4747define ptr addrspace (1 ) @DioGC__malloc (i64 %size , i8 %obj_type , i64 %rsp ) noinline optnone allockind("alloc" ) {
4848entry:
49- ; if size > 7936, call slowpath
50- %size_gt_7936 = icmp ugt i64 %size , 7936
51- br i1 %size_gt_7936 , label %call_slowpath , label %check_collector
49+ ; if size > 128, call slowpath
50+ ; call void @printi64ln(i64 2222)
51+ %size_gt_128 = icmp ugt i64 %size , 128
52+ br i1 %size_gt_128 , label %call_slowpath , label %check_collector
5253check_collector:
5354 ; Load collector from gc_handle
5455 %collector_ptr = load ptr , ptr @gc_handle , align 8 , !invariant.load !0
@@ -62,108 +63,89 @@ call_slowpath:
6263 %innerrsp = tail call ptr asm alignstack "mov $0, sp" , "=r" () #0
6364 %rspi = ptrtoint ptr %innerrsp to i64
6465 %slowpath_result = call ptr addrspace (1 ) @DioGC__malloc_slowpath (i64 %size , i8 %obj_type , i64 %rspi , ptr @gc_handle )
66+ ; call void @printi64ln(i64 999)
67+ ; %slowpath_result_i = ptrtoint ptr addrspace(1) %slowpath_result to i64
68+ ; call void @printi64ln(i64 %slowpath_result_i)
6569 ret ptr addrspace (1 ) %slowpath_result
6670fastpath_start:
71+ ; call void @printi64ln(i64 1)
6772 %thread_local_allocator_ptr = load ptr , ptr %collector_ptr , align 8
6873
69- ; get second field of collector, which is bytes_allocated_since_last_gc
70- %bytes_allocated_since_last_gc_ptr = getelementptr i64 , ptr %collector_ptr , i32 1
74+ ; ; get second field of collector, which is bytes_allocated_since_last_gc
75+ ; %bytes_allocated_since_last_gc_ptr = getelementptr i64, ptr %collector_ptr, i32 1
7176
7277 ; Get thread_local_allocator (first field)
7378 %block = load ptr addrspace (1 ), ptr %thread_local_allocator_ptr , align 8
7479
7580 ; Load block fields
7681 %cursor_ptr = getelementptr i64 , ptr addrspace (1 ) %block , i32 0
77- %cursor = load i64 , ptr addrspace (1 ) %cursor_ptr , align 8
78-
82+ %cursor = load ptr addrspace ( 1 ) , ptr addrspace (1 ) %cursor_ptr , align 8
83+ ; call void @printi64ln(i64 2)
7984; ; if cursor > 256, call slowpath
8085; %cursor_gt_256 = icmp ugt i64 %cursor, 255
8186; br i1 %cursor_gt_256, label %call_slowpath, label %load_block_fields_2
8287
8388; load_block_fields_2:
8489
85- %next_hole_size_ptr = getelementptr i64 , ptr addrspace (1 ) %block , i32 1
86- %next_hole_size = load i64 , ptr addrspace (1 ) %next_hole_size_ptr , align 8
87-
88- %available_line_num_ptr = getelementptr i64 , ptr addrspace (1 ) %block , i32 2
89- %available_line_num = load i64 , ptr addrspace (1 ) %available_line_num_ptr , align 8
90+ %hole_end_ptr = getelementptr i64 , ptr addrspace (1 ) %block , i32 1
91+ %hole_end = load ptr addrspace (1 ), ptr addrspace (1 ) %hole_end_ptr , align 8
92+ ; call void @printi64ln(i64 3)
9093
91- %hole_num_ptr = getelementptr i64 , ptr addrspace (1 ) %block , i32 3
92- %hole_num = load i64 , ptr addrspace (1 ) %hole_num_ptr , align 8
9394
95+
9496
95- ; Calculate line_size = (size - 1 ) / LINE_SIZE + 1
97+ ; Calculate alloc size = (size + 7 ) / 8 * 8
9698 ; LINE_SIZE is 128
97- %size_minus_1 = sub i64 %size , 1
98- %div = udiv i64 %size_minus_1 , 128
99- %line_size = add i64 %div , 1
100-
101- ; Check if fast path is possible (next_hole_size >= line_size)
102- %fast_path_possible = icmp uge i64 %next_hole_size , %line_size
103- br i1 %fast_path_possible , label %fast_path , label %call_slowpath
99+ %size_plus_7 = add i64 %size , 7
100+ %size_div_8 = lshr i64 %size_plus_7 , 3
101+ %alloc_size = shl i64 %size_div_8 , 3
104102
105- fast_path:
106- ; Update available_line_num
107- %new_available = sub i64 %available_line_num , %line_size
108- store i64 %new_available , ptr addrspace (1 ) %available_line_num_ptr , align 8
109-
110- ; Get line map pointer (after the first three fields)
111- %line_map_ptr = getelementptr i64 , ptr addrspace (1 ) %block , i32 4
112-
113- ; Mark lines as used and set object type for first line
114- %first_line_ptr = getelementptr i8 , ptr addrspace (1 ) %line_map_ptr , i64 %cursor
115- ; Set object type and mark as used (obj_type << 2 | 0b10000001)
116- %shifted_type = shl i8 %obj_type , 2
117- %header_val = or i8 %shifted_type , 129 ; 129 = 0b10000001
118- store i8 %header_val , ptr addrspace (1 ) %first_line_ptr , align 1
103+
104+
105+ %cursor_i64 = ptrtoint ptr addrspace (1 ) %cursor to i64
106+ %hole_end_i64 = ptrtoint ptr addrspace (1 ) %hole_end to i64
107+ %hole_end_minus_cursor = sub i64 %hole_end_i64 , %cursor_i64
108+ ; check if hole_end - cursor >= alloc_size
109+ %hole_end_minus_cursor_ge_alloc_size = icmp sge i64 %hole_end_minus_cursor , %alloc_size
110+ br i1 %hole_end_minus_cursor_ge_alloc_size , label %check_current_line , label %call_slowpath
119111
120- ; Check if line_size is 1
121- %is_one_line = icmp eq i64 %line_size , 1
122- br i1 %is_one_line , label %finish_fast_path , label %mark_lines_start
123-
124- mark_lines_start:
125- ; Mark remaining lines as used (0b00000001)
126- %next_cursor = add i64 %cursor , 1
127- %end_cursor = add i64 %cursor , %line_size
128- br label %mark_lines
129-
130- mark_lines:
131- %current = phi i64 [ %next , %mark_lines ], [ %next_cursor , %mark_lines_start ]
132- %line_ptr = getelementptr i8 , ptr addrspace (1 ) %line_map_ptr , i64 %current
133- store i8 1 , ptr addrspace (1 ) %line_ptr , align 1
134- %next = add i64 %current , 1
135- %continue = icmp ult i64 %next , %end_cursor
136- br i1 %continue , label %mark_lines , label %finish_fast_path
137-
138- finish_fast_path:
139- ; Calculate return address (block + cursor * LINE_SIZE)
140- %base_offset = mul i64 %cursor , 128
141- %result_addr = getelementptr i8 , ptr addrspace (1 ) %block , i64 %base_offset
112+ check_current_line:
113+ ; Check if alloc in current line is possible
114+ ; let current_line_remains = self.cursor.align_offset(LINE_SIZE);
115+ %current_line_occupied = and i64 %cursor_i64 , 127
116+ %current_line_remains = sub i64 128 , %current_line_occupied
117+
142118
119+ ; call void @printi64ln(i64 %current_line_remains)
120+ ; call void @printi64ln(i64 %alloc_size)
121+ ; check if alloc_size <= current_line_remains && current_line_remains != 0
122+ %alloc_size_le_remains = icmp ule i64 %alloc_size , %current_line_remains
123+ %current_line_remains_ne_0 = icmp ne i64 %current_line_remains , 0
124+ %alloc_size_le_remains_and_ne_0 = and i1 %alloc_size_le_remains , %current_line_remains_ne_0
125+ br i1 %alloc_size_le_remains_and_ne_0 , label %fast_path , label %check_remaining
126+
127+ check_remaining:
128+ ; Check if 128 <= hole_end - cursor
129+ %hole_end_minus_cursor_ge_128 = icmp uge i64 %hole_end_minus_cursor , 128
130+
131+ ; self.cursor = self.cursor.add(current_line_remains);
132+ %new_cursor_i = add i64 %cursor_i64 , %current_line_remains
133+ %new_cursor = inttoptr i64 %new_cursor_i to ptr addrspace (1 )
134+ br i1 %hole_end_minus_cursor_ge_128 , label %fast_path , label %call_slowpath
135+
136+
137+ fast_path:
138+ ; phi get cursor
139+ %cursor_phi = phi ptr addrspace (1 ) [ %cursor , %check_current_line ], [ %new_cursor , %check_remaining ]
140+
141+ %cursor_phi_i = ptrtoint ptr addrspace (1 ) %cursor_phi to i64
143142 ; Update cursor
144- %new_cursor = add i64 %cursor , %line_size
145- store i64 %new_cursor , ptr addrspace (1 ) %cursor_ptr , align 8
146-
147- ; Update next_hole_size
148- %new_hole_size = sub i64 %next_hole_size , %line_size
149- store i64 %new_hole_size , ptr addrspace (1 ) %next_hole_size_ptr , align 8
150-
151- ; if new_hole_size == 0, update hole_num to hole_num - 1
152- %hole_size_eq_0 = icmp eq i64 %new_hole_size , 0
153- br i1 %hole_size_eq_0 , label %update_hole_num , label %finish_fast_path_2
154-
155- update_hole_num:
156- %new_hole_num = sub i64 %hole_num , 1
157- store i64 %new_hole_num , ptr addrspace (1 ) %hole_num_ptr , align 8
158- br label %finish_fast_path_2
159-
160- finish_fast_path_2:
161- ; Update bytes_allocated_since_last_gc (the size should be line_size * 128)
162- %bytes_allocated_since_last_gc = load i64 , ptr %bytes_allocated_since_last_gc_ptr , align 8
163- %size_128 = mul i64 %line_size , 128
164- %new_bytes_allocated = add i64 %size_128 , %bytes_allocated_since_last_gc
165- store i64 %new_bytes_allocated , ptr %bytes_allocated_since_last_gc_ptr , align 8
166- ret ptr addrspace (1 ) %result_addr
143+ %new_cursor_after_alloc_i = add i64 %cursor_phi_i , %alloc_size
144+ %new_cursor_after_alloc = inttoptr i64 %new_cursor_after_alloc_i to ptr addrspace (1 )
145+ store ptr addrspace (1 ) %new_cursor_after_alloc , ptr addrspace (1 ) %cursor_ptr , align 8
146+ ; call void @printi64ln(i64 4)
147+ ; call void @printi64ln(i64 %cursor_phi_i)
148+ ret ptr addrspace (1 ) %cursor_phi
167149}
168150
169151
0 commit comments