Skip to content

Commit d3086bf

Browse files
hghimirapiotrkira
authored andcommitted
drm/xe/svm: Implement prefetch support for SVM ranges
This commit adds prefetch support for SVM ranges, utilizing the existing ioctl vm_bind functionality to achieve this. v2: rebase v3: - use xa_for_each() instead of manual loop - check range is valid and in preferred location before adding to xarray - Fix naming conventions - Fix return condition as -ENODATA instead of -EAGAIN (Matthew Brost) - Handle sparsely populated cpu vma range (Matthew Brost) v4: - fix end address to find next cpu vma in case of -ENOENT v5: - Move find next vma logic to drm gpusvm layer - Avoid mixing declaration and logic v6: - Use new function names - Move eviction logic to prefetch_ranges Cc: Matthew Brost <[email protected]> Signed-off-by: Himal Prasad Ghimiray <[email protected]>
1 parent 9f8ae3b commit d3086bf

File tree

2 files changed

+234
-21
lines changed

2 files changed

+234
-21
lines changed

drivers/gpu/drm/xe/xe_pt.c

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,6 +1458,7 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
14581458
struct xe_vm *vm = pt_update->vops->vm;
14591459
struct xe_vma_ops *vops = pt_update->vops;
14601460
struct xe_vma_op *op;
1461+
unsigned long i;
14611462
int err;
14621463

14631464
err = xe_pt_pre_commit(pt_update);
@@ -1467,20 +1468,35 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
14671468
xe_svm_notifier_lock(vm);
14681469

14691470
list_for_each_entry(op, &vops->list, link) {
1470-
struct xe_svm_range *range = op->map_range.range;
1471+
struct xe_svm_range *range = NULL;
14711472

14721473
if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE)
14731474
continue;
14741475

1475-
xe_svm_range_debug(range, "PRE-COMMIT");
1476+
if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
1477+
xe_assert(vm->xe,
1478+
xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va)));
1479+
xa_for_each(&op->prefetch_range.range, i, range) {
1480+
xe_svm_range_debug(range, "PRE-COMMIT");
14761481

1477-
xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
1478-
xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE);
1482+
if (!xe_svm_range_pages_valid(range)) {
1483+
xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
1484+
xe_svm_notifier_unlock(vm);
1485+
return -ENODATA;
1486+
}
1487+
}
1488+
} else {
1489+
xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
1490+
xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE);
1491+
range = op->map_range.range;
14791492

1480-
if (!xe_svm_range_pages_valid(range)) {
1481-
xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
1482-
xe_svm_notifier_unlock(vm);
1483-
return -EAGAIN;
1493+
xe_svm_range_debug(range, "PRE-COMMIT");
1494+
1495+
if (!xe_svm_range_pages_valid(range)) {
1496+
xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
1497+
xe_svm_notifier_unlock(vm);
1498+
return -EAGAIN;
1499+
}
14841500
}
14851501
}
14861502

@@ -2065,11 +2081,20 @@ static int op_prepare(struct xe_vm *vm,
20652081
{
20662082
struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
20672083

2068-
if (xe_vma_is_cpu_addr_mirror(vma))
2069-
break;
2084+
if (xe_vma_is_cpu_addr_mirror(vma)) {
2085+
struct xe_svm_range *range;
2086+
unsigned long i;
20702087

2071-
err = bind_op_prepare(vm, tile, pt_update_ops, vma, false);
2072-
pt_update_ops->wait_vm_kernel = true;
2088+
xa_for_each(&op->prefetch_range.range, i, range) {
2089+
err = bind_range_prepare(vm, tile, pt_update_ops,
2090+
vma, range);
2091+
if (err)
2092+
return err;
2093+
}
2094+
} else {
2095+
err = bind_op_prepare(vm, tile, pt_update_ops, vma, false);
2096+
pt_update_ops->wait_vm_kernel = true;
2097+
}
20732098
break;
20742099
}
20752100
case DRM_GPUVA_OP_DRIVER:
@@ -2273,9 +2298,16 @@ static void op_commit(struct xe_vm *vm,
22732298
{
22742299
struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
22752300

2276-
if (!xe_vma_is_cpu_addr_mirror(vma))
2301+
if (xe_vma_is_cpu_addr_mirror(vma)) {
2302+
struct xe_svm_range *range = NULL;
2303+
unsigned long i;
2304+
2305+
xa_for_each(&op->prefetch_range.range, i, range)
2306+
range_present_and_invalidated_tile(vm, range, tile->id);
2307+
} else {
22772308
bind_op_commit(vm, tile, pt_update_ops, vma, fence,
22782309
fence2, false);
2310+
}
22792311
break;
22802312
}
22812313
case DRM_GPUVA_OP_DRIVER:

drivers/gpu/drm/xe/xe_vm.c

Lines changed: 189 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -798,10 +798,33 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
798798
}
799799
ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
800800

801+
static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
802+
{
803+
struct xe_vma *vma;
804+
805+
vma = gpuva_to_vma(op->base.prefetch.va);
806+
807+
if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
808+
xa_destroy(&op->prefetch_range.range);
809+
}
810+
811+
static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
812+
{
813+
struct xe_vma_op *op;
814+
815+
if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
816+
return;
817+
818+
list_for_each_entry(op, &vops->list, link)
819+
xe_vma_svm_prefetch_op_fini(op);
820+
}
821+
801822
static void xe_vma_ops_fini(struct xe_vma_ops *vops)
802823
{
803824
int i;
804825

826+
xe_vma_svm_prefetch_ops_fini(vops);
827+
805828
for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
806829
kfree(vops->pt_update_ops[i].ops);
807830
}
@@ -2248,20 +2271,33 @@ static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
22482271
return true;
22492272
}
22502273

2274+
static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
2275+
{
2276+
struct drm_gpuva_op *__op;
2277+
2278+
drm_gpuva_for_each_op(__op, ops) {
2279+
struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2280+
2281+
xe_vma_svm_prefetch_op_fini(op);
2282+
}
2283+
}
2284+
22512285
/*
22522286
* Create operations list from IOCTL arguments, setup operations fields so parse
22532287
* and commit steps are decoupled from IOCTL arguments. This step can fail.
22542288
*/
22552289
static struct drm_gpuva_ops *
2256-
vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
2257-
u64 bo_offset_or_userptr, u64 addr, u64 range,
2290+
vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
2291+
struct xe_bo *bo, u64 bo_offset_or_userptr,
2292+
u64 addr, u64 range,
22582293
u32 operation, u32 flags,
22592294
u32 prefetch_region, u16 pat_index)
22602295
{
22612296
struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
22622297
struct drm_gpuva_ops *ops;
22632298
struct drm_gpuva_op *__op;
22642299
struct drm_gpuvm_bo *vm_bo;
2300+
u64 range_end = addr + range;
22652301
int err;
22662302

22672303
lockdep_assert_held_write(&vm->lock);
@@ -2323,14 +2359,77 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
23232359
op->map.invalidate_on_bind =
23242360
__xe_vm_needs_clear_scratch_pages(vm, flags);
23252361
} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2326-
op->prefetch.region = prefetch_region;
2327-
}
2362+
struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2363+
struct xe_svm_range *svm_range;
2364+
struct drm_gpusvm_ctx ctx;
2365+
struct xe_tile *tile;
2366+
u8 id, tile_mask = 0;
2367+
u32 i;
2368+
2369+
if (!xe_vma_is_cpu_addr_mirror(vma)) {
2370+
op->prefetch.region = prefetch_region;
2371+
break;
2372+
}
2373+
2374+
ctx.read_only = xe_vma_read_only(vma);
2375+
ctx.devmem_possible = IS_DGFX(vm->xe) &&
2376+
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
2377+
2378+
for_each_tile(tile, vm->xe, id)
2379+
tile_mask |= 0x1 << id;
2380+
2381+
xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
2382+
op->prefetch_range.region = prefetch_region;
2383+
op->prefetch_range.ranges_count = 0;
2384+
alloc_next_range:
2385+
svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
2386+
2387+
if (PTR_ERR(svm_range) == -ENOENT) {
2388+
u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
2389+
2390+
addr = ret == ULONG_MAX ? 0 : ret;
2391+
if (addr)
2392+
goto alloc_next_range;
2393+
else
2394+
goto print_op_label;
2395+
}
2396+
2397+
if (IS_ERR(svm_range)) {
2398+
err = PTR_ERR(svm_range);
2399+
goto unwind_prefetch_ops;
2400+
}
2401+
2402+
if (xe_svm_range_validate(vm, svm_range, tile_mask, !!prefetch_region))
2403+
goto check_next_range;
2404+
2405+
err = xa_alloc(&op->prefetch_range.range,
2406+
&i, svm_range, xa_limit_32b,
2407+
GFP_KERNEL);
23282408

2409+
if (err)
2410+
goto unwind_prefetch_ops;
2411+
2412+
op->prefetch_range.ranges_count++;
2413+
vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
2414+
check_next_range:
2415+
if (range_end > xe_svm_range_end(svm_range) &&
2416+
xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
2417+
addr = xe_svm_range_end(svm_range);
2418+
goto alloc_next_range;
2419+
}
2420+
}
2421+
print_op_label:
23292422
print_op(vm->xe, __op);
23302423
}
23312424

23322425
return ops;
2426+
2427+
unwind_prefetch_ops:
2428+
xe_svm_prefetch_gpuva_ops_fini(ops);
2429+
drm_gpuva_ops_free(&vm->gpuvm, ops);
2430+
return ERR_PTR(err);
23332431
}
2432+
23342433
ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
23352434

23362435
static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
@@ -2645,8 +2744,12 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
26452744
return err;
26462745
}
26472746

2648-
if (!xe_vma_is_cpu_addr_mirror(vma))
2747+
if (xe_vma_is_cpu_addr_mirror(vma))
2748+
xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
2749+
op->prefetch_range.ranges_count);
2750+
else
26492751
xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2752+
26502753
break;
26512754
default:
26522755
drm_warn(&vm->xe->drm, "NOT POSSIBLE");
@@ -2772,6 +2875,56 @@ static int check_ufence(struct xe_vma *vma)
27722875
return 0;
27732876
}
27742877

2878+
static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
2879+
{
2880+
bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);
2881+
struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2882+
int err = 0;
2883+
2884+
struct xe_svm_range *svm_range;
2885+
struct drm_gpusvm_ctx ctx;
2886+
struct xe_tile *tile;
2887+
unsigned long i;
2888+
u32 region;
2889+
2890+
if (!xe_vma_is_cpu_addr_mirror(vma))
2891+
return 0;
2892+
2893+
region = op->prefetch_range.region;
2894+
2895+
ctx.read_only = xe_vma_read_only(vma);
2896+
ctx.devmem_possible = devmem_possible;
2897+
ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
2898+
ctx.devmem_only = region && devmem_possible;
2899+
ctx.timeslice_ms = 0;
2900+
2901+
/* TODO: Threading the migration */
2902+
xa_for_each(&op->prefetch_range.range, i, svm_range) {
2903+
if (!region)
2904+
xe_svm_range_migrate_to_smem(vm, svm_range);
2905+
2906+
if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) {
2907+
tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
2908+
err = xe_svm_alloc_vram(vm, tile, svm_range, &ctx);
2909+
if (err) {
2910+
drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
2911+
vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2912+
return -ENODATA;
2913+
}
2914+
}
2915+
2916+
err = xe_svm_range_get_pages(vm, svm_range, &ctx);
2917+
if (err) {
2918+
if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
2919+
err = -ENODATA;
2920+
drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
2921+
vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2922+
return err;
2923+
}
2924+
}
2925+
return err;
2926+
}
2927+
27752928
static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
27762929
struct xe_vma_op *op)
27772930
{
@@ -2809,7 +2962,12 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
28092962
case DRM_GPUVA_OP_PREFETCH:
28102963
{
28112964
struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2812-
u32 region = op->prefetch.region;
2965+
u32 region;
2966+
2967+
if (xe_vma_is_cpu_addr_mirror(vma))
2968+
region = op->prefetch_range.region;
2969+
else
2970+
region = op->prefetch.region;
28132971

28142972
xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
28152973

@@ -2828,6 +2986,25 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
28282986
return err;
28292987
}
28302988

2989+
static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
2990+
{
2991+
struct xe_vma_op *op;
2992+
int err;
2993+
2994+
if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
2995+
return 0;
2996+
2997+
list_for_each_entry(op, &vops->list, link) {
2998+
if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
2999+
err = prefetch_ranges(vm, op);
3000+
if (err)
3001+
return err;
3002+
}
3003+
}
3004+
3005+
return 0;
3006+
}
3007+
28313008
static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
28323009
struct xe_vm *vm,
28333010
struct xe_vma_ops *vops)
@@ -3478,7 +3655,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
34783655
u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
34793656
u16 pat_index = bind_ops[i].pat_index;
34803657

3481-
ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
3658+
ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
34823659
addr, range, op, flags,
34833660
prefetch_region, pat_index);
34843661
if (IS_ERR(ops[i])) {
@@ -3511,6 +3688,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
35113688
if (err)
35123689
goto unwind_ops;
35133690

3691+
err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
3692+
if (err)
3693+
goto unwind_ops;
3694+
35143695
fence = vm_bind_ioctl_ops_execute(vm, &vops);
35153696
if (IS_ERR(fence))
35163697
err = PTR_ERR(fence);
@@ -3580,7 +3761,7 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
35803761

35813762
xe_vma_ops_init(&vops, vm, q, NULL, 0);
35823763

3583-
ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size,
3764+
ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, bo->size,
35843765
DRM_XE_VM_BIND_OP_MAP, 0, 0,
35853766
vm->xe->pat.idx[cache_lvl]);
35863767
if (IS_ERR(ops)) {

0 commit comments

Comments
 (0)