Skip to content

Commit 486bf54

Browse files
committed
NVMe/MVMEeOF: Patch NVMe/NVMeOF driver to support GDS on Linux 5.19 Kernel
With this change, the NVMe and NVMeOF driver would be enabled to support GPUDirectStorage(GDS). The change is around nvme/nvme rdma map_data() and unmap_data(), where the IO request is first intercepted to check for GDS pages and if it is a GDS page then the request is served by GDS driver component called nvidia-fs, else the request would be served by the standard NVMe driver code. Signed-off-by: Sourab Gupta <[email protected]> Acked-by: Rebanta Mitra <[email protected]> Acked-by: Prashant Prabhu <[email protected]>
1 parent db596dd commit 486bf54

File tree

8 files changed

+472
-1
lines changed

8 files changed

+472
-1
lines changed

drivers/nvme/host/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-License-Identifier: GPL-2.0
22

33
ccflags-y += -I$(src)
4-
4+
ccflags-y += -DCONFIG_NVFS
55
obj-$(CONFIG_NVME_CORE) += nvme-core.o
66
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
77
obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o
@@ -18,10 +18,12 @@ nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
1818
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
1919

2020
nvme-y += pci.o
21+
nvme-y += nvfs-dma.o
2122

2223
nvme-fabrics-y += fabrics.o
2324

2425
nvme-rdma-y += rdma.o
26+
nvme-rdma-y += nvfs-rdma.o
2527

2628
nvme-fc-y += fc.o
2729

drivers/nvme/host/nvfs-dma.c

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifdef CONFIG_NVFS
16+
#define MODULE_PREFIX nvme_v1
17+
#include "nvfs.h"
18+
19+
struct nvfs_dma_rw_ops *nvfs_ops;
20+
21+
atomic_t nvfs_shutdown = ATOMIC_INIT(1);
22+
23+
DEFINE_PER_CPU(long, nvfs_n_ops);
24+
25+
// must have for compatibility
26+
#define NVIDIA_FS_COMPAT_FT(ops) \
27+
(NVIDIA_FS_CHECK_FT_SGLIST_PREP(ops) && NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops))
28+
29+
// protected via nvfs_module_mutex
30+
int REGISTER_FUNC(struct nvfs_dma_rw_ops *ops)
31+
{
32+
if (NVIDIA_FS_COMPAT_FT(ops)) {
33+
nvfs_ops = ops;
34+
atomic_set(&nvfs_shutdown, 0);
35+
return 0;
36+
} else
37+
return -EOPNOTSUPP;
38+
39+
}
40+
EXPORT_SYMBOL(REGISTER_FUNC);
41+
42+
// protected via nvfs_module_mutex
43+
void UNREGISTER_FUNC(void)
44+
{
45+
(void) atomic_cmpxchg(&nvfs_shutdown, 0, 1);
46+
do {
47+
msleep(NVFS_HOLD_TIME_MS);
48+
} while (nvfs_count_ops());
49+
nvfs_ops = NULL;
50+
}
51+
EXPORT_SYMBOL(UNREGISTER_FUNC);
52+
#endif

drivers/nvme/host/nvfs-dma.h

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifndef NVFS_DMA_H
16+
#define NVFS_DMA_H
17+
18+
static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
19+
struct request *req, struct nvme_rw_command *cmnd);
20+
21+
static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
22+
struct request *req, struct nvme_rw_command *cmd, int entries);
23+
24+
static bool nvme_nvfs_unmap_data(struct nvme_dev *dev, struct request *req)
25+
{
26+
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
27+
enum dma_data_direction dma_dir = rq_dma_dir(req);
28+
29+
if (!iod || !iod->nents)
30+
return false;
31+
if (iod->sg && !is_pci_p2pdma_page(sg_page(iod->sg)) &&
32+
!blk_integrity_rq(req) &&
33+
!iod->dma_len &&
34+
nvfs_ops != NULL) {
35+
int count;
36+
37+
count = nvfs_ops->nvfs_dma_unmap_sg(dev->dev, iod->sg, iod->nents,
38+
dma_dir);
39+
40+
if (!count)
41+
return false;
42+
43+
nvfs_put_ops();
44+
return true;
45+
}
46+
return false;
47+
}
48+
49+
static blk_status_t nvme_nvfs_map_data(struct nvme_dev *dev, struct request *req,
50+
struct nvme_command *cmnd, bool *is_nvfs_io)
51+
{
52+
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
53+
struct request_queue *q = req->q;
54+
enum dma_data_direction dma_dir = rq_dma_dir(req);
55+
blk_status_t ret = BLK_STS_RESOURCE;
56+
int nr_mapped;
57+
58+
nr_mapped = 0;
59+
*is_nvfs_io = false;
60+
61+
if (!blk_integrity_rq(req) && nvfs_get_ops()) {
62+
iod->dma_len = 0;
63+
iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
64+
if (!iod->sg) {
65+
nvfs_put_ops();
66+
return BLK_STS_RESOURCE;
67+
}
68+
69+
sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
70+
// associates bio pages to scatterlist
71+
iod->nents = nvfs_ops->nvfs_blk_rq_map_sg(q, req, iod->sg);
72+
if (!iod->nents) {
73+
mempool_free(iod->sg, dev->iod_mempool);
74+
nvfs_put_ops();
75+
return BLK_STS_IOERR; // reset to original ret
76+
}
77+
*is_nvfs_io = true;
78+
79+
if (unlikely((iod->nents == NVFS_IO_ERR))) {
80+
pr_err("%s: failed to map sg_nents=:%d\n", __func__, iod->nents);
81+
mempool_free(iod->sg, dev->iod_mempool);
82+
nvfs_put_ops();
83+
return BLK_STS_IOERR;
84+
}
85+
86+
nr_mapped = nvfs_ops->nvfs_dma_map_sg_attrs(dev->dev,
87+
iod->sg,
88+
iod->nents,
89+
dma_dir,
90+
DMA_ATTR_NO_WARN);
91+
92+
if (unlikely((nr_mapped == NVFS_IO_ERR))) {
93+
mempool_free(iod->sg, dev->iod_mempool);
94+
nvfs_put_ops();
95+
pr_err("%s: failed to dma map sglist=:%d\n", __func__, iod->nents);
96+
return BLK_STS_IOERR;
97+
}
98+
99+
if (unlikely(nr_mapped == NVFS_CPU_REQ)) {
100+
mempool_free(iod->sg, dev->iod_mempool);
101+
nvfs_put_ops();
102+
WARN_ON(1);
103+
}
104+
105+
iod->use_sgl = nvme_pci_use_sgls(dev, req);
106+
if (iod->use_sgl) {
107+
ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped);
108+
} else {
109+
// push dma address to hw registers
110+
ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
111+
}
112+
113+
if (ret != BLK_STS_OK) {
114+
nvme_nvfs_unmap_data(dev, req);
115+
mempool_free(iod->sg, dev->iod_mempool);
116+
}
117+
return ret;
118+
}
119+
return ret;
120+
}
121+
122+
#endif /* NVFS_DMA_H */

drivers/nvme/host/nvfs-rdma.c

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifdef CONFIG_NVFS
16+
#define MODULE_PREFIX nvme_rdma_v1
17+
#include "nvfs.h"
18+
19+
struct nvfs_dma_rw_ops *nvfs_ops;
20+
21+
atomic_t nvfs_shutdown = ATOMIC_INIT(1);
22+
23+
DEFINE_PER_CPU(long, nvfs_n_ops);
24+
25+
// must have for compatibility
26+
#define NVIDIA_FS_COMPAT_FT(ops) \
27+
(NVIDIA_FS_CHECK_FT_SGLIST_PREP(ops) && NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops))
28+
29+
// protected via nvfs_module_mutex
30+
int REGISTER_FUNC(struct nvfs_dma_rw_ops *ops)
31+
{
32+
if (NVIDIA_FS_COMPAT_FT(ops)) {
33+
nvfs_ops = ops;
34+
atomic_set(&nvfs_shutdown, 0);
35+
return 0;
36+
} else
37+
return -EOPNOTSUPP;
38+
39+
}
40+
EXPORT_SYMBOL(REGISTER_FUNC);
41+
42+
// protected via nvfs_module_mutex
43+
void UNREGISTER_FUNC(void)
44+
{
45+
(void) atomic_cmpxchg(&nvfs_shutdown, 0, 1);
46+
do {
47+
msleep(NVFS_HOLD_TIME_MS);
48+
} while (nvfs_count_ops());
49+
nvfs_ops = NULL;
50+
}
51+
EXPORT_SYMBOL(UNREGISTER_FUNC);
52+
#endif

drivers/nvme/host/nvfs-rdma.h

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifndef NVFS_RDMA_H
16+
#define NVFS_RDMA_H
17+
18+
static bool nvme_rdma_nvfs_unmap_data(struct ib_device *ibdev,
19+
struct request *rq)
20+
21+
{
22+
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
23+
enum dma_data_direction dma_dir = rq_dma_dir(rq);
24+
int count;
25+
26+
if (!blk_integrity_rq(rq) && nvfs_ops != NULL) {
27+
count = nvfs_ops->nvfs_dma_unmap_sg(ibdev->dma_device, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
28+
dma_dir);
29+
if (count) {
30+
nvfs_put_ops();
31+
#ifdef HAVE_SG_ALLOC_TABLE_CHAINED_NENTS_FIRST_CHUNK_PARAM
32+
sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
33+
#else
34+
sg_free_table_chained(&req->data_sgl.sg_table, true);
35+
#endif
36+
37+
return true;
38+
}
39+
}
40+
return false;
41+
}
42+
43+
static int nvme_rdma_nvfs_map_data(struct ib_device *ibdev, struct request *rq, bool *is_nvfs_io, int* count)
44+
{
45+
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
46+
enum dma_data_direction dma_dir = rq_dma_dir(rq);
47+
int ret = 0;
48+
49+
*is_nvfs_io = false;
50+
*count = 0;
51+
if (!blk_integrity_rq(rq) && nvfs_get_ops()) {
52+
53+
// associates bio pages to scatterlist
54+
*count = nvfs_ops->nvfs_blk_rq_map_sg(rq->q, rq , req->data_sgl.sg_table.sgl);
55+
if (!*count) {
56+
nvfs_put_ops();
57+
return 0; // fall to cpu path
58+
}
59+
60+
*is_nvfs_io = true;
61+
if (unlikely((*count == NVFS_IO_ERR))) {
62+
nvfs_put_ops();
63+
pr_err("%s: failed to map sg_nents=:%d\n", __func__, req->data_sgl.nents);
64+
return -EIO;
65+
}
66+
req->data_sgl.nents = *count;
67+
68+
*count = nvfs_ops->nvfs_dma_map_sg_attrs(ibdev->dma_device,
69+
req->data_sgl.sg_table.sgl,
70+
req->data_sgl.nents,
71+
dma_dir,
72+
DMA_ATTR_NO_WARN);
73+
74+
if (unlikely((*count == NVFS_IO_ERR))) {
75+
nvfs_put_ops();
76+
return -EIO;
77+
}
78+
79+
if (unlikely(*count == NVFS_CPU_REQ)) {
80+
nvfs_put_ops();
81+
BUG();
82+
return -EIO;
83+
}
84+
85+
return ret;
86+
} else {
87+
// Fall to CPU path
88+
return 0;
89+
}
90+
91+
return ret;
92+
}
93+
94+
#endif

0 commit comments

Comments
 (0)