Skip to content

Commit 715914e

Browse files
committed
NVMe/MVMEeOF: Patch NVMe/NVMeOF driver to support GDS on Linux 6.2 Kernel
With this change, the NVMe and NVMeOF driver would be enabled to support GPUDirectStorage(GDS). The change is around nvme/nvme rdma map_data() and unmap_data(), where the IO request is first intercepted to check for GDS pages and if it is a GDS page then the request is served by GDS driver component called nvidia-fs, else the request would be served by the standard NVMe driver code.
1 parent 3d28f6c commit 715914e

File tree

8 files changed

+472
-1
lines changed

8 files changed

+472
-1
lines changed

drivers/nvme/host/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-License-Identifier: GPL-2.0
22

33
ccflags-y += -I$(src)
4-
4+
ccflags-y += -DCONFIG_NVFS
55
obj-$(CONFIG_NVME_CORE) += nvme-core.o
66
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
77
obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o
@@ -20,10 +20,12 @@ nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
2020
nvme-core-$(CONFIG_NVME_AUTH) += auth.o
2121

2222
nvme-y += pci.o
23+
nvme-y += nvfs-dma.o
2324

2425
nvme-fabrics-y += fabrics.o
2526

2627
nvme-rdma-y += rdma.o
28+
nvme-rdma-y += nvfs-rdma.o
2729

2830
nvme-fc-y += fc.o
2931

drivers/nvme/host/nvfs-dma.c

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifdef CONFIG_NVFS
16+
#define MODULE_PREFIX nvme_v1
17+
#include "nvfs.h"
18+
19+
struct nvfs_dma_rw_ops *nvfs_ops;
20+
21+
atomic_t nvfs_shutdown = ATOMIC_INIT(1);
22+
23+
DEFINE_PER_CPU(long, nvfs_n_ops);
24+
25+
// must have for compatability
26+
#define NVIDIA_FS_COMPAT_FT(ops) \
27+
(NVIDIA_FS_CHECK_FT_SGLIST_PREP(ops) && NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops))
28+
29+
// protected via nvfs_module_mutex
30+
int REGISTER_FUNC(struct nvfs_dma_rw_ops *ops)
31+
{
32+
if (NVIDIA_FS_COMPAT_FT(ops)) {
33+
nvfs_ops = ops;
34+
atomic_set(&nvfs_shutdown, 0);
35+
return 0;
36+
} else
37+
return -EOPNOTSUPP;
38+
39+
40+
}
41+
EXPORT_SYMBOL(REGISTER_FUNC);
42+
43+
// protected via nvfs_module_mutex
44+
void UNREGISTER_FUNC(void)
45+
{
46+
(void) atomic_cmpxchg(&nvfs_shutdown, 0, 1);
47+
do{
48+
msleep(NVFS_HOLD_TIME_MS);
49+
} while (nvfs_count_ops());
50+
nvfs_ops = NULL;
51+
}
52+
EXPORT_SYMBOL(UNREGISTER_FUNC);
53+
#endif

drivers/nvme/host/nvfs-dma.h

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifndef NVFS_DMA_H
16+
#define NVFS_DMA_H
17+
18+
static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
19+
struct request *req, struct nvme_rw_command *cmnd);
20+
21+
static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
22+
struct request *req, struct nvme_rw_command *cmnd);
23+
24+
static bool nvme_nvfs_unmap_data(struct nvme_dev *dev, struct request *req)
25+
{
26+
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
27+
enum dma_data_direction dma_dir = rq_dma_dir(req);
28+
29+
if (!iod || !iod->sgt.nents)
30+
return false;
31+
32+
if (iod->sgt.sgl && !is_pci_p2pdma_page(sg_page(iod->sgt.sgl)) &&
33+
!blk_integrity_rq(req) &&
34+
!iod->dma_len &&
35+
nvfs_ops != NULL) {
36+
int count;
37+
count = nvfs_ops->nvfs_dma_unmap_sg(dev->dev, iod->sgt.sgl, iod->sgt.nents, dma_dir);
38+
if (!count)
39+
return false;
40+
41+
nvfs_put_ops();
42+
return true;
43+
}
44+
45+
return false;
46+
}
47+
48+
static blk_status_t nvme_nvfs_map_data(struct nvme_dev *dev, struct request *req,
49+
struct nvme_command *cmnd, bool *is_nvfs_io)
50+
{
51+
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
52+
struct request_queue *q = req->q;
53+
enum dma_data_direction dma_dir = rq_dma_dir(req);
54+
blk_status_t ret = BLK_STS_RESOURCE;
55+
int nr_mapped;
56+
57+
nr_mapped = 0;
58+
*is_nvfs_io = false;
59+
60+
if (!blk_integrity_rq(req) && nvfs_get_ops()) {
61+
iod->dma_len = 0;
62+
iod->sgt.sgl = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
63+
if (!iod->sgt.sgl) {
64+
nvfs_put_ops();
65+
return BLK_STS_RESOURCE;
66+
}
67+
68+
sg_init_table(iod->sgt.sgl, blk_rq_nr_phys_segments(req));
69+
// associates bio pages to scatterlist
70+
iod->sgt.orig_nents = nvfs_ops->nvfs_blk_rq_map_sg(q, req, iod->sgt.sgl);
71+
if (!iod->sgt.orig_nents) {
72+
mempool_free(iod->sgt.sgl, dev->iod_mempool);
73+
nvfs_put_ops();
74+
return BLK_STS_IOERR; // reset to original ret
75+
}
76+
*is_nvfs_io = true;
77+
78+
if (unlikely((iod->sgt.orig_nents == NVFS_IO_ERR))) {
79+
pr_err("%s: failed to map sg_nents=:%d\n", __func__, iod->sgt.nents);
80+
mempool_free(iod->sgt.sgl, dev->iod_mempool);
81+
nvfs_put_ops();
82+
return BLK_STS_IOERR;
83+
}
84+
85+
nr_mapped = nvfs_ops->nvfs_dma_map_sg_attrs(dev->dev,
86+
iod->sgt.sgl,
87+
iod->sgt.orig_nents,
88+
dma_dir,
89+
DMA_ATTR_NO_WARN);
90+
91+
92+
if (unlikely((nr_mapped == NVFS_IO_ERR))) {
93+
mempool_free(iod->sgt.sgl, dev->iod_mempool);
94+
nvfs_put_ops();
95+
pr_err("%s: failed to dma map sglist=:%d\n", __func__, iod->sgt.nents);
96+
return BLK_STS_IOERR;
97+
}
98+
99+
if (unlikely(nr_mapped == NVFS_CPU_REQ)) {
100+
mempool_free(iod->sgt.sgl, dev->iod_mempool);
101+
nvfs_put_ops();
102+
BUG();
103+
}
104+
105+
iod->sgt.nents = nr_mapped;
106+
107+
if (nvme_pci_use_sgls(dev, req, iod->sgt.nents)) { // TBD: not tested on SGL mode supporting drive
108+
ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
109+
} else {
110+
// push dma address to hw registers
111+
ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
112+
}
113+
114+
if (ret != BLK_STS_OK) {
115+
nvme_nvfs_unmap_data(dev, req);
116+
mempool_free(iod->sgt.sgl, dev->iod_mempool);
117+
}
118+
return ret;
119+
}
120+
return ret;
121+
}
122+
123+
#endif /* NVFS_DMA_H */

drivers/nvme/host/nvfs-rdma.c

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifdef CONFIG_NVFS
16+
#define MODULE_PREFIX nvme_rdma_v1
17+
#include "nvfs.h"
18+
19+
struct nvfs_dma_rw_ops *nvfs_ops;
20+
21+
atomic_t nvfs_shutdown = ATOMIC_INIT(1);
22+
23+
DEFINE_PER_CPU(long, nvfs_n_ops);
24+
25+
// must have for compatability
26+
#define NVIDIA_FS_COMPAT_FT(ops) \
27+
(NVIDIA_FS_CHECK_FT_SGLIST_PREP(ops) && NVIDIA_FS_CHECK_FT_SGLIST_DMA(ops))
28+
29+
// protected via nvfs_module_mutex
30+
int REGISTER_FUNC(struct nvfs_dma_rw_ops *ops)
31+
{
32+
if (NVIDIA_FS_COMPAT_FT(ops)) {
33+
nvfs_ops = ops;
34+
atomic_set(&nvfs_shutdown, 0);
35+
return 0;
36+
} else
37+
return -EOPNOTSUPP;
38+
39+
}
40+
EXPORT_SYMBOL(REGISTER_FUNC);
41+
42+
// protected via nvfs_module_mutex
43+
void UNREGISTER_FUNC(void)
44+
{
45+
(void) atomic_cmpxchg(&nvfs_shutdown, 0, 1);
46+
do{
47+
msleep(NVFS_HOLD_TIME_MS);
48+
} while (nvfs_count_ops());
49+
nvfs_ops = NULL;
50+
}
51+
EXPORT_SYMBOL(UNREGISTER_FUNC);
52+
#endif

drivers/nvme/host/nvfs-rdma.h

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
*
5+
* This program is free software; you can redistribute it and/or modify it
6+
* under the terms and conditions of the GNU General Public License,
7+
* version 2, as published by the Free Software Foundation.
8+
*
9+
* This program is distributed in the hope it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12+
* more details.
13+
*/
14+
15+
#ifndef NVFS_RDMA_H
16+
#define NVFS_RDMA_H
17+
18+
static bool nvme_rdma_nvfs_unmap_data(struct ib_device *ibdev,
19+
struct request *rq)
20+
21+
{
22+
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
23+
enum dma_data_direction dma_dir = rq_dma_dir(rq);
24+
int count;
25+
26+
if (!blk_integrity_rq(rq) && nvfs_ops != NULL) {
27+
count = nvfs_ops->nvfs_dma_unmap_sg(ibdev->dma_device, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
28+
dma_dir);
29+
if (count) {
30+
nvfs_put_ops();
31+
#ifdef HAVE_SG_ALLOC_TABLE_CHAINED_NENTS_FIRST_CHUNK_PARAM
32+
sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
33+
#else
34+
sg_free_table_chained(&req->data_sgl.sg_table, true);
35+
#endif
36+
37+
return true;
38+
}
39+
}
40+
return false;
41+
}
42+
43+
static int nvme_rdma_nvfs_map_data(struct ib_device *ibdev, struct request *rq, bool *is_nvfs_io)
44+
{
45+
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
46+
enum dma_data_direction dma_dir = rq_dma_dir(rq);
47+
int count, ret = 0;
48+
49+
*is_nvfs_io = false;
50+
count = 0;
51+
if (!blk_integrity_rq(rq) && nvfs_get_ops()) {
52+
53+
// associates bio pages to scatterlist
54+
count = nvfs_ops->nvfs_blk_rq_map_sg(rq->q, rq , req->data_sgl.sg_table.sgl);
55+
if (!count) {
56+
nvfs_put_ops();
57+
return 0; // fall to cpu path
58+
}
59+
60+
*is_nvfs_io = true;
61+
if (unlikely((count == NVFS_IO_ERR))) {
62+
nvfs_put_ops();
63+
pr_err("%s: failed to map sg_nents=:%d\n", __func__, req->data_sgl.nents);
64+
return -EIO;
65+
}
66+
req->data_sgl.nents = count;
67+
68+
count = nvfs_ops->nvfs_dma_map_sg_attrs(ibdev->dma_device,
69+
req->data_sgl.sg_table.sgl,
70+
req->data_sgl.nents,
71+
dma_dir,
72+
DMA_ATTR_NO_WARN);
73+
74+
if (unlikely((count == NVFS_IO_ERR))) {
75+
nvfs_put_ops();
76+
return -EIO;
77+
}
78+
79+
if (unlikely(count == NVFS_CPU_REQ)) {
80+
nvfs_put_ops();
81+
BUG();
82+
return -EIO;
83+
}
84+
85+
return ret;
86+
} else {
87+
// Fall to CPU path
88+
return 0;
89+
}
90+
91+
return ret;
92+
}
93+
94+
#endif

0 commit comments

Comments
 (0)