Skip to content

Commit d838f6a

Browse files
authored
Merge pull request #325 from abergeron/switch
Switch gs and ls
2 parents dc5508f + 7c1b198 commit d838f6a

17 files changed

+54
-54
lines changed

pygpu/gpuarray.pxd

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,9 @@ cdef extern from "gpuarray/kernel.h":
127127
unsigned int argcount, const int *types, int flags, char **err_str)
128128
void GpuKernel_clear(_GpuKernel *k)
129129
gpucontext *GpuKernel_context(_GpuKernel *k)
130-
int GpuKernel_sched(_GpuKernel *k, size_t n, size_t *ls, size_t *gs)
130+
int GpuKernel_sched(_GpuKernel *k, size_t n, size_t *gs, size_t *ls)
131131
int GpuKernel_call(_GpuKernel *k, unsigned int n,
132-
const size_t *ls, const size_t *gs,
132+
const size_t *gs, const size_t *ls,
133133
size_t shared, void **args)
134134
int GpuKernel_binary(_GpuKernel *, size_t *, void **)
135135

@@ -265,9 +265,9 @@ cdef int kernel_init(GpuKernel k, gpucontext *ctx,
265265
int flags) except -1
266266
cdef int kernel_clear(GpuKernel k) except -1
267267
cdef gpucontext *kernel_context(GpuKernel k) except NULL
268-
cdef int kernel_sched(GpuKernel k, size_t n, size_t *ls, size_t *gs) except -1
268+
cdef int kernel_sched(GpuKernel k, size_t n, size_t *gs, size_t *ls) except -1
269269
cdef int kernel_call(GpuKernel k, unsigned int n,
270-
const size_t *ls, const size_t *gs,
270+
const size_t *gs, const size_t *ls,
271271
size_t shared, void **args) except -1
272272
cdef int kernel_binary(GpuKernel k, size_t *, void **) except -1
273273
cdef int kernel_property(GpuKernel k, int prop_id, void *res) except -1
@@ -346,5 +346,5 @@ cdef api class GpuKernel [type PyGpuKernelType, object PyGpuKernelObject]:
346346
cdef void **callbuf
347347
cdef object __weakref__
348348

349-
cdef do_call(self, py_n, py_ls, py_gs, py_args, size_t shared)
349+
cdef do_call(self, py_n, py_gs, py_ls, py_args, size_t shared)
350350
cdef _setarg(self, unsigned int index, int typecode, object o)

pygpu/gpuarray.pyx

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -446,16 +446,16 @@ cdef gpucontext *kernel_context(GpuKernel k) except NULL:
446446
raise GpuArrayException, "Invalid kernel or destroyed context"
447447
return res
448448

449-
cdef int kernel_sched(GpuKernel k, size_t n, size_t *ls, size_t *gs) except -1:
449+
cdef int kernel_sched(GpuKernel k, size_t n, size_t *gs, size_t *ls) except -1:
450450
cdef int err
451-
err = GpuKernel_sched(&k.k, n, ls, gs)
451+
err = GpuKernel_sched(&k.k, n, gs, ls)
452452
if err != GA_NO_ERROR:
453453
raise get_exc(err), kernel_error(k, err)
454454

455-
cdef int kernel_call(GpuKernel k, unsigned int n, const size_t *ls,
456-
const size_t *gs, size_t shared, void **args) except -1:
455+
cdef int kernel_call(GpuKernel k, unsigned int n, const size_t *gs,
456+
const size_t *ls, size_t shared, void **args) except -1:
457457
cdef int err
458-
err = GpuKernel_call(&k.k, n, ls, gs, shared, args)
458+
err = GpuKernel_call(&k.k, n, gs, ls, shared, args)
459459
if err != GA_NO_ERROR:
460460
raise get_exc(err), kernel_error(k, err)
461461

@@ -2113,10 +2113,10 @@ cdef class GpuKernel:
21132113
sure to test against the size of your data.
21142114
21152115
If you want more control over thread allocation you can use the
2116-
`ls` and `gs` parameters like so::
2116+
`gs` and `ls` parameters like so::
21172117
21182118
k = GpuKernel(...)
2119-
k(param1, param2, ls=ls, gs=gs)
2119+
k(param1, param2, gs=gs, ls=ls)
21202120
21212121
If you choose to use this interface, make sure to stay within the
21222122
limits of `k.maxlsize` and `ctx.maxgsize` or the call will fail.
@@ -2200,12 +2200,12 @@ cdef class GpuKernel:
22002200
finally:
22012201
free(_types)
22022202

2203-
def __call__(self, *args, n=None, ls=None, gs=None, shared=0):
2203+
def __call__(self, *args, n=None, gs=None, ls=None, shared=0):
22042204
if n == None and (ls == None or gs == None):
22052205
raise ValueError, "Must specify size (n) or both gs and ls"
2206-
self.do_call(n, ls, gs, args, shared)
2206+
self.do_call(n, gs, ls, args, shared)
22072207

2208-
cdef do_call(self, py_n, py_ls, py_gs, py_args, size_t shared):
2208+
cdef do_call(self, py_n, py_gs, py_ls, py_args, size_t shared):
22092209
cdef size_t n
22102210
cdef size_t gs[3]
22112211
cdef size_t ls[3]
@@ -2272,8 +2272,8 @@ cdef class GpuKernel:
22722272
if nd != 1:
22732273
raise ValueError, "n is specified and nd != 1"
22742274
n = py_n
2275-
kernel_sched(self, n, &ls[0], &gs[0])
2276-
kernel_call(self, nd, ls, gs, shared, self.callbuf)
2275+
kernel_sched(self, n, &gs[0], &ls[0])
2276+
kernel_call(self, nd, gs, ls, shared, self.callbuf)
22772277

22782278
cdef _setarg(self, unsigned int index, int typecode, object o):
22792279
if typecode == GA_BUFFER:

pygpu/reduction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ def __call__(self, *args, **kwargs):
282282
kargs.append(offsets[i])
283283
kargs.extend(strs[i])
284284

285-
k(*kargs, ls=ls, gs=gs)
285+
k(*kargs, gs=gs, ls=ls)
286286

287287
return out
288288

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
MAJOR = 0
77
MINOR = 6
88
PATCH = 0
9-
SUFFIX = 'rc1'
9+
SUFFIX = 'rc2'
1010
FULLVERSION = '%d.%d.%d%s' % (MAJOR, MINOR, PATCH, SUFFIX)
1111

1212
try:

src/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ set_target_properties(gpuarray PROPERTIES
8888
INSTALL_NAME_DIR ${CMAKE_INSTALL_PREFIX}/lib
8989
MACOSX_RPATH OFF
9090
# This is the shared library version
91-
VERSION 1.0
91+
VERSION 2.0
9292
)
9393

9494
add_library(gpuarray-static STATIC ${GPUARRAY_SRC})

src/gpuarray/buffer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -482,15 +482,15 @@ GPUARRAY_PUBLIC int gpukernel_setarg(gpukernel *k, unsigned int i, void *a);
482482
*
483483
* \param k kernel
484484
* \param n number of dimensions of grid/block
485-
* \param bs block sizes for this call (also known as local size)
486485
* \param gs grid sizes for this call (also known as global size)
486+
* \param ls block sizes for this call (also known as local size)
487487
* \param shared amount of dynamic shared memory to reserve
488488
* \param args table of pointers to each argument (optional).
489489
*
490490
* \returns GA_NO_ERROR or an error code if an error occurred.
491491
*/
492492
GPUARRAY_PUBLIC int gpukernel_call(gpukernel *k, unsigned int n,
493-
const size_t *ls, const size_t *gs,
493+
const size_t *gs, const size_t *ls,
494494
size_t shared, void **args);
495495

496496
/**

src/gpuarray/config.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
/* The following included file should have been generated by CMake. */
55
#include <gpuarray/abi_version.h>
6-
#define GPUARRAY_API_VERSION 0
6+
#define GPUARRAY_API_VERSION 1
77

88
#ifdef GPUARRAY_SHARED
99
#ifdef _WIN32

src/gpuarray/kernel.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,24 +87,24 @@ GPUARRAY_PUBLIC int GpuKernel_setarg(GpuKernel *k, unsigned int i, void *val);
8787
*
8888
* \param k the kernel to schedule for
8989
* \param n number of elements to handle
90-
* \param ls local size (in/out)
9190
* \param gs grid size (in/out)
91+
* \param ls local size (in/out)
9292
*/
9393
GPUARRAY_PUBLIC int GpuKernel_sched(GpuKernel *k, size_t n,
94-
size_t *ls, size_t *gs);
94+
size_t *gs, size_t *ls);
9595

9696
/**
9797
* Launch the execution of a kernel.
9898
*
9999
* \param k the kernel to launch
100100
* \param n dimensionality of the grid/blocks
101-
* \param ls sizes of launch blocks
102101
* \param gs sizes of launch grid
102+
* \param ls sizes of launch blocks
103103
* \param amount of dynamic shared memory to allocate
104104
* \param args table of pointers to arguments
105105
*/
106106
GPUARRAY_PUBLIC int GpuKernel_call(GpuKernel *k, unsigned int n,
107-
const size_t *ls, const size_t *gs,
107+
const size_t *gs, const size_t *ls,
108108
size_t shared, void **args);
109109

110110
GPUARRAY_PUBLIC int GpuKernel_binary(const GpuKernel *k, size_t *sz,

src/gpuarray_array.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ int GpuArray_take1(GpuArray *a, const GpuArray *v, const GpuArray *i,
487487
if (err != GA_NO_ERROR)
488488
return err;
489489

490-
err = GpuKernel_sched(&k, n[0]*n[1], &ls[1], &gs[1]);
490+
err = GpuKernel_sched(&k, n[0]*n[1], &gs[1], &ls[1]);
491491
if (err != GA_NO_ERROR)
492492
goto out;
493493

@@ -521,7 +521,7 @@ int GpuArray_take1(GpuArray *a, const GpuArray *v, const GpuArray *i,
521521
GpuKernel_setarg(&k, argp++, &n[1]);
522522
GpuKernel_setarg(&k, argp++, errbuf);
523523

524-
err = GpuKernel_call(&k, 2, ls, gs, 0, NULL);
524+
err = GpuKernel_call(&k, 2, gs, ls, 0, NULL);
525525
if (check_error && err == GA_NO_ERROR) {
526526
err = gpudata_read(&kerr, errbuf, 0, sizeof(int));
527527
if (err == GA_NO_ERROR && kerr != 0) {

src/gpuarray_blas_cuda_cublas.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,9 +1099,9 @@ static int sgemvBatch(cb_order order, cb_transpose transA,
10991099
args[8] = &N;
11001100

11011101
if (transA == cb_no_trans) {
1102-
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgemvBH_N_a1_b1_small, 2, ls, gs, 0, args);
1102+
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgemvBH_N_a1_b1_small, 2, gs, ls, 0, args);
11031103
} else {
1104-
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgemvBH_T_a1_b1_small, 2, ls, gs, 0, args);
1104+
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgemvBH_T_a1_b1_small, 2, gs, ls, 0, args);
11051105
}
11061106

11071107
cuda_ops.buffer_release(Aa);
@@ -1223,9 +1223,9 @@ static int dgemvBatch(cb_order order, cb_transpose transA,
12231223
args[8] = &N;
12241224

12251225
if (transA == cb_no_trans) {
1226-
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->dgemvBH_N_a1_b1_small, 2, ls, gs, 0, args);
1226+
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->dgemvBH_N_a1_b1_small, 2, gs, ls, 0, args);
12271227
} else {
1228-
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->dgemvBH_T_a1_b1_small, 2, ls, gs, 0, args);
1228+
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->dgemvBH_T_a1_b1_small, 2, gs, ls, 0, args);
12291229
}
12301230

12311231
cuda_ops.buffer_release(Aa);
@@ -1486,7 +1486,7 @@ static int sgerBatch(cb_order order, size_t M, size_t N, float alpha,
14861486
args[8] = &M;
14871487
args[9] = &N;
14881488

1489-
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgerBH_gen_small, 3, ls, gs, 0, args);
1489+
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgerBH_gen_small, 3, gs, ls, 0, args);
14901490

14911491
cuda_ops.buffer_release(Aa);
14921492
cuda_ops.buffer_release(xa);
@@ -1618,7 +1618,7 @@ static int dgerBatch(cb_order order, size_t M, size_t N, double alpha,
16181618
args[8] = &M;
16191619
args[9] = &N;
16201620

1621-
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgerBH_gen_small, 3, ls, gs, 0, args);
1621+
err = GpuKernel_call(&((blas_handle *)ctx->blas_handle)->sgerBH_gen_small, 3, gs, ls, 0, args);
16221622

16231623
cuda_ops.buffer_release(Aa);
16241624
cuda_ops.buffer_release(xa);

0 commit comments

Comments
 (0)