Skip to content

Commit 296364d

Browse files
committed
Moved the reduction API to reduction.h.
1 parent 026f8c1 commit 296364d

File tree

5 files changed

+284
-151
lines changed

5 files changed

+284
-151
lines changed

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ set(headers
123123
gpuarray/extension.h
124124
gpuarray/ext_cuda.h
125125
gpuarray/kernel.h
126+
gpuarray/reduction.h
126127
gpuarray/types.h
127128
gpuarray/util.h
128129
)

src/gpuarray/array.h

Lines changed: 0 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -123,27 +123,6 @@ typedef enum _ga_order {
123123
GA_F_ORDER=1
124124
} ga_order;
125125

126-
/**
127-
* Supported array reduction operations.
128-
*/
129-
130-
typedef enum _ga_reduce_op {
131-
GA_REDUCE_SUM, /* + */
132-
GA_REDUCE_PROD, /* * */
133-
GA_REDUCE_PRODNZ, /* * (!=0) */
134-
GA_REDUCE_MIN, /* min() */
135-
GA_REDUCE_MAX, /* max() */
136-
GA_REDUCE_ARGMIN, /* argmin() */
137-
GA_REDUCE_ARGMAX, /* argmax() */
138-
GA_REDUCE_MINANDARGMIN, /* min(), argmin() */
139-
GA_REDUCE_MAXANDARGMAX, /* max(), argmax() */
140-
GA_REDUCE_AND, /* & */
141-
GA_REDUCE_OR, /* | */
142-
GA_REDUCE_XOR, /* ^ */
143-
GA_REDUCE_ALL, /* &&/all() */
144-
GA_REDUCE_ANY, /* ||/any() */
145-
} ga_reduce_op;
146-
147126
/**
148127
* Checks if all the specified flags are set.
149128
*
@@ -636,110 +615,6 @@ GPUARRAY_PUBLIC void GpuArray_fprintf(FILE *fd, const GpuArray *a);
636615
GPUARRAY_PUBLIC int GpuArray_fdump(FILE *fd, const GpuArray *a);
637616

638617

639-
/**
640-
* @brief Compute a reduction sum (+), product (*), non-zero product (* != 0),
641-
* min, max, argmin, argmax, min-and-argmin, max-and-argmax, and (&),
642-
* or (|), xor (^), all (&&) or any (||) over a list of axes to reduce.
643-
*
644-
* Returns one (in the case of min-and-argmin/max-and-argmax, two) destination
645-
* tensors. The destination tensor(s)' axes are a strict subset of the axes of the
646-
* source tensor. The axes to be reduced are specified by the caller, and the
647-
* reduction is performed over these axes, which are then removed in the
648-
* destination.
649-
*
650-
* @param [out] dst The destination tensor. Has the same type as the source.
651-
* @param [out] dstArg For argument of minima/maxima operations. Has type int64.
652-
* @param [in] src The source tensor.
653-
* @param [in] reduxLen The number of axes reduced. Must be >= 1 and
654-
* <= src->nd.
655-
* @param [in] reduxList A list of integers of length reduxLen, indicating
656-
* the axes to be reduced. The order of the axes
657-
* matters for dstArg index calculations (GpuArray_argmin,
658-
* GpuArray_argmax, GpuArray_minandargmin,
659-
* GpuArray_maxandargmax). All entries in the list must be
660-
* unique, >= 0 and < src->nd.
661-
*
662-
* For example, if a 5D-tensor is max-reduced with an axis
663-
* list of [3,4,1], then reduxLen shall be 3, and the
664-
* index calculation in every point shall take the form
665-
*
666-
* dstArgmax[i0,i2] = i3 * src.shape[4] * src.shape[1] +
667-
* i4 * src.shape[1] +
668-
* i1
669-
*
670-
* where (i3,i4,i1) are the coordinates of the maximum-
671-
* valued element within subtensor [i0,:,i2,:,:] of src.
672-
* @return GA_NO_ERROR if the operation was successful, or a non-zero error
673-
* code otherwise.
674-
*/
675-
676-
GPUARRAY_PUBLIC int GpuArray_sum (GpuArray* dst,
677-
const GpuArray* src,
678-
unsigned reduxLen,
679-
const unsigned* reduxList);
680-
GPUARRAY_PUBLIC int GpuArray_prod (GpuArray* dst,
681-
const GpuArray* src,
682-
unsigned reduxLen,
683-
const unsigned* reduxList);
684-
GPUARRAY_PUBLIC int GpuArray_prodnz (GpuArray* dst,
685-
const GpuArray* src,
686-
unsigned reduxLen,
687-
const unsigned* reduxList);
688-
GPUARRAY_PUBLIC int GpuArray_min (GpuArray* dst,
689-
const GpuArray* src,
690-
unsigned reduxLen,
691-
const unsigned* reduxList);
692-
GPUARRAY_PUBLIC int GpuArray_max (GpuArray* dst,
693-
const GpuArray* src,
694-
unsigned reduxLen,
695-
const unsigned* reduxList);
696-
GPUARRAY_PUBLIC int GpuArray_argmin (GpuArray* dstArg,
697-
const GpuArray* src,
698-
unsigned reduxLen,
699-
const unsigned* reduxList);
700-
GPUARRAY_PUBLIC int GpuArray_argmax (GpuArray* dstArg,
701-
const GpuArray* src,
702-
unsigned reduxLen,
703-
const unsigned* reduxList);
704-
GPUARRAY_PUBLIC int GpuArray_minandargmin(GpuArray* dst,
705-
GpuArray* dstArg,
706-
const GpuArray* src,
707-
unsigned reduxLen,
708-
const unsigned* reduxList);
709-
GPUARRAY_PUBLIC int GpuArray_maxandargmax(GpuArray* dst,
710-
GpuArray* dstArg,
711-
const GpuArray* src,
712-
unsigned reduxLen,
713-
const unsigned* reduxList);
714-
GPUARRAY_PUBLIC int GpuArray_and (GpuArray* dst,
715-
const GpuArray* src,
716-
unsigned reduxLen,
717-
const unsigned* reduxList);
718-
GPUARRAY_PUBLIC int GpuArray_or (GpuArray* dst,
719-
const GpuArray* src,
720-
unsigned reduxLen,
721-
const unsigned* reduxList);
722-
GPUARRAY_PUBLIC int GpuArray_xor (GpuArray* dst,
723-
const GpuArray* src,
724-
unsigned reduxLen,
725-
const unsigned* reduxList);
726-
GPUARRAY_PUBLIC int GpuArray_all (GpuArray* dst,
727-
const GpuArray* src,
728-
unsigned reduxLen,
729-
const unsigned* reduxList);
730-
GPUARRAY_PUBLIC int GpuArray_any (GpuArray* dst,
731-
const GpuArray* src,
732-
unsigned reduxLen,
733-
const unsigned* reduxList);
734-
GPUARRAY_PUBLIC int GpuArray_reduction (ga_reduce_op op,
735-
GpuArray* dst,
736-
GpuArray* dstArg,
737-
const GpuArray* src,
738-
unsigned reduxLen,
739-
const unsigned* reduxList);
740-
741-
742-
743618

744619

745620
#ifdef __cplusplus

src/gpuarray/reduction.h

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
#ifndef GPUARRAY_REDUCTION_H
2+
#define GPUARRAY_REDUCTION_H
3+
/**
4+
* \file reduction.h
5+
* \brief Reduction functions.
6+
*/
7+
8+
#include <gpuarray/array.h>
9+
10+
#ifdef _MSC_VER
11+
#ifndef inline
12+
#define inline __inline
13+
#endif
14+
#endif
15+
16+
#ifdef __cplusplus
17+
extern "C" {
18+
#endif
19+
#ifdef CONFUSE_EMACS
20+
}
21+
#endif
22+
23+
24+
/**
25+
* Supported array reduction operations.
26+
*/
27+
28+
typedef enum _ga_reduce_op {
29+
GA_REDUCE_SUM, /* + */
30+
GA_REDUCE_PROD, /* * */
31+
GA_REDUCE_PRODNZ, /* * (!=0) */
32+
GA_REDUCE_MIN, /* min() */
33+
GA_REDUCE_MAX, /* max() */
34+
GA_REDUCE_ARGMIN, /* argmin() */
35+
GA_REDUCE_ARGMAX, /* argmax() */
36+
GA_REDUCE_MINANDARGMIN, /* min(), argmin() */
37+
GA_REDUCE_MAXANDARGMAX, /* max(), argmax() */
38+
GA_REDUCE_AND, /* & */
39+
GA_REDUCE_OR, /* | */
40+
GA_REDUCE_XOR, /* ^ */
41+
GA_REDUCE_ALL, /* &&/all() */
42+
GA_REDUCE_ANY, /* ||/any() */
43+
} ga_reduce_op;
44+
45+
46+
47+
/**
48+
* @brief Compute a reduction sum (+), product (*), non-zero product (* != 0),
49+
* min, max, argmin, argmax, min-and-argmin, max-and-argmax, and (&),
50+
* or (|), xor (^), all (&&) or any (||) over a list of axes to reduce.
51+
*
52+
* Returns one (in the case of min-and-argmin/max-and-argmax, two) destination
53+
* tensors. The destination tensor(s)' axes are a strict subset of the axes of the
54+
* source tensor. The axes to be reduced are specified by the caller, and the
55+
* reduction is performed over these axes, which are then removed in the
56+
* destination.
57+
*
58+
* @param [out] dst The destination tensor. Has the same type as the source.
59+
* @param [out] dstArg For argument of minima/maxima operations. Has type int64.
60+
* @param [in] src The source tensor.
61+
* @param [in] reduxLen The number of axes reduced. Must be >= 1 and
62+
* <= src->nd.
63+
* @param [in] reduxList A list of integers of length reduxLen, indicating
64+
* the axes to be reduced. The order of the axes
65+
* matters for dstArg index calculations (GpuArray_argmin,
66+
* GpuArray_argmax, GpuArray_minandargmin,
67+
* GpuArray_maxandargmax). All entries in the list must be
68+
* unique, >= 0 and < src->nd.
69+
*
70+
* For example, if a 5D-tensor is max-reduced with an axis
71+
* list of [3,4,1], then reduxLen shall be 3, and the
72+
* index calculation in every point shall take the form
73+
*
74+
* dstArgmax[i0,i2] = i3 * src.shape[4] * src.shape[1] +
75+
* i4 * src.shape[1] +
76+
* i1
77+
*
78+
* where (i3,i4,i1) are the coordinates of the maximum-
79+
* valued element within subtensor [i0,:,i2,:,:] of src.
80+
* @return GA_NO_ERROR if the operation was successful, or a non-zero error
81+
* code otherwise.
82+
*/
83+
84+
GPUARRAY_PUBLIC int GpuArray_sum (GpuArray* dst,
85+
const GpuArray* src,
86+
unsigned reduxLen,
87+
const unsigned* reduxList);
88+
GPUARRAY_PUBLIC int GpuArray_prod (GpuArray* dst,
89+
const GpuArray* src,
90+
unsigned reduxLen,
91+
const unsigned* reduxList);
92+
GPUARRAY_PUBLIC int GpuArray_prodnz (GpuArray* dst,
93+
const GpuArray* src,
94+
unsigned reduxLen,
95+
const unsigned* reduxList);
96+
GPUARRAY_PUBLIC int GpuArray_min (GpuArray* dst,
97+
const GpuArray* src,
98+
unsigned reduxLen,
99+
const unsigned* reduxList);
100+
GPUARRAY_PUBLIC int GpuArray_max (GpuArray* dst,
101+
const GpuArray* src,
102+
unsigned reduxLen,
103+
const unsigned* reduxList);
104+
GPUARRAY_PUBLIC int GpuArray_argmin (GpuArray* dstArg,
105+
const GpuArray* src,
106+
unsigned reduxLen,
107+
const unsigned* reduxList);
108+
GPUARRAY_PUBLIC int GpuArray_argmax (GpuArray* dstArg,
109+
const GpuArray* src,
110+
unsigned reduxLen,
111+
const unsigned* reduxList);
112+
GPUARRAY_PUBLIC int GpuArray_minandargmin(GpuArray* dst,
113+
GpuArray* dstArg,
114+
const GpuArray* src,
115+
unsigned reduxLen,
116+
const unsigned* reduxList);
117+
GPUARRAY_PUBLIC int GpuArray_maxandargmax(GpuArray* dst,
118+
GpuArray* dstArg,
119+
const GpuArray* src,
120+
unsigned reduxLen,
121+
const unsigned* reduxList);
122+
GPUARRAY_PUBLIC int GpuArray_and (GpuArray* dst,
123+
const GpuArray* src,
124+
unsigned reduxLen,
125+
const unsigned* reduxList);
126+
GPUARRAY_PUBLIC int GpuArray_or (GpuArray* dst,
127+
const GpuArray* src,
128+
unsigned reduxLen,
129+
const unsigned* reduxList);
130+
GPUARRAY_PUBLIC int GpuArray_xor (GpuArray* dst,
131+
const GpuArray* src,
132+
unsigned reduxLen,
133+
const unsigned* reduxList);
134+
GPUARRAY_PUBLIC int GpuArray_all (GpuArray* dst,
135+
const GpuArray* src,
136+
unsigned reduxLen,
137+
const unsigned* reduxList);
138+
GPUARRAY_PUBLIC int GpuArray_any (GpuArray* dst,
139+
const GpuArray* src,
140+
unsigned reduxLen,
141+
const unsigned* reduxList);
142+
GPUARRAY_PUBLIC int GpuArray_reduction (ga_reduce_op op,
143+
GpuArray* dst,
144+
GpuArray* dstArg,
145+
const GpuArray* src,
146+
unsigned reduxLen,
147+
const unsigned* reduxList);
148+
149+
150+
151+
152+
153+
#ifdef __cplusplus
154+
}
155+
#endif
156+
157+
#endif

0 commit comments

Comments
 (0)