@@ -123,6 +123,27 @@ typedef enum _ga_order {
123123 GA_F_ORDER = 1
124124} ga_order ;
125125
126+ /**
127+ * Supported array reduction operations.
128+ */
129+
130+ typedef enum _ga_reduce_op {
131+ GA_REDUCE_SUM , /* + */
132+ GA_REDUCE_PROD , /* * */
133+ GA_REDUCE_PRODNZ , /* * (!=0) */
134+ GA_REDUCE_MIN , /* min() */
135+ GA_REDUCE_MAX , /* max() */
136+ GA_REDUCE_ARGMIN , /* argmin() */
137+ GA_REDUCE_ARGMAX , /* argmax() */
138+ GA_REDUCE_MINANDARGMIN , /* min(), argmin() */
139+ GA_REDUCE_MAXANDARGMAX , /* max(), argmax() */
140+ GA_REDUCE_AND , /* & */
141+ GA_REDUCE_OR , /* | */
142+ GA_REDUCE_XOR , /* ^ */
143+ GA_REDUCE_ALL , /* &&/all() */
144+ GA_REDUCE_ANY , /* ||/any() */
145+ } ga_reduce_op ;
146+
126147/**
127148 * Checks if all the specified flags are set.
128149 *
@@ -614,26 +635,31 @@ GPUARRAY_PUBLIC void GpuArray_fprintf(FILE *fd, const GpuArray *a);
614635
615636GPUARRAY_PUBLIC int GpuArray_fdump (FILE * fd , const GpuArray * a );
616637
638+
617639/**
618- * @brief Computes simultaneously the maxima and the arguments of maxima over
619- * specified axes of the tensor.
640+ * @brief Compute a reduction sum (+), product (*), non-zero product (* != 0),
641+ * min, max, argmin, argmax, min-and-argmin, max-and-argmax, and (&),
642+ * or (|), xor (^), all (&&) or any (||) over a list of axes to reduce.
620643 *
621- * Returns two tensors of identical shape. Both tensors' axes are a subset of
622- * the axes of the original tensor. The axes to be reduced are specified by
623- * the caller, and the maxima and arguments of maxima are computed over them.
644+ * Returns one (in the case of min-and-argmin/max-and-argmax, two) destination
645+ * tensors. The destination tensor(s)' axes are a strict subset of the axes of the
646+ * source tensor. The axes to be reduced are specified by the caller, and the
647+ * reduction is performed over these axes, which are then removed in the
648+ * destination.
624649 *
625- * @param [out] dstMax The resulting tensor of maxima
626- * @param [out] dstArgmax the resulting tensor of arguments at maxima
650+ * @param [out] dst The destination tensor. Has the same type as the source.
651+ * @param [out] dstArg For argument of minima/ maxima operations. Has type int64.
627652 * @param [in] src The source tensor.
628653 * @param [in] reduxLen The number of axes reduced. Must be >= 1 and
629654 * <= src->nd.
630655 * @param [in] reduxList A list of integers of length reduxLen, indicating
631656 * the axes to be reduced. The order of the axes
632- * matters for dstArgmax index calculations. All
633- * entries in the list must be unique, >= 0 and
634- * < src->nd.
657+ * matters for dstArg index calculations (GpuArray_argmin,
658+ * GpuArray_argmax, GpuArray_minandargmin,
659+ * GpuArray_maxandargmax). All entries in the list must be
660+ * unique, >= 0 and < src->nd.
635661 *
636- * For example, if a 5D-tensor is reduced with an axis
662+ * For example, if a 5D-tensor is max- reduced with an axis
637663 * list of [3,4,1], then reduxLen shall be 3, and the
638664 * index calculation in every point shall take the form
639665 *
@@ -647,11 +673,74 @@ GPUARRAY_PUBLIC int GpuArray_fdump(FILE *fd, const GpuArray *a);
647673 * code otherwise.
648674 */
649675
650- GPUARRAY_PUBLIC int GpuArray_maxandargmax (GpuArray * dstMax ,
651- GpuArray * dstArgmax ,
676+ GPUARRAY_PUBLIC int GpuArray_sum (GpuArray * dst ,
652677 const GpuArray * src ,
653678 unsigned reduxLen ,
654679 const unsigned * reduxList );
680+ GPUARRAY_PUBLIC int GpuArray_prod (GpuArray * dst ,
681+ const GpuArray * src ,
682+ unsigned reduxLen ,
683+ const unsigned * reduxList );
684+ GPUARRAY_PUBLIC int GpuArray_prodnz (GpuArray * dst ,
685+ const GpuArray * src ,
686+ unsigned reduxLen ,
687+ const unsigned * reduxList );
688+ GPUARRAY_PUBLIC int GpuArray_min (GpuArray * dst ,
689+ const GpuArray * src ,
690+ unsigned reduxLen ,
691+ const unsigned * reduxList );
692+ GPUARRAY_PUBLIC int GpuArray_max (GpuArray * dst ,
693+ const GpuArray * src ,
694+ unsigned reduxLen ,
695+ const unsigned * reduxList );
696+ GPUARRAY_PUBLIC int GpuArray_argmin (GpuArray * dstArg ,
697+ const GpuArray * src ,
698+ unsigned reduxLen ,
699+ const unsigned * reduxList );
700+ GPUARRAY_PUBLIC int GpuArray_argmax (GpuArray * dstArg ,
701+ const GpuArray * src ,
702+ unsigned reduxLen ,
703+ const unsigned * reduxList );
704+ GPUARRAY_PUBLIC int GpuArray_minandargmin (GpuArray * dst ,
705+ GpuArray * dstArg ,
706+ const GpuArray * src ,
707+ unsigned reduxLen ,
708+ const unsigned * reduxList );
709+ GPUARRAY_PUBLIC int GpuArray_maxandargmax (GpuArray * dst ,
710+ GpuArray * dstArg ,
711+ const GpuArray * src ,
712+ unsigned reduxLen ,
713+ const unsigned * reduxList );
714+ GPUARRAY_PUBLIC int GpuArray_and (GpuArray * dst ,
715+ const GpuArray * src ,
716+ unsigned reduxLen ,
717+ const unsigned * reduxList );
718+ GPUARRAY_PUBLIC int GpuArray_or (GpuArray * dst ,
719+ const GpuArray * src ,
720+ unsigned reduxLen ,
721+ const unsigned * reduxList );
722+ GPUARRAY_PUBLIC int GpuArray_xor (GpuArray * dst ,
723+ const GpuArray * src ,
724+ unsigned reduxLen ,
725+ const unsigned * reduxList );
726+ GPUARRAY_PUBLIC int GpuArray_all (GpuArray * dst ,
727+ const GpuArray * src ,
728+ unsigned reduxLen ,
729+ const unsigned * reduxList );
730+ GPUARRAY_PUBLIC int GpuArray_any (GpuArray * dst ,
731+ const GpuArray * src ,
732+ unsigned reduxLen ,
733+ const unsigned * reduxList );
734+ GPUARRAY_PUBLIC int GpuArray_reduction (ga_reduce_op op ,
735+ GpuArray * dst ,
736+ GpuArray * dstArg ,
737+ const GpuArray * src ,
738+ unsigned reduxLen ,
739+ const unsigned * reduxList );
740+
741+
742+
743+
655744
656745#ifdef __cplusplus
657746}
0 commit comments