Skip to content

Commit 975dfe1

Browse files
author
Jenkins
committed
arm_compute v19.08
1 parent 4ba87db commit 975dfe1

File tree

10,361 files changed

+334440
-156417
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

10,361 files changed

+334440
-156417
lines changed

README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,26 @@ Please report issues here: https://github.com/ARM-software/ComputeLibrary/issues
1212

1313
News:
1414

15+
- [Gian Marco's talk on Performance Analysis for Optimizing Embedded Deep Learning Inference Software](https://www.embedded-vision.com/platinum-members/arm/embedded-vision-training/videos/pages/may-2019-embedded-vision-summit)
1516
- [Gian Marco's talk on optimizing CNNs with Winograd algorithms at the EVS](https://www.embedded-vision.com/platinum-members/arm/embedded-vision-training/videos/pages/may-2018-embedded-vision-summit-iodice)
1617
- [Gian Marco's talk on using SGEMM and FFTs to Accelerate Deep Learning](https://www.embedded-vision.com/platinum-members/arm/embedded-vision-training/videos/pages/may-2016-embedded-vision-summit-iodice)
1718

1819
Related projects:
1920

2021
- [Arm NN SDK](https://github.com/arm-software/armnn)
22+
23+
Tutorials:
24+
2125
- [Tutorial: Cartoonifying Images on Raspberry Pi with the Compute Library](https://community.arm.com/graphics/b/blog/posts/cartoonifying-images-on-raspberry-pi-with-the-compute-library)
2226
- [Tutorial: Running AlexNet on Raspberry Pi with Compute Library](https://community.arm.com/processors/b/blog/posts/running-alexnet-on-raspberry-pi-with-compute-library)
2327

28+
Blogs:
29+
30+
- [Happy Birthday ACL!](https://community.arm.com/developer/tools-software/graphics/b/blog/posts/arm-compute-library-19-05-is-coming)
31+
2432
Documentation available here:
2533

34+
- [v19.08](https://arm-software.github.io/ComputeLibrary/v19.08/)
2635
- [v19.05](https://arm-software.github.io/ComputeLibrary/v19.05/)
2736
- [v19.02](https://arm-software.github.io/ComputeLibrary/v19.02/)
2837
- [v18.11](https://arm-software.github.io/ComputeLibrary/v18.11/index.xhtml)
@@ -41,6 +50,8 @@ Documentation available here:
4150

4251
Binaries available here:
4352

53+
- [v19.08-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v19.08/arm_compute-v19.08-bin-linux.tar.gz)
54+
- [v19.08-android](https://github.com/ARM-software/ComputeLibrary/releases/download/v19.08/arm_compute-v19.08-bin-android.tar.gz)
4455
- [v19.05-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v19.05/arm_compute-v19.05-bin-linux.tar.gz)
4556
- [v19.05-android](https://github.com/ARM-software/ComputeLibrary/releases/download/v19.05/arm_compute-v19.05-bin-android.tar.gz)
4657
- [v19.02-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v19.02/arm_compute-v19.02-bin-linux.tar.gz)

SConscript

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ import os.path
2424
import re
2525
import subprocess
2626

27-
VERSION = "v19.05"
28-
SONAME_VERSION="15.0.0"
27+
VERSION = "v19.08"
28+
SONAME_VERSION="16.0.0"
2929

3030
Import('env')
3131
Import('vars')
@@ -164,6 +164,7 @@ core_files += Glob('src/core/CPP/kernels/*.cpp')
164164
core_files += Glob('src/core/utils/helpers/*.cpp')
165165
core_files += Glob('src/core/utils/io/*.cpp')
166166
core_files += Glob('src/core/utils/quantization/*.cpp')
167+
core_files += Glob('src/core/utils/misc/*.cpp')
167168
if env["logging"]:
168169
core_files += Glob('src/core/utils/logging/*.cpp')
169170

@@ -187,6 +188,7 @@ if env['opencl']:
187188
core_files += Glob('src/core/CL/*.cpp')
188189
core_files += Glob('src/core/CL/kernels/*.cpp')
189190
core_files += Glob('src/core/CL/gemm/*.cpp')
191+
core_files += Glob('src/core/CL/gemm/native/*.cpp')
190192
core_files += Glob('src/core/CL/gemm/reshaped/*.cpp')
191193
core_files += Glob('src/core/CL/gemm/reshaped_only_rhs/*.cpp')
192194

@@ -204,10 +206,13 @@ if env['neon']:
204206

205207
core_files += Glob('src/core/NEON/kernels/arm_gemm/*.cpp')
206208

207-
# build winograd sources for either v7a / v8a
209+
# build winograd/depthwise sources for either v7a / v8a
208210
core_files += Glob('src/core/NEON/kernels/convolution/*/*.cpp')
209211
core_files += Glob('src/core/NEON/kernels/convolution/winograd/*/*.cpp')
210-
arm_compute_env.Append(CPPPATH = ["arm_compute/core/NEON/kernels/convolution/winograd/","arm_compute/core/NEON/kernels/convolution/common/" , "arm_compute/core/NEON/kernels/assembly/"])
212+
arm_compute_env.Append(CPPPATH = ["arm_compute/core/NEON/kernels/convolution/common/",
213+
"arm_compute/core/NEON/kernels/convolution/winograd/",
214+
"arm_compute/core/NEON/kernels/convolution/depthwise/",
215+
"arm_compute/core/NEON/kernels/assembly/"])
211216

212217
graph_files += Glob('src/graph/backends/NEON/*.cpp')
213218

SConstruct

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ if env['os'] == 'android' and ( 'clang++' not in cpp_compiler or 'clang' not in
145145
if 'clang++' in cpp_compiler:
146146
env.Append(CXXFLAGS = ['-Wno-format-nonliteral','-Wno-deprecated-increment-bool','-Wno-vla-extension','-Wno-mismatched-tags'])
147147
else:
148-
env.Append(CXXFLAGS = ['-Wlogical-op','-Wnoexcept','-Wstrict-null-sentinel','-Wno-implicit-fallthrough'])
148+
env.Append(CXXFLAGS = ['-Wlogical-op','-Wnoexcept','-Wstrict-null-sentinel', '-Wno-redundant-move'])
149149

150150
if env['cppthreads']:
151151
env.Append(CPPDEFINES = [('ARM_COMPUTE_CPP_SCHEDULER', 1)])
@@ -185,18 +185,15 @@ elif env['arch'] == 'arm64-v8a':
185185
env.Append(CXXFLAGS = ['-no-integrated-as'])
186186
elif 'arm64-v8.2-a' in env['arch']:
187187
if env['arch'] == 'arm64-v8.2-a-sve':
188-
if env['os'] != 'bare_metal':
189-
print("Only bare metal SVE is supported at the moment")
190-
Exit(1)
191188
env.Append(CXXFLAGS = ['-march=armv8.2-a+sve+fp16+dotprod'])
192189
else:
193190
env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16']) # explicitly enable fp16 extension otherwise __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is undefined
194-
if env['os'] == 'linux':
195-
prefix = "aarch64-linux-gnu-"
196-
elif env['os'] == 'bare_metal':
197-
prefix = "aarch64-elf-"
198-
elif env['os'] == 'android':
199-
prefix = "aarch64-linux-android-"
191+
if env['os'] == 'linux':
192+
prefix = "aarch64-linux-gnu-"
193+
elif env['os'] == 'bare_metal':
194+
prefix = "aarch64-elf-"
195+
elif env['os'] == 'android':
196+
prefix = "aarch64-linux-android-"
200197
env.Append(CPPDEFINES = ['ARM_COMPUTE_AARCH64_V8_2','NO_DOT_IN_TOOLCHAIN'])
201198
if 'clang++' in cpp_compiler:
202199
env.Append(CXXFLAGS = ['-no-integrated-as'])
@@ -282,7 +279,7 @@ if env['debug']:
282279
env.Append(CXXFLAGS = ['-O0','-g','-gdwarf-2'])
283280
env.Append(CPPDEFINES = ['ARM_COMPUTE_DEBUG_ENABLED'])
284281
else:
285-
env.Append(CXXFLAGS = ['-O3','-ftree-vectorize'])
282+
env.Append(CXXFLAGS = ['-O3'])
286283

287284
if env['asserts']:
288285
env.Append(CPPDEFINES = ['ARM_COMPUTE_ASSERTS_ENABLED'])

arm_compute/core/CL/CLKernelLibrary.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,11 @@ class CLKernelLibrary
297297
* @return The content of CL_DEVICE_VERSION
298298
*/
299299
std::string get_device_version();
300+
/** Return the maximum number of compute units in the device
301+
*
302+
* @return The content of CL_DEVICE_MAX_COMPUTE_UNITS
303+
*/
304+
cl_uint get_num_compute_units();
300305
/** Creates a kernel from the kernel library.
301306
*
302307
* @param[in] kernel_name Kernel name.

arm_compute/core/CL/CLKernels.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
2929
#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h"
3030
#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
31+
#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
3132
#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
3233
#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
3334
#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h"
@@ -51,6 +52,7 @@
5152
#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
5253
#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
5354
#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
55+
#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
5456
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
5557
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
5658
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h"
@@ -73,25 +75,26 @@
7375
#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
7476
#include "arm_compute/core/CL/kernels/CLFloorKernel.h"
7577
#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
76-
#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
7778
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
79+
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
7880
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
7981
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
8082
#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
8183
#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
84+
#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h"
8285
#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
8386
#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.h"
8487
#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.h"
8588
#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
8689
#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
8790
#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h"
8891
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
92+
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
8993
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
9094
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
9195
#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
9296
#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
9397
#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
94-
#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h"
9598
#include "arm_compute/core/CL/kernels/CLGatherKernel.h"
9699
#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h"
97100
#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
@@ -109,6 +112,7 @@
109112
#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
110113
#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
111114
#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
115+
#include "arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
112116
#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h"
113117
#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
114118
#include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h"
@@ -138,6 +142,7 @@
138142
#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h"
139143
#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
140144
#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
145+
#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
141146
#include "arm_compute/core/CL/kernels/CLStackLayerKernel.h"
142147
#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
143148
#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h"

arm_compute/core/CL/CLTypes.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2018 ARM Limited.
2+
* Copyright (c) 2017-2019 ARM Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -24,6 +24,7 @@
2424
#ifndef __ARM_COMPUTE_CL_TYPES_H__
2525
#define __ARM_COMPUTE_CL_TYPES_H__
2626

27+
#include "arm_compute/core/CL/ICLArray.h"
2728
#include "arm_compute/core/GPUTarget.h"
2829

2930
#include <string>
@@ -53,5 +54,23 @@ struct CLDeviceOptions
5354
size_t num_cores; /**< Number of cores */
5455
size_t cache_size; /**< Cache size */
5556
};
57+
58+
/** OpenCL quantization data */
59+
struct CLQuantization
60+
{
61+
/** Default Constructor */
62+
CLQuantization()
63+
: scale(nullptr), offset(nullptr) {};
64+
/** Constructor
65+
*
66+
* @param[in] scale OpenCL scale array
67+
* @param[in] offset OpenCL offset array
68+
*/
69+
CLQuantization(const ICLFloatArray *scale, const ICLInt32Array *offset)
70+
: scale(scale), offset(offset) {};
71+
72+
const ICLFloatArray *scale; /**< Quantization scale array */
73+
const ICLInt32Array *offset; /**< Quantization offset array */
74+
};
5675
} // namespace arm_compute
5776
#endif /* __ARM_COMPUTE_CL_TYPES_H__ */

arm_compute/core/CL/ICLKernel.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,20 @@ class ICLKernel : public IKernel
111111
{
112112
add_tensor_argument<1>(idx, tensor, window);
113113
}
114+
/** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true.
115+
*
116+
* @param[in] cond Condition to check
117+
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
118+
* @param[in] tensor Tensor to set as an argument of the object's kernel.
119+
* @param[in] window Window the kernel will be executed on.
120+
*/
121+
void add_1D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window)
122+
{
123+
if(cond)
124+
{
125+
add_1D_tensor_argument(idx, tensor, window);
126+
}
127+
}
114128
/** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
115129
*
116130
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
@@ -121,6 +135,20 @@ class ICLKernel : public IKernel
121135
{
122136
add_tensor_argument<2>(idx, tensor, window);
123137
}
138+
/** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true.
139+
*
140+
* @param[in] cond Condition to check
141+
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
142+
* @param[in] tensor Tensor to set as an argument of the object's kernel.
143+
* @param[in] window Window the kernel will be executed on.
144+
*/
145+
void add_2D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window)
146+
{
147+
if(cond)
148+
{
149+
add_2D_tensor_argument(idx, tensor, window);
150+
}
151+
}
124152
/** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
125153
*
126154
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.

arm_compute/core/CL/ICLTensor.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016-2018 ARM Limited.
2+
* Copyright (c) 2016-2019 ARM Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -26,6 +26,8 @@
2626

2727
#include "arm_compute/core/ITensor.h"
2828

29+
#include "arm_compute/core/CL/CLTypes.h"
30+
2931
#include <cstdint>
3032

3133
namespace cl
@@ -53,6 +55,11 @@ class ICLTensor : public ITensor
5355
/** Default virtual destructor. */
5456
virtual ~ICLTensor() = default;
5557

58+
/** Interface to be implemented by the child class to return the wrapped quantization info data
59+
*
60+
* @return A wrapped quantization info object.
61+
*/
62+
virtual CLQuantization quantization() const = 0;
5663
/** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the image's data.
5764
*
5865
* @return A reference to an OpenCL buffer containing the image's data.
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
* Copyright (c) 2019 ARM Limited.
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to
8+
* deal in the Software without restriction, including without limitation the
9+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10+
* sell copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
#ifndef __ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H__
25+
#define __ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H__
26+
27+
#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
28+
#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h"
29+
30+
#include <memory>
31+
32+
namespace arm_compute
33+
{
34+
namespace cl_gemm
35+
{
36+
/** CLGEMMNative factory class */
37+
class CLGEMMNativeKernelConfigurationFactory final
38+
{
39+
public:
40+
/** Static method to construct CLGEMMNative kernel object accordingly with the GPU architecture
41+
*
42+
* @param[in] arch GPU target
43+
*
44+
* @return CLGEMMNative kernel configuration class
45+
*/
46+
static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget arch)
47+
{
48+
// Note: At the moment we only support Bifrost architecture. However, we should have a dedicated path for each GPU architecture
49+
// using get_arch_from_target(arch)
50+
return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationBifrost>(arch);
51+
}
52+
};
53+
} // namespace cl_gemm
54+
} // namespace arm_compute
55+
#endif /*__ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H__ */

0 commit comments

Comments
 (0)