Skip to content

Commit c3f34a4

Browse files
JenkinsAnthonyBarbier
Jenkins
authored andcommitted
arm_compute v18.03
Change-Id: I8f9a2a9d32a6cab019b8504d313216f28671f9f5
1 parent 06ea048 commit c3f34a4

File tree

5,024 files changed

+22978
-21544
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

5,024 files changed

+22978
-21544
lines changed

Diff for: .github/issue_template.md

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<!--
2+
Please fill the fields below in order to help us diagnose the issue. If you have a
3+
general question or a problem with the scripts, you can ignore these fields.
4+
-->
5+
6+
**Output of 'strings libarm_compute.so | grep arm_compute_version':**
7+
8+
**Platform:**
9+
10+
**Operating System:**
11+
12+
13+
<!--
14+
Please describe the issue (error, expected behaviour etc) and steps to reproduce it. If possible,
15+
share the shortest code necessary that reproduces the issue.
16+
-->
17+
18+
**Problem description:**

Diff for: README.md

+11-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11

2-
:warning: **Deprecation notice: QS8 and QS16 data types will be removed in the next release** (As far as we know nobody uses these data types, if you do or think they are useful please open an Issue or send us an email):warning:
2+
:warning: **Deprecation notice: QS8 and QS16 data types will be removed in the 18.05 release** (As far as we know nobody uses these data types, if you do or think they are useful please open an Issue or send us an email):warning:
33

44
Please report issues here: https://github.com/ARM-software/ComputeLibrary/issues
55
**Make sure you are using the latest version of the library before opening an issue. Thanks**
66

7+
News:
8+
9+
- We're hiring! [Senior Machine Learning C++ Software Engineer](https://careers.peopleclick.com/careerscp/client_arm/external/jobDetails.do?functionName=getJobDetail&jobPostId=36246&localeCode=en-us)
10+
- Come talk to us: [Gian Marco will be presenting his work at the EVS](https://www.embedded-vision.com/summit/even-faster-cnns-exploring-new-class-winograd-algorithms)
11+
712
Related projects:
813

914
- [Caffe on Compute Library](https://github.com/OAID/Caffe-HRT)
@@ -12,6 +17,7 @@ Related projects:
1217

1318
Documentation available here:
1419

20+
- [v18.03](https://arm-software.github.io/ComputeLibrary/v18.03/)
1521
- [v18.02](https://arm-software.github.io/ComputeLibrary/v18.02/)
1622
- [v18.01](https://arm-software.github.io/ComputeLibrary/v18.01/)
1723
- [v17.12](https://arm-software.github.io/ComputeLibrary/v17.12/)
@@ -24,8 +30,10 @@ Documentation available here:
2430

2531
Binaries available here:
2632

27-
- [v18.02-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.01/arm_compute-v18.02-bin-linux.tar.gz)
28-
- [v18.02-android](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.01/arm_compute-v18.02-bin-android.tar.gz)
33+
- [v18.03-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.03/arm_compute-v18.03-bin-linux.tar.gz)
34+
- [v18.03-android](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.03/arm_compute-v18.03-bin-android.tar.gz)
35+
- [v18.02-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.02/arm_compute-v18.02-bin-linux.tar.gz)
36+
- [v18.02-android](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.02/arm_compute-v18.02-bin-android.tar.gz)
2937
- [v18.01](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.01/arm_compute-v18.01-bin.tar.gz)
3038
- [v17.12](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.12/arm_compute-v17.12-bin.tar.gz)
3139
- [v17.10](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.10/arm_compute-v17.10-bin.tar.gz)

Diff for: SConscript

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ import os.path
2424
import re
2525
import subprocess
2626

27-
VERSION = "v18.02"
28-
SONAME_VERSION="9.0.0"
27+
VERSION = "v18.03"
28+
SONAME_VERSION="10.0.0"
2929

3030
Import('env')
3131
Import('vars')

Diff for: arm_compute/core/Dimensions.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class Dimensions
5050
*/
5151
template <typename... Ts>
5252
explicit Dimensions(Ts... dims)
53-
: _id{ { dims... } }, _num_dimensions{ sizeof...(dims) }
53+
: _id{ { static_cast<T>(dims)... } }, _num_dimensions{ sizeof...(dims) }
5454
{
5555
}
5656

Diff for: arm_compute/core/NEON/kernels/convolution/winograd/gemm.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,11 @@ inline void BlockedGemm(
6565
const int c_row_stride
6666
) {
6767
// Array access methods
68-
const auto A = [a, M, K, a_row_stride] (const int i, const int j) -> TIn {
68+
const auto A = [a, a_row_stride] (const int i, const int j) -> TIn {
6969
return a[i*a_row_stride + j];
7070
};
7171

72-
const auto B = [b, K, N, b_row_stride] (const int i, const int j) -> TIn {
72+
const auto B = [b, b_row_stride] (const int i, const int j) -> TIn {
7373
return b[i*b_row_stride + j];
7474
};
7575

Diff for: arm_compute/core/utils/logging/Macros.h

+11-9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017 ARM Limited.
2+
* Copyright (c) 2017-2018 ARM Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -50,14 +50,16 @@
5050
} \
5151
} while(false)
5252

53-
#define ARM_COMPUTE_LOG_STREAM(logger_name, log_level, stream) \
54-
do \
55-
{ \
56-
auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \
57-
if(__logger != nullptr) \
58-
{ \
59-
__logger->log(log_level, static_cast<std::ostringstream &>(std::ostringstream() << stream).str()); \
60-
} \
53+
#define ARM_COMPUTE_LOG_STREAM(logger_name, log_level, stream) \
54+
do \
55+
{ \
56+
auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \
57+
if(__logger != nullptr) \
58+
{ \
59+
std::ostringstream s; \
60+
s << stream; \
61+
__logger->log(log_level, s.str()); \
62+
} \
6163
} while(false)
6264

6365
#else /* ARM_COMPUTE_LOGGING_ENABLED */

Diff for: arm_compute/runtime/CL/CLTuner.h

+47-35
Original file line numberDiff line numberDiff line change
@@ -37,26 +37,43 @@ class ICLKernel;
3737
class CLTuner : public ICLTuner
3838
{
3939
public:
40-
/** Constructor */
41-
CLTuner();
40+
/** Constructor
41+
*
42+
* @param[in] tune_new_kernels Find the optimal local workgroup size for kernels which are not present in the table ?
43+
*
44+
*/
45+
CLTuner(bool tune_new_kernels = true);
4246

4347
/** Destructor */
4448
~CLTuner() = default;
4549

50+
/** Setter for tune_new_kernels option
51+
*
52+
* @param[in] tune_new_kernels Find the optimal local workgroup size for kernels which are not present in the table ?
53+
*/
54+
void set_tune_new_kernels(bool tune_new_kernels);
55+
/** Tune kernels that are not in the LWS table
56+
*
57+
* @return True if tuning of new kernels is enabled.
58+
*/
59+
bool tune_new_kernels() const;
60+
/** Manually add a LWS for a kernel
61+
*
62+
* @param[in] kernel_id Unique identifiant of the kernel
63+
* @param[in] optimal_lws Optimal local workgroup size to use for the given kernel
64+
*/
65+
void add_lws_to_table(const std::string &kernel_id, cl::NDRange optimal_lws);
4666
/** Import LWS table
4767
*
4868
* @param[in] lws_table The unordered_map container to import
4969
*/
5070
void import_lws_table(const std::unordered_map<std::string, cl::NDRange> &lws_table);
5171

52-
/** Export LWS table
72+
/** Give read access to the LWS table
5373
*
5474
* return The lws table as unordered_map container
5575
*/
56-
const std::unordered_map<std::string, cl::NDRange> &export_lws_table();
57-
58-
// Inherited methods overridden:
59-
void tune_kernel(ICLKernel &kernel) override;
76+
const std::unordered_map<std::string, cl::NDRange> &lws_table() const;
6077

6178
/** Set the OpenCL kernel event
6279
*
@@ -66,7 +83,28 @@ class CLTuner : public ICLTuner
6683
*/
6784
void set_cl_kernel_event(cl_event kernel_event);
6885

69-
std::function<decltype(clEnqueueNDRangeKernel)> real_function;
86+
std::function<decltype(clEnqueueNDRangeKernel)> real_clEnqueueNDRangeKernel;
87+
88+
/** Load the LWS table from file
89+
*
90+
* @param[in] filename Load the LWS table from this file.(Must exist)
91+
*/
92+
void load_from_file(const std::string &filename);
93+
94+
/** Save the content of the LWS table to file
95+
*
96+
* @param[in] filename Save the LWS table to this file. (Content will be overwritten)
97+
*/
98+
void save_to_file(const std::string &filename) const;
99+
100+
// Inherited methods overridden:
101+
void tune_kernel(ICLKernel &kernel) override;
102+
103+
/** Is the kernel_event set ?
104+
*
105+
* @return true if the kernel_event is set.
106+
*/
107+
bool kernel_event_is_set() const;
70108

71109
private:
72110
/** Find optimal LWS using brute-force approach
@@ -81,33 +119,7 @@ class CLTuner : public ICLTuner
81119
cl::CommandQueue _queue;
82120
cl::CommandQueue _queue_profiler;
83121
cl::Event _kernel_event;
84-
};
85-
86-
/* Function to be used to intercept kernel enqueues and store their OpenCL Event */
87-
class Interceptor
88-
{
89-
public:
90-
explicit Interceptor(CLTuner &tuner);
91-
92-
/** clEnqueueNDRangeKernel interface
93-
*
94-
* @param[in] command_queue A valid command-queue. The kernel will be queued for execution on the device associated with command_queue.
95-
* @param[in] kernel A valid kernel object. The OpenCL context associated with kernel and command_queue must be the same.
96-
* @param[in] work_dim The number of dimensions used to specify the global work-items and work-items in the work-group. work_dim must be greater than zero and less than or equal to CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.
97-
* @param[in] gwo Global-Workgroup-Offset. It can be used to specify an array of work_dim unsigned values that describe the offset used to calculate the global ID of a work-item. If global_work_offset is NULL, the global IDs start at offset (0, 0, ... 0).
98-
* @param[in] gws Global-Workgroup-Size. Points to an array of work_dim unsigned values that describe the number of global work-items in work_dim dimensions that will execute the kernel function.
99-
* @param[in] lws Local-Workgroup-Size. Points to an array of work_dim unsigned values that describe the number of work-items that make up a work-group
100-
* @param[in] num_events_in_wait_list Number of events in the waiting list
101-
* @param[in] event_wait_list Event waiting list
102-
* @param[in] event OpenCL kernel event
103-
*
104-
* @return the OpenCL status
105-
*/
106-
cl_int operator()(cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *gwo, const size_t *gws, const size_t *lws, cl_uint num_events_in_wait_list,
107-
const cl_event *event_wait_list, cl_event *event);
108-
109-
private:
110-
CLTuner &_tuner;
122+
bool _tune_new_kernels;
111123
};
112124
}
113125
#endif /*__ARM_COMPUTE_CLTUNER_H__ */

0 commit comments

Comments
 (0)