diff --git a/README.md b/README.md index 8fd40b4a..f3d7bf04 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ ### How to Install: ```sh -python3 -m pip install https://github.com/microsoft/antares/releases/download/v0.3.0/antares-0.3.0-py3-none-linux_x86_64.whl +python3 -m pip install antares ``` ### Quick Test: @@ -20,6 +20,9 @@ BACKEND=c-scpu antares # Quickly generate a multi-threaded CPU code: BACKEND=c-mcpu antares +# Search an efficient multi-threaded CPU code: +STEP=100 BACKEND=c-mcpu antares + # Quickly generate a SHADER code for Windows 10/11's DirectX12: BACKEND=c-hlsl_win64 antares diff --git a/antares/antares_compiler.py b/antares/antares_compiler.py index 20c49c72..839f07f0 100644 --- a/antares/antares_compiler.py +++ b/antares/antares_compiler.py @@ -5,7 +5,6 @@ import random import hashlib import traceback -import numpy as np import math import re import json @@ -324,14 +323,14 @@ def compute_mem_ratio(tpr): global_arg_props = get_global_arg_props() access_bytes = 0 for buf in global_arg_props['_in']: - access_bytes += np.product(buf['shape']) * get_type_size(buf['dtype']) + access_bytes += product(buf['shape']) * get_type_size(buf['dtype']) for buf in global_arg_props['_out']: - access_bytes += np.product(buf['shape']) * get_type_size(buf['dtype']) + access_bytes += product(buf['shape']) * get_type_size(buf['dtype']) access_bytes = int(access_bytes) if access_bytes <= 0: return -1 - ratio = np.ceil(access_bytes * 1e-7 / tpr / device_properties().mem_bandwith) + ratio = math.ceil(access_bytes * 1e-7 / tpr / device_properties().mem_bandwith) return min(int(ratio), 100) def run_config_entity(target_source, config_str, dir_sid, expected_timecost='inf', dev_id=0): diff --git a/antares/common.py b/antares/common.py index 0d58f7c5..caddfdde 100644 --- a/antares/common.py +++ b/antares/common.py @@ -4,7 +4,7 @@ import os import subprocess import math -import numpy as np +from functools import reduce class Mock(object): pass @@ -12,6 +12,9 @@ class Mock(object): backend = os.environ['BACKEND'] AntaresGlobal = Mock() +def product(arrlist): + return reduce((lambda x, y: x * y), arrlist) + def wait_for(func, timeout=None, args=[]): if not timeout: return func(*args) diff --git a/backends/c-mcpu/schedule/standard/default.py b/backends/c-mcpu/schedule/standard/default.py index 40dfef69..e90f984b 100644 --- a/backends/c-mcpu/schedule/standard/default.py +++ b/backends/c-mcpu/schedule/standard/default.py @@ -3,7 +3,6 @@ from tvm import te import numpy as np -import psutil def schedule(attrs): cfg, s = attrs.auto_config, attrs.scheduler diff --git a/docker/Dockerfile.c-base b/docker/Dockerfile.c-base index da425714..a48c3fb6 100644 --- a/docker/Dockerfile.c-base +++ b/docker/Dockerfile.c-base @@ -21,8 +21,8 @@ RUN bash -c 'rm -rf ~/.local/antares/3rdparty/tvm/build/{CMake*,Makefile,cmake_i RUN bash -c 'rm -rf ~/.local/antares/3rdparty/tvm/{src,include,golang,tests,3rdparty,device-stub,apps,.??*}' RUN echo '' > ~/.local/antares/3rdparty/tvm/python/tvm/relay/__init__.py -ENV ANTARES_VERSION 0.3.0_0 +ENV ANTARES_VERSION 0.3.1 RUN cd ~ && git clone https://github.com/microsoft/antares --single-branch --depth 1 antares_core && mv ~/.local/antares/3rdparty antares_core RUN cd ~ && sed -i "s/@VERSION@/${ANTARES_VERSION}/g" /antares/engine/dist-info/METADATA && cp -r /antares/engine/dist-info ~/antares-${ANTARES_VERSION}.dist-info -RUN cd ~ && rm -rf antares_core/.??* && zip -r /antares-${ANTARES_VERSION}-py3-none-linux_x86_64.whl antares* >/dev/null +RUN cd ~ && rm -rf antares_core/.??* && zip -r /antares-${ANTARES_VERSION}-py3-none-manylinux1_x86_64.whl antares* >/dev/null diff --git a/engine/device-stub/tvm_extra.patch b/engine/device-stub/tvm_extra.patch new file mode 100644 index 00000000..b4c94f62 --- /dev/null +++ b/engine/device-stub/tvm_extra.patch @@ -0,0 +1,123 @@ +diff --git a/src/runtime/dso_library.cc b/src/runtime/dso_library.cc +index 81eb30ee1..785fb48ac 100644 +--- a/src/runtime/dso_library.cc ++++ b/src/runtime/dso_library.cc +@@ -115,15 +115,16 @@ void DSOLibrary::Unload() { + #else + + void DSOLibrary::Load(const std::string& name) { ++ abort(); /* + lib_handle_ = dlopen(name.c_str(), RTLD_LAZY | RTLD_LOCAL); + ICHECK(lib_handle_ != nullptr) << "Failed to load dynamic shared library " << name << " " +- << dlerror(); ++ << dlerror(); */ + } + +-void* DSOLibrary::GetSymbol_(const char* name) { return dlsym(lib_handle_, name); } ++void* DSOLibrary::GetSymbol_(const char* name) { abort(); /* return dlsym(lib_handle_, name); */ } + + void DSOLibrary::Unload() { +- dlclose(lib_handle_); ++ abort(); // dlclose(lib_handle_); + lib_handle_ = nullptr; + } + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 7293abb60..d741ce2a2 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -1,6 +1,10 @@ + cmake_minimum_required(VERSION 3.2) + project(tvm C CXX) + ++set(CMAKE_C_FLAGS ${CMAKE_C_FLAGS} "-static-libgcc -static-libstdc++") ++set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "-static-libgcc -static-libstdc++") ++set(CMAKE_EXE_LINKER_FLAGS "-static-libgcc -static-libstdc++") ++ + # Utility functions + include(cmake/utils/Utils.cmake) + include(cmake/utils/FindCUDA.cmake) +@@ -50,7 +54,7 @@ tvm_option(USE_FALLBACK_STL_MAP "Use TVM's POD compatible Map" OFF) + tvm_option(USE_ETHOSN "Build with Arm Ethos-N" OFF) + tvm_option(USE_CMSISNN "Build with Arm CMSIS-NN" OFF) + tvm_option(INDEX_DEFAULT_I64 "Defaults the index datatype to int64" ON) +-tvm_option(USE_LIBBACKTRACE "Build libbacktrace to supply linenumbers on stack traces" AUTO) ++# tvm_option(USE_LIBBACKTRACE "Build libbacktrace to supply linenumbers on stack traces" AUTO) + tvm_option(BUILD_STATIC_RUNTIME "Build static version of libtvm_runtime" OFF) + tvm_option(USE_PAPI "Use Performance Application Programming Interface (PAPI) to read performance counters" OFF) + tvm_option(USE_GTEST "Use GoogleTest for C++ sanity tests" AUTO) +@@ -497,7 +501,7 @@ target_compile_definitions(tvm PUBLIC DMLC_USE_LOGGING_LIBRARY=) + + # logging option for libbacktrace +-include(cmake/modules/Logging.cmake) ++# include(cmake/modules/Logging.cmake) + + include(cmake/modules/contrib/PAPI.cmake) + +diff --git a/src/runtime/threading_backend.cc b/src/runtime/threading_backend.cc +index 5b3093ac8..ce0d33fa1 100644 +--- a/src/runtime/threading_backend.cc ++++ b/src/runtime/threading_backend.cc +@@ -127,7 +127,7 @@ class ThreadGroup::Impl { + #if defined(__ANDROID__) + sched_setaffinity(threads_[i].native_handle(), sizeof(cpu_set_t), &cpuset); + #else +- pthread_setaffinity_np(threads_[i].native_handle(), sizeof(cpu_set_t), &cpuset); ++ abort(); // pthread_setaffinity_np(threads_[i].native_handle(), sizeof(cpu_set_t), &cpuset); + #endif + } + if (exclude_worker0) { // main thread run task +@@ -167,7 +167,7 @@ class ThreadGroup::Impl { + #if defined(__ANDROID__) + sched_setaffinity(pthread_self(), sizeof(cpu_set_t), &cpuset); + #else +- pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); ++ abort(); // pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); + #endif + #endif + } +@@ -227,7 +227,7 @@ class ThreadGroup::Impl { + + ThreadGroup::ThreadGroup(int num_workers, std::function worker_callback, + bool exclude_worker0) +- : impl_(new ThreadGroup::Impl(num_workers, worker_callback, exclude_worker0)) {} ++ : impl_((abort(), nullptr) /* new ThreadGroup::Impl(num_workers, worker_callback, exclude_worker0) */) {} + ThreadGroup::~ThreadGroup() { delete impl_; } + void ThreadGroup::Join() { impl_->Join(); } + +diff --git a/src/support/parallel_for.cc b/src/support/parallel_for.cc +index e90967562..e55ed2b25 100644 +--- a/src/support/parallel_for.cc ++++ b/src/support/parallel_for.cc +@@ -49,6 +49,8 @@ std::vector> rr_partitioner(int begin, int end, int step, int n + + void parallel_for(int begin, int end, const std::function& f, int step, + const PartitionerFuncType partitioner) { ++ abort(); ++#if 0 + static bool GLOBAL_PARALLEL_FOR_FLAG{false}; + static std::mutex M_GLOBAL_PARALLEL_FOR_FLAG; + { +@@ -91,10 +93,13 @@ void parallel_for(int begin, int end, const std::function& f, int ste + } catch (const std::exception& e) { + LOG(FATAL) << "Parallel_for error with " << e.what(); + } ++#endif + } + + void parallel_for_dynamic(int begin, int end, int num_threads, + const std::function& f) { ++ abort(); ++#if 0 + // Step 1. Sanity checks + if (begin == end) { + return; +@@ -138,6 +143,7 @@ void parallel_for_dynamic(int begin, int end, int num_threads, + } catch (const std::exception& e) { + LOG(FATAL) << "RuntimeError: parallel_for_dynamic error with " << e.what(); + } ++#endif + } + + } // namespace support diff --git a/engine/dist-info/METADATA b/engine/dist-info/METADATA index fabfee2c..7d665c45 100644 --- a/engine/dist-info/METADATA +++ b/engine/dist-info/METADATA @@ -8,13 +8,8 @@ Keywords: antares dnn Platform: UNKNOWN Requires-Dist: wheel (>=0.26) ; python_version >= "3" Requires-Dist: tornado ; python_version >= "3" -Requires-Dist: psutil ; python_version >= "3" Requires-Dist: numpy ; python_version >= "3" Requires-Dist: decorator ; python_version >= "3" -Requires-Dist: attrs ; python_version >= "3" -Requires-Dist: pytest ; python_version >= "3" -Requires-Dist: typed_ast ; python_version >= "3" -Requires-Dist: cloudpickle ; python_version >= "3" Antares is an engine to automatically generate optimized kernels for multi-platform diff --git a/graph_evaluator/client.py b/graph_evaluator/client.py index 49dce9eb..ff324004 100644 --- a/graph_evaluator/client.py +++ b/graph_evaluator/client.py @@ -67,7 +67,10 @@ def eval(kernel_path, **kwargs): return eval_client.eval(kernel_path, **kwargs) is_wsl = 1 if (os.environ.get('IS_WSL', '0') == '1') else 0 - if is_wsl == os.system(f'file {evaluator_path} | grep "MS Windows" >/dev/null 2>&1'): + with open(evaluator_path, 'rb') as fp: + exec_magic = fp.read(2) + + if is_wsl == 0 and exec_magic == b'MZ': print(f"Antares should run under WSL-1/2 for this backend({backend}), otherwise, evaluation would be skipped.") exit(1) diff --git a/lang/generic.py b/lang/generic.py index cb2e3131..7c34d47b 100644 --- a/lang/generic.py +++ b/lang/generic.py @@ -150,7 +150,8 @@ def select_plan(plan_name): try: return select_plan(plan) except ModuleNotFoundError: - setattr(AntaresGlobal, 'mode', 'antares') + traceback.print_exc() + # setattr(AntaresGlobal, 'mode', 'antares') return None