forked from PaddlePaddle/PaddleNLP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
CMakeLists.txt
215 lines (183 loc) · 7.76 KB
/
CMakeLists.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
cmake_minimum_required(VERSION 3.10)
project(tokenizers LANGUAGES CXX C VERSION 1.0)
option(WITH_TESTING "Compile PaddleNLP fast_tokenizer with unit testing" OFF)
option(WITH_PYTHON "Compile PaddleNLP fast_tokenizer with python interpreter" ON)
add_definitions(-DFASTTOKENIZER_LIB)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set (PUBLIC_DEPEND_LIBS "")
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING
"Choose the type of build, options are: Debug Release
RelWithDebInfo MinSizeRel."
FORCE)
endif(NOT CMAKE_BUILD_TYPE)
if(NOT WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++11")
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
endif()
else()
set(CMAKE_CXX_STANDARD 11)
endif()
IF(WIN32)
# Need to add flags for windows
foreach(
flag_var
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL
CMAKE_C_FLAGS_RELWITHDEBINFO)
if(${flag_var} MATCHES "/MD")
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
elseif(NOT (${flag_var} MATCHES "/MT"))
set(${flag_var} "${${flag_var}} /MT")
endif()
set(${flag_var} "${${flag_var}}" CACHE STRING "msvc compiler flags" FORCE)
endforeach()
add_definitions("-DNOMINMAX")
# windows build turn off warnings, use parallel compiling.
foreach(
flag_var
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL
CMAKE_C_FLAGS_RELWITHDEBINFO)
string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}")
endforeach()
foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS)
set(${flag_var} "${${flag_var}} /w")
endforeach()
# Windows Remove /Zi, /ZI for Release, MinSizeRel builds
foreach(flag_var
CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL)
if(${flag_var} MATCHES "/Z[iI]")
string(REGEX REPLACE "/Z[iI]" "" ${flag_var} "${${flag_var}}")
endif()
endforeach()
set(CMAKE_SUPPRESS_REGENERATION ON)
set(CMAKE_STATIC_LIBRARY_PREFIX lib)
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj")
if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Zc:inline")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zc:inline")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Zc:inline")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zc:inline")
endif()
foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS)
set(${flag_var}
"${${flag_var}} /ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221")
set(${flag_var} "${${flag_var}} /NODEFAULTLIB:MSVCRT.LIB")
endforeach()
ELSE(WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fPIC")
IF (NOT APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ldl")
IF (NOT ANDROID)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lpthread")
ELSE()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Os")
ENDIF()
ENDIF()
set (PUBLIC_DEPEND_LIBS ${CMAKE_DL_LIBS})
ENDIF(WIN32)
set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR})
set(TOKENIZERS_INSTALL_INCLUDE_DIR ${PROJECT_SOURCE_DIR})
message("CMAKE_BUILD_TYPE = " ${CMAKE_BUILD_TYPE})
message("CMAKE_CXX_FLAGS = ${CMAKE_CXX_FLAGS}")
message("CMAKE_EXE_LINKER_FLAGS = ${CMAKE_EXE_LINKER_FLAGS}")
# config GIT_URL with github mirrors to speed up dependent repos clone
option(GIT_URL "Git URL to clone dependent repos" ${GIT_URL})
if(NOT GIT_URL)
set(GIT_URL "https://github.com")
endif()
include_directories(${TOKENIZERS_INSTALL_INCLUDE_DIR})
include(generic)
include(third_party)
add_subdirectory(fast_tokenizer)
if(WITH_PYTHON)
add_subdirectory(python)
if (NOT APPLE AND NOT WIN32) # Linux
add_custom_target(build_tokenizers_bdist_wheel ALL
COMMAND ${PYTHON_EXECUTABLE} setup.py bdist_wheel --plat-name=manylinux1_x86_64
COMMENT "Packing whl packages------>>>"
DEPENDS copy_python_tokenizers)
else()
add_custom_target(build_tokenizers_bdist_wheel ALL
COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMENT "Packing whl packages------>>>"
DEPENDS copy_python_tokenizers)
endif()
else(WITH_PYTHON) # Pack fast_tokenizer cpp lib
set(CPP_PACKAGE_DIR ${CMAKE_BINARY_DIR}/cpp/fast_tokenizer)
add_custom_target(build_cpp_package_dir ALL
COMMAND ${CMAKE_COMMAND} -E make_directory ${CPP_PACKAGE_DIR}/lib ${CPP_PACKAGE_DIR}/include ${CPP_PACKAGE_DIR}/third_party/include ${CPP_PACKAGE_DIR}/third_party/lib
DEPENDS core_tokenizers)
# copy cmake
file(COPY ${PROJECT_SOURCE_DIR}/FastTokenizer.cmake DESTINATION ${CPP_PACKAGE_DIR}/)
# copy headers
file(COPY ${PROJECT_SOURCE_DIR}/fast_tokenizer/ DESTINATION ${CPP_PACKAGE_DIR}/include/fast_tokenizer/
FILES_MATCHING PATTERN "*.h"
PATTERN "test" EXCLUDE
PATTERN "demo" EXCLUDE
PATTERN "pybind" EXCLUDE)
add_custom_target(copy_third_party_headers ALL
COMMAND ${CMAKE_COMMAND} -E copy_directory
${GFLAGS_INCLUDE_DIR} ${ICU_INCLUDE_DIR}
${GLOG_INCLUDE_DIR} ${JSON_INCLUDE_DIR} ${RE2_INCLUDE_DIR}
${CPP_PACKAGE_DIR}/third_party/include
DEPENDS build_cpp_package_dir)
# copy library
set(TOKENIZER_CORE_NAME "core_tokenizers")
set(TOKENIZER_CORE_PATH ${CMAKE_BINARY_DIR}/fast_tokenizer)
if (WIN32)
set(ICU_DLL_DIR ${CMAKE_BINARY_DIR}/third_party/icu/src/extern_icu/icu4c/bin64)
set(ICU_LIB_DIR ${CMAKE_BINARY_DIR}/third_party/icu/src/extern_icu/icu4c/lib64)
add_custom_target(copy_shared_library ALL
COMMAND ${CMAKE_COMMAND} -E copy ${TOKENIZER_CORE_PATH}/${TOKENIZER_CORE_NAME}.dll ${TOKENIZER_CORE_PATH}/${TOKENIZER_CORE_NAME}.lib ${CPP_PACKAGE_DIR}/lib
COMMAND ${CMAKE_COMMAND} -E copy ${ICU_DLL_DIR}/icudt70.dll ${ICU_DLL_DIR}/icuuc70.dll ${ICU_LIB_DIR}/icudt.lib ${ICU_LIB_DIR}/icuuc.lib ${CPP_PACKAGE_DIR}/third_party/lib
DEPENDS build_cpp_package_dir core_tokenizers)
elseif(APPLE)
set(TOKENIZER_CORE_LIBS_PATH "${TOKENIZER_CORE_PATH}/lib${TOKENIZER_CORE_NAME}.dylib")
add_custom_target(copy_shared_library ALL
COMMAND ${CMAKE_COMMAND} -E copy ${TOKENIZER_CORE_LIBS_PATH} ${CPP_PACKAGE_DIR}/lib
DEPENDS build_cpp_package_dir core_tokenizers)
else()
set(TOKENIZER_CORE_LIBS_PATH "${TOKENIZER_CORE_PATH}/lib${TOKENIZER_CORE_NAME}.so")
add_custom_target(copy_shared_library ALL
COMMAND ${CMAKE_COMMAND} -E copy ${TOKENIZER_CORE_LIBS_PATH} ${CPP_PACKAGE_DIR}/lib
DEPENDS build_cpp_package_dir core_tokenizers)
endif()
add_custom_target(create_commit_id_file ALL
COMMAND ${GIT_EXECUTABLE} log -1 --format=%H > ${CPP_PACKAGE_DIR}/commit.log
DEPENDS copy_shared_library)
endif(WITH_PYTHON)