Skip to content

Commit e2d9b1a

Browse files
iindyktfx-copybara
authored andcommitted
Updates struct2tensor's bundled arrow version to 6.0.1.
Additionally: - uses `parquet_types` pre-generated by arrow instead of bundling it. - uses `arrow_format` headers pre-generated by arrow instead of generating it with flatbuffers. PiperOrigin-RevId: 473794356
1 parent d244528 commit e2d9b1a

File tree

8 files changed

+107
-10062
lines changed

8 files changed

+107
-10062
lines changed

.bazelrc

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,4 @@ build:macos --host_cxxopt=-std=c++17
4646
# Suppress all warning messages.
4747
build:short_logs --output_filter=DONT_MATCH_ANYTHING
4848

49-
build:macos --macos_minimum_os=10.9
49+
build:macos --macos_minimum_os=10.12

RELEASE.md

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
## Bug Fixes and Other Changes
88

9+
* Updates bundled `arrow` version to 6.0.1.
910
* Depends on `tensorflow>=2.10.0,<2.11`.
1011

1112
## Deprecations

struct2tensor/workspace.bzl

+7-6
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,18 @@ def struct2tensor_workspace():
4040
)
4141
# LINT.ThenChange(third_party/thrift.BUILD:thrift_gen_version)
4242

43+
# Use the last commit on the relevant release branch to update.
4344
# LINT.IfChange(arrow_archive_version)
45+
ARROW_COMMIT = "347a88ff9d20e2a4061eec0b455b8ea1aa8335dc" # 6.0.1
46+
# LINT.ThenChange(third_party/arrow.BUILD:arrow_gen_version)
47+
4448
http_archive(
4549
name = "arrow",
4650
build_file = "//third_party:arrow.BUILD",
47-
sha256 = "d7b3838758a365c8c47d55ab0df1006a70db951c6964440ba354f81f518b8d8d",
48-
strip_prefix = "arrow-apache-arrow-0.16.0",
49-
urls = [
50-
"https://github.com/apache/arrow/archive/apache-arrow-0.16.0.tar.gz",
51-
],
51+
sha256 = "55fc466d0043c4cce0756bc18e1e62b3233be74c9afe8dc0d18420b9a5fd9714",
52+
strip_prefix = "arrow-%s" % ARROW_COMMIT,
53+
urls = ["https://github.com/apache/arrow/archive/%s.zip" % ARROW_COMMIT],
5254
)
53-
# LINT.ThenChange(third_party/arrow.BUILD:parquet_gen_version)
5455

5556
_TFMD_COMMIT_HASH = "6703ba1095f8a2fe3567f1a2209c819b530c64de" # 1.10.0
5657
http_archive(

third_party/BUILD

+2-16
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2020 Google LLC
1+
# Copyright 2022 Google LLC
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -12,18 +12,4 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
# Description:
16-
# librarys for exposing vendored files in third_party.
17-
18-
licenses(["notice"]) # Apache 2.0
19-
20-
exports_files([
21-
"parquet/parquet_types.cpp",
22-
])
23-
24-
cc_library(
25-
name = "parquet_types_h",
26-
hdrs = ["parquet/parquet_types.h"],
27-
includes = ["."],
28-
visibility = ["//visibility:public"],
29-
)
15+
licenses(["notice"])

third_party/arrow.BUILD

+96-48
Original file line numberDiff line numberDiff line change
@@ -15,63 +15,43 @@
1515
# Description:
1616
# Apache arrow library
1717

18-
load("@flatbuffers//:build_defs.bzl", "flatbuffer_cc_library")
19-
20-
flatbuffer_cc_library(
21-
name = "arrow_format",
22-
srcs = [
23-
"cpp/src/arrow/ipc/feather.fbs",
24-
"format/File.fbs",
25-
"format/Message.fbs",
26-
"format/Schema.fbs",
27-
"format/SparseTensor.fbs",
28-
"format/Tensor.fbs",
29-
],
30-
flatc_args = [
31-
"--scoped-enums",
32-
"--gen-object-api",
33-
],
34-
out_prefix = "cpp/src/generated/",
35-
)
36-
3718
package(default_visibility = ["//visibility:public"])
3819

20+
# LINT.IfChange(arrow_gen_version)
21+
arrow_version_replace_cmd = ("sed " +
22+
"-e 's/@ARROW_VERSION_MAJOR@/6/g' " +
23+
"-e 's/@ARROW_VERSION_MINOR@/0/g' " +
24+
"-e 's/@ARROW_VERSION_PATCH@/1/g' ")
25+
# LINT.ThenChange(../workspace.bzl:arrow_archive_version)
26+
3927
genrule(
4028
name = "arrow_util_config",
4129
srcs = ["cpp/src/arrow/util/config.h.cmake"],
4230
outs = ["cpp/src/arrow/util/config.h"],
43-
cmd = ("sed " +
44-
"-e 's/@ARROW_VERSION_MAJOR@/0/g' " +
45-
"-e 's/@ARROW_VERSION_MINOR@/16/g' " +
46-
"-e 's/@ARROW_VERSION_PATCH@/0/g' " +
31+
cmd = (arrow_version_replace_cmd +
4732
"-e 's/cmakedefine/define/g' " +
4833
"$< >$@"),
4934
)
5035

51-
# LINT.IfChange(parquet_gen_version)
5236
genrule(
5337
name = "parquet_version_h",
5438
srcs = ["cpp/src/parquet/parquet_version.h.in"],
5539
outs = ["cpp/src/parquet/parquet_version.h"],
56-
cmd = ("sed " +
57-
"-e 's/@PARQUET_VERSION_MAJOR@/1/g' " +
58-
"-e 's/@PARQUET_VERSION_MINOR@/5/g' " +
59-
"-e 's/@PARQUET_VERSION_PATCH@/1/g' " +
40+
# Parquet packaged with arrow uses arrow versioning.
41+
cmd = (arrow_version_replace_cmd +
6042
"$< >$@"),
6143
)
62-
# LINT.ThenChange(../workspace.bzl:arrow_archive_version)
6344

64-
cc_library(
65-
name = "xxhash",
66-
srcs = [],
67-
hdrs = [
68-
"cpp/src/arrow/vendored/xxhash/xxh3.h",
69-
"cpp/src/arrow/vendored/xxhash/xxhash.c",
70-
"cpp/src/arrow/vendored/xxhash/xxhash.h",
71-
],
72-
copts = ["-Wno-implicit-fallthrough"],
73-
visibility = ["//visibility:private"],
74-
)
45+
# These are originally generated by flatbuffers. We use them instead of
46+
# re-generating from fbs files under "format/" and "cpp/src/arrow/ipc/".
47+
fbs_headers = [
48+
"cpp/src/generated/feather_generated.h",
49+
"cpp/src/generated/File_generated.h",
50+
"cpp/src/generated/Message_generated.h",
51+
"cpp/src/generated/Schema_generated.h",
52+
"cpp/src/generated/SparseTensor_generated.h",
53+
"cpp/src/generated/Tensor_generated.h",
54+
]
7555

7656
cc_library(
7757
name = "arrow",
@@ -97,8 +77,16 @@ cc_library(
9777
"cpp/src/arrow/**/*.h",
9878
"cpp/src/parquet/**/*.h",
9979
"cpp/src/parquet/**/*.cc",
80+
"cpp/src/arrow/tensor/*.cc",
81+
"cpp/src/generated/parquet_types.cpp",
10082
],
10183
exclude = [
84+
# Excluding files which we don't depend on, but needs
85+
# additional dependencies like boost, snappy etc.
86+
"cpp/src/arrow/util/bpacking_avx2.*",
87+
"cpp/src/arrow/util/bpacking_avx512.*",
88+
"cpp/src/arrow/util/bpacking_neon*",
89+
"cpp/src/arrow/util/bpacking_simd*",
10290
"cpp/src/arrow/util/compression_brotli*",
10391
"cpp/src/arrow/util/compression_bz2*",
10492
"cpp/src/arrow/util/compression_lz4*",
@@ -115,27 +103,87 @@ cc_library(
115103
"cpp/src/**/stream_to_file.cc",
116104
"cpp/src/arrow/ipc/json*.cc",
117105
"cpp/src/arrow/vendored/xxhash/**",
118-
"cpp/src/parquet/encryption_internal.cc",
106+
#"cpp/src/arrow/vendored/datetime/**",
107+
"cpp/src/parquet/encryption/encryption_internal.cc",
119108
],
120-
) + [
121-
"@struct2tensor//third_party:parquet/parquet_types.cpp",
122-
],
109+
),
123110
hdrs = [
124-
# declare header from above genrule
111+
# Headers from above genrules.
125112
"cpp/src/arrow/util/config.h",
126113
"cpp/src/parquet/parquet_version.h",
127-
],
114+
# This is originally generated by thrift compiler. We include the
115+
# files generated by arrow to avoid requiring the compiler
116+
# (thrift library is still needed though).
117+
"cpp/src/generated/parquet_types.h",
118+
] + fbs_headers,
128119
includes = [
129120
"cpp/src",
130121
"cpp/src/arrow/vendored/xxhash",
122+
"cpp/thirdparty/flatbuffers/include",
131123
],
132124
textual_hdrs = [
133125
"cpp/src/arrow/vendored/xxhash/xxhash.c",
134126
],
135127
deps = [
136-
":arrow_format",
128+
":datetime",
129+
":flatbuffers",
137130
":xxhash",
138-
"@struct2tensor//third_party:parquet_types_h",
139131
"@thrift",
140132
],
141133
)
134+
135+
cc_library(
136+
name = "xxhash",
137+
srcs = [],
138+
hdrs = [
139+
"cpp/src/arrow/vendored/xxhash/xxhash.c",
140+
"cpp/src/arrow/vendored/xxhash/xxhash.h",
141+
],
142+
copts = ["-Wno-implicit-fallthrough"],
143+
includes = ["."],
144+
visibility = ["//visibility:private"],
145+
)
146+
147+
config_setting(
148+
name = "windows",
149+
constraint_values = [
150+
"@bazel_tools//platforms:windows",
151+
],
152+
)
153+
154+
cc_library(
155+
name = "datetime",
156+
srcs = [
157+
"cpp/src/arrow/vendored/datetime/tz.cpp",
158+
],
159+
hdrs = [
160+
"cpp/src/arrow/vendored/datetime/date.h",
161+
"cpp/src/arrow/vendored/datetime/ios.h",
162+
"cpp/src/arrow/vendored/datetime/tz.h",
163+
"cpp/src/arrow/vendored/datetime/tz_private.h",
164+
"cpp/src/arrow/vendored/datetime/visibility.h",
165+
],
166+
includes = ["."],
167+
linkopts = select({
168+
":windows": [
169+
"ole32.lib", # for CoTaskMemFree
170+
"shell32.lib", # for SHGetKnownFolderPath
171+
],
172+
"//conditions:default": [
173+
],
174+
}),
175+
visibility = ["//visibility:private"],
176+
)
177+
178+
cc_library(
179+
name = "flatbuffers",
180+
srcs = [],
181+
hdrs = [
182+
"cpp/thirdparty/flatbuffers/include/flatbuffers/base.h",
183+
"cpp/thirdparty/flatbuffers/include/flatbuffers/flatbuffers.h",
184+
"cpp/thirdparty/flatbuffers/include/flatbuffers/stl_emulation.h",
185+
],
186+
copts = ["-Wno-implicit-fallthrough"],
187+
includes = ["."],
188+
visibility = ["//visibility:private"],
189+
)

third_party/parquet/README.md

-8
This file was deleted.

0 commit comments

Comments
 (0)