Skip to content

Commit b792ff7

Browse files
authored
Make JSON support optional via a feature flag (#2300) (#2601)
* Add json feature (#2300) * Fix parquet * Add json feature to wasm build * Update README
1 parent 20282d1 commit b792ff7

File tree

12 files changed

+83
-51
lines changed

12 files changed

+83
-51
lines changed

.github/workflows/arrow.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ jobs:
155155
- name: Build
156156
run: |
157157
cd arrow
158-
cargo build --no-default-features --features=csv,ipc,simd,ffi --target wasm32-unknown-unknown
159-
cargo build --no-default-features --features=csv,ipc,simd,ffi --target wasm32-wasi
158+
cargo build --no-default-features --features=json,csv,ipc,simd,ffi --target wasm32-unknown-unknown
159+
cargo build --no-default-features --features=json,csv,ipc,simd,ffi --target wasm32-wasi
160160
161161
clippy:
162162
name: Clippy

arrow/Cargo.toml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,8 @@ ahash = { version = "0.8", default-features = false, features = ["compile-time-r
4444
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
4545

4646
[dependencies]
47-
serde = { version = "1.0", default-features = false }
48-
serde_derive = { version = "1.0", default-features = false }
49-
serde_json = { version = "1.0", default-features = false, features = ["std"] }
47+
serde = { version = "1.0", default-features = false, features = ["derive"], optional = true }
48+
serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true }
5049
indexmap = { version = "1.9", default-features = false, features = ["std"] }
5150
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
5251
num = { version = "0.4", default-features = false, features = ["std"] }
@@ -69,10 +68,11 @@ bitflags = { version = "1.2.1", default-features = false }
6968
zstd = { version = "0.11.1", default-features = false, optional = true }
7069

7170
[features]
72-
default = ["csv", "ipc"]
71+
default = ["csv", "ipc", "json"]
7372
ipc_compression = ["ipc", "zstd", "lz4"]
7473
csv = ["csv_crate"]
7574
ipc = ["flatbuffers"]
75+
json = ["serde", "serde_json"]
7676
simd = ["packed_simd"]
7777
prettyprint = ["comfy-table"]
7878
# The test utils feature enables code used in benchmarks and tests but
@@ -192,6 +192,7 @@ required-features = ["csv"]
192192
[[bench]]
193193
name = "json_reader"
194194
harness = false
195+
required-features = ["json"]
195196

196197
[[bench]]
197198
name = "equal"

arrow/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ However, for historical reasons, this crate uses versions with major numbers gre
4242
The `arrow` crate provides the following features which may be enabled in your `Cargo.toml`:
4343

4444
- `csv` (default) - support for reading and writing Arrow arrays to/from csv files
45+
- `json` (default) - support for reading and writing Arrow array to/from json files
4546
- `ipc` (default) - support for reading [Arrow IPC Format](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc), also used as the wire protocol in [arrow-flight](https://crates.io/crates/arrow-flight)
4647
- `ipc_compression` - Enables reading and writing compressed IPC streams (also enables `ipc`)
4748
- `prettyprint` - support for formatting record batches as textual columns

arrow/src/array/mod.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,11 +190,12 @@ use crate::datatypes::*;
190190
pub use self::array::Array;
191191
pub use self::array::ArrayAccessor;
192192
pub use self::array::ArrayRef;
193-
pub(crate) use self::data::layout;
194193
pub use self::data::ArrayData;
195194
pub use self::data::ArrayDataBuilder;
196195
pub use self::data::ArrayDataRef;
197-
pub(crate) use self::data::BufferSpec;
196+
197+
#[cfg(feature = "ipc")]
198+
pub(crate) use self::data::{layout, BufferSpec};
198199

199200
pub use self::array_binary::BinaryArray;
200201
pub use self::array_binary::LargeBinaryArray;

arrow/src/datatypes/datatype.rs

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@ use num::BigInt;
1919
use std::cmp::Ordering;
2020
use std::fmt;
2121

22-
use serde_derive::{Deserialize, Serialize};
23-
use serde_json::{json, Value, Value::String as VString};
24-
2522
use crate::error::{ArrowError, Result};
2623
use crate::util::decimal::singed_cmp_le_bytes;
2724

@@ -42,7 +39,8 @@ use super::Field;
4239
/// Nested types can themselves be nested within other arrays.
4340
/// For more information on these types please see
4441
/// [the physical memory layout of Apache Arrow](https://arrow.apache.org/docs/format/Columnar.html#physical-memory-layout).
45-
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
42+
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
43+
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
4644
pub enum DataType {
4745
/// Null type
4846
Null,
@@ -222,7 +220,8 @@ pub enum DataType {
222220
}
223221

224222
/// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds.
225-
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
223+
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
224+
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
226225
pub enum TimeUnit {
227226
/// Time in seconds.
228227
Second,
@@ -235,7 +234,8 @@ pub enum TimeUnit {
235234
}
236235

237236
/// YEAR_MONTH, DAY_TIME, MONTH_DAY_NANO interval in SQL style.
238-
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
237+
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
238+
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
239239
pub enum IntervalUnit {
240240
/// Indicates the number of elapsed whole months, stored as 4-byte integers.
241241
YearMonth,
@@ -253,7 +253,8 @@ pub enum IntervalUnit {
253253
}
254254

255255
// Sparse or Dense union layouts
256-
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
256+
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
257+
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
257258
pub enum UnionMode {
258259
Sparse,
259260
Dense,
@@ -1052,7 +1053,9 @@ pub(crate) fn validate_decimal256_precision_with_lt_bytes(
10521053

10531054
impl DataType {
10541055
/// Parse a data type from a JSON representation.
1055-
pub(crate) fn from(json: &Value) -> Result<DataType> {
1056+
#[cfg(feature = "json")]
1057+
pub(crate) fn from(json: &serde_json::Value) -> Result<DataType> {
1058+
use serde_json::Value;
10561059
let default_field = Field::new("", DataType::Boolean, true);
10571060
match *json {
10581061
Value::Object(ref map) => match map.get("name") {
@@ -1121,7 +1124,7 @@ impl DataType {
11211124
};
11221125
let tz = match map.get("timezone") {
11231126
None => Ok(None),
1124-
Some(VString(tz)) => Ok(Some(tz.clone())),
1127+
Some(serde_json::Value::String(tz)) => Ok(Some(tz.clone())),
11251128
_ => Err(ArrowError::ParseError(
11261129
"timezone must be a string".to_string(),
11271130
)),
@@ -1300,7 +1303,9 @@ impl DataType {
13001303
}
13011304

13021305
/// Generate a JSON representation of the data type.
1303-
pub fn to_json(&self) -> Value {
1306+
#[cfg(feature = "json")]
1307+
pub fn to_json(&self) -> serde_json::Value {
1308+
use serde_json::json;
13041309
match self {
13051310
DataType::Null => json!({"name": "null"}),
13061311
DataType::Boolean => json!({"name": "bool"}),

arrow/src/datatypes/field.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,30 +15,27 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use crate::error::{ArrowError, Result};
1819
use std::cmp::Ordering;
1920
use std::collections::BTreeMap;
2021
use std::hash::{Hash, Hasher};
2122

22-
use serde_derive::{Deserialize, Serialize};
23-
use serde_json::{json, Value};
24-
25-
use crate::error::{ArrowError, Result};
26-
2723
use super::DataType;
2824

2925
/// Describes a single column in a [`Schema`](super::Schema).
3026
///
3127
/// A [`Schema`](super::Schema) is an ordered collection of
3228
/// [`Field`] objects.
33-
#[derive(Serialize, Deserialize, Debug, Clone)]
29+
#[derive(Debug, Clone)]
30+
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3431
pub struct Field {
3532
name: String,
3633
data_type: DataType,
3734
nullable: bool,
3835
dict_id: i64,
3936
dict_is_ordered: bool,
4037
/// A map of key-value pairs containing additional custom meta data.
41-
#[serde(skip_serializing_if = "Option::is_none")]
38+
#[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
4239
metadata: Option<BTreeMap<String, String>>,
4340
}
4441

@@ -254,7 +251,9 @@ impl Field {
254251
}
255252

256253
/// Parse a `Field` definition from a JSON representation.
257-
pub fn from(json: &Value) -> Result<Self> {
254+
#[cfg(feature = "json")]
255+
pub fn from(json: &serde_json::Value) -> Result<Self> {
256+
use serde_json::Value;
258257
match *json {
259258
Value::Object(ref map) => {
260259
let name = match map.get("name") {
@@ -497,8 +496,9 @@ impl Field {
497496
}
498497

499498
/// Generate a JSON representation of the `Field`.
500-
pub fn to_json(&self) -> Value {
501-
let children: Vec<Value> = match self.data_type() {
499+
#[cfg(feature = "json")]
500+
pub fn to_json(&self) -> serde_json::Value {
501+
let children: Vec<serde_json::Value> = match self.data_type() {
502502
DataType::Struct(fields) => fields.iter().map(|f| f.to_json()).collect(),
503503
DataType::List(field)
504504
| DataType::LargeList(field)
@@ -507,7 +507,7 @@ impl Field {
507507
_ => vec![],
508508
};
509509
match self.data_type() {
510-
DataType::Dictionary(ref index_type, ref value_type) => json!({
510+
DataType::Dictionary(ref index_type, ref value_type) => serde_json::json!({
511511
"name": self.name,
512512
"nullable": self.nullable,
513513
"type": value_type.to_json(),
@@ -518,7 +518,7 @@ impl Field {
518518
"isOrdered": self.dict_is_ordered
519519
}
520520
}),
521-
_ => json!({
521+
_ => serde_json::json!({
522522
"name": self.name,
523523
"nullable": self.nullable,
524524
"type": self.data_type.to_json(),

arrow/src/datatypes/mod.rs

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,15 @@ pub type SchemaRef = Arc<Schema>;
5050
mod tests {
5151
use super::*;
5252
use crate::error::Result;
53+
use std::collections::{BTreeMap, HashMap};
54+
55+
#[cfg(feature = "json")]
5356
use crate::json::JsonSerializable;
54-
use serde_json::Value::{Bool, Number as VNumber, String as VString};
55-
use serde_json::{Number, Value};
56-
use std::{
57-
collections::{BTreeMap, HashMap},
58-
f32::NAN,
57+
58+
#[cfg(feature = "json")]
59+
use serde_json::{
60+
Number, Value,
61+
Value::{Bool, Number as VNumber, String as VString},
5962
};
6063

6164
#[test]
@@ -107,6 +110,7 @@ mod tests {
107110
}
108111

109112
#[test]
113+
#[cfg(feature = "json")]
110114
fn create_struct_type() {
111115
let _person = DataType::Struct(vec![
112116
Field::new("first_name", DataType::Utf8, false),
@@ -123,6 +127,7 @@ mod tests {
123127
}
124128

125129
#[test]
130+
#[cfg(feature = "json")]
126131
fn serde_struct_type() {
127132
let kv_array = [("k".to_string(), "v".to_string())];
128133
let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();
@@ -170,6 +175,7 @@ mod tests {
170175
}
171176

172177
#[test]
178+
#[cfg(feature = "json")]
173179
fn struct_field_to_json() {
174180
let f = Field::new(
175181
"address",
@@ -213,6 +219,7 @@ mod tests {
213219
}
214220

215221
#[test]
222+
#[cfg(feature = "json")]
216223
fn map_field_to_json() {
217224
let f = Field::new(
218225
"my_map",
@@ -273,6 +280,7 @@ mod tests {
273280
}
274281

275282
#[test]
283+
#[cfg(feature = "json")]
276284
fn primitive_field_to_json() {
277285
let f = Field::new("first_name", DataType::Utf8, false);
278286
let value: Value = serde_json::from_str(
@@ -289,6 +297,7 @@ mod tests {
289297
assert_eq!(value, f.to_json());
290298
}
291299
#[test]
300+
#[cfg(feature = "json")]
292301
fn parse_struct_from_json() {
293302
let json = r#"
294303
{
@@ -335,6 +344,7 @@ mod tests {
335344
}
336345

337346
#[test]
347+
#[cfg(feature = "json")]
338348
fn parse_map_from_json() {
339349
let json = r#"
340350
{
@@ -398,6 +408,7 @@ mod tests {
398408
}
399409

400410
#[test]
411+
#[cfg(feature = "json")]
401412
fn parse_union_from_json() {
402413
let json = r#"
403414
{
@@ -453,6 +464,7 @@ mod tests {
453464
}
454465

455466
#[test]
467+
#[cfg(feature = "json")]
456468
fn parse_utf8_from_json() {
457469
let json = "{\"name\":\"utf8\"}";
458470
let value: Value = serde_json::from_str(json).unwrap();
@@ -461,6 +473,7 @@ mod tests {
461473
}
462474

463475
#[test]
476+
#[cfg(feature = "json")]
464477
fn parse_int32_from_json() {
465478
let json = "{\"name\": \"int\", \"isSigned\": true, \"bitWidth\": 32}";
466479
let value: Value = serde_json::from_str(json).unwrap();
@@ -469,6 +482,7 @@ mod tests {
469482
}
470483

471484
#[test]
485+
#[cfg(feature = "json")]
472486
fn schema_json() {
473487
// Add some custom metadata
474488
let metadata: HashMap<String, String> =
@@ -1229,6 +1243,7 @@ mod tests {
12291243
}
12301244

12311245
#[test]
1246+
#[cfg(feature = "json")]
12321247
fn test_arrow_native_type_to_json() {
12331248
assert_eq!(Some(Bool(true)), true.into_json_value());
12341249
assert_eq!(Some(VNumber(Number::from(1))), 1i8.into_json_value());
@@ -1248,7 +1263,7 @@ mod tests {
12481263
Some(VNumber(Number::from_f64(0.01f64).unwrap())),
12491264
0.01f64.into_json_value()
12501265
);
1251-
assert_eq!(None, NAN.into_json_value());
1266+
assert_eq!(None, f32::NAN.into_json_value());
12521267
}
12531268

12541269
fn person_schema() -> Schema {

0 commit comments

Comments
 (0)