Skip to content

Commit

Permalink
fixed empty attributes bug
Browse files Browse the repository at this point in the history
  • Loading branch information
soldni committed Aug 10, 2024
1 parent 4322ad0 commit 8ab1d1d
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "dolma"
version = "1.0.3"
version = "1.0.9"
edition = "2021"
license = "Apache-2.0"

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dolma"
version = "1.0.8"
version = "1.0.9"
description = "Data filters"
license = { text = "Apache-2.0" }
readme = "README.md"
Expand All @@ -24,7 +24,7 @@ dependencies = [
"rich",
"s3fs>=2023.6.0",
"smart-open",
"tokenizers>=0.19.1,<1.0.0",
"tokenizers>=0.15.0,<=0.19.1",
"tqdm",
"uniseg",
"numpy",
Expand Down
8 changes: 6 additions & 2 deletions src/shard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ impl Shard {
line_number += 1;
let line = line?;
let mut data: Value = serde_json::from_str(&line)?;
let mut attrs = serde_json::Map::new();
let mut attrs: serde_json::Map<String, Value> = serde_json::Map::new();
for (attr_reader_index, (_, attr_reader)) in
local_attr_readers.iter_mut().enumerate()
{
Expand Down Expand Up @@ -309,7 +309,11 @@ impl Shard {
}
}

if !attrs.is_empty() {
// If there are any attribute readers, then we insert the attributes key into
// the mixer data, regardless of whether any attributes have been read or not.
// Essentially, we skip adding the `attributes` key if for some reason this mixer
// is using no attributes data.
if local_attr_readers.len() > 0 {
// Add to existing attributes if they exist, otherwise create them.
if let Value::Object(ref mut existing_attrs) = data["attributes"] {
for (k, v) in attrs.iter() {
Expand Down

0 comments on commit 8ab1d1d

Please sign in to comment.