Skip to content

Commit

Permalink
Refactor convert command (#880)
Browse files Browse the repository at this point in the history
Signed-off-by: Nico Wagner <[email protected]>
  • Loading branch information
nwagner84 authored Dec 9, 2024
1 parent 91bbf65 commit df9cc29
Show file tree
Hide file tree
Showing 190 changed files with 701 additions and 13,890 deletions.
8 changes: 3 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,11 @@ jobs:
strategy:
matrix:
item:
- { name: pica-record-matcher-v1, fuzz-dir: crates/pica-matcher/fuzz, target: fuzz-record-matcher, max-total-time: 120 }
- { name: pica-record-ref-v1, fuzz-dir: crates/pica-record-v1/fuzz, target: fuzz-record-ref, max-total-time: 120 }
- { name: pica-record-ref, fuzz-dir: fuzz, target: fuzz-record-ref, max-total-time: 120 }
- { name: pica-record-matcher, fuzz-dir: fuzz, target: fuzz-record-matcher, max-total-time: 120 }
- { name: pica-path, fuzz-dir: fuzz, target: fuzz-path, max-total-time: 240 }
- { name: pica-format, fuzz-dir: fuzz, target: fuzz-format, max-total-time: 240 }
- { name: pica-query, fuzz-dir: fuzz, target: fuzz-query, max-total-time: 240 }
- { name: pica-path, fuzz-dir: fuzz, target: fuzz-path, max-total-time: 120 }
- { name: pica-format, fuzz-dir: fuzz, target: fuzz-format, max-total-time: 120 }
- { name: pica-query, fuzz-dir: fuzz, target: fuzz-query, max-total-time: 120 }
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@nightly
Expand Down
15 changes: 2 additions & 13 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,10 @@ path = "tests/lib.rs"
name = "integration"

[workspace]
default-members = ["crates/pica-cli"]
resolver = "2"
default-members = ["crates/pica-toolkit"]

members = [
"pica-cli",
"crates/pica-lint",
"crates/pica-matcher",
"crates/pica-path",
"crates/pica-record-v1",
"crates/pica-select",
"crates/pica-toolkit",
"crates/pica-utils",
]
members = ["crates/pica-cli"]

[workspace.package]
version = "0.25.0"
Expand All @@ -65,8 +56,6 @@ pica-matcher = { version = "0.25", path = "./crates/pica-matcher" }
pica-path = { version = "0.25", path = "./crates/pica-path" }
pica-record-v1 = { version = "0.25", path = "./crates/pica-record-v1" }
pica-record = { version = "0.1", path = "." }
pica-select = { version = "0.25", path = "./crates/pica-select" }
pica-utils = { version = "0.25", path = "./crates/pica-utils" }

anyhow = "1.0"
bstr = "1.10"
Expand Down
6 changes: 4 additions & 2 deletions pica-cli/Cargo.toml → crates/pica-cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pica-cli"
version = "0.1.0"
version = "0.25.0"
authors.workspace = true
license.workspace = true
readme.workspace = true
Expand All @@ -19,13 +19,15 @@ hashbrown = { version = "0.15" }
indicatif = { version = "0.17.9", features = ["improved_unicode"] }
pica-record = { workspace = true, features = ["serde"] }
polars = { version = "0.45", features = ["ipc", "decompress", "lazy"] }
quick-xml = { version = "0.37" }
rand = { version = "0.8.5" }
serde = { workspace = true, features = ["derive"] }
serde_json = { version = "1.0" }
thiserror = { workspace = true }
toml = { version = "0.8.19", features = ["preserve_order"] }

[dev-dependencies]
anyhow = { version = "1.0.93" }
anyhow = { workspace = true }
assert_cmd = { version = "2.0.16" }
assert_fs = { version = "1.1.2" }
predicates = { version = "3.1.2" }
Expand Down
1 change: 1 addition & 0 deletions pica-cli/src/cli.rs → crates/pica-cli/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub(crate) enum Command {
Concat(Concat),
#[cfg(feature = "unstable")]
Config(Config),
Convert(Convert),
Count(Count),
Explode(Explode),
Filter(Filter),
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ use std::ffi::OsString;
use std::fs::File;
use std::io::{self, stdout, BufWriter, Write};

use pica_record_v1::io::ByteRecordWrite;
use pica_record_v1::ByteRecord;
use pica_record::prelude::*;

pub(crate) struct BinaryWriter {
writer: BufWriter<Box<dyn Write>>,
Expand All @@ -27,8 +26,8 @@ impl ByteRecordWrite for BinaryWriter {
&mut self,
record: &ByteRecord,
) -> std::io::Result<()> {
for field in record.iter() {
self.writer.write_all(field.tag())?;
for field in record.fields() {
field.tag().write_to(&mut self.writer)?;
if let Some(occurrence) = field.occurrence() {
occurrence.write_to(&mut self.writer)?;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ use std::ffi::OsString;
use std::fs::File;
use std::io::{self, stdout, BufWriter, Write};

use pica_record_v1::io::ByteRecordWrite;
use pica_record_v1::ByteRecord;
use pica_record::prelude::*;

pub(crate) struct ImportWriter {
writer: BufWriter<Box<dyn Write>>,
Expand All @@ -29,10 +28,10 @@ impl ByteRecordWrite for ImportWriter {
) -> std::io::Result<()> {
self.writer.write_all(b"'\x1d\x0a")?;

for field in record.iter() {
for field in record.fields() {
self.writer.write_all(b"\x1e")?;

self.writer.write_all(field.tag())?;
field.tag().write_to(&mut self.writer)?;
if let Some(occurrence) = field.occurrence() {
occurrence.write_to(&mut self.writer)?;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ use std::ffi::OsString;
use std::fs::File;
use std::io::{self, stdout, BufWriter, Write};

use pica_record_v1::io::ByteRecordWrite;
use pica_record_v1::ByteRecord;
use pica_record::prelude::*;
use serde_json::Value;

pub(crate) struct JsonWriter {
Expand Down Expand Up @@ -32,7 +31,7 @@ impl ByteRecordWrite for JsonWriter {
) -> std::io::Result<()> {
let mut fields: Vec<Value> = Vec::new();

for field in record.iter() {
for field in record.fields() {
let mut data: Vec<serde_json::Value> = Vec::new();
data.push(serde_json::Value::String(
field.tag().to_string(),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,32 +1,21 @@
mod binary;
mod import;
mod json;
mod plain;
mod xml;

use std::ffi::OsString;
use std::process::ExitCode;

use clap::{Parser, ValueEnum};
use pica_record_v1::io::{
ByteRecordWrite, ReaderBuilder, RecordsIterator, WriterBuilder,
};
use serde::{Deserialize, Serialize};
use pica_record::prelude::*;

use self::binary::BinaryWriter;
use self::import::ImportWriter;
use self::json::JsonWriter;
use self::plain::PlainWriter;
use self::xml::XmlWriter;
use crate::error::CliError;
use crate::progress::Progress;
use crate::{skip_invalid_flag, CliResult, Config};

#[derive(Debug, Deserialize, Serialize)]
#[serde(rename_all = "kebab-case")]
pub(crate) struct ConvertConfig {
/// Skip invalid records that can't be decoded.
pub(crate) skip_invalid: Option<bool>,
}
use crate::prelude::*;

mod binary;
mod import;
mod json;
mod plain;
mod xml;

#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum)]
enum Format {
Expand Down Expand Up @@ -81,12 +70,9 @@ pub(crate) struct Convert {
}

impl Convert {
pub(crate) fn run(self, config: &Config) -> CliResult<()> {
let skip_invalid = skip_invalid_flag!(
self.skip_invalid,
config.convert,
config.global
);
pub(crate) fn execute(self, config: &Config) -> CliResult {
let skip_invalid = self.skip_invalid || config.skip_invalid;
let mut progress = Progress::new(self.progress);

if self.from != Format::Plus {
return Err(CliError::Other(format!(
Expand All @@ -106,25 +92,20 @@ impl Convert {
Format::Xml => Box::new(XmlWriter::new(self.output)?),
};

let mut progress = Progress::new(self.progress);

for filename in self.filenames {
let mut reader =
ReaderBuilder::new().from_path(filename)?;

while let Some(result) = reader.next() {
while let Some(result) = reader.next_byte_record() {
match result {
Err(e) => {
if e.is_invalid_record() && skip_invalid {
progress.invalid();
continue;
} else {
return Err(e.into());
}
Err(e) if e.skip_parse_err(skip_invalid) => {
progress.update(true);
continue;
}
Ok(record) => {
progress.record();
writer.write_byte_record(&record)?;
Err(e) => return Err(e.into()),
Ok(ref record) => {
writer.write_byte_record(record)?;
progress.update(false);
}
}
}
Expand All @@ -133,6 +114,6 @@ impl Convert {
progress.finish();
writer.finish()?;

Ok(())
Ok(ExitCode::SUCCESS)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ use std::fs::File;
use std::io::{self, stdout, BufWriter, Write};

use bstr::ByteSlice;
use pica_record_v1::io::ByteRecordWrite;
use pica_record_v1::ByteRecord;
use pica_record::prelude::*;

pub(crate) struct PlainWriter {
writer: BufWriter<Box<dyn Write>>,
Expand All @@ -28,8 +27,8 @@ impl ByteRecordWrite for PlainWriter {
&mut self,
record: &ByteRecord,
) -> std::io::Result<()> {
for field in record.iter() {
self.writer.write_all(field.tag())?;
for field in record.fields() {
field.tag().write_to(&mut self.writer)?;
if let Some(occurrence) = field.occurrence() {
occurrence.write_to(&mut self.writer)?;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ use std::fs::File;
use std::io::{self, stdout, BufWriter, Write};

use bstr::ByteSlice;
use pica_record_v1::io::ByteRecordWrite;
use pica_record_v1::ByteRecord;
use pica_record::prelude::*;
use quick_xml::events::{
BytesDecl, BytesEnd, BytesStart, BytesText, Event,
};
Expand Down Expand Up @@ -57,11 +56,11 @@ impl ByteRecordWrite for XmlWriter {
self.writer
.create_element("record")
.write_inner_content(|r| {
for field in record.iter() {
for field in record.fields() {
r.create_element("datafield")
.with_attribute((
"tag",
field.tag().to_str().unwrap(),
field.tag().to_string().as_str(),
))
.write_inner_content(|f| {
for subfield in field.subfields() {
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ pub(crate) use completions::Completions;
pub(crate) use concat::Concat;
#[cfg(feature = "unstable")]
pub(crate) use config::Config;
pub(crate) use convert::Convert;
pub(crate) use count::Count;
pub(crate) use explode::Explode;
pub(crate) use filter::Filter;
Expand All @@ -19,6 +20,7 @@ mod completions;
mod concat;
#[cfg(feature = "unstable")]
mod config;
mod convert;
mod count;
mod explode;
mod filter;
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
1 change: 0 additions & 1 deletion pica-cli/src/error.rs → crates/pica-cli/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ pub(crate) enum CliError {
Polars(#[from] polars::error::PolarsError),
#[error(transparent)]
IO(#[from] std::io::Error),
#[cfg(feature = "unstable")]
#[error("{0}")]
Other(String),
}
1 change: 1 addition & 0 deletions pica-cli/src/main.rs → crates/pica-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ fn run() -> CliResult {
Command::Concat(cmd) => cmd.execute(&config),
#[cfg(feature = "unstable")]
Command::Config(cmd) => cmd.execute(&mut config),
Command::Convert(cmd) => cmd.execute(&config),
Command::Count(cmd) => cmd.execute(&config),
Command::Explode(cmd) => cmd.execute(&config),
Command::Filter(cmd) => cmd.execute(&config),
Expand Down
2 changes: 1 addition & 1 deletion pica-cli/src/prelude.rs → crates/pica-cli/src/prelude.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pub(crate) use crate::config::Config;
pub(crate) use crate::error::CliResult;
pub(crate) use crate::error::{CliError, CliResult};
pub(crate) use crate::progress::Progress;
pub(crate) use crate::utils::{parse_predicates, FilterSet};
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit df9cc29

Please sign in to comment.