diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7b39634f7..eee422216 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -82,13 +82,11 @@ jobs: strategy: matrix: item: - - { name: pica-record-matcher-v1, fuzz-dir: crates/pica-matcher/fuzz, target: fuzz-record-matcher, max-total-time: 120 } - - { name: pica-record-ref-v1, fuzz-dir: crates/pica-record-v1/fuzz, target: fuzz-record-ref, max-total-time: 120 } - { name: pica-record-ref, fuzz-dir: fuzz, target: fuzz-record-ref, max-total-time: 120 } - { name: pica-record-matcher, fuzz-dir: fuzz, target: fuzz-record-matcher, max-total-time: 120 } - - { name: pica-path, fuzz-dir: fuzz, target: fuzz-path, max-total-time: 240 } - - { name: pica-format, fuzz-dir: fuzz, target: fuzz-format, max-total-time: 240 } - - { name: pica-query, fuzz-dir: fuzz, target: fuzz-query, max-total-time: 240 } + - { name: pica-path, fuzz-dir: fuzz, target: fuzz-path, max-total-time: 120 } + - { name: pica-format, fuzz-dir: fuzz, target: fuzz-format, max-total-time: 120 } + - { name: pica-query, fuzz-dir: fuzz, target: fuzz-query, max-total-time: 120 } steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly diff --git a/Cargo.toml b/Cargo.toml index a0d6ff3e3..74e1708c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,19 +37,10 @@ path = "tests/lib.rs" name = "integration" [workspace] +default-members = ["crates/pica-cli"] resolver = "2" -default-members = ["crates/pica-toolkit"] -members = [ - "pica-cli", - "crates/pica-lint", - "crates/pica-matcher", - "crates/pica-path", - "crates/pica-record-v1", - "crates/pica-select", - "crates/pica-toolkit", - "crates/pica-utils", -] +members = ["crates/pica-cli"] [workspace.package] version = "0.25.0" @@ -65,8 +56,6 @@ pica-matcher = { version = "0.25", path = "./crates/pica-matcher" } pica-path = { version = "0.25", path = "./crates/pica-path" } pica-record-v1 = { version = "0.25", path = "./crates/pica-record-v1" } pica-record = { version = "0.1", path = "." } -pica-select = { version = "0.25", path = "./crates/pica-select" } -pica-utils = { version = "0.25", path = "./crates/pica-utils" } anyhow = "1.0" bstr = "1.10" diff --git a/pica-cli/Cargo.toml b/crates/pica-cli/Cargo.toml similarity index 90% rename from pica-cli/Cargo.toml rename to crates/pica-cli/Cargo.toml index ffd1c8a38..74a3f4bd0 100644 --- a/pica-cli/Cargo.toml +++ b/crates/pica-cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pica-cli" -version = "0.1.0" +version = "0.25.0" authors.workspace = true license.workspace = true readme.workspace = true @@ -19,13 +19,15 @@ hashbrown = { version = "0.15" } indicatif = { version = "0.17.9", features = ["improved_unicode"] } pica-record = { workspace = true, features = ["serde"] } polars = { version = "0.45", features = ["ipc", "decompress", "lazy"] } +quick-xml = { version = "0.37" } rand = { version = "0.8.5" } serde = { workspace = true, features = ["derive"] } +serde_json = { version = "1.0" } thiserror = { workspace = true } toml = { version = "0.8.19", features = ["preserve_order"] } [dev-dependencies] -anyhow = { version = "1.0.93" } +anyhow = { workspace = true } assert_cmd = { version = "2.0.16" } assert_fs = { version = "1.1.2" } predicates = { version = "3.1.2" } diff --git a/pica-cli/src/cli.rs b/crates/pica-cli/src/cli.rs similarity index 97% rename from pica-cli/src/cli.rs rename to crates/pica-cli/src/cli.rs index ca613e09c..352d69eb8 100644 --- a/pica-cli/src/cli.rs +++ b/crates/pica-cli/src/cli.rs @@ -24,6 +24,7 @@ pub(crate) enum Command { Concat(Concat), #[cfg(feature = "unstable")] Config(Config), + Convert(Convert), Count(Count), Explode(Explode), Filter(Filter), diff --git a/pica-cli/src/commands/completions.rs b/crates/pica-cli/src/commands/completions.rs similarity index 100% rename from pica-cli/src/commands/completions.rs rename to crates/pica-cli/src/commands/completions.rs diff --git a/pica-cli/src/commands/concat.rs b/crates/pica-cli/src/commands/concat.rs similarity index 100% rename from pica-cli/src/commands/concat.rs rename to crates/pica-cli/src/commands/concat.rs diff --git a/pica-cli/src/commands/config.rs b/crates/pica-cli/src/commands/config.rs similarity index 100% rename from pica-cli/src/commands/config.rs rename to crates/pica-cli/src/commands/config.rs diff --git a/crates/pica-toolkit/src/commands/convert/binary.rs b/crates/pica-cli/src/commands/convert/binary.rs similarity index 88% rename from crates/pica-toolkit/src/commands/convert/binary.rs rename to crates/pica-cli/src/commands/convert/binary.rs index 09c9c45cc..6b7b23168 100644 --- a/crates/pica-toolkit/src/commands/convert/binary.rs +++ b/crates/pica-cli/src/commands/convert/binary.rs @@ -2,8 +2,7 @@ use std::ffi::OsString; use std::fs::File; use std::io::{self, stdout, BufWriter, Write}; -use pica_record_v1::io::ByteRecordWrite; -use pica_record_v1::ByteRecord; +use pica_record::prelude::*; pub(crate) struct BinaryWriter { writer: BufWriter>, @@ -27,8 +26,8 @@ impl ByteRecordWrite for BinaryWriter { &mut self, record: &ByteRecord, ) -> std::io::Result<()> { - for field in record.iter() { - self.writer.write_all(field.tag())?; + for field in record.fields() { + field.tag().write_to(&mut self.writer)?; if let Some(occurrence) = field.occurrence() { occurrence.write_to(&mut self.writer)?; } diff --git a/crates/pica-toolkit/src/commands/convert/import.rs b/crates/pica-cli/src/commands/convert/import.rs similarity index 88% rename from crates/pica-toolkit/src/commands/convert/import.rs rename to crates/pica-cli/src/commands/convert/import.rs index 3e6e2fcb0..70deb0b16 100644 --- a/crates/pica-toolkit/src/commands/convert/import.rs +++ b/crates/pica-cli/src/commands/convert/import.rs @@ -2,8 +2,7 @@ use std::ffi::OsString; use std::fs::File; use std::io::{self, stdout, BufWriter, Write}; -use pica_record_v1::io::ByteRecordWrite; -use pica_record_v1::ByteRecord; +use pica_record::prelude::*; pub(crate) struct ImportWriter { writer: BufWriter>, @@ -29,10 +28,10 @@ impl ByteRecordWrite for ImportWriter { ) -> std::io::Result<()> { self.writer.write_all(b"'\x1d\x0a")?; - for field in record.iter() { + for field in record.fields() { self.writer.write_all(b"\x1e")?; - self.writer.write_all(field.tag())?; + field.tag().write_to(&mut self.writer)?; if let Some(occurrence) = field.occurrence() { occurrence.write_to(&mut self.writer)?; } diff --git a/crates/pica-toolkit/src/commands/convert/json.rs b/crates/pica-cli/src/commands/convert/json.rs similarity index 94% rename from crates/pica-toolkit/src/commands/convert/json.rs rename to crates/pica-cli/src/commands/convert/json.rs index 129e33dfb..3e0c7056d 100644 --- a/crates/pica-toolkit/src/commands/convert/json.rs +++ b/crates/pica-cli/src/commands/convert/json.rs @@ -2,8 +2,7 @@ use std::ffi::OsString; use std::fs::File; use std::io::{self, stdout, BufWriter, Write}; -use pica_record_v1::io::ByteRecordWrite; -use pica_record_v1::ByteRecord; +use pica_record::prelude::*; use serde_json::Value; pub(crate) struct JsonWriter { @@ -32,7 +31,7 @@ impl ByteRecordWrite for JsonWriter { ) -> std::io::Result<()> { let mut fields: Vec = Vec::new(); - for field in record.iter() { + for field in record.fields() { let mut data: Vec = Vec::new(); data.push(serde_json::Value::String( field.tag().to_string(), diff --git a/crates/pica-toolkit/src/commands/convert/mod.rs b/crates/pica-cli/src/commands/convert/mod.rs similarity index 68% rename from crates/pica-toolkit/src/commands/convert/mod.rs rename to crates/pica-cli/src/commands/convert/mod.rs index e70af0348..7d246ef73 100644 --- a/crates/pica-toolkit/src/commands/convert/mod.rs +++ b/crates/pica-cli/src/commands/convert/mod.rs @@ -1,32 +1,21 @@ -mod binary; -mod import; -mod json; -mod plain; -mod xml; - use std::ffi::OsString; +use std::process::ExitCode; use clap::{Parser, ValueEnum}; -use pica_record_v1::io::{ - ByteRecordWrite, ReaderBuilder, RecordsIterator, WriterBuilder, -}; -use serde::{Deserialize, Serialize}; +use pica_record::prelude::*; use self::binary::BinaryWriter; use self::import::ImportWriter; use self::json::JsonWriter; use self::plain::PlainWriter; use self::xml::XmlWriter; -use crate::error::CliError; -use crate::progress::Progress; -use crate::{skip_invalid_flag, CliResult, Config}; - -#[derive(Debug, Deserialize, Serialize)] -#[serde(rename_all = "kebab-case")] -pub(crate) struct ConvertConfig { - /// Skip invalid records that can't be decoded. - pub(crate) skip_invalid: Option, -} +use crate::prelude::*; + +mod binary; +mod import; +mod json; +mod plain; +mod xml; #[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum)] enum Format { @@ -81,12 +70,9 @@ pub(crate) struct Convert { } impl Convert { - pub(crate) fn run(self, config: &Config) -> CliResult<()> { - let skip_invalid = skip_invalid_flag!( - self.skip_invalid, - config.convert, - config.global - ); + pub(crate) fn execute(self, config: &Config) -> CliResult { + let skip_invalid = self.skip_invalid || config.skip_invalid; + let mut progress = Progress::new(self.progress); if self.from != Format::Plus { return Err(CliError::Other(format!( @@ -106,25 +92,20 @@ impl Convert { Format::Xml => Box::new(XmlWriter::new(self.output)?), }; - let mut progress = Progress::new(self.progress); - for filename in self.filenames { let mut reader = ReaderBuilder::new().from_path(filename)?; - while let Some(result) = reader.next() { + while let Some(result) = reader.next_byte_record() { match result { - Err(e) => { - if e.is_invalid_record() && skip_invalid { - progress.invalid(); - continue; - } else { - return Err(e.into()); - } + Err(e) if e.skip_parse_err(skip_invalid) => { + progress.update(true); + continue; } - Ok(record) => { - progress.record(); - writer.write_byte_record(&record)?; + Err(e) => return Err(e.into()), + Ok(ref record) => { + writer.write_byte_record(record)?; + progress.update(false); } } } @@ -133,6 +114,6 @@ impl Convert { progress.finish(); writer.finish()?; - Ok(()) + Ok(ExitCode::SUCCESS) } } diff --git a/crates/pica-toolkit/src/commands/convert/plain.rs b/crates/pica-cli/src/commands/convert/plain.rs similarity index 89% rename from crates/pica-toolkit/src/commands/convert/plain.rs rename to crates/pica-cli/src/commands/convert/plain.rs index 362036501..2d7d99e87 100644 --- a/crates/pica-toolkit/src/commands/convert/plain.rs +++ b/crates/pica-cli/src/commands/convert/plain.rs @@ -3,8 +3,7 @@ use std::fs::File; use std::io::{self, stdout, BufWriter, Write}; use bstr::ByteSlice; -use pica_record_v1::io::ByteRecordWrite; -use pica_record_v1::ByteRecord; +use pica_record::prelude::*; pub(crate) struct PlainWriter { writer: BufWriter>, @@ -28,8 +27,8 @@ impl ByteRecordWrite for PlainWriter { &mut self, record: &ByteRecord, ) -> std::io::Result<()> { - for field in record.iter() { - self.writer.write_all(field.tag())?; + for field in record.fields() { + field.tag().write_to(&mut self.writer)?; if let Some(occurrence) = field.occurrence() { occurrence.write_to(&mut self.writer)?; } diff --git a/crates/pica-toolkit/src/commands/convert/xml.rs b/crates/pica-cli/src/commands/convert/xml.rs similarity index 94% rename from crates/pica-toolkit/src/commands/convert/xml.rs rename to crates/pica-cli/src/commands/convert/xml.rs index b3795b5fe..dee5b12a5 100644 --- a/crates/pica-toolkit/src/commands/convert/xml.rs +++ b/crates/pica-cli/src/commands/convert/xml.rs @@ -3,8 +3,7 @@ use std::fs::File; use std::io::{self, stdout, BufWriter, Write}; use bstr::ByteSlice; -use pica_record_v1::io::ByteRecordWrite; -use pica_record_v1::ByteRecord; +use pica_record::prelude::*; use quick_xml::events::{ BytesDecl, BytesEnd, BytesStart, BytesText, Event, }; @@ -57,11 +56,11 @@ impl ByteRecordWrite for XmlWriter { self.writer .create_element("record") .write_inner_content(|r| { - for field in record.iter() { + for field in record.fields() { r.create_element("datafield") .with_attribute(( "tag", - field.tag().to_str().unwrap(), + field.tag().to_string().as_str(), )) .write_inner_content(|f| { for subfield in field.subfields() { diff --git a/pica-cli/src/commands/count.rs b/crates/pica-cli/src/commands/count.rs similarity index 100% rename from pica-cli/src/commands/count.rs rename to crates/pica-cli/src/commands/count.rs diff --git a/pica-cli/src/commands/explode.rs b/crates/pica-cli/src/commands/explode.rs similarity index 100% rename from pica-cli/src/commands/explode.rs rename to crates/pica-cli/src/commands/explode.rs diff --git a/pica-cli/src/commands/filter.rs b/crates/pica-cli/src/commands/filter.rs similarity index 100% rename from pica-cli/src/commands/filter.rs rename to crates/pica-cli/src/commands/filter.rs diff --git a/pica-cli/src/commands/frequency.rs b/crates/pica-cli/src/commands/frequency.rs similarity index 100% rename from pica-cli/src/commands/frequency.rs rename to crates/pica-cli/src/commands/frequency.rs diff --git a/pica-cli/src/commands/hash.rs b/crates/pica-cli/src/commands/hash.rs similarity index 100% rename from pica-cli/src/commands/hash.rs rename to crates/pica-cli/src/commands/hash.rs diff --git a/pica-cli/src/commands/invalid.rs b/crates/pica-cli/src/commands/invalid.rs similarity index 100% rename from pica-cli/src/commands/invalid.rs rename to crates/pica-cli/src/commands/invalid.rs diff --git a/pica-cli/src/commands/mod.rs b/crates/pica-cli/src/commands/mod.rs similarity index 94% rename from pica-cli/src/commands/mod.rs rename to crates/pica-cli/src/commands/mod.rs index 4313b85de..dfada5c00 100644 --- a/pica-cli/src/commands/mod.rs +++ b/crates/pica-cli/src/commands/mod.rs @@ -2,6 +2,7 @@ pub(crate) use completions::Completions; pub(crate) use concat::Concat; #[cfg(feature = "unstable")] pub(crate) use config::Config; +pub(crate) use convert::Convert; pub(crate) use count::Count; pub(crate) use explode::Explode; pub(crate) use filter::Filter; @@ -19,6 +20,7 @@ mod completions; mod concat; #[cfg(feature = "unstable")] mod config; +mod convert; mod count; mod explode; mod filter; diff --git a/pica-cli/src/commands/partition.rs b/crates/pica-cli/src/commands/partition.rs similarity index 100% rename from pica-cli/src/commands/partition.rs rename to crates/pica-cli/src/commands/partition.rs diff --git a/pica-cli/src/commands/print.rs b/crates/pica-cli/src/commands/print.rs similarity index 100% rename from pica-cli/src/commands/print.rs rename to crates/pica-cli/src/commands/print.rs diff --git a/pica-cli/src/commands/sample.rs b/crates/pica-cli/src/commands/sample.rs similarity index 100% rename from pica-cli/src/commands/sample.rs rename to crates/pica-cli/src/commands/sample.rs diff --git a/pica-cli/src/commands/select.rs b/crates/pica-cli/src/commands/select.rs similarity index 100% rename from pica-cli/src/commands/select.rs rename to crates/pica-cli/src/commands/select.rs diff --git a/pica-cli/src/commands/slice.rs b/crates/pica-cli/src/commands/slice.rs similarity index 100% rename from pica-cli/src/commands/slice.rs rename to crates/pica-cli/src/commands/slice.rs diff --git a/pica-cli/src/commands/split.rs b/crates/pica-cli/src/commands/split.rs similarity index 100% rename from pica-cli/src/commands/split.rs rename to crates/pica-cli/src/commands/split.rs diff --git a/pica-cli/src/config.rs b/crates/pica-cli/src/config.rs similarity index 100% rename from pica-cli/src/config.rs rename to crates/pica-cli/src/config.rs diff --git a/pica-cli/src/error.rs b/crates/pica-cli/src/error.rs similarity index 97% rename from pica-cli/src/error.rs rename to crates/pica-cli/src/error.rs index 5391c0b12..7280eb27e 100644 --- a/pica-cli/src/error.rs +++ b/crates/pica-cli/src/error.rs @@ -38,7 +38,6 @@ pub(crate) enum CliError { Polars(#[from] polars::error::PolarsError), #[error(transparent)] IO(#[from] std::io::Error), - #[cfg(feature = "unstable")] #[error("{0}")] Other(String), } diff --git a/pica-cli/src/main.rs b/crates/pica-cli/src/main.rs similarity index 96% rename from pica-cli/src/main.rs rename to crates/pica-cli/src/main.rs index a754a8657..e90426f34 100644 --- a/pica-cli/src/main.rs +++ b/crates/pica-cli/src/main.rs @@ -29,6 +29,7 @@ fn run() -> CliResult { Command::Concat(cmd) => cmd.execute(&config), #[cfg(feature = "unstable")] Command::Config(cmd) => cmd.execute(&mut config), + Command::Convert(cmd) => cmd.execute(&config), Command::Count(cmd) => cmd.execute(&config), Command::Explode(cmd) => cmd.execute(&config), Command::Filter(cmd) => cmd.execute(&config), diff --git a/pica-cli/src/prelude.rs b/crates/pica-cli/src/prelude.rs similarity index 72% rename from pica-cli/src/prelude.rs rename to crates/pica-cli/src/prelude.rs index 3e6e60c88..efc19b233 100644 --- a/pica-cli/src/prelude.rs +++ b/crates/pica-cli/src/prelude.rs @@ -1,4 +1,4 @@ pub(crate) use crate::config::Config; -pub(crate) use crate::error::CliResult; +pub(crate) use crate::error::{CliError, CliResult}; pub(crate) use crate::progress::Progress; pub(crate) use crate::utils::{parse_predicates, FilterSet}; diff --git a/pica-cli/src/progress.rs b/crates/pica-cli/src/progress.rs similarity index 100% rename from pica-cli/src/progress.rs rename to crates/pica-cli/src/progress.rs diff --git a/pica-cli/src/utils.rs b/crates/pica-cli/src/utils.rs similarity index 100% rename from pica-cli/src/utils.rs rename to crates/pica-cli/src/utils.rs diff --git a/pica-cli/tests/completions/mod.rs b/crates/pica-cli/tests/completions/mod.rs similarity index 100% rename from pica-cli/tests/completions/mod.rs rename to crates/pica-cli/tests/completions/mod.rs diff --git a/pica-cli/tests/concat/mod.rs b/crates/pica-cli/tests/concat/mod.rs similarity index 100% rename from pica-cli/tests/concat/mod.rs rename to crates/pica-cli/tests/concat/mod.rs diff --git a/pica-cli/tests/config/mod.rs b/crates/pica-cli/tests/config/mod.rs similarity index 100% rename from pica-cli/tests/config/mod.rs rename to crates/pica-cli/tests/config/mod.rs diff --git a/crates/pica-cli/tests/convert/mod.rs b/crates/pica-cli/tests/convert/mod.rs new file mode 100644 index 000000000..d333fee1f --- /dev/null +++ b/crates/pica-cli/tests/convert/mod.rs @@ -0,0 +1,211 @@ +use std::fs::read_to_string; + +use assert_cmd::Command; +use assert_fs::prelude::*; +use assert_fs::TempDir; +use predicates::prelude::*; + +use super::prelude::*; + +#[test] +fn convert_from_plus_to_xml() -> TestResult { + let mut cmd = Command::cargo_bin("pica")?; + let temp_dir = TempDir::new().unwrap(); + let out = temp_dir.child("ada.xml"); + + let assert = cmd + .arg("convert") + .args(["--from", "plus", "--to", "xml"]) + .args(["-o", out.to_str().unwrap()]) + .arg(data_dir().join("ada.dat")) + .assert(); + + assert + .success() + .code(0) + .stdout(predicates::str::is_empty()) + .stderr(predicates::str::is_empty()); + + let mut expected = read_to_string(data_dir().join("ada.xml"))?; + if cfg!(windows) { + expected = expected.replace('\r', ""); + } + + assert_eq!(expected, read_to_string(out.path())?); + + temp_dir.close().unwrap(); + Ok(()) +} + +#[test] +fn convert_from_plus_to_json() -> TestResult { + let mut cmd = Command::cargo_bin("pica")?; + let temp_dir = TempDir::new().unwrap(); + let out = temp_dir.child("ada.json"); + + let assert = cmd + .arg("convert") + .args(["--from", "plus", "--to", "json"]) + .args(["-o", out.to_str().unwrap()]) + .arg(data_dir().join("ada.dat")) + .assert(); + + assert + .success() + .code(0) + .stdout(predicates::str::is_empty()) + .stderr(predicates::str::is_empty()); + + let expected = read_to_string(data_dir().join("ada.json"))?; + assert_eq!(expected, read_to_string(out.path())?); + + temp_dir.close().unwrap(); + Ok(()) +} + +#[test] +fn convert_from_plus_to_plus() -> TestResult { + let mut cmd = Command::cargo_bin("pica")?; + let temp_dir = TempDir::new().unwrap(); + let out = temp_dir.child("ada.dat"); + + let assert = cmd + .arg("convert") + .args(["--from", "plus", "--to", "plus"]) + .args(["-o", out.to_str().unwrap()]) + .arg(data_dir().join("ada.dat")) + .assert(); + + assert + .success() + .code(0) + .stdout(predicates::str::is_empty()) + .stderr(predicates::str::is_empty()); + + assert_eq!( + read_to_string(data_dir().join("ada.dat"))?, + read_to_string(out.path())? + ); + + temp_dir.close().unwrap(); + Ok(()) +} + +#[test] +fn convert_from_plus_to_plain() -> TestResult { + let mut cmd = Command::cargo_bin("pica")?; + let temp_dir = TempDir::new().unwrap(); + let out = temp_dir.child("ada.plain"); + + let assert = cmd + .arg("convert") + .args(["--from", "plus", "--to", "plain"]) + .args(["-o", out.to_str().unwrap()]) + .arg(data_dir().join("ada.dat")) + .assert(); + + assert + .success() + .code(0) + .stdout(predicates::str::is_empty()) + .stderr(predicates::str::is_empty()); + + let mut expected = read_to_string(data_dir().join("ada.plain"))?; + if cfg!(windows) { + expected = expected.replace('\r', ""); + } + + assert_eq!(expected, read_to_string(out.path())?); + + temp_dir.close().unwrap(); + Ok(()) +} + +#[test] +fn convert_from_plus_to_import() -> TestResult { + let mut cmd = Command::cargo_bin("pica")?; + let temp_dir = TempDir::new().unwrap(); + let out = temp_dir.child("ada.import"); + + let assert = cmd + .arg("convert") + .args(["--from", "plus", "--to", "import"]) + .args(["-o", out.to_str().unwrap()]) + .arg(data_dir().join("ada.dat")) + .assert(); + + assert + .success() + .code(0) + .stdout(predicates::str::is_empty()) + .stderr(predicates::str::is_empty()); + + assert_eq!( + read_to_string(data_dir().join("ada.import"))?, + read_to_string(out.path())? + ); + + temp_dir.close().unwrap(); + Ok(()) +} + +#[test] +fn convert_from_plus_to_binary() -> TestResult { + let mut cmd = Command::cargo_bin("pica")?; + let temp_dir = TempDir::new().unwrap(); + let out = temp_dir.child("ada.bin"); + + let assert = cmd + .arg("convert") + .args(["--from", "plus", "--to", "binary"]) + .args(["-o", out.to_str().unwrap()]) + .arg(data_dir().join("ada.dat")) + .assert(); + + assert + .success() + .code(0) + .stdout(predicates::str::is_empty()) + .stderr(predicates::str::is_empty()); + + assert_eq!( + read_to_string(data_dir().join("ada.bin"))?, + read_to_string(out.path())? + ); + + temp_dir.close().unwrap(); + Ok(()) +} + +#[test] +fn convert_skip_invalid() -> TestResult { + let mut cmd = Command::cargo_bin("pica")?; + let assert = cmd + .args(["convert", "-s"]) + .args(["--from", "plus", "--to", "json"]) + .arg(data_dir().join("invalid.dat")) + .assert(); + + assert + .success() + .code(0) + .stdout(predicates::ord::eq("[]")) + .stderr(predicates::str::is_empty()); + + let mut cmd = Command::cargo_bin("pica")?; + let assert = cmd + .args(["convert"]) + .args(["--from", "plus", "--to", "json"]) + .arg(data_dir().join("invalid.dat")) + .assert(); + + assert + .failure() + .code(2) + .stdout(predicates::str::is_empty().not()) + .stderr(predicates::str::contains( + "parse erorr: invalid record on line 1", + )); + + Ok(()) +} diff --git a/pica-cli/tests/count/mod.rs b/crates/pica-cli/tests/count/mod.rs similarity index 100% rename from pica-cli/tests/count/mod.rs rename to crates/pica-cli/tests/count/mod.rs diff --git a/pica-cli/tests/explode/mod.rs b/crates/pica-cli/tests/explode/mod.rs similarity index 100% rename from pica-cli/tests/explode/mod.rs rename to crates/pica-cli/tests/explode/mod.rs diff --git a/pica-cli/tests/filter/cardinality.rs b/crates/pica-cli/tests/filter/cardinality.rs similarity index 100% rename from pica-cli/tests/filter/cardinality.rs rename to crates/pica-cli/tests/filter/cardinality.rs diff --git a/pica-cli/tests/filter/connectives.rs b/crates/pica-cli/tests/filter/connectives.rs similarity index 100% rename from pica-cli/tests/filter/connectives.rs rename to crates/pica-cli/tests/filter/connectives.rs diff --git a/pica-cli/tests/filter/exists.rs b/crates/pica-cli/tests/filter/exists.rs similarity index 100% rename from pica-cli/tests/filter/exists.rs rename to crates/pica-cli/tests/filter/exists.rs diff --git a/pica-cli/tests/filter/in.rs b/crates/pica-cli/tests/filter/in.rs similarity index 100% rename from pica-cli/tests/filter/in.rs rename to crates/pica-cli/tests/filter/in.rs diff --git a/pica-cli/tests/filter/mod.rs b/crates/pica-cli/tests/filter/mod.rs similarity index 100% rename from pica-cli/tests/filter/mod.rs rename to crates/pica-cli/tests/filter/mod.rs diff --git a/pica-cli/tests/filter/regex.rs b/crates/pica-cli/tests/filter/regex.rs similarity index 100% rename from pica-cli/tests/filter/regex.rs rename to crates/pica-cli/tests/filter/regex.rs diff --git a/pica-cli/tests/filter/regex_set.rs b/crates/pica-cli/tests/filter/regex_set.rs similarity index 100% rename from pica-cli/tests/filter/regex_set.rs rename to crates/pica-cli/tests/filter/regex_set.rs diff --git a/pica-cli/tests/filter/relation.rs b/crates/pica-cli/tests/filter/relation.rs similarity index 100% rename from pica-cli/tests/filter/relation.rs rename to crates/pica-cli/tests/filter/relation.rs diff --git a/pica-cli/tests/frequency/mod.rs b/crates/pica-cli/tests/frequency/mod.rs similarity index 100% rename from pica-cli/tests/frequency/mod.rs rename to crates/pica-cli/tests/frequency/mod.rs diff --git a/pica-cli/tests/hash/mod.rs b/crates/pica-cli/tests/hash/mod.rs similarity index 100% rename from pica-cli/tests/hash/mod.rs rename to crates/pica-cli/tests/hash/mod.rs diff --git a/pica-cli/tests/integration.rs b/crates/pica-cli/tests/integration.rs similarity index 94% rename from pica-cli/tests/integration.rs rename to crates/pica-cli/tests/integration.rs index 2791c838e..449f9b52d 100644 --- a/pica-cli/tests/integration.rs +++ b/crates/pica-cli/tests/integration.rs @@ -4,6 +4,7 @@ mod completions; mod concat; #[cfg(feature = "unstable")] mod config; +mod convert; mod count; mod explode; mod filter; diff --git a/pica-cli/tests/invalid/mod.rs b/crates/pica-cli/tests/invalid/mod.rs similarity index 100% rename from pica-cli/tests/invalid/mod.rs rename to crates/pica-cli/tests/invalid/mod.rs diff --git a/pica-cli/tests/partition/mod.rs b/crates/pica-cli/tests/partition/mod.rs similarity index 100% rename from pica-cli/tests/partition/mod.rs rename to crates/pica-cli/tests/partition/mod.rs diff --git a/pica-cli/tests/prelude/mod.rs b/crates/pica-cli/tests/prelude/mod.rs similarity index 90% rename from pica-cli/tests/prelude/mod.rs rename to crates/pica-cli/tests/prelude/mod.rs index 2e5dc7e13..b0f0ea246 100644 --- a/pica-cli/tests/prelude/mod.rs +++ b/crates/pica-cli/tests/prelude/mod.rs @@ -8,7 +8,7 @@ pub(crate) fn data_dir() -> &'static PathBuf { static DATA_DIR: LazyLock = LazyLock::new(|| { current_dir() .unwrap() - .join("../tests/data") + .join("../../tests/data") .canonicalize() .unwrap() .to_path_buf() diff --git a/pica-cli/tests/print/mod.rs b/crates/pica-cli/tests/print/mod.rs similarity index 100% rename from pica-cli/tests/print/mod.rs rename to crates/pica-cli/tests/print/mod.rs diff --git a/pica-cli/tests/sample/mod.rs b/crates/pica-cli/tests/sample/mod.rs similarity index 100% rename from pica-cli/tests/sample/mod.rs rename to crates/pica-cli/tests/sample/mod.rs diff --git a/pica-cli/tests/select/mod.rs b/crates/pica-cli/tests/select/mod.rs similarity index 100% rename from pica-cli/tests/select/mod.rs rename to crates/pica-cli/tests/select/mod.rs diff --git a/pica-cli/tests/slice/mod.rs b/crates/pica-cli/tests/slice/mod.rs similarity index 100% rename from pica-cli/tests/slice/mod.rs rename to crates/pica-cli/tests/slice/mod.rs diff --git a/pica-cli/tests/split/mod.rs b/crates/pica-cli/tests/split/mod.rs similarity index 100% rename from pica-cli/tests/split/mod.rs rename to crates/pica-cli/tests/split/mod.rs diff --git a/crates/pica-matcher/Cargo.toml b/crates/pica-matcher/Cargo.toml deleted file mode 100644 index 9ecee3dcc..000000000 --- a/crates/pica-matcher/Cargo.toml +++ /dev/null @@ -1,26 +0,0 @@ -[package] -name = "pica-matcher" -version.workspace = true -authors.workspace = true -license.workspace = true -readme.workspace = true -keywords.workspace = true -edition.workspace = true -rust-version.workspace = true - -[dependencies] -bstr = { workspace = true } -pica-record-v1 = { workspace = true } -pica-utils = { workspace = true } -regex = { workspace = true } -serde = { workspace = true, optional = true } -strsim = { workspace = true } -thiserror = { workspace = true } -winnow = { workspace = true, features = ["simd"] } - -[dev-dependencies] -anyhow = "1.0" -criterion = { version = "0.5", features = ["html_reports"] } - -[features] -serde = ["dep:serde"] diff --git a/crates/pica-matcher/fuzz/.gitignore b/crates/pica-matcher/fuzz/.gitignore deleted file mode 100644 index 1a45eee77..000000000 --- a/crates/pica-matcher/fuzz/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -target -corpus -artifacts -coverage diff --git a/crates/pica-matcher/fuzz/Cargo.toml b/crates/pica-matcher/fuzz/Cargo.toml deleted file mode 100644 index 64ee64899..000000000 --- a/crates/pica-matcher/fuzz/Cargo.toml +++ /dev/null @@ -1,27 +0,0 @@ -[package] -name = "pica-matcher-fuzz" -version = "0.0.0" -publish = false -edition = "2021" - -[package.metadata] -cargo-fuzz = true - -[dependencies] -libfuzzer-sys = "0.4" - -[dependencies.pica-matcher] -path = ".." - -# Prevent this from interfering with workspaces -[workspace] -members = ["."] - -[profile.release] -debug = 1 - -[[bin]] -name = "fuzz-record-matcher" -path = "fuzz_targets/fuzz_record_matcher.rs" -test = false -doc = false diff --git a/crates/pica-matcher/fuzz/fuzz_targets/fuzz_record_matcher.rs b/crates/pica-matcher/fuzz/fuzz_targets/fuzz_record_matcher.rs deleted file mode 100644 index 7c355b551..000000000 --- a/crates/pica-matcher/fuzz/fuzz_targets/fuzz_record_matcher.rs +++ /dev/null @@ -1,10 +0,0 @@ -#![no_main] - -extern crate libfuzzer_sys; - -use libfuzzer_sys::fuzz_target; -use pica_matcher::RecordMatcher; - -fuzz_target!(|data: &[u8]| { - let _result = RecordMatcher::try_from(data); -}); diff --git a/crates/pica-matcher/src/common.rs b/crates/pica-matcher/src/common.rs deleted file mode 100644 index befc3cc87..000000000 --- a/crates/pica-matcher/src/common.rs +++ /dev/null @@ -1,363 +0,0 @@ -use std::fmt::{self, Display}; - -use winnow::ascii::{multispace0, multispace1}; -use winnow::combinator::{ - alt, delimited, preceded, repeat, terminated, -}; -use winnow::error::{ContextError, ParserError}; -use winnow::stream::{AsChar, Compare, Stream, StreamIsPartial}; -use winnow::token::take_till; -use winnow::{PResult, Parser}; - -/// Boolean Operators. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum BooleanOp { - And, // and, "&&" - Or, // or, "||" - Xor, // xor, "^" -} - -/// Strip whitespaces from the beginning and end. -pub(crate) fn ws, F>( - mut inner: F, -) -> impl Parser -where - I: Stream + StreamIsPartial, - ::Token: AsChar + Clone, - F: Parser, -{ - move |i: &mut I| { - let _ = multispace0.parse_next(i)?; - let o = inner.parse_next(i); - let _ = multispace0.parse_next(i)?; - o - } -} - -/// Relational Operator -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum RelationalOp { - Eq, // equal, "==" - Ne, // not equal, "!=" - Gt, // greater than, ">" - Ge, // greater than or equal, ">=" - Lt, // less than, "<" - Le, // less than or equal, "<=" - StartsWith, // starts with, "=^" - StartsNotWith, // starts not with, "!^" - EndsWith, // ends with, "=$" - EndsNotWith, // ends not with, "!$" - Similar, // similar, "=*" - Contains, // contains, "=?" -} - -impl RelationalOp { - /// Returns true of the operator can be used in combination with the - /// `usize` type, otherwise false. - pub fn is_usize_applicable(&self) -> bool { - matches!( - self, - RelationalOp::Eq - | RelationalOp::Ne - | RelationalOp::Ge - | RelationalOp::Gt - | RelationalOp::Lt - | RelationalOp::Le - ) - } - - /// Returns true of the operator can be used in combination with - /// `str` or byte slices, otherwise false. - pub fn is_str_applicable(&self) -> bool { - !matches!( - self, - RelationalOp::Ge - | RelationalOp::Gt - | RelationalOp::Lt - | RelationalOp::Le - ) - } -} - -impl Display for RelationalOp { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match *self { - RelationalOp::Eq => write!(f, "=="), - RelationalOp::Ne => write!(f, "!="), - RelationalOp::Gt => write!(f, ">"), - RelationalOp::Ge => write!(f, ">="), - RelationalOp::Lt => write!(f, "<"), - RelationalOp::Le => write!(f, "<="), - RelationalOp::StartsWith => write!(f, "=^"), - RelationalOp::StartsNotWith => write!(f, "!^"), - RelationalOp::EndsWith => write!(f, "=$"), - RelationalOp::EndsNotWith => write!(f, "!$"), - RelationalOp::Similar => write!(f, "=*"), - RelationalOp::Contains => write!(f, "=?"), - } - } -} - -/// Parse RelationalOp which can be used for string comparisons. -#[inline] -pub(crate) fn parse_relational_op_str( - i: &mut &[u8], -) -> PResult { - alt(( - "==".value(RelationalOp::Eq), - "!=".value(RelationalOp::Ne), - "=^".value(RelationalOp::StartsWith), - "!^".value(RelationalOp::StartsNotWith), - "=$".value(RelationalOp::EndsWith), - "!$".value(RelationalOp::EndsNotWith), - "=*".value(RelationalOp::Similar), - "=?".value(RelationalOp::Contains), - )) - .parse_next(i) -} - -/// Parse RelationalOp which can be used for usize comparisons. -#[inline] -pub(crate) fn parse_relational_op_usize( - i: &mut &[u8], -) -> PResult { - alt(( - "==".value(RelationalOp::Eq), - "!=".value(RelationalOp::Ne), - ">=".value(RelationalOp::Ge), - ">".value(RelationalOp::Gt), - "<=".value(RelationalOp::Le), - "<".value(RelationalOp::Lt), - )) - .parse_next(i) -} - -#[derive(Debug, Clone, PartialEq, Eq, Default)] -pub enum Quantifier { - All, - #[default] - Any, -} - -#[inline] -pub(crate) fn parse_quantifier(i: &mut &[u8]) -> PResult { - alt(( - terminated("ALL".value(Quantifier::All), multispace1), - terminated("ANY".value(Quantifier::Any), multispace1), - "∀".value(Quantifier::All), - "∃".value(Quantifier::Any), - )) - .parse_next(i) -} - -#[derive(Debug, Copy, Clone)] -enum Quotes { - Single, - Double, -} - -fn parse_literal( - quotes: Quotes, -) -> impl Parser::Slice, E> -where - I: Stream + StreamIsPartial, - ::Token: AsChar, - E: ParserError, -{ - match quotes { - Quotes::Single => take_till(1.., ['\'', '\\']), - Quotes::Double => take_till(1.., ['"', '\\']), - } -} - -fn parse_escaped_char(quotes: Quotes) -> impl Parser -where - I: Stream + StreamIsPartial + Compare, - ::Token: AsChar + Clone, - E: ParserError, -{ - let v = match quotes { - Quotes::Single => '\'', - Quotes::Double => '"', - }; - - preceded( - '\\', - alt(( - 'n'.value('\n'), - 'r'.value('\r'), - 't'.value('\t'), - 'b'.value('\u{08}'), - 'f'.value('\u{0C}'), - '\\'.value('\\'), - '/'.value('/'), - v.value(v), - )), - ) -} - -#[derive(Debug, Clone)] -enum StringFragment<'a> { - Literal(&'a [u8]), - EscapedChar(char), - EscapedWs, -} - -fn parse_quoted_fragment<'a, E: ParserError<&'a [u8]>>( - quotes: Quotes, -) -> impl Parser<&'a [u8], StringFragment<'a>, E> { - use StringFragment::*; - - alt(( - parse_literal::<&'a [u8], E>(quotes).map(Literal), - parse_escaped_char::<&'a [u8], E>(quotes).map(EscapedChar), - preceded('\\', multispace1).value(EscapedWs), - )) -} - -fn parse_quoted_string<'a, E>( - quotes: Quotes, -) -> impl Parser<&'a [u8], Vec, E> -where - E: ParserError<&'a [u8]>, -{ - use StringFragment::*; - - let string_builder = repeat( - 0.., - parse_quoted_fragment::(quotes), - ) - .fold(Vec::new, |mut acc, fragment| { - match fragment { - Literal(s) => acc.extend_from_slice(s), - EscapedChar(c) => acc.push(c as u8), - EscapedWs => {} - } - acc - }); - - match quotes { - Quotes::Single => delimited('\'', string_builder, '\''), - Quotes::Double => delimited('"', string_builder, '"'), - } -} - -#[inline] -fn parse_string_single_quoted(i: &mut &[u8]) -> PResult> { - parse_quoted_string::(Quotes::Single).parse_next(i) -} - -#[inline] -fn parse_string_double_quoted(i: &mut &[u8]) -> PResult> { - parse_quoted_string::(Quotes::Double).parse_next(i) -} - -pub(crate) fn parse_string(i: &mut &[u8]) -> PResult> { - alt((parse_string_single_quoted, parse_string_double_quoted)) - .parse_next(i) -} - -#[cfg(test)] -mod tests { - - use super::*; - - #[test] - fn parse_relational_op_str() { - use super::parse_relational_op_str; - - macro_rules! parse_success { - ($input:expr, $expected:expr) => { - assert_eq!( - parse_relational_op_str - .parse($input.as_bytes()) - .unwrap(), - $expected - ); - }; - } - - parse_success!("==", RelationalOp::Eq); - parse_success!("!=", RelationalOp::Ne); - parse_success!("=^", RelationalOp::StartsWith); - parse_success!("!^", RelationalOp::StartsNotWith); - parse_success!("=$", RelationalOp::EndsWith); - parse_success!("!$", RelationalOp::EndsNotWith); - parse_success!("=*", RelationalOp::Similar); - parse_success!("=?", RelationalOp::Contains); - - assert!(parse_relational_op_str.parse(b"=>").is_err()); - assert!(parse_relational_op_str.parse(b">").is_err()); - assert!(parse_relational_op_str.parse(b"<").is_err()); - assert!(parse_relational_op_str.parse(b"<=").is_err()); - } - - #[test] - fn parse_relational_op_usize() { - use super::parse_relational_op_usize; - - macro_rules! parse_success { - ($input:expr, $expected:expr) => { - assert_eq!( - parse_relational_op_usize - .parse($input.as_bytes()) - .unwrap(), - $expected - ); - }; - } - - parse_success!("==", RelationalOp::Eq); - parse_success!("!=", RelationalOp::Ne); - parse_success!(">=", RelationalOp::Ge); - parse_success!(">", RelationalOp::Gt); - parse_success!("<=", RelationalOp::Le); - parse_success!("<", RelationalOp::Lt); - - assert!(parse_relational_op_usize.parse(b"=*").is_err()); - assert!(parse_relational_op_usize.parse(b"=~").is_err()); - assert!(parse_relational_op_usize.parse(b"=^").is_err()); - assert!(parse_relational_op_usize.parse(b"!^").is_err()); - assert!(parse_relational_op_usize.parse(b"=$").is_err()); - assert!(parse_relational_op_usize.parse(b"!$").is_err()); - assert!(parse_relational_op_usize.parse(b"=?").is_err()); - } - - #[test] - fn parse_string_single_quoted() { - use super::parse_string_single_quoted; - - macro_rules! parse_success { - ($input:expr, $expected:expr) => { - assert_eq!( - parse_string_single_quoted.parse($input).unwrap(), - $expected - ); - }; - } - - parse_success!(b"'abc'", b"abc"); - parse_success!(b"'a\"bc'", b"a\"bc"); - parse_success!(b"'a\\'bc'", b"a'bc"); - parse_success!(b"''", b""); - } - - #[test] - fn parse_string_double_quoted() { - use super::parse_string_double_quoted; - - macro_rules! parse_success { - ($input:expr, $expected:expr) => { - assert_eq!( - parse_string_double_quoted.parse($input).unwrap(), - $expected - ); - }; - } - - parse_success!(b"\"abc\"", b"abc"); - parse_success!(b"\"a\\\"bc\"", b"a\"bc"); - parse_success!(b"\"a\'bc\"", b"a'bc"); - parse_success!(b"\"\"", b""); - } -} diff --git a/crates/pica-matcher/src/error.rs b/crates/pica-matcher/src/error.rs deleted file mode 100644 index 337eeb88c..000000000 --- a/crates/pica-matcher/src/error.rs +++ /dev/null @@ -1,16 +0,0 @@ -use thiserror::Error; - -/// An error that can occur when parsing matcher expressions. -#[derive(Error, PartialEq, Eq, Debug)] -pub enum ParseMatcherError { - #[error("invalid tag matcher")] - InvalidTagMatcher, - #[error("invalid occurrence matcher (got `{0}`)")] - InvalidOccurrenceMatcher(String), - #[error("invalid subfield matcher (got `{0}`)")] - InvalidSubfieldMatcher(String), - #[error("invalid field matcher (got `{0}`)")] - InvalidFieldMatcher(String), - #[error("invalid record matcher (got `{0}`)")] - InvalidRecordMatcher(String), -} diff --git a/crates/pica-matcher/src/field_matcher.rs b/crates/pica-matcher/src/field_matcher.rs deleted file mode 100644 index 52ae750b1..000000000 --- a/crates/pica-matcher/src/field_matcher.rs +++ /dev/null @@ -1,817 +0,0 @@ -//! Matcher that works on PICA+ [Fields](pica_record_v1::Field). - -use std::cell::RefCell; -use std::ops::{BitAnd, BitOr, BitXor, Not}; -use std::str::FromStr; - -use bstr::ByteSlice; -use pica_record_v1::FieldRef; -use winnow::ascii::digit1; -use winnow::combinator::{ - alt, delimited, opt, preceded, repeat, terminated, -}; -use winnow::error::ParserError; -use winnow::prelude::*; - -use crate::common::{ - parse_quantifier, parse_relational_op_usize, ws, BooleanOp, - RelationalOp, -}; -use crate::occurrence_matcher::parse_occurrence_matcher; -use crate::subfield_matcher::{ - self, parse_subfield_matcher, parse_subfield_singleton_matcher, -}; -use crate::tag_matcher::parse_tag_matcher; -use crate::{ - MatcherOptions, OccurrenceMatcher, ParseMatcherError, Quantifier, - SubfieldMatcher, TagMatcher, -}; - -/// A field matcher that checks if a field exists. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ExistsMatcher { - tag_matcher: TagMatcher, - occurrence_matcher: OccurrenceMatcher, -} - -impl ExistsMatcher { - /// Create a new exists matcher from a string slice. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::field_matcher::ExistsMatcher; - /// use pica_record_v1::FieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = ExistsMatcher::new("003@?"); - /// let options = Default::default(); - /// - /// assert!(matcher.is_match( - /// &FieldRef::new("003@", None, vec![('0', "123456789X")]), - /// &options - /// )); - /// - /// assert!(!matcher.is_match( - /// &FieldRef::new("002@", None, vec![('0', "123456789X")]), - /// &options - /// )); - /// - /// Ok(()) - /// } - /// ``` - pub fn new>(data: &T) -> Self { - Self::try_from(data.as_ref()).expect("exists matcher") - } - - /// Returns `true` if the matcher matches against the given - /// subfield(s). - pub fn is_match<'a>( - &self, - fields: impl IntoIterator> + Clone, - _options: &MatcherOptions, - ) -> bool { - fields.into_iter().any(|field| { - self.tag_matcher == field.tag() - && self.occurrence_matcher == field.occurrence() - }) - } -} - -/// Parse a exists matcher expression. -fn parse_exists_matcher(i: &mut &[u8]) -> PResult { - terminated(ws((parse_tag_matcher, parse_occurrence_matcher)), '?') - .map(|(t, o)| ExistsMatcher { - tag_matcher: t, - occurrence_matcher: o, - }) - .parse_next(i) -} - -impl TryFrom<&[u8]> for ExistsMatcher { - type Error = ParseMatcherError; - - #[inline] - fn try_from(value: &[u8]) -> Result { - parse_exists_matcher.parse(value).map_err(|_| { - let value = value.to_str_lossy().to_string(); - ParseMatcherError::InvalidFieldMatcher(value) - }) - } -} - -impl FromStr for ExistsMatcher { - type Err = ParseMatcherError; - - #[inline] - fn from_str(s: &str) -> Result { - Self::try_from(s.as_bytes()) - } -} - -/// A field matcher that checks for fields satisfies subfield -/// criterion. -#[derive(Debug, Clone)] -pub struct SubfieldsMatcher { - quantifier: Quantifier, - tag_matcher: TagMatcher, - occurrence_matcher: OccurrenceMatcher, - subfield_matcher: SubfieldMatcher, -} - -impl SubfieldsMatcher { - /// Create a new subfields matcher from a string slice. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::field_matcher::SubfieldsMatcher; - /// use pica_record_v1::FieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = SubfieldsMatcher::new("002@.0 == 'Olfo'"); - /// let options = Default::default(); - /// - /// assert!(matcher.is_match( - /// &FieldRef::new("002@", None, vec![('0', "Olfo")]), - /// &options - /// )); - /// - /// assert!(!matcher.is_match( - /// &FieldRef::new("002@", None, vec![('0', "Oaf")]), - /// &options - /// )); - /// - /// Ok(()) - /// } - /// ``` - pub fn new>(data: &T) -> Self { - Self::try_from(data.as_ref()).expect("subfields matcher") - } - - /// Returns `true` if at least one field exists with a matching tag - /// and occurrence and a subfield matching the subfield - /// matcher's criteria. - pub fn is_match<'a>( - &self, - fields: impl IntoIterator>, - options: &MatcherOptions, - ) -> bool { - let mut fields = fields.into_iter().filter(|field| { - self.tag_matcher == field.tag() - && self.occurrence_matcher == field.occurrence() - }); - - let check_fn = |field: &FieldRef| -> bool { - self.subfield_matcher.is_match(field.subfields(), options) - }; - - match self.quantifier { - Quantifier::All => fields.all(check_fn), - Quantifier::Any => fields.any(check_fn), - } - } -} - -fn parse_subfields_matcher_dot( - i: &mut &[u8], -) -> PResult { - ( - opt(ws(parse_quantifier)).map(Option::unwrap_or_default), - parse_tag_matcher, - parse_occurrence_matcher, - preceded('.', parse_subfield_singleton_matcher), - ) - .map(|(q, t, o, s)| SubfieldsMatcher { - quantifier: q, - tag_matcher: t, - occurrence_matcher: o, - subfield_matcher: s, - }) - .parse_next(i) -} - -fn parse_subfields_matcher_bracket( - i: &mut &[u8], -) -> PResult { - ( - opt(ws(parse_quantifier)).map(Option::unwrap_or_default), - parse_tag_matcher, - parse_occurrence_matcher, - delimited(ws('{'), parse_subfield_matcher, ws('}')), - ) - .map(|(q, t, o, s)| SubfieldsMatcher { - quantifier: q, - tag_matcher: t, - occurrence_matcher: o, - subfield_matcher: s, - }) - .parse_next(i) -} - -fn parse_subfields_matcher(i: &mut &[u8]) -> PResult { - alt((parse_subfields_matcher_dot, parse_subfields_matcher_bracket)) - .parse_next(i) -} - -impl TryFrom<&[u8]> for SubfieldsMatcher { - type Error = ParseMatcherError; - - #[inline] - fn try_from(value: &[u8]) -> Result { - parse_subfields_matcher.parse(value).map_err(|_| { - let value = value.to_str_lossy().to_string(); - ParseMatcherError::InvalidFieldMatcher(value) - }) - } -} - -impl FromStr for SubfieldsMatcher { - type Err = ParseMatcherError; - - #[inline] - fn from_str(s: &str) -> Result { - Self::try_from(s.as_bytes()) - } -} - -/// A field matcher that checks for the singleton matcher. -#[derive(Debug, Clone)] -pub enum SingletonMatcher { - Exists(ExistsMatcher), - Subfields(SubfieldsMatcher), -} - -impl SingletonMatcher { - /// Create a new singleton matcher from a string slice. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::field_matcher::SingletonMatcher; - /// use pica_record_v1::FieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = SingletonMatcher::new("003@?"); - /// let options = Default::default(); - /// - /// assert!(matcher.is_match( - /// &FieldRef::new("003@", None, vec![('0', "123456789X")]), - /// &options - /// )); - /// - /// Ok(()) - /// } - /// ``` - pub fn new>(data: &T) -> Self { - Self::try_from(data.as_ref()).expect("singleton macher") - } - - /// Returns `true` if the given field matches against the field - /// matcher. - pub fn is_match<'a>( - &self, - fields: impl IntoIterator> + Clone, - options: &MatcherOptions, - ) -> bool { - match self { - Self::Subfields(m) => m.is_match(fields, options), - Self::Exists(m) => m.is_match(fields, options), - } - } -} - -/// Parse a singleton matcher expression. -fn parse_singleton_matcher(i: &mut &[u8]) -> PResult { - alt(( - parse_exists_matcher.map(SingletonMatcher::Exists), - parse_subfields_matcher.map(SingletonMatcher::Subfields), - )) - .parse_next(i) -} - -impl TryFrom<&[u8]> for SingletonMatcher { - type Error = ParseMatcherError; - - #[inline] - fn try_from(value: &[u8]) -> Result { - parse_singleton_matcher.parse(value).map_err(|_| { - let value = value.to_str_lossy().to_string(); - ParseMatcherError::InvalidFieldMatcher(value) - }) - } -} - -impl FromStr for SingletonMatcher { - type Err = ParseMatcherError; - - #[inline] - fn from_str(s: &str) -> Result { - Self::try_from(s.as_bytes()) - } -} - -/// A field matcher that checks the number of occurrences of a field. -#[derive(Debug, Clone)] -pub struct CardinalityMatcher { - tag_matcher: TagMatcher, - occurrence_matcher: OccurrenceMatcher, - subfield_matcher: Option, - op: RelationalOp, - value: usize, -} - -impl CardinalityMatcher { - /// Create a new cardinality matcher from a string slice. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::field_matcher::CardinalityMatcher; - /// use pica_record_v1::FieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = - /// CardinalityMatcher::new("#003@{0 == '123456789X'} >= 1"); - /// - /// assert!(matcher.is_match( - /// &FieldRef::new("003@", None, vec![('0', "123456789X")]), - /// &Default::default() - /// )); - /// - /// Ok(()) - /// } - /// ``` - pub fn new>(data: &T) -> Self { - Self::try_from(data.as_ref()).expect("cardinality matcher") - } - - /// Returns `true` if the given field matches against the field - /// matcher. - pub fn is_match<'a>( - &self, - fields: impl IntoIterator>, - options: &MatcherOptions, - ) -> bool { - let count = fields - .into_iter() - .filter(|field| { - self.tag_matcher == field.tag() - && self.occurrence_matcher == field.occurrence() - }) - .filter(|field| { - if let Some(ref matcher) = self.subfield_matcher { - matcher.is_match(field.subfields(), options) - } else { - true - } - }) - .count(); - - match self.op { - RelationalOp::Eq => count == self.value, - RelationalOp::Ne => count != self.value, - RelationalOp::Ge => count >= self.value, - RelationalOp::Gt => count > self.value, - RelationalOp::Le => count <= self.value, - RelationalOp::Lt => count < self.value, - _ => unreachable!(), - } - } -} - -/// Parse a cardinality matcher expressions. -fn parse_cardinality_matcher( - i: &mut &[u8], -) -> PResult { - preceded( - ws('#'), - ( - ws(parse_tag_matcher), - ws(parse_occurrence_matcher), - opt(delimited('{', parse_subfield_matcher, ws('}'))), - ws(parse_relational_op_usize), - digit1 - .verify_map(|value| std::str::from_utf8(value).ok()) - .verify_map(|value| value.parse::().ok()), - ), - ) - .map(|(t, o, s, op, value)| CardinalityMatcher { - tag_matcher: t, - occurrence_matcher: o, - subfield_matcher: s, - op, - value, - }) - .parse_next(i) -} - -impl TryFrom<&[u8]> for CardinalityMatcher { - type Error = ParseMatcherError; - - #[inline] - fn try_from(value: &[u8]) -> Result { - parse_cardinality_matcher.parse(value).map_err(|_| { - let value = value.to_str_lossy().to_string(); - ParseMatcherError::InvalidFieldMatcher(value) - }) - } -} - -impl FromStr for CardinalityMatcher { - type Err = ParseMatcherError; - - #[inline] - fn from_str(s: &str) -> Result { - Self::try_from(s.as_bytes()) - } -} - -/// A field matcher that allows grouping, negation and connecting of -/// singleton matcher. -#[derive(Debug, Clone)] -pub enum FieldMatcher { - Singleton(SingletonMatcher), - Cardinality(CardinalityMatcher), - Group(Box), - Not(Box), - Composite { - lhs: Box, - op: BooleanOp, - rhs: Box, - }, -} - -impl FieldMatcher { - /// Create a new field matcher from a string slice. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::FieldMatcher; - /// use pica_record_v1::FieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = FieldMatcher::new("003@?"); - /// let options = Default::default(); - /// - /// assert!(matcher.is_match( - /// &FieldRef::new("003@", None, vec![('0', "123456789X")]), - /// &options - /// )); - /// - /// Ok(()) - /// } - /// ``` - pub fn new>(data: &T) -> Self { - Self::try_from(data.as_ref()).expect("field matcher") - } - - /// Returns `true` if the given field matches against the field - /// matcher. - pub fn is_match<'a>( - &self, - fields: impl IntoIterator> + Clone, - options: &MatcherOptions, - ) -> bool { - match self { - Self::Singleton(m) => m.is_match(fields, options), - Self::Group(m) => m.is_match(fields, options), - Self::Not(m) => !m.is_match(fields, options), - Self::Cardinality(m) => m.is_match(fields, options), - Self::Composite { lhs, op, rhs } => match op { - BooleanOp::And => { - lhs.is_match(fields.clone(), options) - && rhs.is_match(fields, options) - } - BooleanOp::Or => { - lhs.is_match(fields.clone(), options) - || rhs.is_match(fields, options) - } - BooleanOp::Xor => { - lhs.is_match(fields.clone(), options) - != rhs.is_match(fields, options) - } - }, - } - } -} - -/// Parse a singleton matcher expression (curly bracket notation). -#[inline] -fn parse_singleton_matcher_bracket( - i: &mut &[u8], -) -> PResult { - parse_subfields_matcher_bracket - .map(SingletonMatcher::Subfields) - .parse_next(i) -} - -/// Parse field matcher singleton expression. -#[inline] -fn parse_field_matcher_singleton( - i: &mut &[u8], -) -> PResult { - parse_singleton_matcher - .map(FieldMatcher::Singleton) - .parse_next(i) -} - -/// Parse field matcher expression (curly bracket notation). -#[inline] -fn parse_field_matcher_singleton_bracket( - i: &mut &[u8], -) -> PResult { - parse_singleton_matcher_bracket - .map(FieldMatcher::Singleton) - .parse_next(i) -} - -/// Parse field matcher exists expression. -#[inline] -fn parse_field_matcher_exists(i: &mut &[u8]) -> PResult { - alt(( - parse_exists_matcher.map(|matcher| { - FieldMatcher::Singleton(SingletonMatcher::Exists(matcher)) - }), - ( - opt(parse_quantifier).map(Option::unwrap_or_default), - parse_tag_matcher, - parse_occurrence_matcher, - preceded(ws('.'), subfield_matcher::parse_exists_matcher), - ) - .map(|(q, t, o, s)| { - FieldMatcher::Singleton(SingletonMatcher::Subfields( - SubfieldsMatcher { - quantifier: q, - tag_matcher: t, - occurrence_matcher: o, - subfield_matcher: SubfieldMatcher::Singleton( - subfield_matcher::SingletonMatcher::Exists( - s, - ), - ), - }, - )) - }), - )) - .parse_next(i) -} - -/// Parse field matcher cardinality expression. -#[inline] -fn parse_field_matcher_cardinality( - i: &mut &[u8], -) -> PResult { - parse_cardinality_matcher - .map(FieldMatcher::Cardinality) - .parse_next(i) -} - -thread_local! { - pub static GROUP_LEVEL: RefCell = const { RefCell::new(0) }; -} - -fn increment_group_level(i: &mut &[u8]) -> PResult<()> { - GROUP_LEVEL.with(|level| { - *level.borrow_mut() += 1; - if *level.borrow() >= 32 { - Err(winnow::error::ErrMode::from_error_kind( - i, - winnow::error::ErrorKind::Many, - )) - } else { - Ok(()) - } - }) -} - -fn decrement_group_level() { - GROUP_LEVEL.with(|level| { - *level.borrow_mut() -= 1; - }) -} - -#[inline] -fn parse_field_matcher_group(i: &mut &[u8]) -> PResult { - delimited( - terminated(ws('('), increment_group_level), - alt(( - parse_field_matcher_composite, - parse_field_matcher_singleton, - parse_field_matcher_not, - parse_field_matcher_cardinality, - parse_field_matcher_group, - )), - ws(')').map(|_| decrement_group_level()), - ) - .map(|matcher| FieldMatcher::Group(Box::new(matcher))) - .parse_next(i) -} - -#[inline] -fn parse_field_matcher_not(i: &mut &[u8]) -> PResult { - preceded( - ws('!'), - alt(( - parse_field_matcher_group, - parse_field_matcher_singleton_bracket, - parse_field_matcher_exists, - parse_field_matcher_not, - )), - ) - .map(|matcher| FieldMatcher::Not(Box::new(matcher))) - .parse_next(i) -} - -#[inline] -fn parse_field_matcher_xor(i: &mut &[u8]) -> PResult { - ( - ws(alt(( - parse_field_matcher_group, - parse_field_matcher_and, - parse_field_matcher_cardinality, - parse_field_matcher_singleton, - parse_field_matcher_not, - parse_field_matcher_exists, - ))), - repeat( - 1.., - preceded( - ws(alt(("^", "XOR"))), - ws(alt(( - parse_field_matcher_group, - parse_field_matcher_and, - parse_field_matcher_cardinality, - parse_field_matcher_singleton, - parse_field_matcher_not, - parse_field_matcher_exists, - ))), - ), - ), - ) - .map(|(head, remainder): (_, Vec<_>)| { - remainder.into_iter().fold(head, |prev, next| prev ^ next) - }) - .parse_next(i) -} - -#[inline] -fn parse_field_matcher_and(i: &mut &[u8]) -> PResult { - ( - ws(alt(( - parse_field_matcher_group, - parse_field_matcher_cardinality, - parse_field_matcher_singleton, - parse_field_matcher_not, - parse_field_matcher_exists, - ))), - repeat( - 1.., - preceded( - ws("&&"), - ws(alt(( - parse_field_matcher_group, - parse_field_matcher_cardinality, - parse_field_matcher_singleton, - parse_field_matcher_not, - parse_field_matcher_exists, - ))), - ), - ), - ) - .map(|(head, remainder): (_, Vec<_>)| { - remainder.into_iter().fold(head, |prev, next| prev & next) - }) - .parse_next(i) -} - -#[inline] -fn parse_field_matcher_or(i: &mut &[u8]) -> PResult { - ( - ws(alt(( - parse_field_matcher_group, - parse_field_matcher_and, - parse_field_matcher_xor, - parse_field_matcher_cardinality, - parse_field_matcher_singleton, - parse_field_matcher_not, - parse_field_matcher_exists, - ))), - repeat( - 1.., - preceded( - ws("||"), - ws(alt(( - parse_field_matcher_group, - parse_field_matcher_and, - parse_field_matcher_xor, - parse_field_matcher_cardinality, - parse_field_matcher_singleton, - parse_field_matcher_not, - parse_field_matcher_exists, - ))), - ), - ), - ) - .map(|(head, remainder): (_, Vec<_>)| { - remainder.into_iter().fold(head, |prev, next| prev | next) - }) - .parse_next(i) -} - -fn parse_field_matcher_composite( - i: &mut &[u8], -) -> PResult { - alt(( - parse_field_matcher_or, - parse_field_matcher_xor, - parse_field_matcher_and, - )) - .parse_next(i) -} - -pub fn parse_field_matcher(i: &mut &[u8]) -> PResult { - ws(alt(( - parse_field_matcher_composite, - parse_field_matcher_group, - parse_field_matcher_not, - parse_field_matcher_singleton, - parse_field_matcher_cardinality, - ))) - .parse_next(i) -} - -impl TryFrom<&[u8]> for FieldMatcher { - type Error = ParseMatcherError; - - #[inline] - fn try_from(value: &[u8]) -> Result { - parse_field_matcher.parse(value).map_err(|_| { - let value = value.to_str_lossy().to_string(); - ParseMatcherError::InvalidFieldMatcher(value) - }) - } -} - -impl FromStr for FieldMatcher { - type Err = ParseMatcherError; - - #[inline] - fn from_str(s: &str) -> Result { - Self::try_from(s.as_bytes()) - } -} - -impl BitAnd for FieldMatcher { - type Output = Self; - - #[inline] - fn bitand(self, rhs: Self) -> Self::Output { - Self::Composite { - lhs: Box::new(self), - op: BooleanOp::And, - rhs: Box::new(rhs), - } - } -} - -impl BitOr for FieldMatcher { - type Output = Self; - - #[inline] - fn bitor(self, rhs: Self) -> Self::Output { - Self::Composite { - lhs: Box::new(self), - op: BooleanOp::Or, - rhs: Box::new(rhs), - } - } -} - -impl BitXor for FieldMatcher { - type Output = Self; - - #[inline] - fn bitxor(self, rhs: Self) -> Self::Output { - Self::Composite { - lhs: Box::new(self), - op: BooleanOp::Xor, - rhs: Box::new(rhs), - } - } -} - -impl Not for FieldMatcher { - type Output = Self; - - #[inline] - fn not(self) -> Self::Output { - Self::Not(Box::new(self)) - } -} diff --git a/crates/pica-matcher/src/lib.rs b/crates/pica-matcher/src/lib.rs deleted file mode 100644 index 5e2703c93..000000000 --- a/crates/pica-matcher/src/lib.rs +++ /dev/null @@ -1,30 +0,0 @@ -//! This crate provides various matcher to filter PICA+ records, fields -//! or subfields. - -mod common; -mod error; -pub mod field_matcher; -mod matcher_builder; -mod occurrence_matcher; -mod options; -mod record_matcher; -pub mod subfield_matcher; -mod tag_matcher; - -pub use common::{Quantifier, RelationalOp}; -pub use error::ParseMatcherError; -pub use field_matcher::FieldMatcher; -pub use matcher_builder::MatcherBuilder; -pub use occurrence_matcher::OccurrenceMatcher; -pub use options::MatcherOptions; -pub use record_matcher::RecordMatcher; -pub use subfield_matcher::SubfieldMatcher; -pub use tag_matcher::TagMatcher; - -/// Parsers recognizing matcher for PICA+ primitives. -pub mod parser { - pub use super::field_matcher::parse_field_matcher; - pub use super::occurrence_matcher::parse_occurrence_matcher; - pub use super::subfield_matcher::parse_subfield_matcher; - pub use super::tag_matcher::parse_tag_matcher; -} diff --git a/crates/pica-matcher/src/matcher_builder.rs b/crates/pica-matcher/src/matcher_builder.rs deleted file mode 100644 index 9e7f61e15..000000000 --- a/crates/pica-matcher/src/matcher_builder.rs +++ /dev/null @@ -1,60 +0,0 @@ -use pica_utils::NormalizationForm; - -use crate::{ParseMatcherError, RecordMatcher}; - -pub struct MatcherBuilder { - matcher: RecordMatcher, - nf: Option, -} - -type Result = std::result::Result; - -impl MatcherBuilder { - pub fn new( - matcher: String, - nf: Option, - ) -> Result { - let matcher = RecordMatcher::try_from( - NormalizationForm::translit_opt(matcher, nf).as_bytes(), - )?; - - Ok(Self { matcher, nf }) - } - - pub fn and(mut self, matcher: Vec) -> Result { - for predicate in matcher.iter() { - self.matcher &= RecordMatcher::try_from( - NormalizationForm::translit_opt(predicate, self.nf) - .as_bytes(), - )?; - } - - Ok(self) - } - - pub fn or(mut self, matcher: Vec) -> Result { - for predicate in matcher.iter() { - self.matcher |= RecordMatcher::try_from( - NormalizationForm::translit_opt(predicate, self.nf) - .as_bytes(), - )?; - } - - Ok(self) - } - - pub fn not(mut self, matcher: Vec) -> Result { - for predicate in matcher.iter() { - self.matcher &= !RecordMatcher::try_from( - NormalizationForm::translit_opt(predicate, self.nf) - .as_bytes(), - )?; - } - - Ok(self) - } - - pub fn build(self) -> RecordMatcher { - self.matcher - } -} diff --git a/crates/pica-matcher/src/occurrence_matcher.rs b/crates/pica-matcher/src/occurrence_matcher.rs deleted file mode 100644 index f1d1a37b3..000000000 --- a/crates/pica-matcher/src/occurrence_matcher.rs +++ /dev/null @@ -1,217 +0,0 @@ -use std::str::FromStr; - -use bstr::{BStr, ByteSlice}; -use pica_record_v1::parser::parse_occurrence_digits; -use pica_record_v1::{Occurrence, OccurrenceRef}; -use winnow::combinator::{alt, empty, preceded, separated_pair}; -use winnow::{PResult, Parser}; - -use crate::ParseMatcherError; - -/// A matcher that matches against PICA+ -/// [Occurrence](`pica_record_v1::Occurrence`). -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum OccurrenceMatcher { - Any, - Exact(Occurrence), - Range(Occurrence, Occurrence), - None, -} - -impl OccurrenceMatcher { - /// Create a new occurrence matcher. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::OccurrenceMatcher; - /// use pica_record_v1::Occurrence; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = OccurrenceMatcher::new("/01-02"); - /// assert!(matches!(matcher, OccurrenceMatcher::Range(_, _))); - /// - /// Ok(()) - /// } - /// ``` - #[inline] - pub fn new>(value: &T) -> Self { - Self::try_from(value.as_ref()).expect("occurrence matcher") - } - - /// Returns `true` if the given occurrence matches against the - /// matcher. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::OccurrenceMatcher; - /// use pica_record_v1::OccurrenceRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = OccurrenceMatcher::new("/01-03"); - /// assert!(matcher.is_match(&OccurrenceRef::new("02"))); - /// assert!(!matcher.is_match(&OccurrenceRef::new("04"))); - /// - /// Ok(()) - /// } - /// ``` - pub fn is_match(&self, other: &OccurrenceRef) -> bool { - match self { - Self::Any => true, - Self::None => other == b"00", - Self::Exact(rhs) => other == rhs, - Self::Range(min, max) => { - (other.as_bytes() >= min.as_bytes()) - && (other.as_bytes() <= max.as_bytes()) - } - } - } - - #[cfg(test)] - fn exact>(value: &T) -> Self { - Self::Exact(OccurrenceRef::new(value).into()) - } - - #[cfg(test)] - fn range>(min: &T, max: &T) -> Self { - Self::Range( - OccurrenceRef::new(min).into(), - OccurrenceRef::new(max).into(), - ) - } -} - -#[inline] -fn parse_occurrence_range(i: &mut &[u8]) -> PResult { - separated_pair( - parse_occurrence_digits, - '-', - parse_occurrence_digits, - ) - .verify(|(min, max)| min.len() == max.len() && min < max) - .map(|(min, max)| { - OccurrenceMatcher::Range( - OccurrenceRef::from_unchecked(min).into(), - OccurrenceRef::from_unchecked(max).into(), - ) - }) - .parse_next(i) -} - -#[inline] -fn parse_occurrence_exact(i: &mut &[u8]) -> PResult { - parse_occurrence_digits - .verify(|x: &BStr| x != "00") - .map(|value| { - OccurrenceMatcher::Exact( - OccurrenceRef::from_unchecked(value).into(), - ) - }) - .parse_next(i) -} - -pub fn parse_occurrence_matcher( - i: &mut &[u8], -) -> PResult { - alt(( - preceded( - '/', - alt(( - parse_occurrence_range, - parse_occurrence_exact, - "00".value(OccurrenceMatcher::None), - '*'.value(OccurrenceMatcher::Any), - )), - ), - empty.value(OccurrenceMatcher::None), - )) - .parse_next(i) -} - -impl TryFrom<&[u8]> for OccurrenceMatcher { - type Error = ParseMatcherError; - - fn try_from(value: &[u8]) -> Result { - parse_occurrence_matcher.parse(value).map_err(|_| { - ParseMatcherError::InvalidOccurrenceMatcher( - value.to_str_lossy().to_string(), - ) - }) - } -} - -impl FromStr for OccurrenceMatcher { - type Err = ParseMatcherError; - - fn from_str(s: &str) -> Result { - Self::try_from(s.as_bytes()) - } -} - -impl From> for OccurrenceMatcher { - fn from(value: OccurrenceRef) -> Self { - OccurrenceMatcher::Exact(value.into()) - } -} - -impl PartialEq> for OccurrenceMatcher { - fn eq(&self, other: &OccurrenceRef) -> bool { - self.is_match(other) - } -} - -impl PartialEq for OccurrenceRef<'_> { - fn eq(&self, matcher: &OccurrenceMatcher) -> bool { - matcher.is_match(self) - } -} - -impl PartialEq>> for OccurrenceMatcher { - fn eq(&self, other: &Option<&OccurrenceRef>) -> bool { - match other { - Some(occurrence) => self.is_match(occurrence), - None => matches!(self, Self::Any | Self::None), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_occurrence_matcher() { - macro_rules! parse_success { - ($input:expr, $expected:expr) => { - assert_eq!( - super::parse_occurrence_matcher - .parse($input) - .unwrap(), - $expected - ); - }; - } - - parse_success!(b"/*", OccurrenceMatcher::Any); - parse_success!(b"/00", OccurrenceMatcher::None); - parse_success!(b"/01", OccurrenceMatcher::exact("01")); - parse_success!(b"/01-03", OccurrenceMatcher::range("01", "03")); - parse_success!(b"", OccurrenceMatcher::None); - - macro_rules! parse_error { - ($input:expr) => { - assert!(super::parse_occurrence_matcher - .parse($input) - .is_err()); - }; - } - - parse_error!(b"/03-01"); - parse_error!(b"/0001"); - parse_error!(b"/0A"); - parse_error!(b"/A"); - } -} diff --git a/crates/pica-matcher/src/options.rs b/crates/pica-matcher/src/options.rs deleted file mode 100644 index 590574761..000000000 --- a/crates/pica-matcher/src/options.rs +++ /dev/null @@ -1,34 +0,0 @@ -/// Options and flags which can be used to configure a matcher. -#[derive(Debug)] -pub struct MatcherOptions { - pub case_ignore: bool, - pub strsim_threshold: f64, -} - -impl Default for MatcherOptions { - fn default() -> Self { - Self { - case_ignore: false, - strsim_threshold: 0.8, - } - } -} - -impl MatcherOptions { - /// Create new matcher flags. - pub fn new() -> Self { - Self::default() - } - - /// Whether to ignore case when comparing strings or not. - pub fn case_ignore(mut self, yes: bool) -> Self { - self.case_ignore = yes; - self - } - - /// Set the similarity threshold for the similar operator (`=*`). - pub fn strsim_threshold(mut self, threshold: f64) -> Self { - self.strsim_threshold = threshold; - self - } -} diff --git a/crates/pica-matcher/src/record_matcher.rs b/crates/pica-matcher/src/record_matcher.rs deleted file mode 100644 index b32d6a1d9..000000000 --- a/crates/pica-matcher/src/record_matcher.rs +++ /dev/null @@ -1,182 +0,0 @@ -use std::ops::{ - BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not, -}; -use std::str::FromStr; - -use bstr::ByteSlice; -use pica_record_v1::RecordRef; -#[cfg(feature = "serde")] -use serde::Deserialize; -use winnow::Parser; - -use crate::common::BooleanOp; -use crate::field_matcher::parse_field_matcher; -use crate::{FieldMatcher, MatcherOptions, ParseMatcherError}; - -/// A Matcher that works on PICA+ [Records](pica_record_v1::Record). -#[derive(Debug)] -pub struct RecordMatcher { - pub(crate) field_matcher: FieldMatcher, -} - -impl RecordMatcher { - /// Create a new field matcher from a string slice. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::RecordMatcher; - /// use pica_record_v1::RecordRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = RecordMatcher::new("003@?"); - /// let record = - /// RecordRef::new(vec![("003@", None, vec![('0', "abc")])]); - /// - /// assert!(matcher.is_match(&record, &Default::default())); - /// Ok(()) - /// } - /// ``` - pub fn new>(data: &T) -> Self { - Self::try_from(data.as_ref()).expect("record matcher") - } - - /// Returns `true` if the given record matches against the record - /// matcher. - pub fn is_match( - &self, - record: &RecordRef, - options: &MatcherOptions, - ) -> bool { - self.field_matcher.is_match(record.iter(), options) - } -} - -impl TryFrom<&[u8]> for RecordMatcher { - type Error = ParseMatcherError; - - fn try_from(value: &[u8]) -> Result { - let matcher_str = value.to_str_lossy().to_string(); - - parse_field_matcher - .parse(value) - .map(|field_matcher| RecordMatcher { field_matcher }) - .map_err(|_| { - ParseMatcherError::InvalidRecordMatcher(matcher_str) - }) - } -} - -impl FromStr for RecordMatcher { - type Err = ParseMatcherError; - - #[inline] - fn from_str(s: &str) -> Result { - Self::try_from(s.as_bytes()) - } -} - -impl TryFrom<&String> for RecordMatcher { - type Error = ParseMatcherError; - - fn try_from(value: &String) -> Result { - Self::try_from(value.as_bytes()) - } -} - -impl BitAnd for RecordMatcher { - type Output = Self; - - fn bitand(self, rhs: Self) -> Self::Output { - RecordMatcher { - field_matcher: FieldMatcher::Composite { - lhs: Box::new(self.field_matcher), - op: BooleanOp::And, - rhs: Box::new(rhs.field_matcher), - }, - } - } -} - -impl BitAndAssign for RecordMatcher { - fn bitand_assign(&mut self, rhs: Self) { - self.field_matcher = FieldMatcher::Composite { - lhs: Box::new(self.field_matcher.clone()), - op: BooleanOp::And, - rhs: Box::new(rhs.field_matcher), - }; - } -} - -impl BitOr for RecordMatcher { - type Output = Self; - - fn bitor(self, rhs: Self) -> Self::Output { - RecordMatcher { - field_matcher: FieldMatcher::Composite { - lhs: Box::new(self.field_matcher), - op: BooleanOp::Or, - rhs: Box::new(rhs.field_matcher), - }, - } - } -} - -impl BitOrAssign for RecordMatcher { - fn bitor_assign(&mut self, rhs: Self) { - self.field_matcher = FieldMatcher::Composite { - lhs: Box::new(self.field_matcher.clone()), - op: BooleanOp::Or, - rhs: Box::new(rhs.field_matcher), - }; - } -} - -impl BitXor for RecordMatcher { - type Output = Self; - - fn bitxor(self, rhs: Self) -> Self::Output { - RecordMatcher { - field_matcher: FieldMatcher::Composite { - lhs: Box::new(self.field_matcher), - op: BooleanOp::Xor, - rhs: Box::new(rhs.field_matcher), - }, - } - } -} - -impl BitXorAssign for RecordMatcher { - fn bitxor_assign(&mut self, rhs: Self) { - self.field_matcher = FieldMatcher::Composite { - lhs: Box::new(self.field_matcher.clone()), - op: BooleanOp::Xor, - rhs: Box::new(rhs.field_matcher), - }; - } -} - -impl Not for RecordMatcher { - type Output = Self; - - fn not(self) -> Self::Output { - RecordMatcher { - field_matcher: FieldMatcher::Not(Box::new( - self.field_matcher, - )), - } - } -} - -#[cfg(feature = "serde")] -impl<'de> Deserialize<'de> for RecordMatcher { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let s: String = Deserialize::deserialize(deserializer)?; - RecordMatcher::try_from(s.as_bytes()) - .map_err(serde::de::Error::custom) - } -} diff --git a/crates/pica-matcher/src/subfield_matcher.rs b/crates/pica-matcher/src/subfield_matcher.rs deleted file mode 100644 index 3a988c57a..000000000 --- a/crates/pica-matcher/src/subfield_matcher.rs +++ /dev/null @@ -1,1417 +0,0 @@ -//! Matcher that works on PICA+ [Subfields](pica_record_v1::Subfield). - -use std::cell::RefCell; -use std::ops::{BitAnd, BitOr, BitXor}; -use std::str::FromStr; - -use bstr::ByteSlice; -use pica_record_v1::parser::parse_subfield_code; -use pica_record_v1::{SubfieldCode, SubfieldRef}; -use regex::bytes::{Regex, RegexBuilder}; -use strsim::normalized_levenshtein; -use winnow::ascii::digit1; -use winnow::combinator::{ - alt, delimited, opt, preceded, repeat, separated, separated_pair, - terminated, -}; -use winnow::error::ParserError; -use winnow::{PResult, Parser}; - -use crate::common::{ - parse_quantifier, parse_relational_op_str, - parse_relational_op_usize, parse_string, ws, BooleanOp, Quantifier, - RelationalOp, -}; -use crate::{MatcherOptions, ParseMatcherError}; - -/// A matcher that checks if a subfield exists. -/// -/// This matcher can be used to determine if a single subfield or a -/// list of subfields contains at least one subfield with a code, that -/// is contained in the matcher's code list. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct ExistsMatcher { - codes: Vec, -} - -const SUBFIELD_CODES: &str = - "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; - -#[inline] -fn parse_subfield_code_range( - i: &mut &[u8], -) -> PResult> { - separated_pair(parse_subfield_code, '-', parse_subfield_code) - .verify(|(min, max)| min < max) - .map(|(min, max)| { - (min.as_byte()..=max.as_byte()) - .map(SubfieldCode::from_unchecked) - .collect() - }) - .parse_next(i) -} - -#[inline] -fn parse_subfield_code_single( - i: &mut &[u8], -) -> PResult> { - parse_subfield_code.map(|code| vec![code]).parse_next(i) -} - -#[inline] -fn parse_subfield_code_list( - i: &mut &[u8], -) -> PResult> { - delimited( - '[', - repeat( - 1.., - alt(( - parse_subfield_code_range, - parse_subfield_code_single, - )), - ) - .fold(Vec::new, |mut acc: Vec<_>, item| { - acc.extend_from_slice(&item); - acc - }), - ']', - ) - .parse_next(i) -} - -#[inline] -fn parse_subfield_code_wildcard( - i: &mut &[u8], -) -> PResult> { - '*'.value( - SUBFIELD_CODES - .chars() - .map(|code| SubfieldCode::new(code).unwrap()) - .collect(), - ) - .parse_next(i) -} - -/// Parse a list of subfield codes -fn parse_subfield_codes(i: &mut &[u8]) -> PResult> { - alt(( - parse_subfield_code_list, - parse_subfield_code_single, - parse_subfield_code_wildcard, - )) - .parse_next(i) -} - -/// Parse the matcher expression from a byte slice. -pub(crate) fn parse_exists_matcher( - i: &mut &[u8], -) -> PResult { - terminated(parse_subfield_codes, '?') - .map(|codes| ExistsMatcher { codes }) - .parse_next(i) -} - -impl ExistsMatcher { - /// Create a new exists matcher from a string slice. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::subfield_matcher::ExistsMatcher; - /// use pica_record_v1::SubfieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = ExistsMatcher::new(vec!['0']); - /// let options = Default::default(); - /// - /// assert!(matcher - /// .is_match(&SubfieldRef::new('0', "123456789X"), &options)); - /// - /// assert!( - /// !matcher.is_match(&SubfieldRef::new('a', "abc"), &options) - /// ); - /// - /// Ok(()) - /// } - /// ``` - pub fn new>>(codes: T) -> Self { - let codes = codes - .into() - .into_iter() - .map(|code| SubfieldCode::new(code).unwrap()) - .collect(); - - Self { codes } - } - - /// Returns `true` if at least one subfield is found with a code - /// which is in the matcher's code list. - /// - /// # Example - /// - /// ```rust - /// use std::str::FromStr; - /// - /// use pica_matcher::subfield_matcher::ExistsMatcher; - /// use pica_record_v1::SubfieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = ExistsMatcher::from_str("[103]?")?; - /// let options = Default::default(); - /// assert!( - /// matcher.is_match(&SubfieldRef::new('0', "123"), &options) - /// ); - /// - /// let matcher = ExistsMatcher::from_str("*?")?; - /// let options = Default::default(); - /// assert!( - /// matcher.is_match(&SubfieldRef::new('a', "abc"), &options) - /// ); - /// - /// Ok(()) - /// } - /// ``` - pub fn is_match<'a>( - &self, - subfields: impl IntoIterator>, - _options: &MatcherOptions, - ) -> bool { - subfields - .into_iter() - .any(|subfield| self.codes.contains(subfield.code())) - } -} - -impl TryFrom<&[u8]> for ExistsMatcher { - type Error = ParseMatcherError; - - #[inline] - fn try_from(value: &[u8]) -> Result { - parse_exists_matcher.parse(value).map_err(|_| { - let value = value.to_str_lossy().to_string(); - ParseMatcherError::InvalidSubfieldMatcher(value) - }) - } -} - -impl FromStr for ExistsMatcher { - type Err = ParseMatcherError; - - #[inline] - fn from_str(s: &str) -> Result { - Self::try_from(s.as_bytes()) - } -} - -/// A matcher that checks relations between (string) values. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct RelationMatcher { - quantifier: Quantifier, - codes: Vec, - op: RelationalOp, - value: Vec, -} - -impl RelationMatcher { - /// Create a new relation matcher from a string slice. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::subfield_matcher::RelationMatcher; - /// use pica_record_v1::SubfieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = RelationMatcher::new("0 == '123456789X'"); - /// let options = Default::default(); - /// - /// assert!(matcher - /// .is_match(&SubfieldRef::new('0', "123456789X"), &options)); - /// - /// assert!(!matcher - /// .is_match(&SubfieldRef::new('0', "123456789!"), &options)); - /// - /// Ok(()) - /// } - /// ``` - pub fn new>(value: T) -> Self { - Self::try_from(value.as_ref()).expect("relation matcher") - } - - /// Returns true if at least one subfield is found, when the - /// subfield's value and the matcher value are related. The two - /// values are related iff the relation defined by the operator - /// exists. - pub fn is_match<'a>( - &self, - subfields: impl IntoIterator>, - options: &MatcherOptions, - ) -> bool { - use RelationalOp::*; - - let mut subfields = subfields - .into_iter() - .filter(|s| self.codes.contains(s.code())); - - let check = |subfield: &SubfieldRef| -> bool { - let value = subfield.value().as_ref(); - match self.op { - Eq => self.compare(value, options), - Ne => !self.compare(value, options), - StartsWith => self.starts_with(value, options, false), - StartsNotWith => self.starts_with(value, options, true), - EndsWith => self.ends_with(value, options, false), - EndsNotWith => self.ends_with(value, options, true), - Similar => self.is_similar(value, options), - Contains => self.contains(value, options), - _ => unreachable!(), - } - }; - - match self.quantifier { - Quantifier::All => subfields.all(check), - Quantifier::Any => subfields.any(check), - } - } - - /// Returns `true` if the given value is equal to the matcher's - /// value. If the `case_ignore` flag is set, both strings will be - /// converted to lowercase first. - fn compare(&self, value: &[u8], options: &MatcherOptions) -> bool { - if options.case_ignore { - self.value.to_lowercase() == value.to_lowercase() - } else { - self.value == value - } - } - - /// Returns `true` if the given values is a prefix of the matcher's - /// value, otherwise `false`. If the `case_ignore` flag is set, - /// both strings will be converted to lowercase first. - fn starts_with( - &self, - value: &[u8], - options: &MatcherOptions, - invert: bool, - ) -> bool { - let mut result = if options.case_ignore { - value.to_lowercase().starts_with(&self.value.to_lowercase()) - } else { - value.starts_with(&self.value) - }; - - if invert { - result = !result - } - - result - } - - /// Returns `true` if the given values is a suffix of the matcher's - /// value, otherwise `false`. If the `case_ignore` flag is set, - /// both strings will be converted to lowercase first. - fn ends_with( - &self, - value: &[u8], - options: &MatcherOptions, - invert: bool, - ) -> bool { - let mut result = if options.case_ignore { - value.to_lowercase().ends_with(&self.value.to_lowercase()) - } else { - value.ends_with(&self.value) - }; - - if invert { - result = !result; - } - - result - } - - /// Returns `true` if the given value is similar to the matcher's - /// value. The similarity score is determined by calculating the - /// normalized levenshtein distance between both strings. If the - /// `case_ignore` flag is set, both strings will be converted to - /// lowercase first. - fn is_similar(&self, rhs: &[u8], options: &MatcherOptions) -> bool { - let lhs = self.value.to_str_lossy(); - let rhs = rhs.to_str_lossy(); - - let score = if options.case_ignore { - normalized_levenshtein( - &lhs.to_lowercase(), - &rhs.to_lowercase(), - ) - } else { - normalized_levenshtein(&lhs, &rhs) - }; - - score > options.strsim_threshold - } - - /// Returns `true` if the given value is a substring of the value. - /// If the `case_ignore` flag is set, both strings will be - /// converted to lowercase first. - fn contains(&self, value: &[u8], options: &MatcherOptions) -> bool { - if options.case_ignore { - value - .to_lowercase() - .find(self.value.to_lowercase()) - .is_some() - } else { - value.find(&self.value).is_some() - } - } -} - -/// Parse a relational expression -#[inline] -fn parse_relation_matcher(i: &mut &[u8]) -> PResult { - ( - opt(ws(parse_quantifier)).map(Option::unwrap_or_default), - ws(parse_subfield_codes), - ws(parse_relational_op_str), - ws(parse_string), - ) - .map(|(quantifier, codes, op, value)| RelationMatcher { - quantifier, - codes, - op, - value, - }) - .parse_next(i) -} - -impl TryFrom<&[u8]> for RelationMatcher { - type Error = ParseMatcherError; - - #[inline] - fn try_from(value: &[u8]) -> Result { - parse_relation_matcher.parse(value).map_err(|_| { - let value = value.to_str_lossy().to_string(); - ParseMatcherError::InvalidSubfieldMatcher(value) - }) - } -} - -impl FromStr for RelationMatcher { - type Err = ParseMatcherError; - - #[inline] - fn from_str(s: &str) -> Result { - Self::try_from(s.as_bytes()) - } -} - -/// A matcher that checks a subfield value against a regex. -#[derive(PartialEq, Clone, Debug)] -pub struct RegexMatcher { - quantifier: Quantifier, - codes: Vec, - re: String, - invert: bool, -} - -impl RegexMatcher { - /// Create a new regex matcher from a string slice. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::subfield_matcher::RegexMatcher; - /// use pica_matcher::Quantifier; - /// use pica_record_v1::SubfieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let options = Default::default(); - /// - /// let subfield = SubfieldRef::new('0', "Oa"); - /// let matcher = - /// RegexMatcher::new(vec!['0'], "^Oa", Quantifier::Any, false); - /// assert!(matcher.is_match(&subfield, &options)); - /// - /// let subfield = SubfieldRef::new('0', "Ob"); - /// let matcher = - /// RegexMatcher::new(vec!['0'], "^Oa", Quantifier::Any, true); - /// assert!(matcher.is_match(&subfield, &options)); - /// - /// Ok(()) - /// } - /// ``` - pub fn new( - codes: T, - re: S, - quantifier: Quantifier, - invert: bool, - ) -> Self - where - S: Into, - T: Into>, - { - let codes = codes - .into() - .into_iter() - .map(|code| SubfieldCode::new(code).unwrap()) - .collect(); - - let re = re.into(); - assert!(RegexBuilder::new(&re).build().is_ok()); - - RegexMatcher { - quantifier, - codes, - re, - invert, - } - } - - /// Returns true if at least one subfield value is found, that - /// matches against the regular expression. - pub fn is_match<'a>( - &self, - subfields: impl IntoIterator>, - options: &MatcherOptions, - ) -> bool { - let re = RegexBuilder::new(&self.re) - .case_insensitive(options.case_ignore) - .build() - .unwrap(); - - let mut subfields = subfields - .into_iter() - .filter(|s| self.codes.contains(s.code())); - - let check_fn = |subfield: &SubfieldRef| -> bool { - let mut result = re.is_match(subfield.value().as_ref()); - if self.invert { - result = !result; - } - - result - }; - - match self.quantifier { - Quantifier::All => subfields.all(check_fn), - Quantifier::Any => subfields.any(check_fn), - } - } -} - -/// Parse a regex matcher expression -fn parse_regex_matcher(i: &mut &[u8]) -> PResult { - ( - opt(ws(parse_quantifier)).map(Option::unwrap_or_default), - ws(parse_subfield_codes), - ws(alt(("=~".value(false), "!~".value(true)))), - parse_string - .verify_map(|re| String::from_utf8(re).ok()) - .verify(|re| Regex::new(re).is_ok()), - ) - .map(|(quantifier, codes, invert, re)| RegexMatcher { - quantifier, - codes, - invert, - re, - }) - .parse_next(i) -} - -impl TryFrom<&[u8]> for RegexMatcher { - type Error = ParseMatcherError; - - #[inline] - fn try_from(value: &[u8]) -> Result { - parse_regex_matcher.parse(value).map_err(|_| { - let value = value.to_str_lossy().to_string(); - ParseMatcherError::InvalidSubfieldMatcher(value) - }) - } -} - -impl FromStr for RegexMatcher { - type Err = ParseMatcherError; - - #[inline] - fn from_str(s: &str) -> Result { - Self::try_from(s.as_bytes()) - } -} - -/// A matcher that checks if a subfield value is in a predefined list. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct InMatcher { - quantifier: Quantifier, - codes: Vec, - values: Vec>, - invert: bool, -} - -impl InMatcher { - /// Create a new matcher from a string slice. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::subfield_matcher::InMatcher; - /// use pica_matcher::Quantifier; - /// use pica_record_v1::SubfieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = InMatcher::new( - /// vec!['0'], - /// vec!["abc", "def"], - /// Quantifier::Any, - /// false, - /// ); - /// let options = Default::default(); - /// assert!( - /// matcher.is_match(&SubfieldRef::new('0', "def"), &options) - /// ); - /// - /// let matcher = InMatcher::new( - /// vec!['0'], - /// vec!["abc", "def"], - /// Quantifier::Any, - /// true, - /// ); - /// let options = Default::default(); - /// assert!( - /// matcher.is_match(&SubfieldRef::new('0', "hij"), &options) - /// ); - /// - /// Ok(()) - /// } - /// ``` - pub fn new( - codes: T, - values: U, - quantifier: Quantifier, - invert: bool, - ) -> Self - where - T: Into>, - U: Into>, - V: AsRef<[u8]>, - { - let codes = codes.into(); - let values = values - .into() - .into_iter() - .map(|s| s.as_ref().to_vec()) - .collect::>(); - - let codes = codes - .into_iter() - .map(|code| SubfieldCode::new(code).unwrap()) - .collect(); - - Self { - quantifier, - codes, - values, - invert, - } - } - - /// Returns `true` if at least one subfield is found, where the - /// value is contained in the matcher list. - pub fn is_match<'a>( - &self, - subfields: impl IntoIterator>, - options: &MatcherOptions, - ) -> bool { - let mut subfields = subfields - .into_iter() - .filter(|s| self.codes.contains(s.code())); - - let check_fn = |subfield: &SubfieldRef| -> bool { - let mut result = self.values.iter().any(|rhs| { - if options.case_ignore { - subfield.value().to_lowercase() - == rhs.to_lowercase() - } else { - subfield.value() == rhs - } - }); - - if self.invert { - result = !result; - } - - result - }; - - match self.quantifier { - Quantifier::All => subfields.all(check_fn), - Quantifier::Any => subfields.any(check_fn), - } - } -} - -/// Parse a in matcher expression. -fn parse_in_matcher(i: &mut &[u8]) -> PResult { - ( - opt(ws(parse_quantifier)).map(Option::unwrap_or_default), - ws(parse_subfield_codes), - opt(ws("not")).map(|x| x.is_some()), - preceded( - ws("in"), - delimited( - ws('['), - separated(1.., parse_string, ws(',')), - ws(']'), - ), - ), - ) - .map(|(quantifier, codes, invert, values)| InMatcher { - quantifier, - codes, - invert, - values, - }) - .parse_next(i) -} - -impl TryFrom<&[u8]> for InMatcher { - type Error = ParseMatcherError; - - fn try_from(value: &[u8]) -> Result { - parse_in_matcher.parse(value).map_err(|_| { - let value = value.to_str_lossy().to_string(); - ParseMatcherError::InvalidSubfieldMatcher(value) - }) - } -} - -impl FromStr for InMatcher { - type Err = ParseMatcherError; - - fn from_str(s: &str) -> Result { - Self::try_from(s.as_bytes()) - } -} - -/// A matcher that checks the number of occurrences of a subfield. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct CardinalityMatcher { - code: SubfieldCode, - op: RelationalOp, - value: usize, -} - -impl CardinalityMatcher { - /// Create a new matcher. - /// - /// # Panics - /// - /// This function panics on ∀ invalid input. The cardinality - /// matcher uses only a subset of all relational operators; the - /// caller must ensure that the operator is applicable on - /// `usize`. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::subfield_matcher::CardinalityMatcher; - /// use pica_matcher::RelationalOp; - /// use pica_record_v1::SubfieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = CardinalityMatcher::new('0', RelationalOp::Gt, 1); - /// let options = Default::default(); - /// - /// assert!(matcher.is_match( - /// vec![ - /// &SubfieldRef::new('0', "def")?, - /// &SubfieldRef::new('0', "abc")? - /// ], - /// &options - /// )); - /// - /// assert!( - /// !matcher.is_match(&SubfieldRef::new('0', "def"), &options) - /// ); - /// - /// Ok(()) - /// } - /// ``` - pub fn new(code: T, op: RelationalOp, value: usize) -> Self - where - T: Into, - { - let code = code.into(); - - assert!(code.is_ascii_alphanumeric()); - assert!(op.is_usize_applicable()); - - Self { - code: SubfieldCode::new(code).unwrap(), - op, - value, - } - } - - /// Returns true of number of fields with a code equal to the - /// matcher's code is `==`, `!=`, `>=`, `>`, `<=`, or `<` than the - /// matcher's value. - pub fn is_match<'a>( - &self, - subfields: impl IntoIterator>, - _options: &MatcherOptions, - ) -> bool { - let count = subfields - .into_iter() - .filter(|&s| self.code == *s.code()) - .count(); - - match self.op { - RelationalOp::Eq => count == self.value, - RelationalOp::Ne => count != self.value, - RelationalOp::Ge => count >= self.value, - RelationalOp::Gt => count > self.value, - RelationalOp::Le => count <= self.value, - RelationalOp::Lt => count < self.value, - _ => unreachable!(), - } - } -} - -/// Parse a cardinality matcher expression. -fn parse_cardinality_matcher( - i: &mut &[u8], -) -> PResult { - preceded( - ws('#'), - ( - ws(parse_subfield_code), - ws(parse_relational_op_usize), - digit1 - .verify_map(|value| std::str::from_utf8(value).ok()) - .verify_map(|value| value.parse::().ok()), - ), - ) - .map(|(code, op, value)| CardinalityMatcher { code, op, value }) - .parse_next(i) -} - -impl TryFrom<&[u8]> for CardinalityMatcher { - type Error = ParseMatcherError; - - #[inline] - fn try_from(value: &[u8]) -> Result { - parse_cardinality_matcher.parse(value).map_err(|_| { - let value = value.to_str_lossy().to_string(); - ParseMatcherError::InvalidSubfieldMatcher(value) - }) - } -} - -impl FromStr for CardinalityMatcher { - type Err = ParseMatcherError; - - #[inline] - fn from_str(s: &str) -> Result { - Self::try_from(s.as_bytes()) - } -} - -/// A matcher that checks for the singleton matcher. -/// -/// This matcher combines all atomic, singleton matcher into a new -/// matcher. -#[derive(Clone, Debug, PartialEq)] -pub enum SingletonMatcher { - Cardinality(CardinalityMatcher), - Exists(ExistsMatcher), - In(InMatcher), - Regex(RegexMatcher), - Relation(RelationMatcher), -} - -/// Parse a singleton matcher expression. -fn parse_singleton_matcher(i: &mut &[u8]) -> PResult { - alt(( - parse_cardinality_matcher.map(SingletonMatcher::Cardinality), - parse_exists_matcher.map(SingletonMatcher::Exists), - parse_in_matcher.map(SingletonMatcher::In), - parse_regex_matcher.map(SingletonMatcher::Regex), - parse_relation_matcher.map(SingletonMatcher::Relation), - )) - .parse_next(i) -} - -impl SingletonMatcher { - /// Create a new singleton matcher from a byte slice. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::subfield_matcher::SingletonMatcher; - /// use pica_record_v1::SubfieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = SingletonMatcher::new("0 != '123456789X'"); - /// let options = Default::default(); - /// - /// assert!(matcher - /// .is_match(&SubfieldRef::new('0', "2345678901"), &options)); - /// - /// Ok(()) - /// } - /// ``` - pub fn new>(data: T) -> Self { - Self::try_from(data.as_ref()).expect("singleton matcher") - } - - /// Returns `true` if the underlying matcher returns `true`. - pub fn is_match<'a>( - &self, - subfields: impl IntoIterator>, - options: &MatcherOptions, - ) -> bool { - match self { - Self::Cardinality(m) => m.is_match(subfields, options), - Self::Exists(m) => m.is_match(subfields, options), - Self::In(m) => m.is_match(subfields, options), - Self::Regex(m) => m.is_match(subfields, options), - Self::Relation(m) => m.is_match(subfields, options), - } - } -} - -impl TryFrom<&[u8]> for SingletonMatcher { - type Error = ParseMatcherError; - - #[inline] - fn try_from(value: &[u8]) -> Result { - parse_singleton_matcher.parse(value).map_err(|_| { - let value = value.to_str_lossy().to_string(); - ParseMatcherError::InvalidSubfieldMatcher(value) - }) - } -} - -/// A matcher that allows grouping, negation and connecting of -/// singleton matcher. -#[derive(Clone, Debug, PartialEq)] -pub enum SubfieldMatcher { - Singleton(SingletonMatcher), - Group(Box), - Not(Box), - Composite { - lhs: Box, - op: BooleanOp, - rhs: Box, - }, -} - -impl SubfieldMatcher { - /// Create a new matcher from a string slice. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::subfield_matcher::SubfieldMatcher; - /// use pica_record_v1::SubfieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = - /// SubfieldMatcher::new("0 != '123456789X' && 0 =^ '234'"); - /// let options = Default::default(); - /// - /// assert!(matcher - /// .is_match(&SubfieldRef::new('0', "2345678901"), &options)); - /// - /// Ok(()) - /// } - /// ``` - pub fn new>(data: T) -> Self { - Self::try_from(data.as_ref()).expect("subfield matcher") - } - - pub fn is_match<'a>( - &self, - subfields: impl IntoIterator> + Clone, - options: &MatcherOptions, - ) -> bool { - match self { - Self::Singleton(m) => m.is_match(subfields, options), - Self::Group(m) => m.is_match(subfields, options), - Self::Not(m) => !m.is_match(subfields, options), - Self::Composite { lhs, op, rhs } => match op { - BooleanOp::And => { - lhs.is_match(subfields.clone(), options) - && rhs.is_match(subfields, options) - } - BooleanOp::Or => { - lhs.is_match(subfields.clone(), options) - || rhs.is_match(subfields, options) - } - BooleanOp::Xor => { - lhs.is_match(subfields.clone(), options) - != rhs.is_match(subfields, options) - } - }, - } - } -} - -#[inline] -fn parse_subfield_exists_matcher( - i: &mut &[u8], -) -> PResult { - parse_exists_matcher - .map(SingletonMatcher::Exists) - .map(SubfieldMatcher::Singleton) - .parse_next(i) -} - -#[inline] -pub(crate) fn parse_subfield_singleton_matcher( - i: &mut &[u8], -) -> PResult { - parse_singleton_matcher - .map(SubfieldMatcher::Singleton) - .parse_next(i) -} - -#[inline] -fn parse_not_matcher(i: &mut &[u8]) -> PResult { - preceded( - ws('!'), - alt(( - parse_group_matcher, - parse_subfield_exists_matcher, - parse_not_matcher, - )), - ) - .map(|matcher| SubfieldMatcher::Not(Box::new(matcher))) - .parse_next(i) -} - -thread_local! { - pub static GROUP_LEVEL: RefCell = const { RefCell::new(0) }; -} - -fn increment_group_level(i: &mut &[u8]) -> PResult<()> { - GROUP_LEVEL.with(|level| { - *level.borrow_mut() += 1; - if *level.borrow() >= 32 { - Err(winnow::error::ErrMode::from_error_kind( - i, - winnow::error::ErrorKind::Many, - )) - } else { - Ok(()) - } - }) -} - -fn decrement_group_level() { - GROUP_LEVEL.with(|level| { - *level.borrow_mut() -= 1; - }) -} - -#[inline] -fn parse_group_matcher(i: &mut &[u8]) -> PResult { - delimited( - terminated(ws('('), increment_group_level), - alt(( - parse_composite_matcher, - parse_subfield_singleton_matcher, - parse_not_matcher, - parse_group_matcher, - )), - ws(')').map(|_| decrement_group_level()), - ) - .map(|matcher| SubfieldMatcher::Group(Box::new(matcher))) - .parse_next(i) -} - -#[inline] -fn parse_or_matcher(i: &mut &[u8]) -> PResult { - ( - alt(( - ws(parse_group_matcher), - ws(parse_xor_matcher), - ws(parse_and_matcher), - ws(parse_subfield_singleton_matcher), - ws(parse_not_matcher), - )), - repeat( - 1.., - preceded( - ws("||"), - alt(( - ws(parse_group_matcher), - ws(parse_xor_matcher), - ws(parse_and_matcher), - ws(parse_subfield_singleton_matcher), - ws(parse_not_matcher), - )), - ), - ), - ) - .map(|(head, remainder): (_, Vec<_>)| { - remainder.into_iter().fold(head, |prev, next| prev | next) - }) - .parse_next(i) -} - -#[inline] -fn parse_and_matcher(i: &mut &[u8]) -> PResult { - ( - ws(alt(( - parse_group_matcher, - parse_singleton_matcher.map(SubfieldMatcher::Singleton), - parse_not_matcher, - ))), - repeat( - 1.., - preceded( - ws("&&"), - ws(alt(( - parse_group_matcher, - parse_singleton_matcher - .map(SubfieldMatcher::Singleton), - parse_not_matcher, - ))), - ), - ), - ) - .map(|(head, remainder): (_, Vec<_>)| { - remainder.into_iter().fold(head, |prev, next| prev & next) - }) - .parse_next(i) -} - -#[inline] -fn parse_xor_matcher(i: &mut &[u8]) -> PResult { - ( - ws(alt(( - parse_group_matcher, - parse_and_matcher, - parse_singleton_matcher.map(SubfieldMatcher::Singleton), - parse_not_matcher, - ))), - repeat( - 1.., - preceded( - ws(alt(("^", "XOR"))), - ws(alt(( - parse_group_matcher, - parse_and_matcher, - parse_singleton_matcher - .map(SubfieldMatcher::Singleton), - parse_not_matcher, - ))), - ), - ), - ) - .map(|(head, remainder): (_, Vec<_>)| { - remainder.into_iter().fold(head, |prev, next| prev ^ next) - }) - .parse_next(i) -} - -#[inline] -fn parse_composite_matcher(i: &mut &[u8]) -> PResult { - alt((parse_or_matcher, parse_xor_matcher, parse_and_matcher)) - .parse_next(i) -} - -pub fn parse_subfield_matcher( - i: &mut &[u8], -) -> PResult { - alt(( - parse_composite_matcher, - parse_group_matcher, - parse_not_matcher, - parse_singleton_matcher.map(SubfieldMatcher::Singleton), - )) - .parse_next(i) -} - -impl TryFrom<&[u8]> for SubfieldMatcher { - type Error = ParseMatcherError; - - #[inline] - fn try_from(value: &[u8]) -> Result { - parse_subfield_matcher.parse(value).map_err(|_| { - let value = value.to_str_lossy().to_string(); - ParseMatcherError::InvalidSubfieldMatcher(value) - }) - } -} - -impl FromStr for SubfieldMatcher { - type Err = ParseMatcherError; - - #[inline] - fn from_str(s: &str) -> Result { - Self::try_from(s.as_bytes()) - } -} - -impl BitAnd for SubfieldMatcher { - type Output = Self; - - fn bitand(self, rhs: Self) -> Self::Output { - Self::Composite { - lhs: Box::new(self), - op: BooleanOp::And, - rhs: Box::new(rhs), - } - } -} - -impl BitOr for SubfieldMatcher { - type Output = Self; - - fn bitor(self, rhs: Self) -> Self::Output { - Self::Composite { - lhs: Box::new(self), - op: BooleanOp::Or, - rhs: Box::new(rhs), - } - } -} - -impl BitXor for SubfieldMatcher { - type Output = Self; - - fn bitxor(self, rhs: Self) -> Self::Output { - Self::Composite { - lhs: Box::new(self), - op: BooleanOp::Xor, - rhs: Box::new(rhs), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - type TestResult = anyhow::Result<()>; - - #[test] - fn parse_subfield_codes() { - let codes = SUBFIELD_CODES.chars().collect::>(); - - macro_rules! parse_success { - ($input:expr, $expected:expr) => { - assert_eq!( - super::parse_subfield_codes.parse($input).unwrap(), - $expected - ); - }; - } - - for code in codes.iter() { - parse_success!(code.to_string().as_bytes(), vec![*code]); - } - - parse_success!(b"*", codes); - parse_success!(b"[12]", vec!['1', '2']); - parse_success!(b"[1-3]", vec!['1', '2', '3']); - parse_success!( - b"[1-3a-cx]", - vec!['1', '2', '3', 'a', 'b', 'c', 'x'] - ); - - assert!(super::parse_subfield_codes.parse(b"!").is_err()); - assert!(super::parse_subfield_codes.parse(b"12").is_err()); - assert!(super::parse_subfield_codes.parse(b"[a1!]").is_err()); - assert!(super::parse_subfield_codes.parse(b"[2-2]").is_err()); - } - - #[test] - fn parse_exists_matcher() -> TestResult { - macro_rules! parse_success { - ($input:expr, $codes:expr) => { - assert_eq!( - super::parse_exists_matcher.parse($input).unwrap(), - ExistsMatcher { codes: $codes } - ); - }; - } - - parse_success!( - b"*?", - SUBFIELD_CODES - .chars() - .map(|code| SubfieldCode::new(code).unwrap()) - .collect() - ); - parse_success!( - b"[a-f]?", - vec![ - 'a'.try_into()?, - 'b'.try_into()?, - 'c'.try_into()?, - 'd'.try_into()?, - 'e'.try_into()?, - 'f'.try_into()? - ] - ); - parse_success!( - b"[a-cf]?", - vec![ - 'a'.try_into()?, - 'b'.try_into()?, - 'c'.try_into()?, - 'f'.try_into()? - ] - ); - parse_success!( - b"[ab]?", - vec!['a'.try_into()?, 'b'.try_into()?] - ); - parse_success!(b"a?", vec!['a'.try_into()?]); - - assert!(super::parse_exists_matcher.parse(b"a ?").is_err()); - - Ok(()) - } - - #[test] - fn parse_relation_matcher() -> TestResult { - use Quantifier::*; - use RelationalOp::*; - - use super::parse_relation_matcher; - - macro_rules! parse_success { - ($input:expr, $quantifier:expr, $codes:expr, $op:expr, $value:expr) => { - assert_eq!( - parse_relation_matcher.parse($input).unwrap(), - RelationMatcher { - quantifier: $quantifier, - codes: $codes, - op: $op, - value: $value.to_vec() - } - ); - }; - } - - parse_success!( - b"0 == 'abc'", - Any, - vec!['0'.try_into()?], - Eq, - b"abc" - ); - parse_success!( - b"0 != 'abc'", - Any, - vec!['0'.try_into()?], - Ne, - b"abc" - ); - parse_success!( - b"0 =^ 'abc'", - Any, - vec!['0'.try_into()?], - StartsWith, - b"abc" - ); - parse_success!( - b"0 !^ 'abc'", - Any, - vec!['0'.try_into()?], - StartsNotWith, - b"abc" - ); - parse_success!( - b"0 =$ 'abc'", - Any, - vec!['0'.try_into()?], - EndsWith, - b"abc" - ); - parse_success!( - b"0 !$ 'abc'", - Any, - vec!['0'.try_into()?], - EndsNotWith, - b"abc" - ); - parse_success!( - b"0 =* 'abc'", - Any, - vec!['0'.try_into()?], - Similar, - b"abc" - ); - parse_success!( - b"0 =? 'abc'", - Any, - vec!['0'.try_into()?], - Contains, - b"abc" - ); - - assert!(parse_relation_matcher.parse(b"0 >= 'abc'").is_err()); - assert!(parse_relation_matcher.parse(b"0 > 'abc'").is_err()); - assert!(parse_relation_matcher.parse(b"0 <= 'abc'").is_err()); - assert!(parse_relation_matcher.parse(b"0 < 'abc'").is_err()); - - Ok(()) - } - - #[test] - fn parse_regex_matcher() -> TestResult { - use super::parse_regex_matcher; - - macro_rules! parse_success { - ($input:expr, $codes:expr, $re:expr, $invert:expr) => { - assert_eq!( - parse_regex_matcher.parse($input).unwrap(), - RegexMatcher { - quantifier: Quantifier::Any, - codes: $codes, - invert: $invert, - re: $re.to_string() - } - ); - }; - } - - parse_success!( - b"0 =~ '^Tp'", - vec!['0'.try_into()?], - "^Tp", - false - ); - parse_success!( - b"0 !~ '^Tp'", - vec!['0'.try_into()?], - "^Tp", - true - ); - parse_success!( - b"[ab] =~ 'foo'", - vec!['a'.try_into()?, 'b'.try_into()?], - "foo", - false - ); - - assert!(parse_regex_matcher.parse(b"0 =~ '[[ab]'").is_err()); - assert!(parse_regex_matcher.parse(b"0 !~ '[[ab]'").is_err()); - - Ok(()) - } -} diff --git a/crates/pica-matcher/src/tag_matcher.rs b/crates/pica-matcher/src/tag_matcher.rs deleted file mode 100644 index 6b012570a..000000000 --- a/crates/pica-matcher/src/tag_matcher.rs +++ /dev/null @@ -1,251 +0,0 @@ -use std::str::FromStr; - -use pica_record_v1::parser::parse_tag; -use pica_record_v1::{Tag, TagRef}; -use winnow::combinator::{alt, delimited, repeat, separated_pair}; -use winnow::token::one_of; -use winnow::{PResult, Parser}; - -use crate::ParseMatcherError; - -/// A matcher that matches against PICA+ [Tags](`pica_record_v1::Tag`). -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum TagMatcher { - Simple(Tag), - Pattern([Vec; 4]), -} - -fn parse_fragment(allowed: &[u8], i: &mut &[u8]) -> PResult> { - alt(( - one_of(|c: u8| allowed.contains(&c)).map(|c| vec![c]), - '.'.value(allowed.to_vec()), - delimited( - '[', - repeat( - 1.., - alt(( - separated_pair( - one_of(|c| allowed.contains(&c)), - '-', - one_of(|c| allowed.contains(&c)), - ) - .verify(|(min, max)| min < max) - .map(|(min, max)| (min..=max).collect()), - one_of(|c| allowed.contains(&c)).map(|c| vec![c]), - )), - ) - .fold(Vec::new, |mut acc, item| { - acc.extend(&item); - acc - }), - ']', - ), - )) - .parse_next(i) -} - -#[inline] -fn parse_pattern(i: &mut &[u8]) -> PResult { - let p0 = parse_fragment(b"012", i)?; - let p1 = parse_fragment(b"0123456789", i)?; - let p2 = parse_fragment(b"0123456789", i)?; - let p3 = parse_fragment(b"ABCDEFGHIJKLMNOPQRSTUVWXYZ@", i)?; - - Ok(TagMatcher::Pattern([p0, p1, p2, p3])) -} - -#[inline] -fn parse_simple(i: &mut &[u8]) -> PResult { - parse_tag - .map(|tag| TagMatcher::Simple(Tag::from(tag))) - .parse_next(i) -} - -#[inline] -pub fn parse_tag_matcher(i: &mut &[u8]) -> PResult { - alt((parse_simple, parse_pattern)).parse_next(i) -} - -impl TagMatcher { - /// Create a new tag matcher. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::TagMatcher; - /// use pica_record_v1::TagRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = TagMatcher::new("003@"); - /// assert_eq!(matcher, TagRef::new("003@")); - /// - /// Ok(()) - /// } - /// ``` - pub fn new>(value: &B) -> Self { - parse_tag_matcher - .parse(value.as_ref()) - .expect("tag matcher") - } - - /// Returns `true` if the given tag matches against the matcher. - /// - /// # Example - /// - /// ```rust - /// use pica_matcher::TagMatcher; - /// use pica_record_v1::TagRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let matcher = TagMatcher::new("00[3-5]@"); - /// assert!(matcher.is_match(&TagRef::new("003@"))); - /// assert!(!matcher.is_match(&TagRef::new("002@"))); - /// - /// Ok(()) - /// } - /// ``` - pub fn is_match(&self, tag: &TagRef) -> bool { - match self { - Self::Simple(lhs) => lhs == tag, - Self::Pattern(pattern) => { - pattern[0].contains(&tag[0]) - && pattern[1].contains(&tag[1]) - && pattern[2].contains(&tag[2]) - && pattern[3].contains(&tag[3]) - } - } - } -} - -impl PartialEq for TagRef<'_> { - #[inline] - fn eq(&self, matcher: &TagMatcher) -> bool { - matcher.is_match(self) - } -} - -impl PartialEq> for TagMatcher { - #[inline] - fn eq(&self, other: &TagRef<'_>) -> bool { - self.is_match(other) - } -} - -impl PartialEq for &TagRef<'_> { - #[inline] - fn eq(&self, matcher: &TagMatcher) -> bool { - matcher.is_match(self) - } -} - -impl PartialEq<&TagRef<'_>> for TagMatcher { - #[inline] - fn eq(&self, other: &&TagRef<'_>) -> bool { - self.is_match(other) - } -} - -impl FromStr for TagMatcher { - type Err = ParseMatcherError; - - fn from_str(s: &str) -> Result { - parse_tag_matcher - .parse(s.as_bytes()) - .map_err(|_| ParseMatcherError::InvalidTagMatcher) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - macro_rules! pattern { - ($p0:expr, $p1:expr, $p2:expr, $p3:expr) => { - TagMatcher::Pattern([ - $p0.as_bytes().to_vec(), - $p1.as_bytes().to_vec(), - $p2.as_bytes().to_vec(), - $p3.as_bytes().to_vec(), - ]) - }; - } - - #[test] - fn parse_simple() { - macro_rules! parse_success { - ($input:expr, $expected:expr) => { - assert_eq!( - super::parse_simple.parse($input).unwrap(), - $expected - ); - }; - } - - parse_success!(b"003@", TagMatcher::Simple(Tag::new("003@"))); - parse_success!(b"101@", TagMatcher::Simple(Tag::new("101@"))); - parse_success!(b"203@", TagMatcher::Simple(Tag::new("203@"))); - - assert!(super::parse_simple.parse(b"003@.0").is_err()); - assert!(super::parse_simple.parse(b"!03@").is_err()); - } - - #[test] - fn parse_pattern() { - macro_rules! parse_success { - ($input:expr, $expected:expr) => { - assert_eq!( - super::parse_pattern.parse($input).unwrap(), - $expected - ); - }; - } - - parse_success!(b"003@", pattern!("0", "0", "3", "@")); - parse_success!(b"[02]03@", pattern!("02", "0", "3", "@")); - parse_success!(b".03@", pattern!("012", "0", "3", "@")); - parse_success!(b"0.3@", pattern!("0", "0123456789", "3", "@")); - parse_success!(b"00.@", pattern!("0", "0", "0123456789", "@")); - parse_success!(b"0[2-4]1A", pattern!("0", "234", "1", "A")); - parse_success!(b"0[2-46]1A", pattern!("0", "2346", "1", "A")); - - parse_success!( - b"003.", - pattern!("0", "0", "3", "ABCDEFGHIJKLMNOPQRSTUVWXYZ@") - ); - - parse_success!( - b"0[2-456-8]1A", - pattern!("0", "2345678", "1", "A") - ); - - parse_success!( - b"....", - pattern!( - "012", - "0123456789", - "0123456789", - "ABCDEFGHIJKLMNOPQRSTUVWXYZ@" - ) - ); - - assert!(super::parse_pattern.parse(b"00[3-1]@").is_err()); - assert!(super::parse_pattern.parse(b"00[3-3]@").is_err()); - } - - #[test] - fn parse_tag_matcher() { - macro_rules! parse_success { - ($input:expr, $expected:expr) => { - assert_eq!( - super::parse_tag_matcher.parse($input).unwrap(), - $expected - ); - }; - } - - parse_success!(b"003@", TagMatcher::Simple(Tag::new("003@"))); - parse_success!(b"0[2-46]1A", pattern!("0", "2346", "1", "A")); - } -} diff --git a/crates/pica-matcher/tests/field_matcher/mod.rs b/crates/pica-matcher/tests/field_matcher/mod.rs deleted file mode 100644 index 05d9a51a3..000000000 --- a/crates/pica-matcher/tests/field_matcher/mod.rs +++ /dev/null @@ -1,971 +0,0 @@ -use std::str::FromStr; - -use bstr::B; -use pica_matcher::field_matcher::{ - CardinalityMatcher, ExistsMatcher, SingletonMatcher, - SubfieldsMatcher, -}; -use pica_matcher::{FieldMatcher, MatcherOptions, ParseMatcherError}; -use pica_record_v1::FieldRef; - -use crate::TestResult; - -macro_rules! field { - ($tag:expr, $code:expr, $value:expr) => { - FieldRef::new($tag, None, vec![($code, $value)]) - }; - - ($tag:expr, $occurrence:expr, $code:expr, $value:expr) => { - FieldRef::new($tag, Some($occurrence), vec![($code, $value)]) - }; -} - -#[test] -fn exists_matcher_new() { - let matcher = ExistsMatcher::new("003@?"); - let options = MatcherOptions::default(); - - assert!( - matcher.is_match(&field!("003@", '0', "123456789X"), &options) - ); -} - -#[test] -#[should_panic] -fn exists_matcher_new_panic() { - let _ = ExistsMatcher::new("303@?"); -} - -#[test] -fn exists_matcher_try_from() -> TestResult { - let matcher = ExistsMatcher::try_from(B("003@?"))?; - let options = MatcherOptions::default(); - - assert!( - matcher.is_match(&field!("003@", '0', "123456789X"), &options) - ); - - assert!(matches!( - ExistsMatcher::try_from(B("303@?")).unwrap_err(), - ParseMatcherError::InvalidFieldMatcher(_) - )); - - Ok(()) -} - -#[test] -fn exists_matcher_from_str() -> TestResult { - let matcher = ExistsMatcher::from_str("003@?")?; - let options = MatcherOptions::default(); - - assert!( - matcher.is_match(&field!("003@", '0', "123456789X"), &options) - ); - - assert!(matches!( - ExistsMatcher::from_str("303@?").unwrap_err(), - ParseMatcherError::InvalidFieldMatcher(_) - )); - - Ok(()) -} - -#[test] -fn exists_matcher_is_match() -> TestResult { - let matcher = ExistsMatcher::new("003@?"); - let options = MatcherOptions::default(); - - assert!( - matcher.is_match(&field!("003@", '0', "123456789X"), &options) - ); - - assert!(!matcher.is_match(&field!("002@", '0', "Olfo"), &options)); - - let fields = [ - &field!("002@", '0', "Olfo"), - &field!("003@", '0', "123456789X"), - ]; - assert!(matcher.is_match(fields, &options)); - - let matcher = ExistsMatcher::new("00[23]@?"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("002@", '0', "Olfo"), &options)); - assert!( - matcher.is_match(&field!("003@", '0', "123456789X"), &options) - ); - - // occurrence - let matcher = ExistsMatcher::new("041A/02?"); - let options = MatcherOptions::default(); - - let field = field!("041A", "01", 'a', "abc"); - assert!(!matcher.is_match(&field, &options)); - - let field = field!("041A", "02", 'a', "abc"); - assert!(matcher.is_match(&field, &options)); - - Ok(()) -} - -#[test] -fn subfields_matcher_new() { - let matcher = SubfieldsMatcher::new("003@.0 == '123456789X'"); - let options = MatcherOptions::default(); - - assert!( - matcher.is_match(&field!("003@", '0', "123456789X"), &options) - ); -} - -#[test] -#[should_panic] -fn subfields_matcher_new_panic() { - let _ = SubfieldsMatcher::new("003!.0 == '123456789X'"); -} - -#[test] -fn subfields_matcher_try_from() -> TestResult { - let matcher = - SubfieldsMatcher::try_from(B("003@.0 == '123456789X'"))?; - let options = MatcherOptions::default(); - - assert!( - matcher.is_match(&field!("003@", '0', "123456789X"), &options) - ); - - assert!(matches!( - SubfieldsMatcher::try_from(B("003@.! == '123456789X'")) - .unwrap_err(), - ParseMatcherError::InvalidFieldMatcher(_) - )); - - Ok(()) -} - -#[test] -fn subfields_matcher_from_str() -> TestResult { - let matcher = SubfieldsMatcher::from_str("003@.0 == '123456789X'")?; - let options = MatcherOptions::default(); - - assert!( - matcher.is_match(&field!("003@", '0', "123456789X"), &options) - ); - - assert!(matches!( - SubfieldsMatcher::from_str("003@.! == '123456789X'") - .unwrap_err(), - ParseMatcherError::InvalidFieldMatcher(_) - )); - - Ok(()) -} - -#[test] -fn subfields_matcher_is_match() -> TestResult { - // simple - let matcher = SubfieldsMatcher::new("003@.0 == '123456789X'"); - let options = MatcherOptions::default(); - - let field = field!("003@", '0', "123456789X"); - assert!(matcher.is_match(&field, &options)); - - let field = field!("002@", '0', "Olfo"); - assert!(!matcher.is_match(&field, &options)); - - // complex - let matcher = - SubfieldsMatcher::new("003@{0? && 0 == '123456789X'}"); - let options = MatcherOptions::default(); - - let field = field!("003@", '0', "123456789X"); - assert!(matcher.is_match(&field, &options)); - - let field = field!("003@", '0', "34567"); - assert!(!matcher.is_match(&field, &options)); - - let field = field!("002@", '0', "Olfo"); - assert!(!matcher.is_match(&field, &options)); - - Ok(()) -} - -#[test] -fn singleton_matcher_new() { - let matcher = SingletonMatcher::new("041A/03.9?"); - let options = MatcherOptions::default(); - let field = field!("041A", "03", '9', "1234"); - assert!(matcher.is_match(&field, &options)); -} - -#[test] -fn singleton_matcher_try_from() -> TestResult { - let matcher = SingletonMatcher::try_from(B("041A/03.9?"))?; - let options = MatcherOptions::default(); - let field = field!("041A", "03", '9', "1234"); - assert!(matcher.is_match(&field, &options)); - - assert!(matches!( - SingletonMatcher::try_from(B("041A/03.!?")).unwrap_err(), - ParseMatcherError::InvalidFieldMatcher(_) - )); - - Ok(()) -} - -#[test] -fn singleton_matcher_from_str() -> TestResult { - let matcher = SingletonMatcher::from_str("041A/03.9?")?; - let options = MatcherOptions::default(); - let field = field!("041A", "03", '9', "1234"); - assert!(matcher.is_match(&field, &options)); - - assert!(matches!( - SingletonMatcher::from_str("041A/03.!?").unwrap_err(), - ParseMatcherError::InvalidFieldMatcher(_) - )); - - Ok(()) -} - -#[test] -#[should_panic] -fn singleton_matcher_new_panic() { - let _ = SingletonMatcher::new("041!/*?"); -} - -#[test] -fn singleton_matcher_is_match() { - let matcher = SingletonMatcher::new("041A/*?"); - let options = MatcherOptions::default(); - let field = field!("041A", "09", '9', "1234"); - assert!(matcher.is_match(&field, &options)); - - let matcher = SingletonMatcher::new("041A/*.9 == '1234'"); - let options = MatcherOptions::default(); - let field = field!("041A", "09", '9', "1234"); - assert!(matcher.is_match(&field, &options)); -} - -#[test] -fn cardinality_matcher_new() { - let matcher = CardinalityMatcher::new("#012A == 1"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); -} - -#[test] -#[should_panic] -fn cardinality_matcher_new_panic() { - let _ = CardinalityMatcher::new("#012A == -1"); -} - -#[test] -fn cardinality_matcher_try_from() -> TestResult { - let matcher = CardinalityMatcher::try_from(B("#012A == 1"))?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); - - assert!(matches!( - CardinalityMatcher::try_from(B("#012A == -1")).unwrap_err(), - ParseMatcherError::InvalidFieldMatcher(_) - )); - - Ok(()) -} - -#[test] -fn cardinality_matcher_from_str() -> TestResult { - let matcher = CardinalityMatcher::from_str("#012A == 1")?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); - - assert!(matches!( - CardinalityMatcher::from_str("#012A == -1").unwrap_err(), - ParseMatcherError::InvalidFieldMatcher(_) - )); - - Ok(()) -} - -#[test] -fn cardinality_matcher_equal() -> TestResult { - let matcher = CardinalityMatcher::new("#012A == 1"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(!matcher.is_match( - vec![&field!("012A", '0', "abc"), &field!("012A", '0', "def"),], - &options - )); - - Ok(()) -} - -#[test] -fn cardinality_matcher_not_equal() -> TestResult { - let matcher = CardinalityMatcher::new("#012A{0 =^ 'ab'} != 1"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(matcher.is_match( - vec![&field!("012A", '0', "abc"), &field!("012A", '0', "abd")], - &options - )); - - Ok(()) -} - -#[test] -fn cardinality_matcher_greater_than_or_equal() -> TestResult { - let matcher = CardinalityMatcher::new("#012A >= 2"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(matcher.is_match( - vec![&field!("012A", '0', "abc"), &field!("012A", '0', "def")], - &options - )); - - Ok(()) -} - -#[test] -fn cardinality_matcher_greater_than() -> TestResult { - let matcher = CardinalityMatcher::new("#012A{ 0 =^ 'ab' } > 1"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(!matcher.is_match( - vec![&field!("012A", '0', "abc"), &field!("012A", '0', "def")], - &options - )); - - assert!(matcher.is_match( - vec![ - &field!("012A", '0', "abc"), - &field!("012A", 'X', "def"), - &field!("012A", '0', "abd"), - ], - &options - )); - - Ok(()) -} - -#[test] -fn cardinality_matcher_less_than_or_equal() -> TestResult { - let matcher = CardinalityMatcher::new("#012A <= 2"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(matcher.is_match( - vec![&field!("012A", '0', "abc"), &field!("012A", '0', "def")], - &options - )); - - assert!(!matcher.is_match( - vec![ - &field!("012A", '0', "abc"), - &field!("012A", '0', "def"), - &field!("012A", '0', "hij"), - ], - &options - )); - - Ok(()) -} - -#[test] -fn cardinality_matcher_less_than() -> TestResult { - let matcher = CardinalityMatcher::new("#012A{ 0 =^ 'ab' } < 2"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(matcher.is_match( - vec![&field!("012A", '0', "abc"), &field!("012A", '0', "def")], - &options - )); - - assert!(!matcher.is_match( - vec![ - &field!("012A", '0', "abc"), - &field!("012A", 'X', "def"), - &field!("012A", '0', "abd") - ], - &options - )); - - Ok(()) -} - -#[test] -fn field_matcher_new() { - let matcher = FieldMatcher::new("003@.0?"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("003@", '0', "abc"), &options)); -} - -#[test] -#[should_panic] -fn field_matcher_new_panic() { - let _ = FieldMatcher::new("003@.!?"); -} - -#[test] -fn field_matcher_try_from() -> TestResult { - let matcher = FieldMatcher::try_from(B("003@.0?"))?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("003@", '0', "abc"), &options)); - assert!(matches!( - FieldMatcher::try_from(B("003@.!?")).unwrap_err(), - ParseMatcherError::InvalidFieldMatcher(_) - )); - - Ok(()) -} - -#[test] -fn field_matcher_from_str() -> TestResult { - let matcher = FieldMatcher::from_str("003@.0?")?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("003@", '0', "abc"), &options)); - assert!(matches!( - FieldMatcher::from_str("003@.!?").unwrap_err(), - ParseMatcherError::InvalidFieldMatcher(_) - )); - - Ok(()) -} - -#[test] -fn field_matcher_bit_and() { - let lhs = FieldMatcher::new("044H.9?"); - let rhs = FieldMatcher::new("044H.b == 'gnd'"); - let matcher = lhs & rhs; - - let field = - FieldRef::new("044H", None, vec![('9', "123"), ('b', "gnd")]); - assert!(matcher.is_match(&field, &MatcherOptions::default())); -} - -#[test] -fn field_matcher_bit_or() { - let lhs = FieldMatcher::new("044H.9?"); - let rhs = FieldMatcher::new("044K.9?"); - let matcher = lhs | rhs; - - let field = - FieldRef::new("044K", None, vec![('9', "123"), ('b', "kasw")]); - assert!(matcher.is_match(&field, &MatcherOptions::default())); -} - -#[test] -fn field_matcher_bit_xor() { - let lhs = FieldMatcher::new("044K.b?"); - let rhs = FieldMatcher::new("044K.9?"); - let matcher = lhs ^ rhs; - - let field = - FieldRef::new("044K", None, vec![('9', "123"), ('b', "kasw")]); - assert!(!matcher.is_match(&field, &MatcherOptions::default())); -} - -#[test] -fn field_matcher_negate() { - let inner = FieldMatcher::new("044H.9?"); - let matcher = !inner; - - let field = - FieldRef::new("044K", None, vec![('9', "123"), ('b', "gnd")]); - assert!(matcher.is_match(&field, &MatcherOptions::default())); -} - -#[test] -fn field_matcher_singleton() { - let matcher = FieldMatcher::new("041A/03.9?"); - assert!(matcher.is_match( - &field!("041A", "03", '9', "1234"), - &MatcherOptions::default() - )); -} - -#[test] -fn field_matcher_cardinality() { - let matcher = FieldMatcher::new("#012A == 1"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); -} - -#[test] -fn field_matcher_group() { - // singleton - let matcher = FieldMatcher::new("(012A?)"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(!matcher.is_match(&field!("013A", '0', "abc"), &options)); - - // not - let matcher = FieldMatcher::new("(!012A?)"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(matcher.is_match(&field!("013A", '0', "abc"), &options)); - - // cardinality - let matcher = FieldMatcher::new("(#012A <= 1)"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(matcher.is_match(&field!("013A", '0', "abc"), &options)); - - // group - let matcher = FieldMatcher::new("((012A?))"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(!matcher.is_match(&field!("013A", '0', "abc"), &options)); - - // and - let matcher = FieldMatcher::new("(012A? && 012A.0 == 'abc')"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(!matcher.is_match(&field!("012A", '0', "def"), &options)); - - // or - let matcher = FieldMatcher::new("(012A? || 013A.0 == 'abc')"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(matcher.is_match(&field!("013A", '0', "abc"), &options)); - assert!(!matcher.is_match(&field!("013A", '0', "def"), &options)); -} - -#[test] -fn field_matcher_not() { - // Group - let matcher = FieldMatcher::new("!(012A?)"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(matcher.is_match(&field!("013A", '0', "abc"), &options)); - - // exists - let matcher = FieldMatcher::new("!012A?"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(matcher.is_match(&field!("013A", '0', "abc"), &options)); - - // exists - let matcher = FieldMatcher::new("!!012A?"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(!matcher.is_match(&field!("013A", '0', "abc"), &options)); -} - -#[test] -fn field_matcher_and() { - let options = MatcherOptions::default(); - let matcher = FieldMatcher::new( - "012A? && #014A == 0 && 013A{#a == 1 && a == '123'}", - ); - - assert!(matcher.is_match( - vec![&field!("012A", '0', "abc"), &field!("013A", 'a', "123")], - &options - )); - - assert!(!matcher.is_match( - vec![ - &field!("012A", '0', "abc"), - &field!("013A", 'a', "123"), - &field!("014A", '0', "hij"), - ], - &options - )); -} - -#[test] -fn field_matcher_composite_or() { - let matcher = - FieldMatcher::new("012A? || 013A{#a == 1 && a == '1'}"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match( - vec![ - &field!("012A", '0', "abc"), - &FieldRef::new("013A", None, vec![('a', "1"), ('a', "2")]), - ], - &options - )); - - assert!(matcher.is_match( - vec![&field!("013A", 'a', "1"), &field!("014A", '0', "abc")], - &options - )); - - assert!(!matcher.is_match( - vec![ - &FieldRef::new("013A", None, vec![('a', "1"), ('a', "2")]), - &FieldRef::new("014A", None, vec![('0', "abc")]), - ], - &options - )); - - let matcher = - FieldMatcher::new("!014A.x? || 013A{#a == 2 && a == '1'}"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match( - vec![ - &FieldRef::new("012A", None, vec![('0', "abc")]), - &FieldRef::new("013A", None, vec![('a', "1"), ('a', "2")]), - ], - &options - )); -} - -#[test] -fn field_matcher_composite_xor() { - let matcher = FieldMatcher::new("012A? ^ 012B?"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(!matcher.is_match( - vec![&field!("012C", '0', ""), &field!("012D", '0', ""),], - &options - )); - assert!(matcher.is_match( - vec![&field!("012C", '0', ""), &field!("012A", '0', ""),], - &options - )); - assert!(matcher.is_match( - vec![&field!("012A", '0', ""), &field!("012C", '0', ""),], - &options - )); - assert!(!matcher.is_match( - vec![&field!("012A", '0', ""), &field!("012B", '0', ""),], - &options - )); - - // XOR - let matcher = FieldMatcher::new("012A? XOR 012B?"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(!matcher.is_match( - vec![&field!("012C", '0', ""), &field!("012D", '0', ""),], - &options - )); - assert!(matcher.is_match( - vec![&field!("012C", '0', ""), &field!("012A", '0', ""),], - &options - )); - assert!(matcher.is_match( - vec![&field!("012A", '0', ""), &field!("012C", '0', ""),], - &options - )); - assert!(!matcher.is_match( - vec![&field!("012A", '0', ""), &field!("012B", '0', ""),], - &options - )); - - // list - let matcher = FieldMatcher::new("012A? ^ 012B? ^ 012C?"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&field!("012A", '0', "abc"), &options)); - assert!(matcher.is_match( - vec![ - &field!("012C", '0', ""), - &field!("012D", '0', ""), - &field!("013D", '0', "") - ], - &options - )); - - // precedence OR - let matcher = FieldMatcher::new("012A? ^ 012B? || 012C?"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("013B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("013B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("012B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("012B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("013B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("013B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); - assert!(!matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("012B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("012B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); - - let matcher = FieldMatcher::new("012A? ^ (012B? || 012C?)"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("013B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("013B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("012B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("012B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("013B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(!matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("013B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); - assert!(!matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("012B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(!matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("012B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); - - // precedence AND - let matcher = FieldMatcher::new("012A? ^ 012B? && 012C?"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("013B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(!matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("013B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); - assert!(!matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("012B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("012B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("013B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("013B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("012B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(!matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("012B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); - - let matcher = FieldMatcher::new("(012A? ^ 012B?) && 012C?"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("013B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(!matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("013B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); - assert!(!matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("012B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("013A", '0', ""), - &field!("012B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); - assert!(!matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("013B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("013B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); - assert!(!matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("012B", '0', ""), - &field!("013C", '0', "") - ], - &options - )); - assert!(!matcher.is_match( - vec![ - &field!("012A", '0', ""), - &field!("012B", '0', ""), - &field!("012C", '0', "") - ], - &options - )); -} diff --git a/crates/pica-matcher/tests/integration.rs b/crates/pica-matcher/tests/integration.rs deleted file mode 100644 index 875bd03d2..000000000 --- a/crates/pica-matcher/tests/integration.rs +++ /dev/null @@ -1,7 +0,0 @@ -mod field_matcher; -mod occurrence_matcher; -mod record_matcher; -mod subfield_matcher; -mod tag_matcher; - -pub(crate) type TestResult = anyhow::Result<()>; diff --git a/crates/pica-matcher/tests/occurrence_matcher/mod.rs b/crates/pica-matcher/tests/occurrence_matcher/mod.rs deleted file mode 100644 index b97cde98d..000000000 --- a/crates/pica-matcher/tests/occurrence_matcher/mod.rs +++ /dev/null @@ -1,82 +0,0 @@ -use pica_matcher::OccurrenceMatcher; -use pica_record_v1::OccurrenceRef; - -#[test] -fn test_occurrence_matcher_eq() { - let matcher = OccurrenceMatcher::new("/02"); - - assert!(!matcher.is_match(&OccurrenceRef::new("01"))); - assert!(matcher.is_match(&OccurrenceRef::new("02"))); - assert!(!matcher.is_match(&OccurrenceRef::new("03"))); -} - -#[test] -fn test_occurrence_matcher_range() { - let matcher = OccurrenceMatcher::new("/01-03"); - - assert!(matcher.is_match(&OccurrenceRef::new("01"))); - assert!(matcher.is_match(&OccurrenceRef::new("02"))); - assert!(matcher.is_match(&OccurrenceRef::new("03"))); - - assert!(!matcher.is_match(&OccurrenceRef::new("00"))); - assert!(!matcher.is_match(&OccurrenceRef::new("001"))); - assert!(!matcher.is_match(&OccurrenceRef::new("04"))); -} - -#[test] -fn test_occurrence_matcher_any() { - let matcher = OccurrenceMatcher::new("/*"); - assert!(matcher.is_match(&OccurrenceRef::new("01"))); - assert!(matcher.is_match(&OccurrenceRef::new("00"))); - assert!(matcher.is_match(&OccurrenceRef::new("001"))); -} - -#[test] -fn is_match() { - let matcher = OccurrenceMatcher::new("/01"); - assert!(!matcher.is_match(&OccurrenceRef::new("00"))); - assert!(matcher.is_match(&OccurrenceRef::new("01"))); - - let matcher = OccurrenceMatcher::new("/01-03"); - assert!(!matcher.is_match(&OccurrenceRef::new("00"))); - assert!(matcher.is_match(&OccurrenceRef::new("01"))); - assert!(matcher.is_match(&OccurrenceRef::new("02"))); - assert!(matcher.is_match(&OccurrenceRef::new("03"))); - assert!(!matcher.is_match(&OccurrenceRef::new("04"))); - - let matcher = OccurrenceMatcher::new("/*"); - assert!(matcher.is_match(&OccurrenceRef::new("00"))); - assert!(matcher.is_match(&OccurrenceRef::new("01"))); - - let matcher = OccurrenceMatcher::new("/00"); - assert!(matcher.is_match(&OccurrenceRef::new("00"))); - assert!(!matcher.is_match(&OccurrenceRef::new("01"))); -} - -#[test] -fn test_partial_eq() { - let matcher = OccurrenceMatcher::new("/01"); - assert_ne!(matcher, OccurrenceRef::new("00")); - assert_eq!(matcher, OccurrenceRef::new("01")); - assert_ne!(matcher, Option::::None.as_ref()); - - let matcher = OccurrenceMatcher::new("/01-03"); - assert_ne!(matcher, OccurrenceRef::new("00")); - assert_eq!(matcher, OccurrenceRef::new("01")); - assert_eq!(matcher, OccurrenceRef::new("02")); - assert_eq!(matcher, OccurrenceRef::new("03")); - assert_ne!(matcher, OccurrenceRef::new("04")); - assert_ne!(matcher, Option::::None.as_ref()); - - let matcher = OccurrenceMatcher::new("/*"); - assert_eq!(matcher, OccurrenceRef::new("000")); - assert_eq!(matcher, OccurrenceRef::new("00")); - assert_eq!(matcher, OccurrenceRef::new("001")); - assert_eq!(matcher, OccurrenceRef::new("01")); - assert_eq!(matcher, Option::::None.as_ref()); - - let matcher = OccurrenceMatcher::new("/00"); - assert_eq!(matcher, OccurrenceRef::new("00")); - assert_ne!(matcher, OccurrenceRef::new("01")); - assert_eq!(matcher, Option::::None.as_ref()); -} diff --git a/crates/pica-matcher/tests/record_matcher/mod.rs b/crates/pica-matcher/tests/record_matcher/mod.rs deleted file mode 100644 index 378477827..000000000 --- a/crates/pica-matcher/tests/record_matcher/mod.rs +++ /dev/null @@ -1,194 +0,0 @@ -use std::path::Path; -use std::str::FromStr; -use std::sync::OnceLock; -use std::{env, fs}; - -use bstr::B; -use pica_matcher::{MatcherOptions, ParseMatcherError, RecordMatcher}; -use pica_record_v1::RecordRef; - -use crate::TestResult; - -fn ada_lovelace() -> &'static [u8] { - static DATA: OnceLock> = OnceLock::new(); - DATA.get_or_init(|| { - let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); - let path = Path::new(&manifest_dir) - .join("../pica-toolkit/tests/data/119232022.dat"); - eprintln!("{:?}", path); - fs::read_to_string(&path).unwrap().as_bytes().to_vec() - }) -} - -#[test] -fn record_matcher_new() -> TestResult { - let matcher = RecordMatcher::new("003@.0?"); - - assert!(matcher.is_match( - &RecordRef::from_bytes(ada_lovelace())?, - &MatcherOptions::default() - )); - - Ok(()) -} - -#[test] -#[should_panic] -fn record_matcher_new_panic() { - let _ = RecordMatcher::new("003@.!?"); -} - -#[test] -fn record_matcher_try_from() -> TestResult { - let matcher = RecordMatcher::try_from(B("003@.0?"))?; - - assert!(matcher.is_match( - &RecordRef::from_bytes(ada_lovelace())?, - &MatcherOptions::default() - )); - - assert!(matches!( - RecordMatcher::try_from(B("003@.!?")).unwrap_err(), - ParseMatcherError::InvalidRecordMatcher(_) - )); - - Ok(()) -} - -#[test] -fn record_matcher_from_str() -> TestResult { - let matcher = RecordMatcher::from_str("003@.0?")?; - - assert!(matcher.is_match( - &RecordRef::from_bytes(ada_lovelace())?, - &MatcherOptions::default() - )); - - assert!(matches!( - RecordMatcher::from_str("003@.!?").unwrap_err(), - ParseMatcherError::InvalidRecordMatcher(_) - )); - - Ok(()) -} - -#[test] -fn record_matcher_exists() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - - let matcher = RecordMatcher::new("004B?"); - assert!(matcher.is_match(&record, &Default::default())); - - let matcher = RecordMatcher::new("028A.a?"); - assert!(matcher.is_match(&record, &Default::default())); - - Ok(()) -} - -#[test] -fn record_matcher_cardinality() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let matcher = RecordMatcher::new( - "#028[A@]{d =^ 'Ada' && a == 'Lovelace'} == 5", - ); - - assert!(matcher.is_match(&record, &Default::default())); - Ok(()) -} - -#[test] -fn record_matcher_in() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let matcher = RecordMatcher::new("002@.0 in ['Tpz', 'Tp1']"); - assert!(matcher.is_match(&record, &Default::default())); - - Ok(()) -} - -#[test] -fn record_matcher_regex() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let matcher = RecordMatcher::new("047A/03.[er] =~ '^DE-\\\\d+6'"); - assert!(matcher.is_match(&record, &Default::default())); - - Ok(()) -} - -#[test] -fn record_matcher_eq() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let matcher = RecordMatcher::new("003@.0 == '119232022'"); - assert!(matcher.is_match(&record, &Default::default())); - - Ok(()) -} - -#[test] -fn record_matcher_not_equal() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let matcher = RecordMatcher::new("002@.0 != 'Ts1'"); - assert!(matcher.is_match(&record, &Default::default())); - - Ok(()) -} - -#[test] -fn record_matcher_starts_with() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let matcher = - RecordMatcher::new("003U.a =^ 'http://d-nb.info/gnd/'"); - assert!(matcher.is_match(&record, &Default::default())); - - Ok(()) -} - -#[test] -fn record_matcher_ends_with() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let matcher = RecordMatcher::new("042B.a =$ '-GB'"); - assert!(matcher.is_match(&record, &Default::default())); - - Ok(()) -} - -#[test] -fn record_matcher_group() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let matcher = - RecordMatcher::new("(002@.0 == 'Tp1' && 004B.a == 'pik')"); - assert!(matcher.is_match(&record, &Default::default())); - - Ok(()) -} - -#[test] -fn record_matcher_not() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let matcher = - RecordMatcher::new("!(002@.0 == 'Ts1' || 002@.0 =^ 'Tu')"); - assert!(matcher.is_match(&record, &Default::default())); - - let matcher = RecordMatcher::new("!012A.0?"); - assert!(matcher.is_match(&record, &Default::default())); - Ok(()) -} - -#[test] -fn record_matcher_and() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let matcher = - RecordMatcher::new("002@.0 == 'Tp1' && 004B.a == 'pik'"); - assert!(matcher.is_match(&record, &Default::default())); - - Ok(()) -} - -#[test] -fn record_matcher_or() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let matcher = - RecordMatcher::new("002@.0 == 'Ts1' || 004B.a == 'pik'"); - assert!(matcher.is_match(&record, &Default::default())); - - Ok(()) -} diff --git a/crates/pica-matcher/tests/subfield_matcher/mod.rs b/crates/pica-matcher/tests/subfield_matcher/mod.rs deleted file mode 100644 index 87120cdc8..000000000 --- a/crates/pica-matcher/tests/subfield_matcher/mod.rs +++ /dev/null @@ -1,1076 +0,0 @@ -use std::str::FromStr; - -use bstr::B; -use pica_matcher::subfield_matcher::*; -use pica_matcher::{ - MatcherOptions, ParseMatcherError, Quantifier, RelationalOp, -}; -use pica_record_v1::SubfieldRef; - -use crate::TestResult; - -macro_rules! subfield { - ($code:expr, $value:expr) => { - SubfieldRef::new($code, $value).unwrap() - }; -} - -#[test] -fn exists_matcher_new() { - let subfield = subfield!('0', "119232022"); - let options = MatcherOptions::default(); - - let matcher = ExistsMatcher::new(vec!['0']); - assert!(matcher.is_match(&subfield, &options)); - - let matcher = ExistsMatcher::new(vec!['2', '3']); - assert!(!matcher.is_match(&subfield, &options)); -} - -#[test] -#[should_panic] -fn exists_matcher_new_panic() { - let _ = ExistsMatcher::new(vec!['0', '!']); -} - -#[test] -fn exists_matcher_try_from() -> TestResult { - let subfield = subfield!('0', "119232022"); - let options = MatcherOptions::default(); - - let matcher = ExistsMatcher::try_from(B("0?"))?; - assert!(matcher.is_match(&subfield, &options)); - - assert!(matches!( - ExistsMatcher::try_from("ä?".as_bytes()).unwrap_err(), - ParseMatcherError::InvalidSubfieldMatcher(_) - )); - - Ok(()) -} - -#[test] -fn exists_matcher_from_str() -> TestResult { - let subfield = subfield!('0', "119232022"); - let options = MatcherOptions::default(); - - let matcher = ExistsMatcher::from_str("0?")?; - assert!(matcher.is_match(&subfield, &options)); - - assert!(matches!( - ExistsMatcher::from_str("ä?").unwrap_err(), - ParseMatcherError::InvalidSubfieldMatcher(_) - )); - - Ok(()) -} - -#[test] -fn exists_matcher_is_match() -> TestResult { - let matcher = ExistsMatcher::from_str("1?")?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('1', "abc"), &options)); - assert!(!matcher.is_match(&subfield!('0', "abc"), &options)); - - assert!(matcher.is_match( - [&subfield!('3', "def"), &subfield!('1', "hij"),], - &options - )); - - let matcher = ExistsMatcher::from_str("[a12]?")?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('1', "abc"), &options)); - assert!(!matcher.is_match(&subfield!('9', "abc"), &options)); - assert!(matcher.is_match( - [ - &subfield!('3', "def"), - &subfield!('9', "hij"), - &subfield!('2', "bsg"), - ], - &options - )); - - Ok(()) -} - -#[test] -fn relational_matcher_new() { - let matcher = RelationMatcher::new("0 == 'abc'"); - assert!(matcher - .is_match(&subfield!('0', "abc"), &MatcherOptions::new())); -} - -#[test] -#[should_panic] -fn relational_matcher_new_panic() { - let _ = RelationMatcher::new("! == 'abc'"); -} - -#[test] -fn relation_matcher_try_from() -> TestResult { - let matcher = RelationMatcher::try_from(B("0 == 'abc'"))?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('0', "abc"), &options)); - - Ok(()) -} - -#[test] -fn relation_matcher_from_str() -> TestResult { - let matcher = RelationMatcher::from_str("0 == 'abc'")?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('0', "abc"), &options)); - - Ok(()) -} - -#[test] -fn relational_matcher_equal() { - // case sensitive - let matcher = RelationMatcher::new("0 == 'abc'"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('0', "abc"), &options)); - assert!(!matcher.is_match(&subfield!('0', "ABC"), &options)); - assert!(!matcher.is_match(&subfield!('1', "abc"), &options)); - assert!(matcher.is_match( - [ - &subfield!('3', "def"), - &subfield!('0', "abc"), - &subfield!('2', "bsg"), - ], - &options - )); - - // case insensitive - let matcher = RelationMatcher::new("0 == 'abc'"); - let options = MatcherOptions::new().case_ignore(true); - - assert!(matcher.is_match(&subfield!('0', "abc"), &options)); - assert!(matcher.is_match(&subfield!('0', "ABC"), &options)); - - // multiple subfields - let matcher = RelationMatcher::new("0 == 'abc'"); - let options = MatcherOptions::default(); - - let subfields = [ - &subfield!('3', "def"), - &subfield!('0', "abc"), - &subfield!('2', "hij"), - ]; - - assert!(matcher.is_match(subfields, &options)); - - let subfields = [&subfield!('3', "def"), &subfield!('2', "hij")]; - assert!(!matcher.is_match(subfields, &options)); -} - -#[test] -fn relational_matcher_not_equal() { - // case sensitive - let matcher = RelationMatcher::new("0 != 'abc'"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('0', "abc"), &options)); - assert!(matcher.is_match(&subfield!('0', "ABC"), &options)); - assert!(!matcher.is_match(&subfield!('1', "abc"), &options)); - - let subfields = [&subfield!('0', "abc"), &subfield!('2', "hij")]; - assert!(!matcher.is_match(subfields, &options)); - - // case insensitive - let matcher = RelationMatcher::new("0 != 'abc'"); - let options = MatcherOptions::new().case_ignore(true); - - assert!(!matcher.is_match(&subfield!('0', "abc"), &options)); - assert!(!matcher.is_match(&subfield!('0', "ABC"), &options)); - - // multiple subfields - let matcher = RelationMatcher::new("0 != 'abc'"); - let options = MatcherOptions::default(); - - let subfields = [&subfield!('3', "def"), &subfield!('0', "bsg")]; - assert!(matcher.is_match(subfields, &options)); - - let subfields = [ - &subfield!('3', "def"), - &subfield!('0', "abc"), - &subfield!('2', "bsg"), - ]; - - assert!(!matcher.is_match(subfields, &options)); -} - -#[test] -fn relational_matcher_starts_not_with() { - // case sensitive - let matcher = RelationMatcher::new("0 !^ 'ab'"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('0', "abc"), &options)); - assert!(matcher.is_match(&subfield!('0', "def"), &options)); - - // case insensitive - let matcher = RelationMatcher::new("0 !^ 'ab'"); - let options = MatcherOptions::new().case_ignore(true); - - assert!(!matcher.is_match(&subfield!('0', "abc"), &options)); - assert!(!matcher.is_match(&subfield!('0', "ABc"), &options)); - assert!(!matcher.is_match(&subfield!('0', "aBc"), &options)); - assert!(matcher.is_match(&subfield!('0', "def"), &options)); - - // multiple subfields - let matcher = RelationMatcher::new("0 !^ 'ab'"); - let options = MatcherOptions::default(); - - let subfields = [&subfield!('0', "baab"), &subfield!('0', "abba")]; - assert!(matcher.is_match(subfields, &options)); - - let subfields = [&subfield!('0', "abc"), &subfield!('1', "abba")]; - assert!(!matcher.is_match(subfields, &options)); -} - -#[test] -fn relational_matcher_ends_with() { - // case sensitive - let matcher = RelationMatcher::new("0 =$ 'ab'"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('0', "abab"), &options)); - assert!(!matcher.is_match(&subfield!('0', "abba"), &options)); - - // case insensitive - let matcher = RelationMatcher::new("0 =$ 'ab'"); - let options = MatcherOptions::new().case_ignore(true); - - assert!(matcher.is_match(&subfield!('0', "abab"), &options)); - assert!(matcher.is_match(&subfield!('0', "abab"), &options)); - - // multiple subfields - let matcher = RelationMatcher::new("0 =$ 'ab'"); - let options = MatcherOptions::default(); - - let subfields = [&subfield!('0', "baba"), &subfield!('0', "abab")]; - assert!(matcher.is_match(subfields, &options)); - - let subfields = [&subfield!('0', "def"), &subfield!('1', "aab")]; - assert!(!matcher.is_match(subfields, &options)); -} - -#[test] -fn relational_matcher_ends_not_with() { - // case sensitive - let matcher = RelationMatcher::new("0 !$ 'ab'"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('0', "abab"), &options)); - assert!(matcher.is_match(&subfield!('0', "abba"), &options)); - - // case insensitive - let matcher = RelationMatcher::new("0 !$ 'ab'"); - let options = MatcherOptions::new().case_ignore(true); - - assert!(!matcher.is_match(&subfield!('0', "abab"), &options)); - assert!(!matcher.is_match(&subfield!('0', "abAB"), &options)); - assert!(matcher.is_match(&subfield!('0', "abbba"), &options)); - - // multiple subfields - let matcher = RelationMatcher::new("0 !$ 'ab'"); - let options = MatcherOptions::default(); - - let subfields = [&subfield!('0', "baba"), &subfield!('0', "abab")]; - assert!(matcher.is_match(subfields, &options)); - - let subfields = [&subfield!('0', "abab"), &subfield!('1', "ab")]; - assert!(!matcher.is_match(subfields, &options)); -} - -#[test] -fn relational_matcher_similar() { - // default threshold - let matcher = RelationMatcher::new("a =* 'Heike'"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('a', "Heike"), &options)); - assert!(!matcher.is_match(&subfield!('a', "Heiko"), &options)); - - // threshold set - let matcher = RelationMatcher::new("a =* 'Heike'"); - let options = MatcherOptions::new().strsim_threshold(0.7); - - assert!(matcher.is_match(&subfield!('a', "Heike"), &options)); - assert!(matcher.is_match(&subfield!('a', "Heiko"), &options)); - - // default threshold - let matcher = RelationMatcher::new("a =* 'Heike'"); - let options = MatcherOptions::new().case_ignore(true); - - assert!(matcher.is_match(&subfield!('a', "Heike"), &options)); - - // multiple subfields - let matcher = RelationMatcher::new("a =* 'Heike'"); - let options = MatcherOptions::default(); - - let subfields = - [&subfield!('a', "Heiko"), &subfield!('a', "Heike")]; - assert!(matcher.is_match(subfields, &options)); -} - -#[test] -fn relational_matcher_contains() { - // default options - let matcher = RelationMatcher::new("a =? 'aba'"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('a', "aba"), &options)); - assert!(matcher.is_match(&subfield!('a', "xabax"), &options)); - assert!(!matcher.is_match(&subfield!('a', "abba"), &options)); - - // case ignore - let matcher = RelationMatcher::new("a =? 'AbA'"); - let options = MatcherOptions::default().case_ignore(true); - - assert!(matcher.is_match(&subfield!('a', "aba"), &options)); - assert!(matcher.is_match(&subfield!('a', "xabax"), &options)); - assert!(!matcher.is_match(&subfield!('a', "abba"), &options)); - - // multiple subfields - let matcher = RelationMatcher::new("a =? 'aba'"); - let options = MatcherOptions::default(); - - let subfields = - [&subfield!('a', "XabbaX"), &subfield!('a', "YabaY")]; - assert!(matcher.is_match(subfields, &options)); -} - -#[test] -fn regex_matcher_new() { - let _ = RegexMatcher::new( - vec!['0'], - "^T[gpsu][1z]$", - Quantifier::All, - false, - ); -} - -#[test] -#[should_panic] -fn regex_matcher_new_panic1() { - RegexMatcher::new( - vec!['0'], - "^T[[gpsu][1z]$", - Quantifier::Any, - false, - ); -} - -#[test] -fn regex_matcher_try_from() { - assert!(RegexMatcher::try_from(B("0 =~ '^T[gpsu][1z]$'")).is_ok()); - - let error = - RegexMatcher::try_from(B("0 =~ '^Tp[[1z]$'")).unwrap_err(); - assert!(matches!( - error, - ParseMatcherError::InvalidSubfieldMatcher(_) - )); -} - -#[test] -fn regex_matcher_from_str() { - assert!(RegexMatcher::from_str("0 =~ '^T[gpsu][1z]$'").is_ok()); - - let error = RegexMatcher::from_str("0 =~ '^Tp[[1z]$'").unwrap_err(); - - assert!(matches!( - error, - ParseMatcherError::InvalidSubfieldMatcher(_) - )); -} - -#[test] -fn regex_matcher_is_match() -> TestResult { - // case sensitive - let matcher = RegexMatcher::from_str("0 =~ '^ab'")?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('0', "abba"), &options)); - assert!(!matcher.is_match(&subfield!('0', "bba"), &options)); - assert!(!matcher.is_match(&subfield!('a', "abba"), &options)); - - // case insensitive - let matcher = RegexMatcher::from_str("0 =~ '^ab'")?; - let options = MatcherOptions::new().case_ignore(true); - - assert!(matcher.is_match(&subfield!('0', "abba"), &options)); - assert!(matcher.is_match(&subfield!('0', "abba"), &options)); - - // invert match - let matcher = RegexMatcher::from_str("0 !~ '^ab'")?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('0', "baba"), &options)); - assert!(!matcher.is_match(&subfield!('0', "abba"), &options)); - - // multiple subfields - let matcher = RegexMatcher::from_str("0 =~ '^ab'")?; - let options = MatcherOptions::default(); - - let subfields = - [&subfield!('0', "foobar"), &subfield!('0', "abba")]; - assert!(matcher.is_match(subfields, &options)); - - let subfields = [&subfield!('0', "foo"), &subfield!('0', "bar")]; - assert!(!matcher.is_match(subfields, &options)); - - Ok(()) -} - -#[test] -fn in_matcher_new() { - assert!(InMatcher::new( - vec!['0'], - vec!["abc", "def"], - Quantifier::All, - false - ) - .is_match(&subfield!('0', "abc"), &MatcherOptions::default())); -} - -#[test] -#[should_panic] -fn in_matcher_new_panic() { - let _ = InMatcher::new( - vec!['!'], - vec!["abc", "def"], - Quantifier::Any, - false, - ); -} - -#[test] -fn in_matcher_try_from() -> TestResult { - let matcher = InMatcher::try_from(B("0 in ['abc', 'def']"))?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('0', "abc"), &options)); - - Ok(()) -} - -#[test] -fn in_matcher_from_str() -> TestResult { - let matcher = InMatcher::from_str("0 in ['abc', 'def']")?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('0', "abc"), &options)); - - Ok(()) -} - -#[test] -fn in_matcher_is_match() -> TestResult { - // case sensitive - let matcher = InMatcher::from_str("0 in ['abc', 'def']")?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('0', "abc"), &options)); - assert!(!matcher.is_match(&subfield!('0', "ABC"), &options)); - assert!(matcher.is_match(&subfield!('0', "def"), &options)); - assert!(!matcher.is_match(&subfield!('0', "DEF"), &options)); - assert!(!matcher.is_match(&subfield!('0', "hij"), &options)); - - // case insensitive - let matcher = InMatcher::from_str("0 in ['abc', 'def']")?; - let options = MatcherOptions::new().case_ignore(true); - - assert!(matcher.is_match(&subfield!('0', "abc"), &options)); - assert!(matcher.is_match(&subfield!('0', "ABC"), &options)); - - // multiple subfields - let matcher = InMatcher::from_str("0 in ['abc', 'def']")?; - let options = MatcherOptions::default(); - - let subfields = [&subfield!('0', "hij"), &subfield!('0', "abc")]; - assert!(matcher.is_match(subfields, &options)); - - let matcher = InMatcher::from_str("a in ['000', '999']")?; - let options = MatcherOptions::default(); - - let subfields = [&subfield!('a', "000"), &subfield!('z', "xyz")]; - assert!(matcher.is_match(subfields, &options)); - - // invert - let matcher = InMatcher::from_str("a not in ['000', '999']")?; - let options = MatcherOptions::default(); - - let subfields = [&subfield!('a', "000"), &subfield!('a', "222")]; - assert!(matcher.is_match(subfields, &options)); - - let matcher = InMatcher::from_str("a not in ['000', '999']")?; - let options = MatcherOptions::default(); - - let subfields = [&subfield!('a', "000"), &subfield!('z', "xyz")]; - assert!(!matcher.is_match(subfields, &options)); - - Ok(()) -} - -#[test] -fn cardinality_matcher_new() { - let matcher = CardinalityMatcher::new('0', RelationalOp::Eq, 2); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('X', "abc"), &options)); -} - -#[test] -#[should_panic] -fn cardinality_matcher_new_panic1() { - let _ = CardinalityMatcher::new('!', RelationalOp::Eq, 2); -} - -#[test] -#[should_panic] -fn cardinality_matcher_new_panic2() { - let _ = CardinalityMatcher::new('!', RelationalOp::StartsWith, 2); -} - -#[test] -fn cardinality_matcher_try_from() -> TestResult { - let matcher = CardinalityMatcher::try_from(B("#0 == 2"))?; - assert!(!matcher - .is_match(&subfield!('X', "abc"), &MatcherOptions::default())); - - assert!(matches!( - CardinalityMatcher::try_from(B("#0 =~ 2")).unwrap_err(), - ParseMatcherError::InvalidSubfieldMatcher(_) - )); - - Ok(()) -} - -#[test] -fn cardinality_matcher_try_str() -> TestResult { - let matcher = CardinalityMatcher::from_str("#0 == 2")?; - assert!(!matcher - .is_match(&subfield!('X', "abc"), &MatcherOptions::default())); - - assert!(matches!( - CardinalityMatcher::from_str("#0 =~ 2").unwrap_err(), - ParseMatcherError::InvalidSubfieldMatcher(_) - )); - - Ok(()) -} - -#[test] -fn cardinality_matcher_equal() -> TestResult { - let matcher = CardinalityMatcher::from_str("#0 == 2")?; - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('X', "abc"), &options)); - assert!(!matcher.is_match(&subfield!('0', "abc"), &options)); - - let subfields = [&subfield!('0', "abc"), &subfield!('0', "def")]; - assert!(matcher.is_match(subfields, &options)); - - let subfields = [ - &subfield!('0', "abc"), - &subfield!('0', "def"), - &subfield!('0', "hij"), - ]; - assert!(!matcher.is_match(subfields, &options)); - - Ok(()) -} - -#[test] -fn cardinality_matcher_not_equal() -> TestResult { - let matcher = CardinalityMatcher::from_str("#0 != 2")?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('X', "abc"), &options)); - assert!(matcher.is_match(&subfield!('0', "abc"), &options)); - - let subfields = [&subfield!('0', "abc"), &subfield!('0', "def")]; - assert!(!matcher.is_match(subfields, &options)); - - let subfields = [ - &subfield!('0', "abc"), - &subfield!('0', "def"), - &subfield!('0', "hij"), - ]; - assert!(matcher.is_match(subfields, &options)); - - Ok(()) -} - -#[test] -fn cardinality_matcher_greater_than_or_equal() -> TestResult { - let matcher = CardinalityMatcher::from_str("#0 >= 2")?; - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('X', "abc"), &options)); - assert!(!matcher.is_match(&subfield!('0', "abc"), &options)); - - let subfields = [&subfield!('0', "abc"), &subfield!('0', "def")]; - assert!(matcher.is_match(subfields, &options)); - - let subfields = [ - &subfield!('0', "abc"), - &subfield!('0', "def"), - &subfield!('0', "hij"), - ]; - assert!(matcher.is_match(subfields, &options)); - - Ok(()) -} - -#[test] -fn cardinality_matcher_greater_than() -> TestResult { - let matcher = CardinalityMatcher::from_str("#0 > 2")?; - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('X', "abc"), &options)); - assert!(!matcher.is_match(&subfield!('0', "abc"), &options)); - - let subfields = [&subfield!('0', "abc"), &subfield!('0', "def")]; - assert!(!matcher.is_match(subfields, &options)); - - let subfields = [ - &subfield!('0', "abc"), - &subfield!('0', "def"), - &subfield!('0', "hij"), - ]; - assert!(matcher.is_match(subfields, &options)); - - Ok(()) -} - -#[test] -fn cardinality_matcher_less_than_or_equal() -> TestResult { - let matcher = CardinalityMatcher::from_str("#0 <= 2")?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('X', "abc"), &options)); - assert!(matcher.is_match(&subfield!('0', "abc"), &options)); - - let subfields = [&subfield!('0', "abc"), &subfield!('0', "def")]; - assert!(matcher.is_match(subfields, &options)); - - let subfields = [ - &subfield!('0', "abc"), - &subfield!('0', "def"), - &subfield!('0', "hij"), - ]; - assert!(!matcher.is_match(subfields, &options)); - - Ok(()) -} - -#[test] -fn cardinality_matcher_less_than() -> TestResult { - let matcher = CardinalityMatcher::from_str("#0 < 2")?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('X', "abc"), &options)); - assert!(matcher.is_match(&subfield!('0', "abc"), &options)); - - let subfields = [&subfield!('0', "abc"), &subfield!('0', "def")]; - assert!(!matcher.is_match(subfields, &options)); - - let subfields = [ - &subfield!('0', "abc"), - &subfield!('0', "def"), - &subfield!('0', "hij"), - ]; - assert!(!matcher.is_match(subfields, &options)); - Ok(()) -} - -#[test] -fn subfield_matcher_new() { - let matcher = SubfieldMatcher::new("a == 'bcd'"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('a', "bcd"), &options)); -} - -#[test] -#[should_panic] -fn subfield_matcher_new_panic() { - let _ = SubfieldMatcher::new("a == 'bcd"); -} - -#[test] -fn subfield_matcher_try_from() -> TestResult { - let matcher = SubfieldMatcher::try_from(B("a == 'bcd'"))?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('a', "bcd"), &options)); - - assert!(matches!( - SubfieldMatcher::try_from(B("a == 'bcd")).unwrap_err(), - ParseMatcherError::InvalidSubfieldMatcher(_) - )); - - Ok(()) -} - -#[test] -fn subfield_matcher_from_str() -> TestResult { - let matcher = SubfieldMatcher::from_str("a == 'bcd'")?; - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('a', "bcd"), &options)); - - assert!(matches!( - SubfieldMatcher::from_str("a == 'bcd").unwrap_err(), - ParseMatcherError::InvalidSubfieldMatcher(_) - )); - - Ok(()) -} - -#[test] -fn subfield_matcher_bit_and() -> TestResult { - let lhs = SubfieldMatcher::from_str("a =^ 'D'")?; - let rhs = SubfieldMatcher::from_str("a =$ 'NB'")?; - let matcher = lhs & rhs; - - assert!(matcher - .is_match(&subfield!('a', "DNB"), &MatcherOptions::default())); - - Ok(()) -} - -#[test] -fn subfield_matcher_bit_or() -> TestResult { - let lhs = SubfieldMatcher::from_str("a =^ 'f'")?; - let rhs = SubfieldMatcher::from_str("a =^ 'b'")?; - let matcher = lhs | rhs; - - assert!(matcher - .is_match(&subfield!('a', "foo"), &MatcherOptions::default())); - assert!(matcher - .is_match(&subfield!('a', "bar"), &MatcherOptions::default())); - - Ok(()) -} - -#[test] -fn subfield_matcher_bit_xor() -> TestResult { - let lhs = SubfieldMatcher::from_str("a =^ 'a'")?; - let rhs = SubfieldMatcher::from_str("a =$ 'b'")?; - let matcher = lhs ^ rhs; - - assert!(!matcher - .is_match(&subfield!('a', "cc"), &MatcherOptions::default())); - assert!(matcher - .is_match(&subfield!('a', "ac"), &MatcherOptions::default())); - assert!(matcher - .is_match(&subfield!('a', "bb"), &MatcherOptions::default())); - assert!(!matcher - .is_match(&subfield!('a', "ab"), &MatcherOptions::default())); - - Ok(()) -} - -#[test] -fn subfield_matcher_not() -> TestResult { - // group - let matcher = SubfieldMatcher::from_str("!(a == 'bcd')")?; - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('a', "bcd"), &options)); - assert!(matcher.is_match(&subfield!('b', "cde"), &options)); - - // exists - let matcher = SubfieldMatcher::new("!a?"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('a', "bcd"), &options)); - assert!(matcher.is_match(&subfield!('b', "cde"), &options)); - - // not - let matcher = SubfieldMatcher::new("!!!(a == 'bcd')"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('a', "bcd"), &options)); - assert!(matcher.is_match(&subfield!('b', "cde"), &options)); - - Ok(()) -} - -#[test] -fn subfield_matcher_group() { - // and - let matcher = SubfieldMatcher::new("(a =^ 'ab' && a =$ 'ba')"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('a', "abba"), &options)); - assert!(!matcher.is_match(&subfield!('b', "cde"), &options)); - - // or - let matcher = SubfieldMatcher::new("(a =^ 'ab' || a =^ 'ba')"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('a', "abba"), &options)); - assert!(matcher.is_match(&subfield!('a', "baba"), &options)); - - // singleton - let matcher = SubfieldMatcher::new("(a == 'bcd')"); - let options = MatcherOptions::default(); - assert!(matcher.is_match(&subfield!('a', "bcd"), &options)); - assert!(!matcher.is_match(&subfield!('b', "cde"), &options)); - - // nested group - let matcher = SubfieldMatcher::new("(((a == 'bcd')))"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('a', "bcd"), &options)); - - // not - let matcher = SubfieldMatcher::new("(!(a == 'bcd'))"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('h', "ijk"), &options)); -} - -#[test] -fn subfield_matcher_or() -> TestResult { - // singleton - let matcher = - SubfieldMatcher::new("a =^ 'ab' || a =^ 'bc' || a =^ 'cd'"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('a', "abab"), &options)); - assert!(matcher.is_match(&subfield!('a', "bcbc"), &options)); - assert!(matcher.is_match(&subfield!('a', "cdcd"), &options)); - assert!(!matcher.is_match(&subfield!('a', "dede"), &options)); - assert!(!matcher.is_match(&subfield!('b', "abab"), &options)); - - // group - let matcher = - SubfieldMatcher::new("a =^ 'ab' || (a =^ 'bc' && a =$ 'cd')"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('a', "abab"), &options)); - assert!(matcher.is_match(&subfield!('a', "bccd"), &options)); - assert!(!matcher.is_match(&subfield!('a', "bcbc"), &options)); - - // and - let matcher = - SubfieldMatcher::new("a =^ 'ab' || a =^ 'bc' && a =$ 'cd'"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('a', "abab"), &options)); - assert!(matcher.is_match(&subfield!('a', "abcd"), &options)); - assert!(matcher.is_match(&subfield!('a', "bccd"), &options)); - assert!(!matcher.is_match(&subfield!('a', "bcbc"), &options)); - - // or - let matcher = SubfieldMatcher::new("!a? || b == 'x'"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('a', "abab"), &options)); - - let subfields = [&subfield!('a', "bccd"), &subfield!('b', "x")]; - assert!(matcher.is_match(subfields, &options)); - - // not - let matcher = SubfieldMatcher::new("a == 'bcd' || !(a != 'def')"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('a', "bcd"), &options)); - assert!(matcher.is_match(&subfield!('a', "def"), &options)); - assert!(!matcher.is_match(&subfield!('a', "hij"), &options)); - - // boolean op precedence - let matcher = - SubfieldMatcher::new("(a =^ 'ab' || a =^ 'bc') && a =$ 'cd'"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('a', "abab"), &options)); - assert!(matcher.is_match(&subfield!('a', "abcd"), &options)); - assert!(matcher.is_match(&subfield!('a', "bccd"), &options)); - assert!(!matcher.is_match(&subfield!('a', "bcbc"), &options)); - - // multiple subfields - let matcher = SubfieldMatcher::new("#a == 2 || a =^ 'ab'"); - let options = MatcherOptions::default(); - - let subfields = [&subfield!('a', "def"), &subfield!('a', "hij")]; - assert!(matcher.is_match(subfields, &options)); - - let subfields = [ - &subfield!('a', "def"), - &subfield!('a', "hij"), - &subfield!('a', "abc"), - ]; - assert!(matcher.is_match(subfields, &options)); - - Ok(()) -} - -#[test] -fn subfield_matcher_and() -> anyhow::Result<()> { - // singleton - let matcher = - SubfieldMatcher::new("#a == 1 && a =^ 'ab' && a =$ 'ba'"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('a', "abba"), &options)); - assert!(!matcher.is_match(&subfield!('a', "baba"), &options)); - - let subfields = [&subfield!('a', "abba"), &subfield!('a', "baba")]; - assert!(!matcher.is_match(subfields, &options)); - - // group - let matcher = - SubfieldMatcher::new("#a == 1 && (a =^ 'ab' || a =^ 'ba')"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('a', "abba"), &options)); - assert!(matcher.is_match(&subfield!('a', "baba"), &options)); - - let subfields = [&subfield!('a', "abba"), &subfield!('a', "baba")]; - assert!(!matcher.is_match(subfields, &options)); - - // not - let matcher = - SubfieldMatcher::new("#a == 1 && !(a =^ 'ab' || a =^ 'ba')"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('a', "abba"), &options)); - assert!(!matcher.is_match(&subfield!('a', "baba"), &options)); - assert!(matcher.is_match(&subfield!('a', "cbcb"), &options)); - - Ok(()) -} - -#[test] -fn subfield_matcher_xor() -> anyhow::Result<()> { - // singleton - let matcher = SubfieldMatcher::new("a =^ 'a' ^ a =$ 'b'"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('a', "cc"), &options)); - assert!(matcher.is_match(&subfield!('a', "ac"), &options)); - assert!(matcher.is_match(&subfield!('a', "cb"), &options)); - assert!(!matcher.is_match(&subfield!('a', "ab"), &options)); - - let subfields = [&subfield!('a', "dd"), &subfield!('a', "cb")]; - assert!(matcher.is_match(subfields, &options)); - - let matcher = SubfieldMatcher::new("a =^ 'a' XOR a =$ 'b'"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('a', "cc"), &options)); - assert!(matcher.is_match(&subfield!('a', "ac"), &options)); - assert!(matcher.is_match(&subfield!('a', "cb"), &options)); - assert!(!matcher.is_match(&subfield!('a', "ab"), &options)); - - // group - let matcher = - SubfieldMatcher::new("a =^ 'a' ^ (a =$ 'b' || a =$ 'c')"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('a', "dd"), &options)); - assert!(matcher.is_match(&subfield!('a', "ad"), &options)); - assert!(matcher.is_match(&subfield!('a', "cb"), &options)); - assert!(matcher.is_match(&subfield!('a', "cc"), &options)); - assert!(!matcher.is_match(&subfield!('a', "ab"), &options)); - assert!(!matcher.is_match(&subfield!('a', "ac"), &options)); - - let subfields = [&subfield!('a', "ab"), &subfield!('a', "ac")]; - assert!(!matcher.is_match(subfields, &options)); - - let subfields = [&subfield!('a', "dd"), &subfield!('a', "db")]; - assert!(matcher.is_match(subfields, &options)); - - // not - let matcher = - SubfieldMatcher::new("a =^ 'a' ^ !(a =$ 'b' || a =$ 'c')"); - let options = MatcherOptions::default(); - - assert!(matcher.is_match(&subfield!('a', "dd"), &options)); - assert!(!matcher.is_match(&subfield!('a', "ad"), &options)); - assert!(!matcher.is_match(&subfield!('a', "cb"), &options)); - assert!(!matcher.is_match(&subfield!('a', "cc"), &options)); - assert!(matcher.is_match(&subfield!('a', "ab"), &options)); - assert!(matcher.is_match(&subfield!('a', "ac"), &options)); - - // precedence - let matcher = - SubfieldMatcher::new("a =^ 'a' ^ a =$ 'b' || a =? 'c'"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('a', "ddd"), &options)); - assert!(matcher.is_match(&subfield!('a', "dcd"), &options)); - assert!(matcher.is_match(&subfield!('a', "ddb"), &options)); - assert!(matcher.is_match(&subfield!('a', "dcb"), &options)); - assert!(matcher.is_match(&subfield!('a', "add"), &options)); - assert!(matcher.is_match(&subfield!('a', "acd"), &options)); - assert!(!matcher.is_match(&subfield!('a', "adb"), &options)); - assert!(matcher.is_match(&subfield!('a', "acb"), &options)); - - let matcher = - SubfieldMatcher::new("a =? 'c' || a =^ 'a' ^ a =$ 'b'"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('a', "ddd"), &options)); - assert!(matcher.is_match(&subfield!('a', "dcd"), &options)); - assert!(matcher.is_match(&subfield!('a', "ddb"), &options)); - assert!(matcher.is_match(&subfield!('a', "dcb"), &options)); - assert!(matcher.is_match(&subfield!('a', "add"), &options)); - assert!(matcher.is_match(&subfield!('a', "acd"), &options)); - assert!(!matcher.is_match(&subfield!('a', "adb"), &options)); - assert!(matcher.is_match(&subfield!('a', "acb"), &options)); - - let matcher = - SubfieldMatcher::new("a =^ 'a' ^ a =$ 'b' && a =? 'c'"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('a', "ddd"), &options)); - assert!(!matcher.is_match(&subfield!('a', "dcd"), &options)); - assert!(!matcher.is_match(&subfield!('a', "ddb"), &options)); - assert!(matcher.is_match(&subfield!('a', "dcb"), &options)); - assert!(matcher.is_match(&subfield!('a', "add"), &options)); - assert!(matcher.is_match(&subfield!('a', "acd"), &options)); - assert!(matcher.is_match(&subfield!('a', "adb"), &options)); - assert!(!matcher.is_match(&subfield!('a', "acb"), &options)); - - let matcher = - SubfieldMatcher::new("a =? 'c' && a =^ 'a' ^ a =$ 'b'"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('a', "ddd"), &options)); - assert!(matcher.is_match(&subfield!('a', "ddb"), &options)); - assert!(!matcher.is_match(&subfield!('a', "add"), &options)); - assert!(matcher.is_match(&subfield!('a', "adb"), &options)); - assert!(!matcher.is_match(&subfield!('a', "dcd"), &options)); - assert!(matcher.is_match(&subfield!('a', "dcb"), &options)); - assert!(matcher.is_match(&subfield!('a', "acd"), &options)); - assert!(!matcher.is_match(&subfield!('a', "acb"), &options)); - - let matcher = - SubfieldMatcher::new("a =? 'c' && (a =^ 'a' ^ a =$ 'b')"); - let options = MatcherOptions::default(); - - assert!(!matcher.is_match(&subfield!('a', "ddd"), &options)); - assert!(!matcher.is_match(&subfield!('a', "dcd"), &options)); - assert!(!matcher.is_match(&subfield!('a', "ddb"), &options)); - assert!(matcher.is_match(&subfield!('a', "dcb"), &options)); - assert!(!matcher.is_match(&subfield!('a', "add"), &options)); - assert!(matcher.is_match(&subfield!('a', "acd"), &options)); - assert!(!matcher.is_match(&subfield!('a', "adb"), &options)); - assert!(!matcher.is_match(&subfield!('a', "acb"), &options)); - - Ok(()) -} diff --git a/crates/pica-matcher/tests/tag_matcher/mod.rs b/crates/pica-matcher/tests/tag_matcher/mod.rs deleted file mode 100644 index ad69ab663..000000000 --- a/crates/pica-matcher/tests/tag_matcher/mod.rs +++ /dev/null @@ -1,65 +0,0 @@ -use pica_matcher::TagMatcher; -use pica_record_v1::TagRef; - -const TAGS: [&'static str; 24] = [ - "001A", "001B", "001D", "001U", "001X", "002@", "003@", "003U", - "004B", "007K", "007N", "008A", "008B", "010E", "028@", "028A", - "028R", "032T", "041R", "042A", "042B", "047A", "047C", "050C", -]; - -#[test] -fn tag_matcher_new() { - let matcher = TagMatcher::new("003@"); - assert!(matcher.is_match(&TagRef::new("003@"))); - assert!(!matcher.is_match(&TagRef::new("002@"))); - - let matcher = TagMatcher::new("01[2-4]A"); - assert!(!matcher.is_match(&TagRef::new("011A"))); - assert!(matcher.is_match(&TagRef::new("012A"))); - assert!(matcher.is_match(&TagRef::new("013A"))); - assert!(matcher.is_match(&TagRef::new("014A"))); - assert!(!matcher.is_match(&TagRef::new("015A"))); -} - -#[test] -#[should_panic] -fn tag_matcher_new_panic() { - let _matcher = TagMatcher::new("[0-5]03@"); -} - -#[test] -fn tag_matcher_is_match() { - for tag in TAGS { - let matcher = TagMatcher::new(tag); - assert!(matcher.is_match(&TagRef::new(tag))); - - let matcher = TagMatcher::new("...."); - assert!(matcher.is_match(&TagRef::new(tag))); - - let matcher = - TagMatcher::new("[0-2][0-5][01-78][ABDUX@KNECRT]"); - assert!(matcher.is_match(&TagRef::new(tag))); - } -} - -#[test] -fn tag_matcher_partial_eq() { - for tag in TAGS { - assert_eq!(TagRef::new(tag), TagMatcher::new(tag)); - assert_eq!(TagMatcher::new(tag), TagRef::new(tag)); - assert_eq!(TagRef::new(tag), TagMatcher::new("....")); - assert_eq!(TagMatcher::new("...."), TagRef::new(tag)); - assert_eq!( - TagRef::new(tag), - TagMatcher::new("[0-2][0-5][01-78][ABDUX@KNECRT]") - ); - } -} - -#[test] -fn tag_matcher_from_str() { - for tag in TAGS { - let matcher = tag.parse::().unwrap(); - assert!(matcher.is_match(&TagRef::new(tag))); - } -} diff --git a/crates/pica-path/Cargo.toml b/crates/pica-path/Cargo.toml deleted file mode 100644 index 4f77c335f..000000000 --- a/crates/pica-path/Cargo.toml +++ /dev/null @@ -1,23 +0,0 @@ -[package] -name = "pica-path" -version.workspace = true -authors.workspace = true -license.workspace = true -readme.workspace = true -keywords.workspace = true -edition.workspace = true -rust-version.workspace = true - -[dependencies] -bstr = { workspace = true } -pica-matcher = { workspace = true } -pica-record-v1 = { workspace = true } -serde = { workspace = true, optional = true } -thiserror = { workspace = true } -winnow = { workspace = true, features = ["simd"] } - -[dev-dependencies] -anyhow = "1.0" - -[features] -serde = ["dep:serde"] diff --git a/crates/pica-path/src/lib.rs b/crates/pica-path/src/lib.rs deleted file mode 100644 index 7d19fbc42..000000000 --- a/crates/pica-path/src/lib.rs +++ /dev/null @@ -1,473 +0,0 @@ -use std::str::FromStr; - -use bstr::{BStr, ByteSlice}; -use pica_matcher::parser::{ - parse_occurrence_matcher, parse_tag_matcher, -}; -use pica_matcher::subfield_matcher::parse_subfield_matcher; -use pica_matcher::{ - MatcherOptions, OccurrenceMatcher, SubfieldMatcher, TagMatcher, -}; -use pica_record_v1::parser::parse_subfield_code; -use pica_record_v1::{FieldRef, RecordRef, SubfieldCode}; -#[cfg(feature = "serde")] -use serde::Deserialize; -use thiserror::Error; -use winnow::ascii::multispace0; -use winnow::combinator::{ - alt, delimited, opt, preceded, repeat, separated, separated_pair, -}; -use winnow::error::ParserError; -use winnow::prelude::*; -use winnow::stream::{AsChar, Stream, StreamIsPartial}; - -const SUBFIELD_CODES: &str = - "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; - -#[derive(Debug, Error)] -#[error("invalid path expression, got `{0}`")] -pub struct ParsePathError(pub String); - -#[derive(Clone, Debug, PartialEq)] -pub struct Path { - tag_matcher: TagMatcher, - occurrence_matcher: OccurrenceMatcher, - subfield_matcher: Option, - codes: Vec>, -} - -impl Path { - /// Create a new path from a string slice. - /// - /// # Panics - /// - /// This methods panics on invalid path expressions. - /// - /// # Example - /// - /// ```rust - /// use pica_path::Path; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let _path = Path::new("003@.0"); - /// Ok(()) - /// } - /// ``` - pub fn new>(data: &T) -> Self { - Self::try_from(data.as_ref()).expect("valid path expression.") - } - - /// Returns the list of codes. - /// - /// # Example - /// - /// ```rust - /// use pica_path::Path; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let path = Path::new("003@.0"); - /// assert_eq!(path.codes(), &[vec!['0']]); - /// Ok(()) - /// } - /// ``` - pub fn codes(&self) -> &Vec> { - &self.codes - } - - /// Returns the flat list of codes. - /// - /// # Example - /// - /// ```rust - /// use pica_path::Path; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let path = Path::new("003@.0"); - /// assert_eq!(path.codes_flat(), &['0']); - /// Ok(()) - /// } - /// ``` - pub fn codes_flat(&self) -> Vec { - self.codes.clone().into_iter().flatten().collect() - } - - /// Returns the tag matcher of the path. - pub fn tag_matcher(&self) -> &TagMatcher { - &self.tag_matcher - } - - /// Returns the occurrence matcher of the path. - pub fn occurrence_matcher(&self) -> &OccurrenceMatcher { - &self.occurrence_matcher - } - - /// Returns the subfield matcher of the path. - pub fn subfield_matcher(&self) -> Option<&SubfieldMatcher> { - self.subfield_matcher.as_ref() - } -} - -impl TryFrom<&[u8]> for Path { - type Error = ParsePathError; - - fn try_from(value: &[u8]) -> Result { - parse_path.parse(value).map_err(|_| { - let value = value.to_str_lossy().to_string(); - ParsePathError(value) - }) - } -} - -impl FromStr for Path { - type Err = ParsePathError; - - fn from_str(s: &str) -> Result { - Self::try_from(s.as_bytes()) - } -} - -/// Strip whitespaces from the beginning and end. -pub(crate) fn ws, F>( - mut inner: F, -) -> impl Parser -where - I: Stream + StreamIsPartial, - ::Token: AsChar + Clone, - F: Parser, -{ - move |i: &mut I| { - let _ = multispace0.parse_next(i)?; - let o = inner.parse_next(i); - let _ = multispace0.parse_next(i)?; - - o - } -} - -#[inline] -fn parse_subfield_code_range( - i: &mut &[u8], -) -> PResult> { - separated_pair(parse_subfield_code, '-', parse_subfield_code) - .verify(|(min, max)| min < max) - .map(|(min, max)| { - (min.as_byte()..=max.as_byte()) - .map(SubfieldCode::from_unchecked) - .collect() - }) - .parse_next(i) -} - -#[inline] -fn parse_subfield_code_single( - i: &mut &[u8], -) -> PResult> { - parse_subfield_code.map(|code| vec![code]).parse_next(i) -} - -#[inline] -fn parse_subfield_code_list( - i: &mut &[u8], -) -> PResult> { - delimited( - '[', - repeat( - 1.., - alt(( - parse_subfield_code_range, - parse_subfield_code_single, - )), - ) - .fold(Vec::new, |mut acc: Vec<_>, item| { - acc.extend_from_slice(&item); - acc - }), - ']', - ) - .parse_next(i) -} - -#[inline] -fn parse_subfield_codes(i: &mut &[u8]) -> PResult> { - alt(( - parse_subfield_code_list, - parse_subfield_code_single, - '*'.value( - SUBFIELD_CODES - .chars() - .map(|code| SubfieldCode::new(code).unwrap()) - .collect(), - ), - )) - .parse_next(i) -} - -fn parse_path_simple(i: &mut &[u8]) -> PResult { - ws(( - parse_tag_matcher, - parse_occurrence_matcher, - preceded('.', parse_subfield_codes), - )) - .map(|(t, o, c)| Path { - tag_matcher: t, - occurrence_matcher: o, - subfield_matcher: None, - codes: vec![c], - }) - .parse_next(i) -} - -fn parse_path_curly(i: &mut &[u8]) -> PResult { - ws(( - parse_tag_matcher, - parse_occurrence_matcher, - delimited( - ws('{'), - ( - alt(( - separated(1.., parse_subfield_codes, ws(',')), - delimited( - ws('('), - separated(1.., parse_subfield_codes, ws(',')), - ws(')'), - ), - )), - opt(preceded(ws('|'), parse_subfield_matcher)), - ), - ws('}'), - ), - )) - .map(|(t, o, (c, m))| Path { - tag_matcher: t, - occurrence_matcher: o, - subfield_matcher: m, - codes: c, - }) - .parse_next(i) -} - -pub fn parse_path(i: &mut &[u8]) -> PResult { - alt((parse_path_simple, parse_path_curly)).parse_next(i) -} - -pub trait PathExt { - fn path(&self, path: &Path, options: &MatcherOptions) - -> Vec<&BStr>; - - /// Returns the idn of the record. - /// - /// # Example - /// - /// ```rust - /// use bstr::ByteSlice; - /// use pica_path::{Path, PathExt}; - /// use pica_record_v1::ByteRecord; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let record = - /// ByteRecord::from_bytes(b"003@ \x1f0123456789X\x1e\n")?; - /// assert_eq!(record.idn(), Some(b"123456789X".as_bstr())); - /// - /// let record = ByteRecord::from_bytes(b"002@ \x1f0Olfo\x1e\n")?; - /// assert_eq!(record.idn(), None); - /// Ok(()) - /// } - /// ``` - fn idn(&self) -> Option<&BStr> { - self.path(&Path::new("003@.0"), &Default::default()) - .first() - .copied() - } - - /// Returns the first value (converted to string) of the path - /// expression. - /// - /// # Example - /// - /// ```rust - /// use bstr::ByteSlice; - /// use pica_path::{Path, PathExt}; - /// use pica_record_v1::ByteRecord; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let record = - /// ByteRecord::from_bytes(b"003@ \x1f0123456789X\x1e\n")?; - /// assert_eq!( - /// record.first("003@.0"), - /// Some("123456789X".to_string()) - /// ); - /// - /// let record = ByteRecord::from_bytes(b"002@ \x1f0Olfo\x1e\n")?; - /// assert_eq!(record.first("003@.0"), None); - /// Ok(()) - /// } - /// ``` - fn first>(&self, path: P) -> Option { - self.path(&Path::new(&path), &Default::default()) - .first() - .map(ToString::to_string) - } -} - -impl PathExt for RecordRef<'_> { - /// Returns all subfield values which satisfies the path matcher. - /// - /// # Example - /// - /// ```rust - /// use bstr::BString; - /// use pica_path::{Path, PathExt}; - /// use pica_record_v1::RecordRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let record = RecordRef::new(vec![ - /// ("012A", None, vec![('a', "123"), ('a', "456")]), - /// ("012A", Some("01"), vec![('a', "789"), ('b', "xyz")]), - /// ]); - /// - /// assert_eq!( - /// record.path(&Path::new("012A/*.a"), &Default::default()), - /// vec![ - /// &BString::from("123"), - /// &BString::from("456"), - /// &BString::from("789") - /// ] - /// ); - /// Ok(()) - /// } - /// ``` - fn path( - &self, - path: &Path, - options: &MatcherOptions, - ) -> Vec<&BStr> { - self.iter() - .filter(|field| { - path.tag_matcher == field.tag() - && path.occurrence_matcher == field.occurrence() - }) - .filter(|field| { - if let Some(ref matcher) = path.subfield_matcher { - matcher.is_match(field.subfields(), options) - } else { - true - } - }) - .flat_map(FieldRef::subfields) - .filter_map(|subfield| { - if path.codes_flat().contains(subfield.code()) { - Some(subfield.value().as_bstr()) - } else { - None - } - }) - .collect() - } -} - -#[cfg(feature = "serde")] -impl<'de> Deserialize<'de> for Path { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let s: String = Deserialize::deserialize(deserializer)?; - Path::try_from(s.as_bytes()).map_err(serde::de::Error::custom) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_subfield_code_single() { - use super::parse_subfield_code_single; - - assert_eq!( - parse_subfield_code_single.parse(b"a").unwrap(), - vec!['a'] - ); - } - - #[test] - fn parse_subfield_code_range() { - use super::parse_subfield_code_range; - - assert_eq!( - parse_subfield_code_range.parse(b"a-c").unwrap(), - vec!['a', 'b', 'c'] - ); - - assert!(parse_subfield_code_range.parse(b"a-a").is_err()); - assert!(parse_subfield_code_range.parse(b"c-a").is_err()); - assert!(parse_subfield_code_range.parse(b"a").is_err()); - } - - #[test] - fn parse_subfield_codes() { - use super::{parse_subfield_codes, SUBFIELD_CODES}; - - macro_rules! parse_success { - ($input:expr, $expected:expr) => { - assert_eq!( - parse_subfield_codes.parse($input).unwrap(), - $expected - ) - }; - } - - parse_success!(b"[a-cx]", vec!['a', 'b', 'c', 'x']); - parse_success!(b"[a-c]", vec!['a', 'b', 'c']); - parse_success!(b"a", vec!['a']); - parse_success!( - b"*", - SUBFIELD_CODES.chars().collect::>() - ); - } - - #[test] - fn parse_path_simple() { - macro_rules! parse_success { - ($input:expr) => { - assert!(super::parse_path_simple.parse($input).is_ok()) - }; - } - - parse_success!(b"021A/*.[a-cx]"); - parse_success!(b"021A.[a-cx]"); - parse_success!(b"021A/*.[a-c]"); - parse_success!(b"021A.[a-c]"); - parse_success!(b"021A/*.a"); - parse_success!(b"..../*.*"); - parse_success!(b"021A.a"); - parse_success!(b"021A.*"); - } - - #[test] - fn parse_path_curly() { - macro_rules! parse_success { - ($input:expr) => { - assert!(super::parse_path_curly.parse($input).is_ok()) - }; - } - - parse_success!(b"021A/*{ [a-cx] }"); - parse_success!(b"021A/*{ [a-cx], y }"); - parse_success!(b"021A/*{ ([a-cx], y) }"); - parse_success!(b"021A/*{ ([a-cx], y) | y? }"); - parse_success!(b"021A/*{ * | y? }"); - parse_success!(b"021A{ [a-cx] }"); - parse_success!(b"021A/*{[a-c]}"); - parse_success!(b"021A{[a-c]}"); - parse_success!(b"021A/*{a}"); - parse_success!(b"021A{a}"); - parse_success!(b"021A{*}"); - } -} diff --git a/crates/pica-path/tests/integration.rs b/crates/pica-path/tests/integration.rs deleted file mode 100644 index 943c062b2..000000000 --- a/crates/pica-path/tests/integration.rs +++ /dev/null @@ -1,133 +0,0 @@ -use std::str::FromStr; -use std::sync::OnceLock; - -use bstr::{ByteSlice, B}; -use pica_matcher::MatcherOptions; -use pica_path::{ParsePathError, Path, PathExt}; -use pica_record_v1::ByteRecord; - -type TestResult = anyhow::Result<()>; - -fn ada_lovelace() -> &'static [u8] { - use std::path::Path; - use std::{env, fs}; - - static DATA: OnceLock> = OnceLock::new(); - DATA.get_or_init(|| { - let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); - let path = Path::new(&manifest_dir) - .join("../pica-toolkit/tests/data/119232022.dat"); - fs::read_to_string(&path).unwrap().as_bytes().to_vec() - }) -} - -#[test] -fn path_new() -> TestResult { - let record = ByteRecord::from_bytes(ada_lovelace())?; - let options = MatcherOptions::default(); - let path = Path::new("003@.0"); - - assert_eq!(record.path(&path, &options), vec!["119232022"]); - Ok(()) -} - -#[test] -#[should_panic] -fn path_new_panic() { - let _ = Path::new("003@.!"); -} - -#[test] -fn path_try_from() -> TestResult { - let record = ByteRecord::from_bytes(ada_lovelace())?; - let options = MatcherOptions::default(); - let path = Path::try_from(B("003@.0"))?; - - assert_eq!(record.path(&path, &options), vec!["119232022"]); - assert!(matches!( - Path::try_from(B("003@.!")).unwrap_err(), - ParsePathError(_) - )); - - Ok(()) -} - -#[test] -fn path_from_str() -> TestResult { - let record = ByteRecord::from_bytes(ada_lovelace())?; - let options = MatcherOptions::default(); - let path = Path::from_str("003@.0")?; - - assert_eq!(record.path(&path, &options), vec!["119232022"]); - assert!(matches!( - Path::from_str("003@.!").unwrap_err(), - ParsePathError(_) - )); - - Ok(()) -} - -#[test] -fn path_codes() { - assert_eq!(Path::new("003@.0").codes(), &[vec!['0']]); - assert_eq!( - Path::new("003@{ [01], 2 }").codes(), - &[vec!['0', '1'], vec!['2']] - ); - - assert_eq!( - Path::new("003@{ ([0-2], 2) }").codes(), - &[vec!['0', '1', '2'], vec!['2']] - ); -} - -#[test] -fn path_codes_flat() { - assert_eq!(Path::new("003@.0").codes_flat(), &['0']); - assert_eq!( - Path::new("003@{ [01], 2 }").codes_flat(), - &['0', '1', '2'] - ); - - assert_eq!( - Path::new("003@{ ([0-2], 2) }").codes_flat(), - &['0', '1', '2', '2'] - ); -} - -#[test] -fn path_simple() -> TestResult { - let record = ByteRecord::from_bytes(ada_lovelace())?; - let options = MatcherOptions::default(); - let path = Path::new("003@.0"); - - assert_eq!(record.path(&path, &options), vec!["119232022"]); - Ok(()) -} - -#[test] -fn path_matcher() -> TestResult { - let record = ByteRecord::from_bytes(ada_lovelace())?; - let path = Path::new("065R{ 9 | 4 == 'ortg' }"); - let options = MatcherOptions::default(); - - assert_eq!(record.path(&path, &options), vec!["040743357"]); - Ok(()) -} - -#[test] -fn path_idn() -> TestResult { - let record = ByteRecord::from_bytes(ada_lovelace())?; - assert_eq!(record.idn(), Some(b"119232022".as_bstr())); - Ok(()) -} - -#[test] -fn test_path_codes() -> TestResult { - let record = ByteRecord::from_bytes(ada_lovelace())?; - let options = MatcherOptions::default(); - let path = Path::new("047A/03.[er]"); - - assert_eq!(record.path(&path, &options), vec!["DE-386", "DE-576"]); - Ok(()) -} diff --git a/crates/pica-record-v1/Cargo.toml b/crates/pica-record-v1/Cargo.toml deleted file mode 100644 index ab9d6f2ee..000000000 --- a/crates/pica-record-v1/Cargo.toml +++ /dev/null @@ -1,32 +0,0 @@ -[package] -name = "pica-record-v1" -resolver = "2" -version.workspace = true -authors.workspace = true -license.workspace = true -readme.workspace = true -keywords.workspace = true -edition.workspace = true -rust-version.workspace = true - -[dependencies] -quickcheck = { workspace = true, optional = true } -bstr = { workspace = true } -flate2 = { workspace = true } -sha2 = { workspace = true } -thiserror = { workspace = true } -winnow = { workspace = true, features = ["simd"] } - -[features] -arbitrary = ["dep:quickcheck"] - -[dev-dependencies] -anyhow = "1.0" -criterion = { version = "0.5", features = ["html_reports"] } -pica-record-v1 = { path = ".", features = ["arbitrary"] } -quickcheck_macros = "1.0" -tempfile = "3.8" - -[[bench]] -name = "main" -harness = false diff --git a/crates/pica-record-v1/benches/main.rs b/crates/pica-record-v1/benches/main.rs deleted file mode 100644 index c44fabe4e..000000000 --- a/crates/pica-record-v1/benches/main.rs +++ /dev/null @@ -1,40 +0,0 @@ -use std::path::Path; -use std::sync::OnceLock; -use std::{env, fs}; - -use criterion::*; -use pica_record_v1::{Record, RecordRef}; -use quickcheck::{Arbitrary, Gen}; - -fn ada_lovelace() -> &'static [u8] { - static DATA: OnceLock> = OnceLock::new(); - DATA.get_or_init(|| { - let path = Path::new(&env!("CARGO_MANIFEST_DIR")) - .join("../../tests/data/ada.dat"); - fs::read_to_string(&path).unwrap().as_bytes().to_vec() - }) -} - -fn arbitrary(size: usize) -> Vec { - let record = Record::arbitrary(&mut Gen::new(size)); - let mut bytes = vec![]; - let _ = record.write_to(&mut bytes); - bytes -} - -pub fn criterion_benchmark(c: &mut Criterion) { - c.bench_function("RecordRef::from_bytes (ada)", |b| { - b.iter(|| RecordRef::from_bytes(black_box(ada_lovelace()))) - }); - - c.bench_function("RecordRef::from_bytes (arbitrary)", |b| { - let bytes = arbitrary(100); - b.iter(|| { - let _record = - RecordRef::from_bytes(black_box(&bytes)).unwrap(); - }) - }); -} - -criterion_group!(benches, criterion_benchmark); -criterion_main!(benches); diff --git a/crates/pica-record-v1/fuzz/.gitignore b/crates/pica-record-v1/fuzz/.gitignore deleted file mode 100644 index 1a45eee77..000000000 --- a/crates/pica-record-v1/fuzz/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -target -corpus -artifacts -coverage diff --git a/crates/pica-record-v1/fuzz/Cargo.toml b/crates/pica-record-v1/fuzz/Cargo.toml deleted file mode 100644 index 56f9baaf3..000000000 --- a/crates/pica-record-v1/fuzz/Cargo.toml +++ /dev/null @@ -1,27 +0,0 @@ -[package] -name = "pica-record-fuzz" -version = "0.0.0" -publish = false -edition = "2021" - -[package.metadata] -cargo-fuzz = true - -[dependencies] -libfuzzer-sys = "0.4" - -[dependencies.pica-record-v1] -path = ".." - -# Prevent this from interfering with workspaces -[workspace] -members = ["."] - -[profile.release] -debug = 1 - -[[bin]] -name = "fuzz-record-ref" -path = "fuzz_targets/fuzz_record_ref.rs" -test = false -doc = false diff --git a/crates/pica-record-v1/fuzz/fuzz_targets/fuzz_record_ref.rs b/crates/pica-record-v1/fuzz/fuzz_targets/fuzz_record_ref.rs deleted file mode 100644 index 7d44c992a..000000000 --- a/crates/pica-record-v1/fuzz/fuzz_targets/fuzz_record_ref.rs +++ /dev/null @@ -1,8 +0,0 @@ -#![no_main] - -use libfuzzer_sys::fuzz_target; -use pica_record_v1::RecordRef; - -fuzz_target!(|data: &[u8]| { - let _record = RecordRef::from_bytes(data); -}); diff --git a/crates/pica-record-v1/src/error.rs b/crates/pica-record-v1/src/error.rs deleted file mode 100644 index 5f33201b6..000000000 --- a/crates/pica-record-v1/src/error.rs +++ /dev/null @@ -1,28 +0,0 @@ -use thiserror::Error; - -/// An error that can occur when ceparsing PICA+ records. -#[derive(Error, PartialEq, Eq, Debug)] -pub enum PicaError { - #[error("'{0}' is not a valid subfield code.")] - InvalidSubfieldCode(char), - #[error("'{0}' is not a valid subfield value.")] - InvalidSubfieldValue(String), - #[error("'{0}' is not a valid subfield.")] - InvalidSubfield(String), -} - -/// -----{ TODO }----------------------------------------- -/// An error that can occur when parsing PICA+ records. -#[derive(Error, PartialEq, Eq, Debug)] -pub enum ParsePicaError { - #[error("invalid subfield")] - InvalidSubfield, - #[error("invalid tag")] - InvalidTag, - #[error("invalid occurrence")] - InvalidOccurrence, - #[error("invalid field")] - InvalidField, - #[error("invalid record (expected valid record, got `{0:?}`)")] - InvalidRecord(Vec), -} diff --git a/crates/pica-record-v1/src/io/mod.rs b/crates/pica-record-v1/src/io/mod.rs deleted file mode 100644 index b21d058a3..000000000 --- a/crates/pica-record-v1/src/io/mod.rs +++ /dev/null @@ -1,165 +0,0 @@ -//! Utilities for reading and writing PICA+ records. - -use std::io; - -use bstr::ByteSlice; -use thiserror::Error; - -use crate::{ByteRecord, ParsePicaError}; - -type ParseResult<'a> = Result, ParsePicaError>; -type ReadResult = Result; - -mod reader; -mod writer; - -pub use reader::{Reader, ReaderBuilder, RecordsIterator}; -pub use writer::{ - ByteRecordWrite, GzipWriter, PlainWriter, WriterBuilder, -}; - -/// An error that can occur when reading PICA+ records from a -/// [BufReader](std::io::BufReader). -#[derive(Error, Debug)] -pub enum ReadPicaError { - #[error("parse error: {msg:?}")] - Parse { msg: String, err: ParsePicaError }, - - #[error("io error")] - Io(#[from] io::Error), -} - -impl ReadPicaError { - /// Returns true, if the underlying error was caused by parsing an - /// invalid record. - pub fn is_invalid_record(&self) -> bool { - matches!( - self, - Self::Parse { - msg: _, - err: ParsePicaError::InvalidRecord(_) - } - ) - } -} - -impl From for ReadPicaError { - fn from(err: ParsePicaError) -> Self { - Self::Parse { - msg: "invalid record".into(), - err, - } - } -} - -/// An extension of [BufRead](`std::io::BufRead`) which provides a -/// convenience API for reading [ByteRecord](`crate::ByteRecord`)s. -pub trait BufReadExt: io::BufRead { - /// Executes the given closure on each parsed line in the underlying - /// reader. - /// - /// If the underlying reader or the closure returns an error, then - /// the iteration stops and the error is returned. If the closure - /// returns `false` the iteration is stopped and no error is - /// returned. - /// - /// # Example - /// - /// ```rust - /// use std::io::{Cursor, Seek}; - /// - /// use pica_record_v1::io::BufReadExt; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let mut reader = - /// Cursor::new(b"003@ \x1f0abc\x1e\n003@ \x1f0def\x1e\n"); - /// - /// // iterate over all records - /// let mut count = 0; - /// reader.for_pica_record(|result| { - /// let _record = result?; - /// count += 1; - /// Ok(true) - /// })?; - /// - /// assert_eq!(count, 2); - /// - /// // stop iteration after first record - /// reader.rewind()?; - /// count = 0; - /// reader.for_pica_record(|result| { - /// let _record = result?; - /// count += 1; - /// Ok(false) - /// })?; - /// - /// assert_eq!(count, 1); - /// - /// Ok(()) - /// } - /// ``` - fn for_pica_record(&mut self, mut f: F) -> ReadResult<()> - where - F: FnMut(ParseResult) -> ReadResult, - { - // The following code is based on the `io::BufReadExt` trait of - // the `bstr` crate. It was necessary to duplicate the code, in - // order to use a different result type. - // https://docs.rs/bstr/1.0.1/src/bstr/io.rs.html#289-341 - - let mut bytes = vec![]; - let mut res = Ok(()); - let mut consumed = 0; - - 'outer: loop { - { - let mut buf = self.fill_buf()?; - - while let Some(index) = buf.find_byte(b'\n') { - let (line, rest) = buf.split_at(index + 1); - buf = rest; - consumed += line.len(); - - let result = ByteRecord::from_bytes(line); - match f(result) { - Ok(false) => break 'outer, - Err(err) => { - res = Err(err); - break 'outer; - } - _ => (), - } - } - - bytes.extend_from_slice(buf); - consumed += buf.len(); - } - - self.consume(consumed); - consumed = 0; - - self.read_until(b'\n', &mut bytes)?; - if bytes.is_empty() { - break; - } - - let result = ByteRecord::from_bytes(&bytes); - match f(result) { - Ok(false) => break, - Err(err) => { - res = Err(err); - break; - } - _ => (), - } - - bytes.clear(); - } - - self.consume(consumed); - res - } -} - -impl BufReadExt for B {} diff --git a/crates/pica-record-v1/src/io/reader.rs b/crates/pica-record-v1/src/io/reader.rs deleted file mode 100644 index e1a307d6f..000000000 --- a/crates/pica-record-v1/src/io/reader.rs +++ /dev/null @@ -1,187 +0,0 @@ -use std::ffi::OsStr; -use std::fs::File; -use std::io::{self, BufRead, BufReader, Read}; -use std::path::Path; - -use flate2::read::GzDecoder; - -use super::ReadPicaError; -use crate::ByteRecord; - -/// Configures and builds a PICA+ reader. -#[derive(Debug, Default)] -pub struct ReaderBuilder { - limit: usize, -} - -impl ReaderBuilder { - /// Create a new ReaderBuilder. - pub fn new() -> Self { - Self::default() - } - - /// Change the limit of records to read. - /// - /// # Example - /// - /// ```rust - /// use std::io::{Cursor, Seek}; - /// - /// use pica_record_v1::io::{ReaderBuilder, RecordsIterator}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let data = - /// Cursor::new(b"003@ \x1f0abc\x1e\n003@ \x1f0def\x1e\n"); - /// let mut reader = - /// ReaderBuilder::new().limit(1).from_reader(data, None); - /// - /// let mut count = 0; - /// while let Some(result) = reader.next() { - /// count += 1; - /// } - /// - /// assert_eq!(count, 1); - /// - /// Ok(()) - /// } - /// ``` - pub fn limit(mut self, limit: usize) -> Self { - self.limit = limit; - self - } - - /// ```rust - /// use std::io::{Cursor, Seek}; - /// - /// use pica_record_v1::io::{ReaderBuilder, RecordsIterator}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let data = - /// Cursor::new(b"003@ \x1f0abc\x1e\n003@ \x1f0def\x1e\n"); - /// let mut reader = ReaderBuilder::new().from_reader(data, None); - /// - /// let mut count = 0; - /// while let Some(result) = reader.next() { - /// count += 1; - /// } - /// - /// assert_eq!(count, 2); - /// - /// Ok(()) - /// } - /// ``` - pub fn from_reader( - &self, - reader: R, - source: Option, - ) -> Reader { - Reader::new(self, reader, source) - } - - pub fn from_path>( - &self, - path: P, - ) -> io::Result>> { - let path = path.as_ref(); - let source = path.to_string_lossy().to_string(); - - let reader: Box = match path - .extension() - .and_then(OsStr::to_str) - { - Some("gz") => Box::new(GzDecoder::new(File::open(path)?)), - _ => { - if path.to_str() != Some("-") { - Box::new(File::open(path)?) - } else { - Box::new(io::stdin()) - } - } - }; - - Ok(self.from_reader(reader, Some(source))) - } -} - -pub struct Reader { - inner: BufReader, - source: Option, - limit: usize, - count: usize, - buf: Vec, -} - -impl Reader { - pub fn new( - builder: &ReaderBuilder, - reader: R, - source: Option, - ) -> Self { - Self { - inner: BufReader::new(reader), - limit: builder.limit, - source, - buf: vec![], - count: 0, - } - } - - pub fn into_inner(self) -> BufReader { - self.inner - } -} - -pub trait RecordsIterator { - type Item<'a> - where - Self: 'a; - - fn next(&mut self) -> Option>; -} - -impl RecordsIterator for Reader { - type Item<'a> - = Result, ReadPicaError> - where - Self: 'a; - - fn next(&mut self) -> Option> { - if self.limit > 0 && self.count >= self.limit { - return None; - } - - self.buf.clear(); - match self.inner.read_until(b'\n', &mut self.buf) { - Err(e) => Some(Err(ReadPicaError::from(e))), - Ok(0) => None, - Ok(_) => { - let result = ByteRecord::from_bytes(&self.buf); - match result { - Err(err) => { - let msg = match &self.source { - Some(source) => { - if source == "-" { - format!("invalid record in line {} (stdin)", self.count) - } else { - format!("invalid record in line {} ({})", self.count, source) - } - } - None => format!( - "invalid record on line {}", - self.count - ), - }; - - Some(Err(ReadPicaError::Parse { msg, err })) - } - Ok(record) => { - self.count += 1; - Some(Ok(record)) - } - } - } - } - } -} diff --git a/crates/pica-record-v1/src/io/writer.rs b/crates/pica-record-v1/src/io/writer.rs deleted file mode 100644 index 7c61e7fe5..000000000 --- a/crates/pica-record-v1/src/io/writer.rs +++ /dev/null @@ -1,148 +0,0 @@ -use std::ffi::OsStr; -use std::fs::OpenOptions; -use std::io::{self, BufWriter, Result, Write}; -use std::path::Path; - -use flate2::write::GzEncoder; -use flate2::Compression; - -use crate::ByteRecord; - -/// A trait that permits writing [ByteRecord]s. -pub trait ByteRecordWrite { - /// Writes a [ByteRecord] into this writer. - fn write_byte_record(&mut self, record: &ByteRecord) -> Result<()>; - - /// Finish the underlying writer. - fn finish(&mut self) -> Result<()>; -} - -/// Configures and build a [ByteRecord] writer. -#[derive(Default)] -pub struct WriterBuilder { - append: bool, - gzip: bool, -} - -type WriterResult = io::Result>; - -impl WriterBuilder { - /// Creates a new builder with default settings. - pub fn new() -> Self { - Self::default() - } - - /// Builds a [ByteRecord] writer from this configuration that writes - /// to the given path. - pub fn from_path>(&self, path: P) -> WriterResult { - let path = path.as_ref(); - - let file = OpenOptions::new() - .write(true) - .create(true) - .truncate(!self.append) - .append(self.append) - .open(path)?; - - if self.gzip - || path.extension().and_then(OsStr::to_str) == Some("gz") - { - Ok(Box::new(GzipWriter::new(file))) - } else { - Ok(Box::new(PlainWriter::new(file))) - } - } - - /// Builds a [ByteRecord] writer from this configuration that writes - /// to the given path, if given, otherwise write to `stdout`. - pub fn from_path_or_stdout>( - &self, - path: Option

, - ) -> WriterResult { - match path { - Some(path) => self.from_path(path), - None => { - if self.gzip { - Ok(Box::new(GzipWriter::new( - Box::new(io::stdout()), - ))) - } else { - Ok(Box::new(PlainWriter::new(Box::new( - io::stdout(), - )))) - } - } - } - } - - /// Whether to use a gzip encoder or not. - /// - /// When this flag is set, the writer encode the records in gzip - /// format. This flag is disabled by default and has no effect when - /// writing to `stdout`. - /// - /// # Panics - /// - /// It's an error to use this flag in append-mode. - pub fn gzip(mut self, yes: bool) -> Self { - assert!(!yes || (yes ^ self.append)); - self.gzip = yes; - self - } - - /// Whether to append to a given file or not. - /// - /// When this flag is set, the writer appends to the given file. If - /// the file does not exists, the file is created. This flag has - /// no effect when writing to `stdout`. This option is disabled by - /// default. - /// - /// # Panics - /// - /// It's an error to use this flag in combination with a gzip - /// writer. - pub fn append(mut self, yes: bool) -> Self { - assert!(!yes || (yes ^ self.gzip)); - self.append = yes; - self - } -} - -/// A plain buffered [ByteRecord] writer. -pub struct PlainWriter(BufWriter); - -impl PlainWriter { - pub fn new(inner: W) -> Self { - Self(BufWriter::new(inner)) - } -} - -impl ByteRecordWrite for PlainWriter { - fn write_byte_record(&mut self, record: &ByteRecord) -> Result<()> { - record.write_to(&mut self.0) - } - - fn finish(&mut self) -> Result<()> { - self.0.flush() - } -} - -/// A [ByteRecord] writer that gzip encodes records. -pub struct GzipWriter(GzEncoder); - -impl GzipWriter { - pub fn new(inner: W) -> GzipWriter { - Self(GzEncoder::new(inner, Compression::default())) - } -} - -impl ByteRecordWrite for GzipWriter { - fn write_byte_record(&mut self, record: &ByteRecord) -> Result<()> { - record.write_to(&mut self.0) - } - - fn finish(&mut self) -> Result<()> { - self.0.try_finish()?; - Ok(()) - } -} diff --git a/crates/pica-record-v1/src/level.rs b/crates/pica-record-v1/src/level.rs deleted file mode 100644 index c6c88569e..000000000 --- a/crates/pica-record-v1/src/level.rs +++ /dev/null @@ -1,30 +0,0 @@ -use std::str::FromStr; - -use thiserror::Error; - -/// The level (main, local, copy) of a field (or tag). -#[derive(Debug, Default, Clone, PartialEq, Eq)] -pub enum Level { - #[default] - Main, - Local, - Copy, -} - -/// An error that can occur when parsing PICA+ level. -#[derive(Error, PartialEq, Eq, Debug)] -#[error("{0}")] -pub struct ParseLevelError(String); - -impl FromStr for Level { - type Err = ParseLevelError; - - fn from_str(s: &str) -> Result { - match s { - "main" => Ok(Self::Main), - "local" => Ok(Self::Local), - "copy" => Ok(Self::Copy), - _ => Err(ParseLevelError(format!("invalid level '{s}'"))), - } - } -} diff --git a/crates/pica-record-v1/src/lib.rs b/crates/pica-record-v1/src/lib.rs deleted file mode 100644 index fe6ce7531..000000000 --- a/crates/pica-record-v1/src/lib.rs +++ /dev/null @@ -1,60 +0,0 @@ -//! This crate provides the low-level primitives to work with -//! bibliographic records encoded in PICA+. -//! -//! There exists an immutable and a mutable variant for each primitive. -//! The immutable variant is used to parse the corresponding component -//! of a record, without owning the data. This type is mostly a wrapper -//! of the underlying data (byte slices). On the other hand there is -//! also a mutable variant, which is used in upstream crates. This -//! variant owns it's data. -//! -//! This crate also provides two higher-level data structure to work -//! with records: [`ByteRecord`] and [`StringRecord`]. The first type -//! is a wrapper of a [`RecordRef`] and provides more functions to work -//! with records as well as an mechanism to cache the complete byte -//! sequence of the original record. This improves the performance when -//! the record is written back to a stream. It is important to note that -//! a [`ByteRecord`] might contain invalid UTF-8 data. When it's -//! important to guarantee valid UTF-8 data, use a [`StringRecord`] -//! instead. -//! -//! Finally, the [`io`] module provides utilities for reading and -//! writing PICA+ records and the [`parser`] module exposes the internal -//! parser combinators, which are used in upstream crates (matcher, -//! select). - -pub use error::PicaError; -pub use primitives::{ - Field, FieldRef, Subfield, SubfieldCode, SubfieldRef, - SubfieldValue, SubfieldValueRef, -}; - -/// Parsers recognizing low-level primitives (e.g. subfield codes). -#[rustfmt::skip] -pub mod parser { - pub use super::primitives::parse_subfield_code; - pub use super::primitives::parse_subfield_value_ref; - pub use super::primitives::parse_subfield_ref; - pub use super::primitives::parse_field_ref; - - // TODO - pub use super::occurrence::parse_occurrence_digits; - pub use super::tag::parse_tag; -} - -mod primitives; - -// -----{ TODO }----------------------------------------- - -mod error; -pub mod io; -mod level; -mod occurrence; -mod record; -mod tag; - -pub use error::ParsePicaError; -pub use level::{Level, ParseLevelError}; -pub use occurrence::{Occurrence, OccurrenceRef}; -pub use record::{ByteRecord, Record, RecordRef, StringRecord}; -pub use tag::{Tag, TagRef}; diff --git a/crates/pica-record-v1/src/occurrence.rs b/crates/pica-record-v1/src/occurrence.rs deleted file mode 100644 index 0510886fc..000000000 --- a/crates/pica-record-v1/src/occurrence.rs +++ /dev/null @@ -1,307 +0,0 @@ -use std::fmt::{self, Display}; -use std::io::{self, Write}; - -use bstr::{BStr, BString, ByteSlice}; -use winnow::combinator::preceded; -use winnow::stream::AsChar; -use winnow::token::take_while; -use winnow::{PResult, Parser}; - -use crate::ParsePicaError; - -/// An immutable PICA+ occurrence. -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd)] -pub struct OccurrenceRef<'a>(&'a BStr); - -/// A mutable PICA+ occurrence. -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd)] -pub struct Occurrence(BString); - -impl<'a> OccurrenceRef<'a> { - /// Create an immutable PICA+ occurrence. - /// - /// # Panics - /// - /// This method panics if the occurrence is invalid. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::OccurrenceRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let occurrence = OccurrenceRef::new("01"); - /// assert_eq!(occurrence, "01"); - /// - /// Ok(()) - /// } - /// ``` - pub fn new>(value: &'a B) -> Self { - Self::try_from(value.as_ref()).expect("value occurrence") - } - - /// Creates an immutable PICA+ tag from a byte slice. - /// - /// If an invalid tag is given, an error is returned. - /// - /// ```rust - /// use pica_record_v1::OccurrenceRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// assert!(OccurrenceRef::from_bytes(b"/01").is_ok()); - /// assert!(OccurrenceRef::from_bytes(b"01").is_err()); - /// - /// Ok(()) - /// } - /// ``` - pub fn from_bytes(bytes: &'a [u8]) -> Result { - parse_occurrence - .parse(bytes) - .map_err(|_| ParsePicaError::InvalidOccurrence) - } - - /// Converts a occurrence reference into the underlying byte slice. - /// - /// ```rust - /// use pica_record_v1::OccurrenceRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let occurrence = OccurrenceRef::from_bytes(b"/01")?; - /// assert_eq!(occurrence.as_bytes(), b"01"); - /// - /// Ok(()) - /// } - /// ``` - pub fn as_bytes(&self) -> &[u8] { - self.0.as_ref() - } - - /// Creates an immutable PICA+ tag from a unchecked byte string. - /// - /// ```rust - /// use pica_record_v1::OccurrenceRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// assert_eq!(OccurrenceRef::new(b"01"), "01"); - /// assert_ne!(OccurrenceRef::new(b"01"), "02"); - /// - /// Ok(()) - /// } - /// ``` - #[inline] - pub fn from_unchecked(value: &'a BStr) -> Self { - Self(value) - } - - /// Write the occurrence into the given writer. - /// - /// # Example - /// - /// ```rust - /// use std::io::Cursor; - /// - /// use pica_record_v1::OccurrenceRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let mut writer = Cursor::new(Vec::::new()); - /// let occurrence = OccurrenceRef::new("01"); - /// occurrence.write_to(&mut writer); - /// # - /// # assert_eq!( - /// # String::from_utf8(writer.into_inner())?, - /// # "/01" - /// # ); - /// Ok(()) - /// } - /// ``` - #[inline] - pub fn write_to(&self, out: &mut impl Write) -> io::Result<()> { - write!(out, "/{}", self.0) - } -} - -impl> PartialEq for OccurrenceRef<'_> { - #[inline] - fn eq(&self, other: &T) -> bool { - self.0 == other.as_ref() - } -} - -impl Display for OccurrenceRef<'_> { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "/{}", self.0) - } -} - -/// Parse the digits of an PICA+ occurrence. -#[inline] -pub fn parse_occurrence_digits<'a>( - i: &mut &'a [u8], -) -> PResult<&'a BStr> { - take_while(2..=3, AsChar::is_dec_digit) - .map(ByteSlice::as_bstr) - .parse_next(i) -} - -/// Parse a PICA+ occurrence (read-only). -#[inline] -pub(crate) fn parse_occurrence<'a>( - i: &mut &'a [u8], -) -> PResult> { - preceded(b'/', parse_occurrence_digits) - .map(|value| OccurrenceRef(value.as_bstr())) - .parse_next(i) -} - -impl<'a> TryFrom<&'a [u8]> for OccurrenceRef<'a> { - type Error = ParsePicaError; - - fn try_from(value: &'a [u8]) -> Result { - if parse_occurrence_digits.parse(value).is_err() { - return Err(ParsePicaError::InvalidOccurrence); - } - - Ok(Self(value.into())) - } -} - -impl Occurrence { - /// Converts a occurrence into the underlying byte slice. - /// - /// ```rust - /// use pica_record_v1::{Occurrence, OccurrenceRef}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let occurrence = Occurrence::from(OccurrenceRef::new("01")); - /// assert_eq!(occurrence.as_bytes(), b"01"); - /// - /// Ok(()) - /// } - /// ``` - pub fn as_bytes(&self) -> &[u8] { - self.0.as_ref() - } - - /// Write the occurrence into the given writer. - /// - /// # Example - /// - /// ```rust - /// use std::io::Cursor; - /// - /// use pica_record_v1::{Occurrence, OccurrenceRef}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let mut writer = Cursor::new(Vec::::new()); - /// let occurrence: Occurrence = OccurrenceRef::new("01").into(); - /// occurrence.write_to(&mut writer); - /// # - /// # assert_eq!( - /// # String::from_utf8(writer.into_inner())?, - /// # "/01" - /// # ); - /// Ok(()) - /// } - /// ``` - #[inline] - pub fn write_to(&self, out: &mut impl Write) -> io::Result<()> { - write!(out, "/{}", self.0) - } -} - -impl From> for Occurrence { - fn from(value: OccurrenceRef<'_>) -> Self { - Self(value.0.into()) - } -} - -impl AsRef<[u8]> for Occurrence { - fn as_ref(&self) -> &[u8] { - self.0.as_ref() - } -} - -#[cfg(feature = "arbitrary")] -impl quickcheck::Arbitrary for Occurrence { - fn arbitrary(g: &mut quickcheck::Gen) -> Self { - let size = *g.choose(&[2, 3]).unwrap(); - let value = (0..size) - .map(|_| *g.choose(b"0123456789").unwrap()) - .collect::>(); - - Occurrence(value.into()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[quickcheck_macros::quickcheck] - fn parse_arbitrary_occurrence(occurrence: Occurrence) -> bool { - let mut bytes = Vec::::new(); - let _ = occurrence.write_to(&mut bytes); - super::parse_occurrence.parse(&bytes).is_ok() - } - - #[test] - fn parse_occurrence_digits() { - use super::parse_occurrence_digits; - - macro_rules! parse_success { - ($input:expr) => { - assert_eq!( - parse_occurrence_digits.parse($input).unwrap(), - $input.as_bstr() - ); - }; - } - - parse_success!(b"00"); - parse_success!(b"01"); - parse_success!(b"000"); - parse_success!(b"001"); - - assert!(parse_occurrence_digits.parse(b"").is_err()); - assert!(parse_occurrence_digits.parse(b"0").is_err()); - assert!(parse_occurrence_digits.parse(b"0001").is_err()); - assert!(parse_occurrence_digits.parse(b"0a").is_err()); - } - - #[test] - fn parse_occurrence() { - macro_rules! parse_success { - ($input:expr) => { - assert_eq!( - super::parse_occurrence.parse($input).unwrap(), - OccurrenceRef($input[1..].as_bstr()) - ); - }; - } - - parse_success!(b"/00"); - parse_success!(b"/000"); - parse_success!(b"/001"); - parse_success!(b"/01"); - - macro_rules! parse_error { - ($input:expr) => { - assert!(super::parse_occurrence.parse($input).is_err()); - }; - } - - parse_error!(b""); - parse_error!(b"/"); - parse_error!(b"/0a"); - parse_error!(b"/0001"); - parse_error!(b"/0"); - } -} diff --git a/crates/pica-record-v1/src/primitives/code.rs b/crates/pica-record-v1/src/primitives/code.rs deleted file mode 100644 index c8aaec165..000000000 --- a/crates/pica-record-v1/src/primitives/code.rs +++ /dev/null @@ -1,173 +0,0 @@ -use std::fmt::{self, Display}; - -use crate::PicaError; - -/// A PICA+ subfield code. -/// -/// This type represents a PICA+ subfield code, which is a ASCII -/// alpha-numeric chracter. -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] -pub struct SubfieldCode(char); - -impl SubfieldCode { - /// Creates a new subfield code. - /// - /// # Error - /// - /// This functions fails if the given code is not an ASCII - /// alpha-numeric character. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::SubfieldCode; - /// - /// let code = SubfieldCode::new('a')?; - /// assert_eq!(code, 'a'); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn new(code: char) -> Result { - if !code.is_ascii_alphanumeric() { - return Err(PicaError::InvalidSubfieldCode(code)); - } - - Ok(Self(code)) - } - - /// Creates a subfied code without checking for validity. - /// - /// # Safety - /// - /// The caller *must* ensure that the given subfield code is valid. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::SubfieldCode; - /// - /// let code = SubfieldCode::from_unchecked('a'); - /// assert_eq!(code, 'a'); - /// - /// # Ok::<(), Box>(()) - /// ``` - #[inline] - pub fn from_unchecked>(code: T) -> Self { - Self(code.into()) - } - - /// Returns the subfield code as a byte (`u8`). - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::SubfieldCode; - /// - /// let code = SubfieldCode::new('a')?; - /// assert_eq!(code.as_byte(), b'a'); - /// - /// # Ok::<(), Box>(()) - /// ``` - #[inline] - pub fn as_byte(&self) -> u8 { - self.0 as u8 - } -} - -impl Display for SubfieldCode { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} - -impl PartialEq for SubfieldCode { - fn eq(&self, code: &char) -> bool { - self.0 == *code - } -} - -impl PartialEq for &SubfieldCode { - fn eq(&self, code: &char) -> bool { - self.0 == *code - } -} - -impl TryFrom for SubfieldCode { - type Error = PicaError; - - fn try_from(code: char) -> Result { - Self::new(code) - } -} - -#[cfg(feature = "arbitrary")] -impl quickcheck::Arbitrary for SubfieldCode { - fn arbitrary(g: &mut quickcheck::Gen) -> Self { - let code = (1..) - .map(|_| char::arbitrary(g)) - .find(char::is_ascii_alphanumeric) - .unwrap(); - - Self(code) - } -} - -#[cfg(test)] -mod tests { - use quickcheck_macros::quickcheck; - - use super::*; - - #[quickcheck] - fn test_subfield_code_new(input: char) { - let result = SubfieldCode::new(input); - - if input.is_ascii_alphanumeric() { - assert_eq!(result.unwrap(), SubfieldCode(input)); - } else { - assert_eq!( - result.unwrap_err(), - PicaError::InvalidSubfieldCode(input) - ); - } - } - - #[test] - fn test_subfield_code_from_unchecked() { - for c in '0'..='z' { - if c.is_ascii_alphanumeric() { - assert_eq!( - SubfieldCode::from_unchecked(c), - SubfieldCode(c) - ); - } - } - } - - #[test] - fn test_subfield_code_as_byte() { - for c in '0'..='z' { - if c.is_ascii_alphanumeric() { - let code = SubfieldCode::new(c).unwrap(); - assert_eq!(code.as_byte(), c as u8); - } - } - } - - #[test] - fn test_subfield_code_try_from_char() { - for c in '0'..='z' { - if c.is_ascii_alphanumeric() { - assert_eq!( - SubfieldCode::try_from(c).unwrap(), - SubfieldCode(c) - ); - } else { - assert_eq!( - SubfieldCode::try_from(c).unwrap_err(), - PicaError::InvalidSubfieldCode(c) - ); - } - } - } -} diff --git a/crates/pica-record-v1/src/primitives/field.rs b/crates/pica-record-v1/src/primitives/field.rs deleted file mode 100644 index 0234667f3..000000000 --- a/crates/pica-record-v1/src/primitives/field.rs +++ /dev/null @@ -1,410 +0,0 @@ -use std::io::{self, Write}; -use std::iter; -use std::str::Utf8Error; - -use winnow::Parser; - -use super::parse_field_ref; -use crate::{ - Level, Occurrence, OccurrenceRef, ParsePicaError, Subfield, - SubfieldRef, Tag, TagRef, -}; - -/// An immutable PICA+ field. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct FieldRef<'a> { - pub(super) tag: TagRef<'a>, - pub(super) occurrence: Option>, - pub(super) subfields: Vec>, -} - -/// A mutable PICA+ field. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct Field { - tag: Tag, - occurrence: Option, - subfields: Vec, -} - -impl<'a> FieldRef<'a> { - /// Create a new field. - /// - /// # Panics - /// - /// This method panics if a parameter is invalid. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::{FieldRef, Tag}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let field = FieldRef::new("012A", None, vec![('0', "abc")]); - /// - /// assert_eq!(field.tag(), b"012A"); - /// assert_eq!(field.subfields().len(), 1); - /// assert!(field.occurrence().is_none()); - /// Ok(()) - /// } - /// ``` - pub fn new>( - tag: &'a B, - occurrence: Option<&'a B>, - subfields: Vec<(char, &'a B)>, - ) -> Self { - let occurrence = occurrence.map(OccurrenceRef::new); - let subfields: Vec> = subfields - .into_iter() - .map(TryFrom::try_from) - .collect::, _>>() - .expect("valid subfields"); - - Self { - tag: TagRef::new(tag), - occurrence, - subfields, - } - } - - /// Creates an immutable PICA+ field from a byte slice. - /// - /// If an invalid field is given, an error is returned. - /// - /// ```rust - /// use pica_record_v1::FieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let field = - /// FieldRef::from_bytes(b"003@ \x1f0123456789X\x1e").unwrap(); - /// Ok(()) - /// } - /// ``` - pub fn from_bytes(bytes: &'a [u8]) -> Result { - Self::try_from(bytes) - } - - /// Returns the tag of the field. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::{FieldRef, TagRef}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let field = - /// FieldRef::from_bytes(b"003@ \x1f0123456789X\x1e").unwrap(); - /// assert_eq!(field.tag(), &TagRef::new("003@")); - /// - /// Ok(()) - /// } - /// ``` - pub fn tag(&self) -> &TagRef { - &self.tag - } - - /// Returns a reference to the occurrence of the field. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::{FieldRef, Occurrence}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let field = FieldRef::new("012A", Some("01"), vec![]); - /// let occurrence = field.occurrence().unwrap(); - /// assert_eq!(*occurrence, "01"); - /// - /// Ok(()) - /// } - /// ``` - pub fn occurrence(&self) -> Option<&OccurrenceRef> { - self.occurrence.as_ref() - } - - /// Returns the subfields of the field. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::FieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let field = FieldRef::new( - /// "012A", - /// Some("01"), - /// vec![('a', "b"), ('c', "d")], - /// ); - /// - /// assert_eq!(field.subfields().len(), 2); - /// - /// Ok(()) - /// } - /// ``` - pub fn subfields(&self) -> &Vec { - self.subfields.as_ref() - } - - /// Returns `true` if the field contains a subfield with the given - /// code. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::FieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let field = - /// FieldRef::from_bytes(b"003@ \x1f0123456789X\x1e").unwrap(); - /// assert!(field.contains('0')); - /// assert!(!field.contains('a')); - /// - /// Ok(()) - /// } - /// ``` - pub fn contains(&self, code: char) -> bool { - self.subfields - .iter() - .any(|subfield| *subfield.code() == code) - } - - /// Searches for the first subfield that satisfies the given - /// predicate. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::{FieldRef, SubfieldRef}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let field = FieldRef::new( - /// "012A", - /// Some("01"), - /// vec![('a', "b"), ('c', "d")], - /// ); - /// - /// assert_eq!( - /// field.find(|subfield| subfield.code() == 'a').unwrap(), - /// &SubfieldRef::new('a', "b")? - /// ); - /// - /// assert!(field - /// .find(|subfield| subfield.code() == 'b') - /// .is_none()); - /// - /// Ok(()) - /// } - /// ``` - pub fn find(&self, predicate: F) -> Option<&SubfieldRef> - where - F: Fn(&&SubfieldRef) -> bool, - { - self.subfields().iter().find(predicate) - } - - /// Returns an [`std::str::Utf8Error`](Utf8Error) if the field - /// contains invalid UTF-8 data, otherwise the unit. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::FieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let field = FieldRef::from_bytes(b"003@ \x1f0123\x1e")?; - /// assert!(field.validate().is_ok()); - /// - /// let field = FieldRef::from_bytes(b"003@ \x1f0\x00\x9F\x1e")?; - /// /// assert!(field.validate().is_err()); - /// Ok(()) - /// } - /// ``` - pub fn validate(&self) -> Result<(), Utf8Error> { - for subfield in self.subfields() { - subfield.validate()?; - } - - Ok(()) - } - - /// Write the field into the given writer. - /// - /// # Example - /// - /// ```rust - /// use std::io::Cursor; - /// - /// use pica_record_v1::FieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let mut writer = Cursor::new(Vec::::new()); - /// let field = FieldRef::from_bytes(b"012A/01 \x1fab\x1fcd\x1e")?; - /// field.write_to(&mut writer); - /// # - /// # assert_eq!( - /// # String::from_utf8(writer.into_inner())?, - /// # "012A/01 \x1fab\x1fcd\x1e" - /// # ); - /// - /// Ok(()) - /// } - /// ``` - #[inline] - pub fn write_to(&self, out: &mut impl Write) -> io::Result<()> { - write!(out, "{}", self.tag)?; - self.occurrence().map(|o| o.write_to(out)); - - write!(out, " ")?; - - for subfield in self.subfields.iter() { - subfield.write_to(out)?; - } - - write!(out, "\x1e") - } - - /// Returns the level of the field. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::{FieldRef, Level}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let field = FieldRef::from_bytes(b"012A/01 \x1fab\x1fcd\x1e")?; - /// assert_eq!(field.level(), Level::Main); - /// - /// Ok(()) - /// } - /// ``` - #[inline] - pub fn level(&self) -> Level { - self.tag.level() - } -} - -impl<'a> TryFrom<&'a [u8]> for FieldRef<'a> { - type Error = ParsePicaError; - - fn try_from(value: &'a [u8]) -> Result { - parse_field_ref - .parse(value) - .map_err(|_| ParsePicaError::InvalidField) - } -} - -impl<'a> IntoIterator for &'a FieldRef<'a> { - type Item = &'a FieldRef<'a>; - type IntoIter = iter::Once; - - /// Creates an iterator from a single field. The iterator just - /// returns the field once. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::FieldRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let field = FieldRef::new("003@", None, vec![('0', "abc")]); - /// let mut iter = field.into_iter(); - /// - /// assert_eq!(iter.next(), Some(&field)); - /// assert_eq!(iter.next(), None); - /// - /// Ok(()) - /// } - /// ``` - fn into_iter(self) -> Self::IntoIter { - iter::once(self) - } -} - -impl From> for Field { - fn from(other: FieldRef<'_>) -> Self { - let FieldRef { - tag, - occurrence, - subfields, - } = other; - - Field { - tag: tag.into(), - occurrence: occurrence.map(Occurrence::from), - subfields: subfields - .into_iter() - .map(Subfield::from) - .collect(), - } - } -} - -impl Field { - /// Write the field into the given writer. - /// - /// # Example - /// - /// ```rust - /// use std::io::Cursor; - /// - /// use pica_record_v1::{Field, FieldRef}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let mut writer = Cursor::new(Vec::::new()); - /// let field: Field = - /// FieldRef::from_bytes(b"012A/01 \x1fab\x1fcd\x1e")?.into(); - /// field.write_to(&mut writer); - /// # - /// # assert_eq!( - /// # String::from_utf8(writer.into_inner())?, - /// # "012A/01 \x1fab\x1fcd\x1e" - /// # ); - /// - /// Ok(()) - /// } - /// ``` - #[inline] - pub fn write_to(&self, out: &mut impl Write) -> io::Result<()> { - let _ = out.write(self.tag.as_bytes())?; - - if let Some(ref o) = self.occurrence { - o.write_to(out)?; - } - - write!(out, " ")?; - - for subfield in self.subfields.iter() { - subfield.write_to(out)?; - } - - write!(out, "\x1e") - } -} - -#[cfg(feature = "arbitrary")] -impl quickcheck::Arbitrary for Field { - fn arbitrary(g: &mut quickcheck::Gen) -> Self { - let tag = Tag::arbitrary(g); - let occurrence = Option::::arbitrary(g); - let subfields = (0..g.size()) - .map(|_| Subfield::arbitrary(g)) - .collect::>(); - - Self { - tag, - occurrence, - subfields, - } - } -} diff --git a/crates/pica-record-v1/src/primitives/mod.rs b/crates/pica-record-v1/src/primitives/mod.rs deleted file mode 100644 index b95c6e4de..000000000 --- a/crates/pica-record-v1/src/primitives/mod.rs +++ /dev/null @@ -1,14 +0,0 @@ -pub use code::SubfieldCode; -pub use field::{Field, FieldRef}; -pub use parse::{ - parse_field_ref, parse_subfield_code, parse_subfield_ref, - parse_subfield_value_ref, -}; -pub use subfield::{Subfield, SubfieldRef}; -pub use value::{SubfieldValue, SubfieldValueRef}; - -mod code; -mod field; -mod parse; -mod subfield; -mod value; diff --git a/crates/pica-record-v1/src/primitives/parse.rs b/crates/pica-record-v1/src/primitives/parse.rs deleted file mode 100644 index 9a0b3e049..000000000 --- a/crates/pica-record-v1/src/primitives/parse.rs +++ /dev/null @@ -1,182 +0,0 @@ -use winnow::combinator::{opt, preceded, repeat}; -use winnow::prelude::*; -use winnow::token::{one_of, take_till}; - -use super::{FieldRef, SubfieldRef}; -use crate::occurrence::parse_occurrence; -use crate::tag::parse_tag; -use crate::{SubfieldCode, SubfieldValueRef}; - -/// Parse a PICA+ subfield code. -pub fn parse_subfield_code(i: &mut &[u8]) -> PResult { - one_of((b'0'..=b'9', b'a'..=b'z', b'A'..=b'Z')) - .map(SubfieldCode::from_unchecked) - .parse_next(i) -} - -/// Parse a PICA+ subfield value reference. -pub fn parse_subfield_value_ref<'a>( - i: &mut &'a [u8], -) -> PResult> { - take_till(0.., |c| c == b'\x1f' || c == b'\x1e') - .map(SubfieldValueRef::from_unchecked) - .parse_next(i) -} - -/// Parse a PICA+ subfield. -pub fn parse_subfield_ref<'a>( - i: &mut &'a [u8], -) -> PResult> { - preceded(b'\x1f', (parse_subfield_code, parse_subfield_value_ref)) - .map(|(code, value)| SubfieldRef { code, value }) - .parse_next(i) -} - -/// Parse a PICA+ field. -pub fn parse_field_ref<'a>(i: &mut &'a [u8]) -> PResult> { - ( - parse_tag, - opt(parse_occurrence), - b' ', - repeat(0.., parse_subfield_ref), - b'\x1e', - ) - .map(|(tag, occurrence, _, subfields, _)| FieldRef { - tag, - occurrence, - subfields, - }) - .parse_next(i) -} - -#[cfg(test)] -mod tests { - use bstr::ByteSlice; - use quickcheck_macros::quickcheck; - - use super::*; - use crate::{Field, Subfield}; - - #[quickcheck] - fn test_parse_arbitrary_subfield_code(code: u8) { - if code.is_ascii_alphanumeric() { - assert_eq!( - parse_subfield_code.parse(&[code]).unwrap(), - SubfieldCode::from_unchecked(char::from(code)) - ); - } else { - assert!(parse_subfield_code.parse(&[code]).is_err()); - } - } - - #[test] - fn test_parse_subfield_value_ref() { - macro_rules! parse_success { - ($input:expr, $expected:expr, $rest:expr) => { - let value = SubfieldValueRef::from_unchecked($expected); - assert_eq!( - parse_subfield_value_ref - .parse_peek($input) - .unwrap(), - ($rest.as_bytes(), value) - ); - }; - } - parse_success!(b"abc", b"abc", b""); - parse_success!(b"a\x1ebc", b"a", b"\x1ebc"); - parse_success!(b"a\x1fbc", b"a", b"\x1fbc"); - parse_success!(b"", b"", b""); - } - - #[quickcheck] - fn test_parse_arbitrary_subfield_value_ref(input: String) { - let input = input.replace(['\x1f', '\x1e'], ""); - let rest = b"".as_bytes(); - - let value = SubfieldValueRef::from_unchecked(input.as_bytes()); - assert_eq!( - parse_subfield_value_ref - .parse_peek(input.as_bytes()) - .unwrap(), - (rest, value) - ); - } - - #[test] - fn test_parse_subfield_ref() { - assert_eq!( - parse_subfield_ref.parse(b"\x1fa123").unwrap(), - SubfieldRef::new('a', "123").unwrap() - ); - - assert_eq!( - parse_subfield_ref.parse(b"\x1fa").unwrap(), - SubfieldRef::new('a', "").unwrap() - ); - - assert!(parse_subfield_ref.parse(b"a123").is_err()); - assert!(parse_subfield_ref.parse(b"").is_err()); - } - - #[cfg_attr(miri, ignore)] - #[quickcheck_macros::quickcheck] - fn test_parse_arbitrary_subfield_ref(subfield: Subfield) -> bool { - let mut bytes = Vec::::new(); - let _ = subfield.write_to(&mut bytes); - parse_subfield_ref.parse(&bytes).is_ok() - } - - #[test] - fn test_parse_field_ref() { - macro_rules! parse_success { - ($i:expr, $tag:expr, $occurrence:expr, $subfields:expr) => { - let field = - FieldRef::new($tag, $occurrence, $subfields); - let result = parse_field_ref.parse($i).unwrap(); - assert_eq!(result, field); - }; - ($i:expr, $tag:expr, $subfields:expr) => { - let field = FieldRef::new($tag, None, $subfields); - let result = parse_field_ref.parse($i).unwrap(); - assert_eq!(result, field); - }; - ($i:expr, $tag:expr) => { - let field = FieldRef::new($tag, None, vec![]); - let result = parse_field_ref.parse($i).unwrap(); - assert_eq!(result, field); - }; - } - - parse_success!( - b"012A/01 \x1fabc\x1e", - "012A", - Some("01"), - vec![('a', "bc")] - ); - - parse_success!(b"012A \x1fabc\x1e", "012A", vec![('a', "bc")]); - parse_success!(b"012A \x1e", "012A"); - - macro_rules! parse_error { - ($i:expr) => { - assert!(parse_field_ref.parse($i).is_err()); - }; - } - - parse_error!(b"012A/00\x1fabc\x1e"); - parse_error!(b"012A/00 abc\x1e"); - parse_error!(b"012A/00 \x1fabc"); - parse_error!(b"012!/01 \x1fabc\x1e"); - parse_error!(b"012A/0! \x1fabc\x1e"); - parse_error!(b"012A/00 \x1f!bc\x1e"); - } - - #[cfg_attr(miri, ignore)] - #[quickcheck_macros::quickcheck] - fn test_parse_arbitrary_field_ref(field: Field) -> bool { - let mut bytes = Vec::::new(); - let _ = field.write_to(&mut bytes); - - parse_field_ref.parse(&bytes).is_ok() - } -} diff --git a/crates/pica-record-v1/src/primitives/subfield.rs b/crates/pica-record-v1/src/primitives/subfield.rs deleted file mode 100644 index 53ccf54b9..000000000 --- a/crates/pica-record-v1/src/primitives/subfield.rs +++ /dev/null @@ -1,360 +0,0 @@ -use std::io::{self, Write}; -use std::iter; -use std::ops::Deref; -use std::str::Utf8Error; - -use bstr::ByteSlice; -use winnow::Parser; - -use super::parse::parse_subfield_ref; -use crate::{PicaError, SubfieldCode, SubfieldValue, SubfieldValueRef}; - -/// An immutable PICA+ subfield. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct SubfieldRef<'a> { - pub(super) code: SubfieldCode, - pub(super) value: SubfieldValueRef<'a>, -} - -impl<'a> SubfieldRef<'a> { - /// Create a new immutable PICA+ subfield. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::SubfieldRef; - /// - /// let subfield = SubfieldRef::new('a', "bcd")?; - /// assert_eq!(subfield.code(), 'a'); - /// assert_eq!(subfield.value(), "bcd"); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn new(code: char, value: &'a T) -> Result - where - T: ?Sized + AsRef<[u8]>, - { - Ok(Self { - code: SubfieldCode::new(code)?, - value: SubfieldValueRef::new(value)?, - }) - } - - /// Creates an immutable PICA+ subfield from a byte slice. - /// - /// # Error - /// - /// If an invalid subfield is given, an error is returned. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::SubfieldRef; - /// - /// let subfield = SubfieldRef::from_bytes(b"\x1f0123456789X")?; - /// assert_eq!(subfield.code(), '0'); - /// assert_eq!(subfield.value(), "123456789X"); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn from_bytes + ?Sized>( - bytes: &'a T, - ) -> Result { - let bytes = bytes.as_ref(); - - parse_subfield_ref.parse(bytes).map_err(|_| { - PicaError::InvalidSubfield(bytes.to_str_lossy().to_string()) - }) - } - - /// Returns the code of the subfield. - #[inline] - pub fn code(&self) -> &SubfieldCode { - &self.code - } - - /// Returns the value of the subfield. - #[inline] - pub fn value(&self) -> &SubfieldValueRef { - &self.value - } - - /// Returns an [`std::str::Utf8Error`](Utf8Error) if the subfield - /// value contains invalid UTF-8 data, otherwise the unit. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::SubfieldRef; - /// - /// let subfield = SubfieldRef::new('0', "123456789X")?; - /// assert!(subfield.validate().is_ok()); - /// - /// let subfield = SubfieldRef::from_bytes(&[b'\x1f', b'0', 0, 159])?; - /// assert_eq!(subfield.validate().is_err(), true); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn validate(&self) -> Result<(), Utf8Error> { - if self.value.is_ascii() { - return Ok(()); - } - - std::str::from_utf8(&self.value)?; - Ok(()) - } - - /// Write the subfield into the given writer. - /// - /// # Example - /// - /// ```rust - /// use std::io::Cursor; - /// - /// use pica_record_v1::SubfieldRef; - /// - /// let mut writer = Cursor::new(Vec::::new()); - /// let subfield = SubfieldRef::new('0', "123456789X")?; - /// subfield.write_to(&mut writer); - /// # - /// # assert_eq!( - /// # String::from_utf8(writer.into_inner())?, - /// # "\x1f0123456789X" - /// # ); - /// - /// # Ok::<(), Box>(()) - /// ``` - #[inline] - pub fn write_to(&self, out: &mut impl Write) -> io::Result<()> { - write!(out, "\x1f{}{}", self.code, self.value) - } -} - -impl<'a> Deref for SubfieldRef<'a> { - type Target = SubfieldValueRef<'a>; - - fn deref(&self) -> &Self::Target { - &self.value - } -} - -impl<'a, T> TryFrom<(char, &'a T)> for SubfieldRef<'a> -where - T: AsRef<[u8]> + ?Sized, -{ - type Error = PicaError; - - fn try_from(value: (char, &'a T)) -> Result { - Ok(Self { - value: SubfieldValueRef::new(value.1)?, - code: SubfieldCode::new(value.0)?, - }) - } -} - -impl<'a> IntoIterator for &'a SubfieldRef<'a> { - type Item = &'a SubfieldRef<'a>; - type IntoIter = iter::Once; - - /// Creates an iterator from a single subfield. The iterator just - /// returns the subfield once. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::SubfieldRef; - /// - /// let subfield = SubfieldRef::new('0', "123456789X")?; - /// let mut iter = subfield.into_iter(); - /// assert_eq!(iter.next(), Some(&subfield)); - /// assert_eq!(iter.next(), None); - /// - /// # Ok::<(), Box>(()) - /// ``` - fn into_iter(self) -> Self::IntoIter { - iter::once(self) - } -} - -/// A mutable PICA+ subfield. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Subfield { - code: SubfieldCode, - value: SubfieldValue, -} - -impl Subfield { - /// Write the subfield into the given writer. - /// - /// # Example - /// - /// ```rust - /// use std::io::Cursor; - /// - /// use pica_record_v1::{Subfield, SubfieldRef}; - /// - /// let mut writer = Cursor::new(Vec::::new()); - /// let subfield: Subfield = - /// SubfieldRef::new('0', "123456789X")?.into(); - /// subfield.write_to(&mut writer); - /// # - /// # assert_eq!( - /// # String::from_utf8(writer.into_inner())?, - /// # "\x1f0123456789X" - /// # ); - /// - /// # Ok::<(), Box>(()) - /// ``` - #[inline] - pub fn write_to(&self, out: &mut impl Write) -> io::Result<()> { - write!(out, "\x1f{}{}", self.code, self.value) - } -} - -impl PartialEq for SubfieldRef<'_> { - #[inline] - fn eq(&self, other: &Subfield) -> bool { - self.code == other.code && self.value == other.value - } -} - -impl PartialEq> for Subfield { - #[inline] - fn eq(&self, other: &SubfieldRef<'_>) -> bool { - self.code == other.code && self.value == other.value - } -} - -impl From> for Subfield { - #[inline] - fn from(other: SubfieldRef<'_>) -> Self { - Subfield { - value: other.value.into(), - code: other.code, - } - } -} - -#[cfg(feature = "arbitrary")] -impl quickcheck::Arbitrary for Subfield { - fn arbitrary(g: &mut quickcheck::Gen) -> Self { - Self { - code: SubfieldCode::arbitrary(g), - value: SubfieldValue::arbitrary(g), - } - } -} - -#[cfg(test)] -mod tests { - use std::io::Cursor; - - use super::*; - - #[test] - fn test_subfield_ref_new() { - let subfield = SubfieldRef::new('a', "abc").unwrap(); - assert_eq!(subfield.code(), 'a'); - assert_eq!(subfield.value(), "abc"); - - assert_eq!( - SubfieldRef::new('!', "abc").unwrap_err(), - PicaError::InvalidSubfieldCode('!') - ); - - assert_eq!( - SubfieldRef::new('a', "a\x1fbc").unwrap_err(), - PicaError::InvalidSubfieldValue("a\x1fbc".to_string()) - ); - } - - #[test] - fn test_subfield_ref_from_bytes() { - let subfield = SubfieldRef::from_bytes(b"\x1f0abc").unwrap(); - assert_eq!(subfield.code(), '0'); - assert_eq!(subfield.value(), "abc"); - - assert_eq!( - SubfieldRef::from_bytes("\x1f!abc").unwrap_err(), - PicaError::InvalidSubfield("\x1f!abc".to_string()) - ); - } - - #[test] - fn test_subfield_ref_code() { - let subfield = SubfieldRef::new('1', "abc").unwrap(); - assert_eq!(subfield.code(), '1'); - } - - #[test] - fn test_subfield_ref_value() { - let subfield = SubfieldRef::new('1', "abc").unwrap(); - assert_eq!(subfield.value(), "abc"); - } - - #[test] - fn test_subfield_ref_is_empty() { - let subfield = SubfieldRef::new('1', "abc").unwrap(); - assert!(!subfield.is_empty()); - - let subfield = SubfieldRef::new('1', "").unwrap(); - assert!(subfield.is_empty()); - } - - #[test] - fn test_subfield_ref_validate() { - let subfield = SubfieldRef::new('1', "abc").unwrap(); - assert!(subfield.validate().is_ok()); - - let subfield = - SubfieldRef::from_bytes(&[b'\x1f', b'0', 0, 159]).unwrap(); - assert!(subfield.validate().is_err()); - } - - #[test] - fn test_subfield_ref_write_to() { - let mut writer = Cursor::new(Vec::::new()); - let subfield = SubfieldRef::new('0', "abcdef").unwrap(); - let _ = subfield.write_to(&mut writer); - assert_eq!(writer.into_inner(), b"\x1f0abcdef"); - } - - #[test] - fn test_subfield_ref_try_from() { - let subfield = SubfieldRef::try_from(('a', "abc")).unwrap(); - assert_eq!(subfield.code(), 'a'); - assert_eq!(subfield.value(), "abc"); - - let err = SubfieldRef::try_from(('!', "abc")).unwrap_err(); - assert!(matches!(err, PicaError::InvalidSubfieldCode(_))); - - let err = SubfieldRef::try_from(('a', "a\x1fc")).unwrap_err(); - assert!(matches!(err, PicaError::InvalidSubfieldValue(_))); - - let err = SubfieldRef::try_from(('a', "a\x1ec")).unwrap_err(); - assert!(matches!(err, PicaError::InvalidSubfieldValue(_))); - } - - #[test] - fn test_subfield_ref_into_iter() { - let subfield = SubfieldRef::new('0', "abcdef").unwrap(); - let mut iter = subfield.into_iter(); - assert_eq!(iter.next(), Some(&subfield)); - assert_eq!(iter.next(), None); - } - - #[test] - fn test_subfield_write_to() { - let mut writer = Cursor::new(Vec::::new()); - let subfield: Subfield = - SubfieldRef::new('0', "abcdef").unwrap().into(); - let _ = subfield.write_to(&mut writer); - assert_eq!(writer.into_inner(), b"\x1f0abcdef"); - } - - #[test] - fn test_subfield_from_ref() { - let subfield_ref = SubfieldRef::new('0', "abc").unwrap(); - let _subfield = Subfield::from(subfield_ref.clone()); - } -} diff --git a/crates/pica-record-v1/src/primitives/value.rs b/crates/pica-record-v1/src/primitives/value.rs deleted file mode 100644 index 1fbf77cc0..000000000 --- a/crates/pica-record-v1/src/primitives/value.rs +++ /dev/null @@ -1,261 +0,0 @@ -use std::fmt::{self, Display}; -use std::ops::Deref; - -use bstr::{BStr, ByteSlice}; - -use crate::PicaError; - -/// An immutable PICA+ subfield value. -/// -/// This type behaves like byte slice but guarantees that the subfield -/// value does contain neither '\x1e' nor '\x1f'. -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] -pub struct SubfieldValueRef<'a>(&'a BStr); - -impl<'a> SubfieldValueRef<'a> { - /// Create a new subfield value reference from a byte slice. - /// - /// # Error - /// - /// This function fails if the subfield value contains either the - /// field separator '\x1f' or the record separator '\x1e'. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::SubfieldValueRef; - /// - /// let value = SubfieldValueRef::new(b"abc")?; - /// assert_eq!(value, "abc"); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn new(value: &'a T) -> Result - where - T: AsRef<[u8]> + ?Sized, - { - let value = value.as_ref(); - if value.find_byteset(b"\x1f\x1e").is_some() { - return Err(PicaError::InvalidSubfieldValue( - value.to_str_lossy().to_string(), - )); - } - - Ok(Self(value.into())) - } - - /// Create a new subfield value reference from a byte slice without - /// checking for validity. - /// - /// # Safety - /// - /// The caller *must* ensure that the value neither contains the - /// record separator '\x1e' nor the field separator '\x1f'. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::SubfieldValueRef; - /// - /// let value = SubfieldValueRef::from_unchecked("abc"); - /// assert_eq!(value, "abc"); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn from_unchecked(value: &'a T) -> Self - where - T: AsRef<[u8]> + ?Sized, - { - Self(value.as_ref().into()) - } - - /// Returns the subfield value as a byte slice. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::SubfieldValueRef; - /// - /// let value = SubfieldValueRef::from_unchecked("abc"); - /// assert_eq!(value.as_bytes(), b"abc"); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn as_bytes(&self) -> &'a [u8] { - self.0 - } -} - -impl Deref for SubfieldValueRef<'_> { - type Target = BStr; - - fn deref(&self) -> &Self::Target { - self.0 - } -} - -impl Display for SubfieldValueRef<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} - -impl PartialEq for SubfieldValueRef<'_> { - fn eq(&self, value: &str) -> bool { - self.0 == value.as_bytes() - } -} - -impl PartialEq<&str> for SubfieldValueRef<'_> { - fn eq(&self, value: &&str) -> bool { - self.0 == value.as_bytes() - } -} - -impl PartialEq> for SubfieldValueRef<'_> { - fn eq(&self, other: &Vec) -> bool { - self.0 == other - } -} - -/// A mutable PICA+ subfield value. -/// -/// This type behaves like byte slice but guarantees that the subfield -/// value does not contain neither '\x1e' or '\x1f'. -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] -pub struct SubfieldValue(Vec); - -impl SubfieldValue { - /// Create a new subfield value from a byte slice. - /// - /// # Error - /// - /// This function fails if the subfield value contains either the - /// field separator '\x1f' or the record separator '\x1e'. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::SubfieldValue; - /// - /// let value = SubfieldValue::new(b"abc")?; - /// assert_eq!(value, "abc"); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn new(value: &T) -> Result - where - T: AsRef<[u8]>, - { - let value = value.as_ref(); - if value.find_byteset(b"\x1f\x1e").is_some() { - return Err(PicaError::InvalidSubfieldValue( - value.to_str_lossy().to_string(), - )); - } - - Ok(Self(value.to_vec())) - } - - /// Create a new subfield value from a byte slice without checking - /// for validity. - /// - /// # Safety - /// - /// The caller *must* ensure that the value neither contains the - /// record separator '\x1e' nor the field separator '\x1f'. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::SubfieldValue; - /// - /// let value = SubfieldValue::from_unchecked("abc"); - /// assert_eq!(value, "abc"); - /// - /// # Ok::<(), Box>(()) - /// ``` - pub fn from_unchecked(value: &T) -> Self - where - T: AsRef<[u8]> + ?Sized, - { - Self(value.as_ref().to_vec()) - } -} - -impl Display for SubfieldValue { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.0.as_bstr()) - } -} - -impl From> for SubfieldValue { - fn from(value: SubfieldValueRef<'_>) -> Self { - Self(value.to_vec()) - } -} - -impl PartialEq> for SubfieldValue { - fn eq(&self, other: &SubfieldValueRef<'_>) -> bool { - self.0 == other.0 - } -} - -impl PartialEq for SubfieldValueRef<'_> { - fn eq(&self, other: &SubfieldValue) -> bool { - self.0 == other.0 - } -} - -impl PartialEq<&str> for SubfieldValue { - fn eq(&self, other: &&str) -> bool { - self.0 == other.as_bytes() - } -} - -#[cfg(feature = "arbitrary")] -impl quickcheck::Arbitrary for SubfieldValue { - fn arbitrary(g: &mut quickcheck::Gen) -> Self { - let value = String::arbitrary(g).replace(['\x1f', '\x1e'], ""); - Self::from_unchecked(&value) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_subfield_value_ref_new() { - let value = SubfieldValueRef::new("abc").unwrap(); - assert_eq!(value, "abc"); - - let value = SubfieldValueRef::new("").unwrap(); - assert_eq!(value, ""); - - assert_eq!( - SubfieldValueRef::new("abc\x1e").unwrap_err(), - PicaError::InvalidSubfieldValue("abc\x1e".to_string()) - ); - - assert_eq!( - SubfieldValueRef::new("abc\x1f").unwrap_err(), - PicaError::InvalidSubfieldValue("abc\x1f".to_string()) - ); - } - - #[test] - fn test_subfield_value_ref_from_unchecked() { - let value = SubfieldValueRef::from_unchecked("abc"); - assert_eq!(value, "abc"); - - let value = SubfieldValueRef::from_unchecked(""); - assert_eq!(value, ""); - } - - #[test] - fn test_subfield_value_ref_as_bytes() { - let value = SubfieldValueRef::from_unchecked("abc"); - assert_eq!(value.as_bytes(), b"abc"); - } -} diff --git a/crates/pica-record-v1/src/record.rs b/crates/pica-record-v1/src/record.rs deleted file mode 100644 index 627c362a1..000000000 --- a/crates/pica-record-v1/src/record.rs +++ /dev/null @@ -1,518 +0,0 @@ -use std::hash::{Hash, Hasher}; -use std::io::{self, Cursor, Write}; -use std::ops::{Deref, DerefMut}; -use std::slice::Iter; -use std::str::Utf8Error; - -use sha2::{Digest, Sha256}; -use winnow::combinator::{repeat, terminated}; -use winnow::{PResult, Parser}; - -use crate::parser::parse_field_ref; -use crate::{Field, FieldRef, ParsePicaError}; - -/// An immutable PICA+ record. -#[derive(Debug)] -pub struct RecordRef<'a>(Vec>); - -/// An immutable PICA+ record. -#[derive(Debug, Clone)] -pub struct Record(Vec); - -#[inline] -fn parse_record<'a>(i: &mut &'a [u8]) -> PResult> { - terminated(repeat(1.., parse_field_ref), b'\n') - .map(RecordRef) - .parse_next(i) -} - -impl<'a> RecordRef<'a> { - /// Create a new immutable record. - /// - /// # Panics - /// - /// This method panics if a parameter is invalid. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::RecordRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let record = - /// RecordRef::new(vec![("003@", None, vec![('0', "abc")])]); - /// assert_eq!(record.iter().len(), 1); - /// - /// Ok(()) - /// } - /// ``` - #[allow(clippy::type_complexity)] - pub fn new( - fields: Vec<(&'a B, Option<&'a B>, Vec<(char, &'a B)>)>, - ) -> Self - where - B: ?Sized + AsRef<[u8]>, - { - let fields = fields - .into_iter() - .map(|(t, o, s)| FieldRef::new(t, o, s)) - .collect(); - - Self(fields) - } - - /// Creates an PICA+ record from a byte slice. - /// - /// If an invalid record is given, an error is returned. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::RecordRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let record = RecordRef::from_bytes(b"003@ \x1f0abc\x1e\n"); - /// assert_eq!(record.iter().len(), 1); - /// - /// Ok(()) - /// } - /// ``` - pub fn from_bytes(bytes: &'a [u8]) -> Result { - parse_record - .parse(bytes) - .map_err(|_| ParsePicaError::InvalidRecord(bytes.into())) - } - - /// Returns `true` if the record contains no fields, otherwise - /// `false`. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::RecordRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let record = RecordRef::from_bytes(b"002@ \x1f0Oaf\x1e\n")?; - /// assert!(!record.is_empty()); - /// - /// Ok(()) - /// } - /// ``` - #[inline] - pub fn is_empty(&self) -> bool { - self.0.is_empty() - } - - /// Returns an iterator over the fields of the record. - /// - /// # Panics - /// - /// This method panics if a parameter is invalid. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::RecordRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let record = RecordRef::new(vec![ - /// ("003@", None, vec![('0', "123456789X")]), - /// ("002@", None, vec![('0', "Oaf")]), - /// ]); - /// - /// assert_eq!(record.iter().len(), 2); - /// Ok(()) - /// } - /// ``` - pub fn iter(&self) -> Iter { - self.0.iter() - } - - /// Retains only the fields specified by the predicate. - /// - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::{RecordRef, TagRef}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let mut record = RecordRef::new(vec![ - /// ("003@", None, vec![('0', "123456789X")]), - /// ("002@", None, vec![('0', "Oaf")]), - /// ]); - /// - /// record.retain(|field| field.tag() == &TagRef::new("003@")); - /// assert_eq!(record.iter().len(), 1); - /// - /// Ok(()) - /// } - /// ``` - pub fn retain bool>(&mut self, f: F) { - self.0.retain(f); - } - - /// Returns an [`std::str::Utf8Error`](Utf8Error) if the record - /// contains invalid UTF-8 data, otherwise the unit. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::RecordRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let record = RecordRef::from_bytes(b"003@ \x1f0a\x1e\n")?; - /// assert!(record.validate().is_ok()); - /// Ok(()) - /// } - /// ``` - pub fn validate(&self) -> Result<(), Utf8Error> { - for field in self.iter() { - field.validate()?; - } - - Ok(()) - } - - /// Write the record into the given writer. - /// - /// # Example - /// - /// ```rust - /// use std::io::Cursor; - /// - /// use pica_record_v1::RecordRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let mut writer = Cursor::new(Vec::::new()); - /// let record = RecordRef::from_bytes(b"003@ \x1f0a\x1e\n")?; - /// record.write_to(&mut writer); - /// # - /// # assert_eq!( - /// # String::from_utf8(writer.into_inner())?, - /// # "003@ \x1f0a\x1e\n" - /// # ); - /// - /// Ok(()) - /// } - /// ``` - #[inline] - pub fn write_to(&self, out: &mut impl Write) -> io::Result<()> { - if !self.is_empty() { - for field in self.iter() { - field.write_to(out)?; - } - - writeln!(out)?; - } - - Ok(()) - } -} - -impl PartialEq> for RecordRef<'_> { - fn eq(&self, other: &RecordRef<'_>) -> bool { - self.0 == other.0 - } -} - -impl From> for Record { - fn from(other: RecordRef<'_>) -> Self { - Self(other.0.into_iter().map(Field::from).collect()) - } -} -impl Record { - /// Write the record into the given writer. - /// - /// # Example - /// - /// ```rust - /// use std::io::Cursor; - /// - /// use pica_record_v1::{Record, RecordRef}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let mut writer = Cursor::new(Vec::::new()); - /// let record: Record = - /// RecordRef::from_bytes(b"003@ \x1f0a\x1e\n")?.into(); - /// record.write_to(&mut writer); - /// # - /// # assert_eq!( - /// # String::from_utf8(writer.into_inner())?, - /// # "003@ \x1f0a\x1e\n" - /// # ); - /// - /// Ok(()) - /// } - /// ``` - #[inline] - pub fn write_to(&self, out: &mut impl Write) -> io::Result<()> { - if !self.0.is_empty() { - for field in self.0.iter() { - field.write_to(out)?; - } - - writeln!(out)?; - } - - Ok(()) - } -} - -#[cfg(feature = "arbitrary")] -impl quickcheck::Arbitrary for Record { - fn arbitrary(g: &mut quickcheck::Gen) -> Self { - let size = g.size(); - let fields = - (0..size).map(|_| Field::arbitrary(g)).collect::>(); - - Self(fields) - } -} - -/// A PICA+ record, that may contain invalid UTF-8 data. -#[derive(Debug)] -pub struct ByteRecord<'a> { - raw_data: Option<&'a [u8]>, - record: RecordRef<'a>, -} - -impl<'a> ByteRecord<'a> { - /// Creates an byte record from a byte slice. - /// - /// If an invalid record is given, an error is returned. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::ByteRecord; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let record = ByteRecord::from_bytes(b"003@ \x1f0abc\x1e\n"); - /// assert_eq!(record.iter().len(), 1); - /// Ok(()) - /// } - /// ``` - pub fn from_bytes(bytes: &'a [u8]) -> Result { - Ok(Self { - record: RecordRef::from_bytes(bytes)?, - raw_data: Some(bytes), - }) - } - - /// Write the record into the given writer. - /// - /// # Example - /// - /// ```rust - /// use std::io::Cursor; - /// - /// use pica_record_v1::ByteRecord; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let mut writer = Cursor::new(Vec::::new()); - /// let record = ByteRecord::from_bytes(b"003@ \x1f0a\x1e\n")?; - /// record.write_to(&mut writer); - /// # - /// # assert_eq!( - /// # String::from_utf8(writer.into_inner())?, - /// # "003@ \x1f0a\x1e\n" - /// # ); - /// Ok(()) - /// } - /// ``` - #[inline] - pub fn write_to(&self, out: &mut impl Write) -> io::Result<()> { - match self.raw_data { - Some(data) => out.write_all(data), - None => self.record.write_to(out), - } - } - - /// Retains only the fields specified by the predicate. - /// - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::{ByteRecord, TagRef}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let mut record = ByteRecord::from_bytes( - /// b"003@ \x1f0a\x1e002@ \x1f0Olfo\x1e\n", - /// )?; - /// - /// record.retain(|field| field.tag() == &TagRef::new("003@")); - /// assert_eq!(record.iter().len(), 1); - /// Ok(()) - /// } - /// ``` - pub fn retain bool>(&mut self, f: F) { - self.record.retain(f); - self.raw_data = None; - } - - /// Returns the SHA-256 hash of the record. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::ByteRecord; - /// use std::fmt::Write; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let mut record = - /// ByteRecord::from_bytes(b"012A \x1fa123\x1e\n")?; - /// - /// let hash = record.sha256().iter().fold( - /// String::new(), |mut out, b| { - /// let _ = write!(out, "{b:02x}"); - /// out - /// }); - /// - /// assert!(hash.starts_with("95e266")); - /// Ok(()) - /// } - pub fn sha256(&self) -> Vec { - let mut writer = Cursor::new(Vec::::new()); - let mut hasher = Sha256::new(); - - let _ = self.write_to(&mut writer); - let data = writer.into_inner(); - hasher.update(data); - - let result = hasher.finalize(); - result.to_vec() - } -} - -impl<'a> Deref for ByteRecord<'a> { - type Target = RecordRef<'a>; - - #[inline] - fn deref(&self) -> &Self::Target { - &self.record - } -} - -impl DerefMut for ByteRecord<'_> { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.record - } -} - -impl PartialEq> for ByteRecord<'_> { - fn eq(&self, other: &ByteRecord<'_>) -> bool { - match (self.raw_data, other.raw_data) { - (Some(lhs), Some(rhs)) => lhs == rhs, - _ => self.record == other.record, - } - } -} - -impl<'a> From> for ByteRecord<'a> { - fn from(record: RecordRef<'a>) -> Self { - ByteRecord { - raw_data: None, - record, - } - } -} - -impl Hash for ByteRecord<'_> { - fn hash(&self, state: &mut H) { - match self.raw_data { - Some(data) => data.hash(state), - None => { - let mut writer = Cursor::new(Vec::::new()); - let _ = self.write_to(&mut writer); - let data = writer.into_inner(); - data.hash(state) - } - }; - } -} - -/// A PICA+ record, that guarantees valid UTF-8 data. -#[derive(Debug)] -pub struct StringRecord<'a>(ByteRecord<'a>); - -impl<'a> TryFrom> for StringRecord<'a> { - type Error = Utf8Error; - - fn try_from(record: ByteRecord<'a>) -> Result { - record.validate()?; - - Ok(StringRecord(record)) - } -} - -impl<'a> StringRecord<'a> { - /// Creates an PICA+ record from a byte slice. - /// - /// If an invalid record is given, an error is returned. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::StringRecord; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let record = StringRecord::from_bytes(b"003@ \x1f0a\x1e\n")?; - /// assert_eq!(record.iter().len(), 1); - /// - /// let result = - /// StringRecord::from_bytes(b"003@ \x1f0\x00\x9f\x1e\n"); - /// assert!(result.is_err()); - /// - /// Ok(()) - /// } - /// ``` - #[inline] - pub fn from_bytes(data: &'a [u8]) -> Result { - Self::try_from(ByteRecord::from_bytes(data)?) - .map_err(|_| ParsePicaError::InvalidRecord(data.into())) - } -} - -impl<'a> Deref for StringRecord<'a> { - type Target = ByteRecord<'a>; - - #[inline] - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl DerefMut for StringRecord<'_> { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[cfg_attr(miri, ignore)] - #[quickcheck_macros::quickcheck] - fn parse_arbitrary_record(record: Record) -> bool { - let mut bytes = Vec::::new(); - let _ = record.write_to(&mut bytes); - - super::parse_record.parse(&bytes).is_ok() - } -} diff --git a/crates/pica-record-v1/src/tag.rs b/crates/pica-record-v1/src/tag.rs deleted file mode 100644 index 55191fa0c..000000000 --- a/crates/pica-record-v1/src/tag.rs +++ /dev/null @@ -1,275 +0,0 @@ -use std::fmt::{self, Display}; -use std::ops::{Deref, Index}; - -use bstr::{BStr, BString, ByteSlice}; -use winnow::token::one_of; -use winnow::{PResult, Parser}; - -use crate::{Level, ParsePicaError}; - -/// An immutable PICA+ tag. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct TagRef<'a>(&'a BStr); - -/// A mutable PICA+ tag. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Tag(BString); - -impl<'a> TagRef<'a> { - /// Create a new immutable PICA+ tag. - /// - /// # Panics - /// - /// This method panics if the tag is invalid. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::TagRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let tag = TagRef::new("003@"); - /// assert_eq!(tag, "003@"); - /// - /// Ok(()) - /// } - /// ``` - #[inline] - pub fn new>(value: &'a B) -> Self { - Self::try_from(value.as_ref()).expect("valid tag") - } - - /// Creates an PICA+ tag from a byte slice. - /// - /// If an invalid tag is given, an error is returned. - /// - /// ```rust - /// use pica_record_v1::TagRef; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// assert!(TagRef::from_bytes(b"003@").is_ok()); - /// assert!(TagRef::from_bytes(b"!03@").is_err()); - /// Ok(()) - /// } - /// ``` - #[inline] - pub fn from_bytes(bytes: &'a [u8]) -> Result { - parse_tag - .parse(bytes) - .map_err(|_| ParsePicaError::InvalidTag) - } - - /// Returns the `Level` of the tag. - /// - /// ```rust - /// use pica_record_v1::{Level, TagRef}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// assert_eq!(TagRef::new("003@").level(), Level::Main); - /// assert_eq!(TagRef::new("101@").level(), Level::Local); - /// assert_eq!(TagRef::new("203@").level(), Level::Copy); - /// Ok(()) - /// } - /// ``` - pub fn level(&self) -> Level { - match self.0[0] { - b'0' => Level::Main, - b'1' => Level::Local, - b'2' => Level::Copy, - _ => unreachable!(), - } - } -} - -impl> PartialEq for TagRef<'_> { - #[inline] - fn eq(&self, other: &T) -> bool { - self.0 == other.as_ref() - } -} - -impl PartialEq for TagRef<'_> { - /// Compare a `TagRef` with a string slice. - /// - /// ```rust - /// use pica_record_v1::{Level, TagRef}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// assert_eq!(&TagRef::new("003@"), "003@"); - /// assert_eq!(TagRef::new("003@"), "003@"); - /// Ok(()) - /// } - /// ``` - #[inline] - fn eq(&self, other: &str) -> bool { - self.0 == other - } -} - -impl Deref for TagRef<'_> { - type Target = BStr; - - #[inline] - fn deref(&self) -> &Self::Target { - self.0 - } -} - -impl Index for TagRef<'_> { - type Output = u8; - - fn index(&self, index: usize) -> &Self::Output { - debug_assert!(index < self.0.len()); - &self.0[index] - } -} - -impl Display for TagRef<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.0) - } -} - -#[inline] -pub fn parse_tag<'a>(i: &mut &'a [u8]) -> PResult> { - ( - one_of([b'0', b'1', b'2']), - one_of(|c: u8| c.is_ascii_digit()), - one_of(|c: u8| c.is_ascii_digit()), - one_of(|c: u8| c.is_ascii_uppercase() || c == b'@'), - ) - .take() - .map(|tag| TagRef(ByteSlice::as_bstr(tag))) - .parse_next(i) -} - -impl<'a> TryFrom<&'a [u8]> for TagRef<'a> { - type Error = ParsePicaError; - - fn try_from(value: &'a [u8]) -> Result { - if parse_tag.parse(value).is_err() { - return Err(ParsePicaError::InvalidTag); - } - - Ok(Self(value.into())) - } -} - -impl Tag { - /// Create a new mutable PICA+ tag. - /// - /// # Panics - /// - /// This method panics if the tag is invalid. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::Tag; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let tag = Tag::new("003@"); - /// assert_eq!(tag, "003@"); - /// - /// Ok(()) - /// } - /// ``` - pub fn new>(value: &T) -> Self { - TagRef::new(value).into() - } - - /// Returns the tag as an byte slice. - /// - /// # Example - /// - /// ```rust - /// use pica_record_v1::Tag; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let tag = Tag::new("003@"); - /// assert_eq!(tag.as_bytes(), b"003@"); - /// - /// Ok(()) - /// } - /// ``` - pub fn as_bytes(&self) -> &[u8] { - &self.0 - } -} - -impl From> for Tag { - #[inline] - fn from(value: TagRef<'_>) -> Self { - Tag(value.0.into()) - } -} - -impl PartialEq> for Tag { - #[inline] - fn eq(&self, other: &TagRef<'_>) -> bool { - self.0 == other.0 - } -} - -impl PartialEq for TagRef<'_> { - #[inline] - fn eq(&self, other: &Tag) -> bool { - other.0 == self.0 - } -} - -impl> PartialEq for Tag { - fn eq(&self, other: &T) -> bool { - self.0 == other.as_ref() - } -} - -#[cfg(feature = "arbitrary")] -impl quickcheck::Arbitrary for Tag { - fn arbitrary(g: &mut quickcheck::Gen) -> Self { - let p0 = *g.choose(b"012").unwrap(); - let p1 = *g.choose(b"0123456789").unwrap(); - let p2 = *g.choose(b"0123456789").unwrap(); - let p3 = *g.choose(b"ABCDEFGHIJKLMNOPQRSTUVWXYZ@").unwrap(); - - let inner = BString::from(&[p0, p1, p2, p3]); - - Tag(inner) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_tag() { - macro_rules! parse_success { - ($input:expr, $expected:expr) => { - assert_eq!( - super::parse_tag.parse($input).unwrap(), - $expected - ); - }; - } - - for tag in [b"003@", b"002@", b"123@", b"247C"] { - parse_success!(tag, TagRef(tag.as_bstr())); - } - - for tag in ["456@", "0A2A", "01AA", "01Aa", "003@0"] { - assert!(super::parse_tag.parse(tag.as_bytes()).is_err()); - } - } - - #[quickcheck_macros::quickcheck] - fn parse_arbitrary_tag(tag: Tag) -> bool { - super::parse_tag.parse(tag.as_bytes()).is_ok() - } -} diff --git a/crates/pica-record-v1/tests/field/mod.rs b/crates/pica-record-v1/tests/field/mod.rs deleted file mode 100644 index 78d4b54e9..000000000 --- a/crates/pica-record-v1/tests/field/mod.rs +++ /dev/null @@ -1,148 +0,0 @@ -use std::io::Cursor; - -use pica_record_v1::*; - -#[test] -fn field_ref_new() { - let subfield = SubfieldRef::try_from(('9', "040269019")).unwrap(); - let field = FieldRef::new("041A", None, vec![('9', "040269019")]); - - assert_eq!(field.tag(), b"041A"); - assert_eq!(field.occurrence(), None); - assert_eq!(field.subfields(), &[subfield]); -} - -#[test] -#[should_panic] -fn field_ref_new_panic() { - let _field = FieldRef::new("041A", None, vec![('!', "040269019")]); -} - -#[test] -fn field_ref_from_bytes() { - let field = - FieldRef::from_bytes(b"001A \x1f01140:20-11-22\x1e").unwrap(); - assert_eq!(field.tag(), b"001A"); -} - -#[test] -fn field_ref_try_from() { - let bytes = "021A \x1faGrundlagen der Informationswissenschaft\x1e"; - let field = FieldRef::try_from(bytes.as_bytes()).unwrap(); - assert_eq!(field.subfields().len(), 1); - assert_eq!(field.tag(), b"021A"); - - let bytes = "02!A \x1fa123\x1e"; - let err = FieldRef::try_from(bytes.as_bytes()).unwrap_err(); - assert_eq!(err, ParsePicaError::InvalidField); -} - -#[test] -fn field_ref_tag() { - let field = FieldRef::new("041A", None, vec![('9', "040269019")]); - assert_eq!(field.tag(), b"041A"); -} - -#[test] -fn field_ref_occurrence() { - let field = FieldRef::new("041A", None, vec![]); - assert_eq!(field.occurrence(), None); - - let occurrence = OccurrenceRef::new("01"); - let field = FieldRef::new("041A", Some("01"), vec![]); - assert_eq!(field.occurrence(), Some(&occurrence)); -} - -#[test] -fn field_ref_subfields() { - let subfield = SubfieldRef::try_from(('9', "040269019")).unwrap(); - let field = FieldRef::new("041A", None, vec![('9', "040269019")]); - assert_eq!(field.subfields(), &[subfield]); - - let field = FieldRef::new("041A", None, vec![]); - assert!(field.subfields().is_empty()); -} - -#[test] -fn field_ref_find() { - let subfield = SubfieldRef::try_from(('9', "040269019")).unwrap(); - let field = FieldRef::new("041A", None, vec![('9', "040269019")]); - - assert!(field.find(|subfield| subfield.code() == '8').is_none()); - assert_eq!( - field.find(|subfield| subfield.code() == '9').unwrap(), - &subfield - ); -} - -#[test] -fn field_ref_contains() { - let field = FieldRef::new( - "041A", - None, - vec![ - ('9', "040269019"), - ('a', "abc"), - ('a', "def"), - ('a', "xyz"), - ], - ); - - assert!(!field.contains('y')); - assert!(field.contains('9')); - assert!(field.contains('a')); -} - -#[test] -fn field_ref_validate() { - let field = FieldRef::from_bytes(b"019@ \x1fXA-DE-BE\x1e").unwrap(); - assert!(field.validate().is_ok()); - - let field = - FieldRef::from_bytes(b"019@ \x1f0\x00\x9F\x1e").unwrap(); - assert!(field.validate().is_err()); -} - -#[test] -fn field_ref_write_to() { - let mut writer = Cursor::new(Vec::::new()); - let field = - FieldRef::from_bytes(b"033A \x1fnDe Gruyter Saur\x1e").unwrap(); - let _ = field.write_to(&mut writer); - assert_eq!(writer.into_inner(), b"033A \x1fnDe Gruyter Saur\x1e"); - - let mut writer = Cursor::new(Vec::::new()); - let field = - FieldRef::from_bytes(b"203@/01 \x1f0850439868\x1e").unwrap(); - let _ = field.write_to(&mut writer); - assert_eq!(writer.into_inner(), b"203@/01 \x1f0850439868\x1e"); -} - -#[test] -fn field_ref_level() { - let field = FieldRef::from_bytes(b"011@ \x1fa2022\x1e").unwrap(); - assert_eq!(field.level(), Level::Main); - - let field = FieldRef::from_bytes(b"101@ \x1fa1\x1e").unwrap(); - assert_eq!(field.level(), Level::Local); - - let field = - FieldRef::from_bytes(b"203@/01 \x1f0850439868\x1e").unwrap(); - assert_eq!(field.level(), Level::Copy); -} - -#[test] -fn field_ref_into_iter() { - let field = FieldRef::from_bytes(b"002@ \x1f0Oaf\x1e").unwrap(); - let mut iter = field.into_iter(); - - assert_eq!(iter.next(), Some(&field)); - assert_eq!(iter.next(), None); -} - -#[test] -fn field_from_ref() { - let field_ref = - FieldRef::from_bytes(b"001U \x1f0utf8\x1e").unwrap(); - let _field = Field::from(field_ref); -} diff --git a/crates/pica-record-v1/tests/integration_tests.rs b/crates/pica-record-v1/tests/integration_tests.rs deleted file mode 100644 index 1c98ec5fa..000000000 --- a/crates/pica-record-v1/tests/integration_tests.rs +++ /dev/null @@ -1,7 +0,0 @@ -mod field; -mod level; -mod occurrence; -mod reader; -mod record; -mod tag; -mod writer; diff --git a/crates/pica-record-v1/tests/level/mod.rs b/crates/pica-record-v1/tests/level/mod.rs deleted file mode 100644 index fe5b0711c..000000000 --- a/crates/pica-record-v1/tests/level/mod.rs +++ /dev/null @@ -1,16 +0,0 @@ -use pica_record_v1::Level; - -#[test] -fn level_from_str() { - assert_eq!("main".parse::().unwrap(), Level::Main); - assert_eq!("local".parse::().unwrap(), Level::Local); - assert_eq!("copy".parse::().unwrap(), Level::Copy); - - let err = "master".parse::().unwrap_err(); - assert_eq!(err.to_string(), "invalid level 'master'"); -} - -#[test] -fn level_default() { - assert_eq!(Level::default(), Level::Main); -} diff --git a/crates/pica-record-v1/tests/occurrence/mod.rs b/crates/pica-record-v1/tests/occurrence/mod.rs deleted file mode 100644 index 45cd5c9f3..000000000 --- a/crates/pica-record-v1/tests/occurrence/mod.rs +++ /dev/null @@ -1,96 +0,0 @@ -use std::io::Cursor; - -use pica_record_v1::{Occurrence, OccurrenceRef, ParsePicaError}; - -#[test] -fn occurrence_ref_new() { - assert_eq!(OccurrenceRef::new("00"), "00"); - assert_eq!(OccurrenceRef::new("000"), "000"); - assert_eq!(OccurrenceRef::new("001"), "001"); - assert_eq!(OccurrenceRef::new("01"), "01"); - assert_eq!(OccurrenceRef::new("99"), "99"); - assert_eq!(OccurrenceRef::new("999"), "999"); -} - -#[test] -#[should_panic] -fn occurrence_ref_new_panic() { - let _ = OccurrenceRef::new("/0A"); -} - -#[test] -fn occurrence_ref_from_bytes() { - assert_eq!(OccurrenceRef::from_bytes(b"/00").unwrap(), "00"); - assert_eq!(OccurrenceRef::from_bytes(b"/000").unwrap(), "000"); - assert_eq!(OccurrenceRef::from_bytes(b"/001").unwrap(), "001"); - assert_eq!(OccurrenceRef::from_bytes(b"/01").unwrap(), "01"); - assert_eq!(OccurrenceRef::from_bytes(b"/99").unwrap(), "99"); - assert_eq!(OccurrenceRef::from_bytes(b"/999").unwrap(), "999"); - - let err = OccurrenceRef::from_bytes(b"/0A").unwrap_err(); - assert_eq!(err, ParsePicaError::InvalidOccurrence); -} - -#[test] -fn occurrence_ref_try_from() { - for o in ["00", "000", "001", "01", "99", "999"] { - assert_eq!(OccurrenceRef::try_from(o.as_bytes()).unwrap(), o); - } - - let err = OccurrenceRef::try_from("0A".as_bytes()).unwrap_err(); - assert_eq!(err, ParsePicaError::InvalidOccurrence); -} - -#[test] -fn occurrence_ref_as_bytes() { - let occurrence = OccurrenceRef::new("01"); - assert_eq!(occurrence.as_bytes(), b"01"); -} - -#[test] -fn occurrence_ref_from_unchecked() { - let occurrence = OccurrenceRef::from_unchecked("01".into()); - assert_eq!(occurrence, "01"); -} - -#[test] -fn occurrence_ref_write_to() { - let mut writer = Cursor::new(Vec::::new()); - let occurrence = OccurrenceRef::new("001"); - let _ = occurrence.write_to(&mut writer); - assert_eq!(writer.into_inner(), b"/001"); -} - -#[test] -fn occurrence_ref_partial_eq() { - let occurrence = OccurrenceRef::new("001"); - assert_eq!(occurrence, b"001"); - assert_eq!(occurrence, "001"); -} - -#[test] -fn occurrence_ref_to_string() { - let occurrence = OccurrenceRef::new("001"); - assert_eq!(occurrence.to_string(), "/001".to_string()); -} - -#[test] -fn occurrence_as_ref() { - let occurrence_ref = OccurrenceRef::new("001"); - let occurrence = Occurrence::from(occurrence_ref); - assert_eq!(occurrence.as_ref(), b"001"); -} - -#[test] -fn occurrence_as_bytes() { - let occurrence: Occurrence = OccurrenceRef::new("01").into(); - assert_eq!(occurrence.as_bytes(), b"01"); -} - -#[test] -fn occurrence_write_to() { - let mut writer = Cursor::new(Vec::::new()); - let occurrence: Occurrence = OccurrenceRef::new("001").into(); - let _ = occurrence.write_to(&mut writer); - assert_eq!(writer.into_inner(), b"/001"); -} diff --git a/crates/pica-record-v1/tests/reader/mod.rs b/crates/pica-record-v1/tests/reader/mod.rs deleted file mode 100644 index 7cfe8e5a3..000000000 --- a/crates/pica-record-v1/tests/reader/mod.rs +++ /dev/null @@ -1,48 +0,0 @@ -use std::io::Cursor; -use std::path::PathBuf; - -use pica_record_v1::io::{ReaderBuilder, RecordsIterator}; - -#[test] -fn reader_builder_limit() { - let data = Cursor::new( - b"003@ \x1f0123456789X\x1e012A \x1fa123\x1e\n\ - 003@ \x1f0123456789X\x1e012A \x1fa123\x1e\n", - ); - - let mut reader = - ReaderBuilder::new().limit(1).from_reader(data, None); - - let mut count = 0; - while let Some(result) = reader.next() { - let _record = result.unwrap(); - count += 1; - } - - assert_eq!(count, 1); -} - -#[test] -fn reader_builder_from_reader() { - let data = - Cursor::new(b"003@ \x1f0123456789X\x1e012A \x1fa123\x1e\n"); - let mut reader = ReaderBuilder::new().from_reader(data, None); - assert!(reader.next().is_some()); - assert!(reader.next().is_none()); -} - -#[test] -fn reader_builder_from_path() { - let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join("../../tests/data/DUMP.dat.gz"); - - let mut reader = ReaderBuilder::new().from_path(path).unwrap(); - let mut count = 0; - while let Some(result) = reader.next() { - if result.is_ok() { - count += 1; - } - } - - assert_eq!(count, 12); -} diff --git a/crates/pica-record-v1/tests/record/mod.rs b/crates/pica-record-v1/tests/record/mod.rs deleted file mode 100644 index 9e2a476f4..000000000 --- a/crates/pica-record-v1/tests/record/mod.rs +++ /dev/null @@ -1,209 +0,0 @@ -use std::fmt::Write; -use std::io::Cursor; - -use pica_record_v1::{ - ByteRecord, ParsePicaError, Record, RecordRef, StringRecord, -}; - -#[test] -fn record_ref_new() { - let record = RecordRef::new(vec![ - ("003@", None, vec![('0', "123237025")]), - ("028A", None, vec![('d', "Rainer"), ('a', "Kuhlen")]), - ]); - - assert_eq!(record.iter().len(), 2); -} - -#[test] -#[should_panic] -fn record_ref_new_panic() { - let _record = - RecordRef::new(vec![("00!@", None, vec![('0', "123237025")])]); -} - -#[test] -fn record_ref_from_bytes() { - let record = - RecordRef::from_bytes(b"041R \x1faProf. Dr.\x1f4akad\x1e\n") - .unwrap(); - assert_eq!(record.iter().len(), 1); - - let err = - RecordRef::from_bytes(b"041R \x1faProf. Dr.\x1f!akad\x1e\n") - .unwrap_err(); - assert!(matches!(err, ParsePicaError::InvalidRecord(_))); -} - -#[test] -fn record_ref_is_empty() { - let record = - RecordRef::from_bytes(b"041R \x1faProf. Dr.\x1f4akad\x1e\n") - .unwrap(); - assert!(!record.is_empty()); - - let fields: Vec<(&str, Option<&str>, Vec<(char, &str)>)> = vec![]; - let record = RecordRef::new(fields); - assert!(record.is_empty()); -} - -#[test] -fn record_ref_iter() { - let record = - RecordRef::from_bytes(b"041R \x1faProf. Dr.\x1f4akad\x1e\n") - .unwrap(); - let mut iter = record.iter(); - - assert!(iter.next().is_some()); - assert!(iter.next().is_none()); -} - -#[test] -fn record_ref_retain() { - let bytes = b"003@ \x1f0123456789X\x1e012A \x1fa123\x1e\n"; - let mut record = RecordRef::from_bytes(bytes).unwrap(); - assert_eq!(record.iter().len(), 2); - - record.retain(|field| field.tag() == b"012A"); - assert_eq!(record.iter().len(), 1); - - record.retain(|field| field.tag() == b"003@"); - assert!(record.is_empty()); -} - -#[test] -fn record_ref_validate() { - let bytes = b"003@ \x1f0123456789X\x1e012A \x1fa123\x1e\n"; - let record = RecordRef::from_bytes(bytes).unwrap(); - assert!(record.validate().is_ok()); - - let bytes = b"003@ \x1f0\x00\x9F\x1e012A \x1fa123\x1e\n"; - let record = RecordRef::from_bytes(bytes).unwrap(); - assert!(record.validate().is_err()); -} - -#[test] -fn record_ref_write_to() { - let mut writer = Cursor::new(Vec::::new()); - let record = RecordRef::from_bytes(b"003@ \x1f0123\x1e\n").unwrap(); - let _ = record.write_to(&mut writer); - - assert_eq!(writer.into_inner(), b"003@ \x1f0123\x1e\n"); -} - -#[test] -fn record_from_ref() { - let record_ref = - RecordRef::from_bytes(b"003@ \x1f0123\x1e\n").unwrap(); - let _record = Record::from(record_ref); -} - -#[test] -fn byte_record_from_bytes() { - let record = - ByteRecord::from_bytes(b"041R \x1faProf. Dr.\x1f4akad\x1e\n") - .unwrap(); - assert_eq!(record.iter().len(), 1); - - let err = - ByteRecord::from_bytes(b"041R \x1faProf. Dr.\x1f!akad\x1e\n") - .unwrap_err(); - assert!(matches!(err, ParsePicaError::InvalidRecord(_))); -} - -#[test] -fn byte_record_write_to() { - let mut writer = Cursor::new(Vec::::new()); - let record = - ByteRecord::from_bytes(b"003@ \x1f0123\x1e\n").unwrap(); - let _ = record.write_to(&mut writer); - - assert_eq!(writer.into_inner(), b"003@ \x1f0123\x1e\n"); -} - -#[test] -fn byte_record_retain() { - let bytes = b"003@ \x1f0123456789X\x1e012A \x1fa123\x1e\n"; - let mut record = ByteRecord::from_bytes(bytes).unwrap(); - assert_eq!(record.iter().len(), 2); - - record.retain(|field| field.tag() == b"012A"); - assert_eq!(record.iter().len(), 1); - - record.retain(|field| field.tag() == b"003@"); - assert!(record.is_empty()); -} - -#[test] -fn byte_record_hash() { - let bytes = b"003@ \x1f0123456789X\x1e012A \x1fa123\x1e\n"; - let record = ByteRecord::from_bytes(bytes).unwrap(); - - let hash = - record.sha256().iter().fold(String::new(), |mut out, b| { - let _ = write!(out, "{b:02x}"); - out - }); - - let expected = "f9bf144682fe03f32b2ad2d4048c84a1\ - 2a4d58cb557dd8f44066ae7d81cebd5c"; - assert_eq!(hash, expected); -} - -#[test] -fn byte_record_from_ref() { - let bytes = b"003@ \x1f0123456789X\x1e012A \x1fa123\x1e\n"; - let record_ref = RecordRef::from_bytes(bytes).unwrap(); - let record = ByteRecord::from(record_ref); - assert_eq!(record.iter().len(), 2); -} - -#[test] -fn string_record_try_from() { - let bytes = b"003@ \x1f0123456789X\x1e012A \x1fa123\x1e\n"; - let byte_record = ByteRecord::from_bytes(bytes).unwrap(); - let record = StringRecord::try_from(byte_record).unwrap(); - assert_eq!(record.iter().len(), 2); -} - -#[test] -fn string_record_from_bytes() { - let record = - StringRecord::from_bytes(b"041R \x1faProf. Dr.\x1f4akad\x1e\n") - .unwrap(); - assert_eq!(record.iter().len(), 1); - - let err = - StringRecord::from_bytes(b"041R \x1faProf. Dr.\x1f!akad\x1e\n") - .unwrap_err(); - assert!(matches!(err, ParsePicaError::InvalidRecord(_))); -} - -#[test] -fn string_record_retain() { - let bytes = b"003@ \x1f0123456789X\x1e012A \x1fa123\x1e\n"; - let mut record = StringRecord::from_bytes(bytes).unwrap(); - assert_eq!(record.iter().len(), 2); - - record.retain(|field| field.tag() == b"012A"); - assert_eq!(record.iter().len(), 1); - - record.retain(|field| field.tag() == b"003@"); - assert!(record.is_empty()); -} - -#[test] -fn string_record_hash() { - let bytes = b"003@ \x1f0123456789X\x1e012A \x1fa123\x1e\n"; - let record = StringRecord::from_bytes(bytes).unwrap(); - - let hash = - record.sha256().iter().fold(String::new(), |mut out, b| { - let _ = write!(out, "{b:02x}"); - out - }); - - let expected = "f9bf144682fe03f32b2ad2d4048c84a1\ - 2a4d58cb557dd8f44066ae7d81cebd5c"; - assert_eq!(hash, expected); -} diff --git a/crates/pica-record-v1/tests/tag/mod.rs b/crates/pica-record-v1/tests/tag/mod.rs deleted file mode 100644 index 4b3789e41..000000000 --- a/crates/pica-record-v1/tests/tag/mod.rs +++ /dev/null @@ -1,98 +0,0 @@ -use pica_record_v1::{Level, ParsePicaError, Tag, TagRef}; - -#[test] -fn tag_ref_new() { - assert_eq!(TagRef::new(b"044H"), "044H"); - assert_eq!(TagRef::new("044H"), "044H"); -} - -#[test] -#[should_panic] -fn tag_ref_new_panic() { - let _tag = TagRef::new("303@"); -} - -#[test] -fn tag_ref_from_bytes() { - let tag = TagRef::from_bytes(b"003@").unwrap(); - assert_eq!(tag, "003@"); - - let err = TagRef::from_bytes(b"303@").unwrap_err(); - assert_eq!(err, ParsePicaError::InvalidTag); -} - -#[test] -fn tag_ref_try_from() { - let tag = TagRef::try_from("003@".as_bytes()).unwrap(); - assert_eq!(tag, "003@"); - - let err = TagRef::try_from("303@".as_bytes()).unwrap_err(); - assert_eq!(err, ParsePicaError::InvalidTag); -} - -#[test] -fn tag_ref_level() { - assert_eq!(TagRef::new("003@").level(), Level::Main); - assert_eq!(TagRef::new("101@").level(), Level::Local); - assert_eq!(TagRef::new("203@").level(), Level::Copy); -} - -#[test] -fn tag_ref_index() { - let tag = TagRef::new("012A"); - assert_eq!(tag[0], b'0'); - assert_eq!(tag[1], b'1'); - assert_eq!(tag[2], b'2'); - assert_eq!(tag[3], b'A'); -} - -#[test] -#[should_panic] -fn tag_ref_index_panic() { - let tag = TagRef::new("012A"); - assert_eq!(tag[4], b'A'); -} - -#[test] -fn tag_ref_to_string() { - let tag = TagRef::new("041A"); - assert_eq!(tag.to_string(), "041A".to_string()); -} - -#[test] -fn tag_new() { - assert_eq!(Tag::new(b"044H"), "044H"); - assert_eq!(Tag::new("044H"), "044H"); -} - -#[test] -fn tag_as_bytes() { - assert_eq!(Tag::new("044H").as_bytes(), b"044H"); -} - -#[test] -fn tag_from_tag_ref() { - let tag_ref = TagRef::new("041A"); - let tag = Tag::new("041A"); - - assert_eq!(Tag::from(tag_ref), tag); -} - -#[test] -fn tag_partial_eq() { - let tag_ref = TagRef::new("041A"); - let tag = Tag::new("041A"); - - assert_eq!(tag_ref, b"041A"); - assert_eq!(tag, b"041A"); - assert_eq!(tag, tag_ref); - assert_eq!(tag_ref, tag); - - let tag_ref = TagRef::new("041A"); - let tag = Tag::new("044H"); - - assert_ne!(tag_ref, b"044H"); - assert_ne!(tag, b"041A"); - assert_ne!(tag, tag_ref); - assert_ne!(tag_ref, tag); -} diff --git a/crates/pica-record-v1/tests/writer/mod.rs b/crates/pica-record-v1/tests/writer/mod.rs deleted file mode 100644 index 26aff57d4..000000000 --- a/crates/pica-record-v1/tests/writer/mod.rs +++ /dev/null @@ -1,109 +0,0 @@ -use pica_record_v1::io::{ - Reader, ReaderBuilder, RecordsIterator, WriterBuilder, -}; -use pica_record_v1::ByteRecord; -use tempfile::NamedTempFile; - -#[test] -fn writer_builder_from_path() { - let bytes = b"003@ \x1f0123456789X\x1e012A \x1fa123\x1e\n"; - let record = ByteRecord::from_bytes(bytes).unwrap(); - - let tempfile = NamedTempFile::new().unwrap(); - let mut writer = - WriterBuilder::new().from_path(tempfile.path()).unwrap(); - assert!(writer.write_byte_record(&record).is_ok()); - assert!(writer.finish().is_ok()); - - let mut reader: Reader<_> = - ReaderBuilder::new().from_path(tempfile.path()).unwrap(); - - let mut count = 0; - while let Some(result) = reader.next() { - assert_eq!(result.unwrap(), record); - count += 1; - } - - assert_eq!(count, 1); -} - -#[test] -fn writer_builder_from_path_or_stdout() { - let bytes = b"003@ \x1f0123456789X\x1e012A \x1fa123\x1e\n"; - let record = ByteRecord::from_bytes(bytes).unwrap(); - - let tempfile = NamedTempFile::new().unwrap(); - let mut writer = WriterBuilder::new() - .from_path_or_stdout(Some(tempfile.path())) - .unwrap(); - assert!(writer.write_byte_record(&record).is_ok()); - assert!(writer.finish().is_ok()); - - let mut reader: Reader<_> = - ReaderBuilder::new().from_path(tempfile.path()).unwrap(); - - let mut count = 0; - while let Some(result) = reader.next() { - assert_eq!(result.unwrap(), record); - count += 1; - } - - assert_eq!(count, 1); -} - -#[test] -fn writer_builder_gzip() { - let bytes = b"003@ \x1f0123456789X\x1e012A \x1fa123\x1e\n"; - let record = ByteRecord::from_bytes(bytes).unwrap(); - - let tempfile = NamedTempFile::new().unwrap(); - let mut path = tempfile.path().to_str().unwrap().to_string(); - path.push_str(".gz"); - - let mut writer = - WriterBuilder::new().gzip(true).from_path(&path).unwrap(); - assert!(writer.write_byte_record(&record).is_ok()); - assert!(writer.finish().is_ok()); - - let mut reader: Reader<_> = - ReaderBuilder::new().from_path(&path).unwrap(); - - let mut count = 0; - while let Some(result) = reader.next() { - assert_eq!(result.unwrap(), record); - count += 1; - } - - assert_eq!(count, 1); -} - -#[test] -fn writer_builder_append() { - let bytes = b"003@ \x1f0123456789X\x1e012A \x1fa123\x1e\n"; - let record = ByteRecord::from_bytes(bytes).unwrap(); - - let tempfile = NamedTempFile::new().unwrap(); - - let mut writer = - WriterBuilder::new().from_path(tempfile.path()).unwrap(); - assert!(writer.write_byte_record(&record).is_ok()); - assert!(writer.finish().is_ok()); - - let mut writer = WriterBuilder::new() - .append(true) - .from_path(tempfile.path()) - .unwrap(); - assert!(writer.write_byte_record(&record).is_ok()); - assert!(writer.finish().is_ok()); - - let mut reader: Reader<_> = - ReaderBuilder::new().from_path(tempfile.path()).unwrap(); - - let mut count = 0; - while let Some(result) = reader.next() { - assert_eq!(result.unwrap(), record); - count += 1; - } - - assert_eq!(count, 2); -} diff --git a/crates/pica-select/Cargo.toml b/crates/pica-select/Cargo.toml deleted file mode 100644 index ef30c4f41..000000000 --- a/crates/pica-select/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] -name = "pica-select" -version.workspace = true -authors.workspace = true -license.workspace = true -readme.workspace = true -keywords.workspace = true -edition.workspace = true -rust-version.workspace = true - -[dependencies] -bstr = { workspace = true } -pica-matcher = { workspace = true } -pica-path = { workspace = true } -pica-record-v1 = { workspace = true } -thiserror = { workspace = true } -winnow = { workspace = true, features = ["simd"] } - -[dev-dependencies] -bstr = { workspace = true } -anyhow = { workspace = true } diff --git a/crates/pica-select/fuzz/.gitignore b/crates/pica-select/fuzz/.gitignore deleted file mode 100644 index 1a45eee77..000000000 --- a/crates/pica-select/fuzz/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -target -corpus -artifacts -coverage diff --git a/crates/pica-select/fuzz/Cargo.toml b/crates/pica-select/fuzz/Cargo.toml deleted file mode 100644 index 66bc825a0..000000000 --- a/crates/pica-select/fuzz/Cargo.toml +++ /dev/null @@ -1,27 +0,0 @@ -[package] -name = "pica-select-fuzz" -version = "0.0.0" -publish = false -edition = "2021" - -[package.metadata] -cargo-fuzz = true - -[dependencies] -libfuzzer-sys = "0.4" - -[dependencies.pica-select] -path = ".." - -# Prevent this from interfering with workspaces -[workspace] -members = ["."] - -[profile.release] -debug = 1 - -[[bin]] -name = "fuzz-query" -path = "fuzz_targets/fuzz_query.rs" -test = false -doc = false diff --git a/crates/pica-select/fuzz/fuzz_targets/fuzz_query.rs b/crates/pica-select/fuzz/fuzz_targets/fuzz_query.rs deleted file mode 100644 index ea0e49cd5..000000000 --- a/crates/pica-select/fuzz/fuzz_targets/fuzz_query.rs +++ /dev/null @@ -1,8 +0,0 @@ -#![no_main] - -use libfuzzer_sys::fuzz_target; -use pica_select::Query; - -fuzz_target!(|data: &[u8]| { - let _ = Query::try_from(data); -}); diff --git a/crates/pica-select/fuzz/regressions/crash-1065da7d802c4cec5ff86325a5629a0e4736191d b/crates/pica-select/fuzz/regressions/crash-1065da7d802c4cec5ff86325a5629a0e4736191d deleted file mode 100644 index dddbc0d14..000000000 Binary files a/crates/pica-select/fuzz/regressions/crash-1065da7d802c4cec5ff86325a5629a0e4736191d and /dev/null differ diff --git a/crates/pica-select/src/lib.rs b/crates/pica-select/src/lib.rs deleted file mode 100644 index 9e07e6b7c..000000000 --- a/crates/pica-select/src/lib.rs +++ /dev/null @@ -1,733 +0,0 @@ -use std::fmt::Debug; -use std::ops::{Add, Deref, Mul}; -use std::str::FromStr; - -use bstr::ByteSlice; -use pica_matcher::MatcherOptions; -use pica_path::{parse_path, Path}; -use pica_record_v1::RecordRef; -use thiserror::Error; -use winnow::ascii::{multispace0, multispace1}; -use winnow::combinator::{alt, delimited, preceded, repeat, separated}; -use winnow::error::{ContextError, ParserError}; -use winnow::prelude::*; -use winnow::stream::{AsChar, Compare, Stream, StreamIsPartial}; -use winnow::token::take_till; - -#[derive(Debug, Error)] -#[error("invalid selector, got `{0}`")] -pub struct ParseSelectorError(pub String); - -#[derive(Debug)] -pub enum QueryFragment { - Path(Path), - Const(String), -} - -impl From for QueryFragment { - fn from(value: Path) -> Self { - Self::Path(value) - } -} - -impl From for QueryFragment { - fn from(value: String) -> Self { - Self::Const(value) - } -} - -#[derive(Debug)] -pub struct Query(Vec); - -#[derive(Debug, Error)] -#[error("invalid query, got `{0}`")] -pub struct ParseQueryError(pub String); - -impl Query { - /// Create a new select query from a string slice. - /// - /// # Panics - /// - /// This methods panics on invalid query expressions. - /// - /// # Example - /// - /// ```rust - /// use pica_select::Query; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let _query = - /// Query::new("003@.0, 012A{ (a,b) | a? && b == 'foo' }"); - /// Ok(()) - /// } - /// ``` - pub fn new>(data: &T) -> Self { - Self::try_from(data.as_ref()).expect("valid query expression.") - } -} - -impl Deref for Query { - type Target = Vec; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl TryFrom<&[u8]> for Query { - type Error = ParseQueryError; - - fn try_from(value: &[u8]) -> Result { - parse_query.parse(value).map_err(|_| { - let value = value.to_str_lossy().to_string(); - ParseQueryError(value) - }) - } -} - -impl FromStr for Query { - type Err = ParseQueryError; - fn from_str(s: &str) -> Result { - Self::try_from(s.as_bytes()) - } -} - -impl From for Query { - #[inline] - fn from(path: Path) -> Self { - Self(vec![path.into()]) - } -} - -#[derive(Debug, Copy, Clone)] -enum Quotes { - Single, - Double, -} - -fn parse_literal( - quotes: Quotes, -) -> impl Parser::Slice, E> -where - I: Stream + StreamIsPartial, - ::Token: AsChar, - E: ParserError, -{ - match quotes { - Quotes::Single => take_till(1.., ['\'', '\\']), - Quotes::Double => take_till(1.., ['"', '\\']), - } -} - -fn parse_escaped_char(quotes: Quotes) -> impl Parser -where - I: Stream + StreamIsPartial + Compare, - ::Token: AsChar + Clone, - E: ParserError, -{ - let v = match quotes { - Quotes::Single => '\'', - Quotes::Double => '"', - }; - - preceded( - '\\', - alt(( - 'n'.value('\n'), - 'r'.value('\r'), - 't'.value('\t'), - 'b'.value('\u{08}'), - 'f'.value('\u{0C}'), - '\\'.value('\\'), - '/'.value('/'), - v.value(v), - )), - ) -} - -#[derive(Debug, Clone)] -enum StringFragment<'a> { - Literal(&'a [u8]), - EscapedChar(char), - EscapedWs, -} - -fn parse_quoted_fragment<'a, E: ParserError<&'a [u8]>>( - quotes: Quotes, -) -> impl Parser<&'a [u8], StringFragment<'a>, E> { - use StringFragment::*; - - alt(( - parse_literal::<&'a [u8], E>(quotes).map(Literal), - parse_escaped_char::<&'a [u8], E>(quotes).map(EscapedChar), - preceded('\\', multispace1).value(EscapedWs), - )) -} - -fn parse_quoted_string<'a, E>( - quotes: Quotes, -) -> impl Parser<&'a [u8], Vec, E> -where - E: ParserError<&'a [u8]>, -{ - use StringFragment::*; - - let string_builder = repeat( - 0.., - parse_quoted_fragment::(quotes), - ) - .fold(Vec::new, |mut acc, fragment| { - match fragment { - Literal(s) => acc.extend_from_slice(s), - EscapedChar(c) => acc.push(c as u8), - EscapedWs => {} - } - acc - }); - - match quotes { - Quotes::Single => delimited('\'', string_builder, '\''), - Quotes::Double => delimited('"', string_builder, '"'), - } -} - -#[inline] -fn parse_string_single_quoted(i: &mut &[u8]) -> PResult> { - parse_quoted_string::(Quotes::Single).parse_next(i) -} - -#[inline] -fn parse_string_double_quoted(i: &mut &[u8]) -> PResult> { - parse_quoted_string::(Quotes::Double).parse_next(i) -} - -pub(crate) fn parse_string(i: &mut &[u8]) -> PResult> { - alt((parse_string_single_quoted, parse_string_double_quoted)) - .parse_next(i) -} - -fn parse_query_fragment(i: &mut &[u8]) -> PResult { - alt(( - parse_path.map(QueryFragment::Path), - parse_string - .verify_map(|value| String::from_utf8(value).ok()) - .map(QueryFragment::Const), - )) - .parse_next(i) -} - -fn parse_query(i: &mut &[u8]) -> PResult { - separated( - 1.., - parse_query_fragment, - delimited(multispace0, ',', multispace0), - ) - .map(Query) - .parse_next(i) -} - -#[derive(Debug, Default, PartialEq, Eq)] -pub struct Outcome(Vec>); - -impl Outcome { - pub fn one() -> Self { - Self(vec![vec!["".to_string()]]) - } - - pub fn ones(n: usize) -> Self { - Self(vec![std::iter::repeat("".to_string()).take(n).collect()]) - } - - pub fn squash(self, sep: &str) -> Self { - let flattened = - self.0.into_iter().flatten().collect::>(); - - if flattened.len() > 1 - && !sep.is_empty() - && flattened.iter().any(|item| item.contains(sep)) - { - eprintln!( - "WARNING: A subfield value contains \ - squash separator '{sep}'." - ); - } - - Self(vec![vec![flattened.join(sep)]]) - } - - pub fn merge(self, sep: &str) -> Self { - let result = self.0.clone().into_iter().reduce(|acc, e| { - let mut result = Vec::new(); - - for i in 0..acc.len() { - let mut value = String::from(&acc[i]); - value.push_str(sep); - value.push_str(&e[i]); - result.push(value) - } - - result - }); - - Self(vec![result.unwrap()]) - } - - pub fn into_inner(self) -> Vec> { - self.0 - } -} - -impl Deref for Outcome { - type Target = Vec>; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl From> for Outcome { - fn from(values: Vec) -> Self { - Self(values.into_iter().map(|v| vec![v.to_string()]).collect()) - } -} - -impl Add for Outcome { - type Output = Outcome; - - fn add(self, rhs: Self) -> Self::Output { - let mut result = self.0; - result.extend(rhs.0); - Self(result) - } -} - -impl Mul for Outcome { - type Output = Outcome; - - fn mul(self, rhs: Self) -> Self::Output { - if self.is_empty() { - return rhs; - } - - if rhs.is_empty() { - return self; - } - - let mut rows = vec![]; - let xs = self.0; - let ys = rhs.0; - - for x in xs.into_iter() { - for y in ys.clone().into_iter() { - let mut row = x.clone(); - row.extend(y.clone()); - rows.push(row); - } - } - - Self(rows) - } -} - -/// Options and flags which can be used to configure a matcher. -#[derive(Debug)] -pub struct QueryOptions { - pub case_ignore: bool, - pub strsim_threshold: f64, - pub separator: String, - pub squash: bool, - pub merge: bool, -} - -impl Default for QueryOptions { - fn default() -> Self { - Self { - case_ignore: false, - strsim_threshold: 0.8, - separator: "|".into(), - squash: false, - merge: false, - } - } -} - -impl QueryOptions { - /// Create new matcher flags. - pub fn new() -> Self { - Self::default() - } - - /// Whether to ignore case when comparing strings or not. - pub fn case_ignore(mut self, yes: bool) -> Self { - self.case_ignore = yes; - self - } - - /// Set the similarity threshold for the similar operator (`=*`). - pub fn strsim_threshold(mut self, threshold: f64) -> Self { - self.strsim_threshold = threshold; - self - } - - /// Whether to squash subfield values or not. - pub fn squash(mut self, yes: bool) -> Self { - self.squash = yes; - self - } - - /// Whether to merge repeated fields or not. - pub fn merge(mut self, yes: bool) -> Self { - self.merge = yes; - self - } - - /// Set the squash or merge separator. - pub fn separator>(mut self, sep: S) -> Self { - self.separator = sep.into(); - self - } -} - -impl From<&QueryOptions> for MatcherOptions { - fn from(options: &QueryOptions) -> Self { - Self::new() - .strsim_threshold(options.strsim_threshold) - .case_ignore(options.case_ignore) - } -} - -pub trait QueryExt { - fn query(&self, query: &Query, options: &QueryOptions) -> Outcome; -} - -impl QueryExt for RecordRef<'_> { - /// Performs a query against a PICA+ record. - /// - /// # Example - /// - /// ```rust - /// use std::str::FromStr; - /// - /// use pica_record_v1::RecordRef; - /// use pica_select::{Outcome, Query, QueryExt}; - /// - /// # fn main() { example().unwrap(); } - /// fn example() -> anyhow::Result<()> { - /// let query = Query::new("003@.0, 012A{(a,b) | a == 'abc'}"); - /// let record = RecordRef::from_bytes( - /// b"003@ \x1f01234\x1e012A \x1faabc\x1e\n", - /// )?; - /// - /// assert_eq!( - /// record.query(&query, &Default::default()).into_inner(), - /// vec![vec![ - /// "1234".to_string(), - /// "abc".to_string(), - /// "".to_string() - /// ]] - /// ); - /// - /// Ok(()) - /// } - /// ``` - fn query(&self, query: &Query, options: &QueryOptions) -> Outcome { - let mut outcomes = vec![]; - - for fragment in query.iter() { - let outcome = match fragment { - QueryFragment::Const(value) => { - Outcome(vec![vec![value.to_owned()]]) - } - QueryFragment::Path(path) => { - let mut outcome = self - .iter() - .filter(|field| { - path.tag_matcher().is_match(field.tag()) - && *path.occurrence_matcher() - == field.occurrence() - }) - .filter(|field| { - if let Some(m) = path.subfield_matcher() { - m.is_match( - field.subfields(), - &options.into(), - ) - } else { - true - } - }) - .map(|field| { - path.codes() - .iter() - .map(|codes| { - field - .subfields() - .iter() - .filter(|subfield| { - codes.contains( - subfield.code(), - ) - }) - .map(|subfield| { - subfield.value() - }) - .collect::>() - }) - .map(|values| { - if !values.is_empty() { - Outcome::from(values) - } else { - Outcome::one() - } - }) - .map(|outcome| { - if options.squash { - outcome - .squash(&options.separator) - } else { - outcome - } - }) - .fold(Outcome::default(), |acc, e| { - acc * e - }) - }) - .fold(Outcome::default(), |acc, e| acc + e); - - if outcome.is_empty() { - outcome = Outcome::ones(path.codes().len()); - } - - outcome - } - }; - - outcomes.push(outcome); - } - - outcomes - .into_iter() - .map(|outcome| { - if options.merge { - outcome.merge(&options.separator) - } else { - outcome - } - }) - .reduce(|acc, e| acc * e) - .unwrap_or_default() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - macro_rules! s { - ($s:expr) => { - $s.to_string() - }; - } - - #[test] - fn test_outcome_from_vec() { - assert_eq!( - Outcome::from(vec![s!("abc"), s!("def")]), - Outcome(vec![vec![s!("abc")], vec![s!("def")]]) - ); - } - - #[test] - fn test_outcome_add() { - let lhs = Outcome::from(vec!["abc", "def"]); - let rhs = Outcome::from(vec!["123", "456"]); - - assert_eq!( - lhs + rhs, - Outcome::from(vec!["abc", "def", "123", "456"]) - ); - - let lhs = Outcome(vec![vec![s!("abc"), s!("def")]]); - let rhs = Outcome(vec![vec![s!("123"), s!("456")]]); - - assert_eq!( - lhs + rhs, - Outcome(vec![ - vec![s!("abc"), s!("def")], - vec![s!("123"), s!("456")] - ]) - ); - } - - #[test] - fn test_outcome_mul() { - let lhs = Outcome::from(vec!["abc", "def"]); - let rhs = Outcome::from(vec!["123", "456"]); - - assert_eq!( - lhs * rhs, - Outcome(vec![ - vec![s!("abc"), s!("123")], - vec![s!("abc"), s!("456")], - vec![s!("def"), s!("123")], - vec![s!("def"), s!("456")], - ]) - ); - - let lhs = Outcome(vec![vec![s!("abc"), s!("def")]]); - let rhs = Outcome::from(vec!["123", "456"]); - - assert_eq!( - lhs * rhs, - Outcome(vec![ - vec![s!("abc"), s!("def"), s!("123")], - vec![s!("abc"), s!("def"), s!("456")], - ]) - ); - - assert_eq!( - Outcome::default() * Outcome::from(vec!["123", "456"]), - Outcome::from(vec!["123", "456"]) - ); - - assert_eq!( - Outcome::from(vec!["123", "456"]) * Outcome::default(), - Outcome::from(vec!["123", "456"]) - ); - } - - #[test] - fn test_query() { - let options = QueryOptions::default(); - - let record = - RecordRef::new(vec![("012A", None, vec![('a', "1")])]); - assert_eq!( - record.query(&Query::new("012A.a"), &options), - Outcome::from(vec![s!("1")]) - ); - - let record = RecordRef::new(vec![( - "012A", - None, - vec![('a', "1"), ('a', "2")], - )]); - assert_eq!( - record.query(&Query::new("012A.a"), &options), - Outcome::from(vec![s!("1"), s!("2")]) - ); - - let record = RecordRef::new(vec![ - ("012A", None, vec![('a', "1")]), - ("012A", None, vec![('a', "2")]), - ]); - assert_eq!( - record.query(&Query::new("012A.a"), &options), - Outcome::from(vec![s!("1"), s!("2")]) - ); - - let record = RecordRef::new(vec![ - ("003@", None, vec![('0', "9")]), - ("012A", None, vec![('a', "1")]), - ]); - assert_eq!( - record.query(&Query::new("003@.0, 012A.a"), &options), - Outcome(vec![vec![s!("9"), s!("1")]]) - ); - - let record = RecordRef::new(vec![ - ("003@", None, vec![('0', "9")]), - ("012A", None, vec![('a', "1")]), - ("012A", None, vec![('a', "2")]), - ]); - assert_eq!( - record.query(&Query::new("003@.0, 012A.a"), &options), - Outcome(vec![ - vec![s!("9"), s!("1")], - vec![s!("9"), s!("2")], - ]) - ); - - let record = RecordRef::new(vec![ - ("003@", None, vec![('0', "9")]), - ("012A", None, vec![('a', "1"), ('b', "2")]), - ]); - assert_eq!( - record - .query(&Query::new("003@.0, 012A{ (a, b) }"), &options), - Outcome(vec![vec![s!("9"), s!("1"), s!("2")]]) - ); - - let record = RecordRef::new(vec![ - ("003@", None, vec![('0', "9")]), - ("012A", None, vec![('a', "1")]), - ]); - assert_eq!( - record - .query(&Query::new("003@.0, 012A{ (a, b) }"), &options), - Outcome(vec![vec![s!("9"), s!("1"), s!("")]]) - ); - - let record = RecordRef::new(vec![ - ("003@", None, vec![('0', "9")]), - ("012A", None, vec![('a', "1"), ('a', "2")]), - ]); - assert_eq!( - record - .query(&Query::new("003@.0, 012A{ (a, b) }"), &options), - Outcome(vec![ - vec![s!("9"), s!("1"), s!("")], - vec![s!("9"), s!("2"), s!("")], - ]) - ); - - let record = RecordRef::new(vec![ - ("003@", None, vec![('0', "9")]), - ("012A", None, vec![('a', "1"), ('a', "2")]), - ("012A", None, vec![('a', "3"), ('b', "4")]), - ]); - assert_eq!( - record - .query(&Query::new("003@.0, 012A{ (a, b) }"), &options), - Outcome(vec![ - vec![s!("9"), s!("1"), s!("")], - vec![s!("9"), s!("2"), s!("")], - vec![s!("9"), s!("3"), s!("4")], - ]) - ); - - let record = RecordRef::new(vec![ - ("003@", None, vec![('0', "9")]), - ("012A", None, vec![('a', "1"), ('a', "2")]), - ("012A", None, vec![('a', "3"), ('b', "4"), ('x', "5")]), - ]); - assert_eq!( - record.query( - &Query::new("003@.0, 012A{ (a,b) | x? }"), - &options - ), - Outcome(vec![vec![s!("9"), s!("3"), s!("4")],]) - ); - - let record = - RecordRef::new(vec![("012A", None, vec![('a', "1")])]); - assert_eq!( - record.query(&Query::new("012A.a, 'foo'"), &options), - Outcome(vec![vec![s!("1"), s!("foo")]]) - ); - - let record = RecordRef::new(vec![ - ("003@", None, vec![('0', "9")]), - ("012A", None, vec![('a', "1"), ('a', "2")]), - ("012A", None, vec![('a', "3"), ('b', "4"), ('x', "5")]), - ]); - assert_eq!( - record.query( - &Query::new("003@.0, \"bar\", 012A{ (a,b) | x? }"), - &options - ), - Outcome(vec![vec![s!("9"), s!("bar"), s!("3"), s!("4")],]) - ); - } -} diff --git a/crates/pica-select/tests/integration.rs b/crates/pica-select/tests/integration.rs deleted file mode 100644 index 1dee51d6d..000000000 --- a/crates/pica-select/tests/integration.rs +++ /dev/null @@ -1,196 +0,0 @@ -use std::str::FromStr; -use std::sync::OnceLock; - -use bstr::B; -use pica_path::Path; -use pica_record_v1::RecordRef; -use pica_select::{ParseQueryError, Query, QueryExt, QueryOptions}; - -type TestResult = anyhow::Result<()>; - -fn ada_lovelace() -> &'static [u8] { - use std::path::Path; - use std::{env, fs}; - - static DATA: OnceLock> = OnceLock::new(); - DATA.get_or_init(|| { - let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); - let path = Path::new(&manifest_dir) - .join("../pica-toolkit/tests/data/119232022.dat"); - fs::read_to_string(&path).unwrap().as_bytes().to_vec() - }) -} - -#[test] -fn query_new() -> TestResult { - let query = Query::new("003@.0"); - let record = RecordRef::from_bytes(ada_lovelace())?; - let options = QueryOptions::default(); - - assert_eq!( - record.query(&query, &options).as_ref(), - [["119232022"]] - ); - - Ok(()) -} - -#[test] -#[should_panic] -fn query_new_panic() { - let _ = Query::new("003@.!"); -} - -#[test] -fn query_try_from() -> TestResult { - let query = Query::try_from(B("003@.0"))?; - let record = RecordRef::from_bytes(ada_lovelace())?; - let options = QueryOptions::default(); - - assert_eq!( - record.query(&query, &options).as_ref(), - [["119232022"]] - ); - - assert!(matches!( - Query::try_from(B("003@.!")).unwrap_err(), - ParseQueryError(_) - )); - - Ok(()) -} - -#[test] -fn query_from_str() -> TestResult { - let query = Query::from_str("003@.0")?; - let record = RecordRef::from_bytes(ada_lovelace())?; - let options = QueryOptions::default(); - - assert_eq!( - record.query(&query, &options).as_ref(), - [["119232022"]] - ); - - assert!(matches!( - Query::from_str("003@.!").unwrap_err(), - ParseQueryError(_) - )); - - Ok(()) -} - -#[test] -fn query_from_path() -> TestResult { - let query = Query::from(Path::new("003@.0")); - let record = RecordRef::from_bytes(ada_lovelace())?; - let options = QueryOptions::default(); - - assert_eq!( - record.query(&query, &options).as_ref(), - [["119232022"]] - ); - - Ok(()) -} - -#[test] -fn record_query_default() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let query = Query::new("065R{ (9,7) | 4 == 'ortg'}"); - let options = QueryOptions::default(); - - assert_eq!( - record.query(&query, &options).as_ref(), - [["040743357", "Tgz"]] - ); - - Ok(()) -} - -#[test] -fn record_query_case_ignore() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let query = Query::new("028R{ d, a | a == 'KING' }"); - - let options = QueryOptions::default().case_ignore(true); - assert_eq!( - record.query(&query, &options).as_ref(), - [["william", "king"]] - ); - - let options = QueryOptions::default().case_ignore(false); - assert_eq!(record.query(&query, &options).as_ref(), [["", ""]]); - - Ok(()) -} - -#[test] -fn record_query_squash() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let query = Query::new("008A.a"); - - let options = QueryOptions::default().squash(true); - assert_eq!(record.query(&query, &options).as_ref(), [["s|z|f"]]); - - let options = QueryOptions::default().squash(false); - assert_eq!( - record.query(&query, &options).as_ref(), - [["s"], ["z"], ["f"]] - ); - - Ok(()) -} - -#[test] -fn record_query_merge() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let query = Query::new("003@.0, 008A.a"); - - let options = QueryOptions::default().merge(true); - assert_eq!( - record.query(&query, &options).as_ref(), - [["119232022", "s|z|f"]] - ); - - let options = QueryOptions::default().merge(false); - assert_eq!( - record.query(&query, &options).as_ref(), - [["119232022", "s"], ["119232022", "z"], ["119232022", "f"]] - ); - - Ok(()) -} - -#[test] -fn record_query_separator() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let query = Query::new("003@.0, 008A.a"); - - let options = QueryOptions::default().squash(true).separator("+"); - assert_eq!( - record.query(&query, &options).as_ref(), - [["119232022", "s+z+f"]] - ); - - let options = QueryOptions::default().merge(true).separator("+"); - assert_eq!( - record.query(&query, &options).as_ref(), - [["119232022", "s+z+f"]] - ); - - Ok(()) -} - -#[test] -fn record_query_const() -> TestResult { - let record = RecordRef::from_bytes(ada_lovelace())?; - let query = Query::new("003@.0, 'abc', 003@.0"); - let options = QueryOptions::default(); - - assert_eq!( - record.query(&query, &options).as_ref(), - [["119232022", "abc", "119232022"]] - ); - - Ok(()) -} diff --git a/crates/pica-toolkit/Cargo.toml b/crates/pica-toolkit/Cargo.toml deleted file mode 100644 index 4e4b4a63a..000000000 --- a/crates/pica-toolkit/Cargo.toml +++ /dev/null @@ -1,33 +0,0 @@ -[package] -name = "pica-toolkit" -version.workspace = true -authors.workspace = true -license.workspace = true -readme.workspace = true -keywords.workspace = true -edition.workspace = true -rust-version.workspace = true - -[dependencies] -bstr = { workspace = true } -clap = { workspace = true, features = ["cargo", "derive", "wrap_help"] } -csv = { workspace = true } -directories = { version = "5.0" } -flate2 = { workspace = true } -indicatif = { version = "0.17" } -pica-matcher = { workspace = true } -pica-path = { workspace = true } -pica-record-v1 = { workspace = true } -pica-utils = { workspace = true } -quick-xml = { version = "0.37" } -regex = { workspace = true } -serde = { workspace = true, features = ["derive"] } -serde_json = { workspace = true } -strsim = { workspace = true } -thiserror = { workspace = true } -toml = { workspace = true } -unicode-normalization = { version = "0.1" } - -[[bin]] -path = "src/main.rs" -name = "pica-v1" diff --git a/crates/pica-toolkit/src/cli.rs b/crates/pica-toolkit/src/cli.rs deleted file mode 100644 index 5b0fecc7c..000000000 --- a/crates/pica-toolkit/src/cli.rs +++ /dev/null @@ -1,19 +0,0 @@ -use clap::{crate_version, Arg, Command}; - -pub(crate) fn build_cli() -> Command<'static> { - Command::new("pica") - .about( - "Tools to work with bibliographic records encoded in Pica+", - ) - .subcommand_required(true) - .version(crate_version!()) - .author(crate_authors!()) - .arg( - Arg::new("config") - .short('c') - .long("config") - .takes_value(true) - .value_name("filename"), - ) - .subcommands(crate::cmds::subcmds()) -} diff --git a/crates/pica-toolkit/src/commands/mod.rs b/crates/pica-toolkit/src/commands/mod.rs deleted file mode 100644 index 0efca0eb6..000000000 --- a/crates/pica-toolkit/src/commands/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod convert; - -pub(crate) use convert::{Convert, ConvertConfig}; diff --git a/crates/pica-toolkit/src/config.rs b/crates/pica-toolkit/src/config.rs deleted file mode 100644 index c714fc040..000000000 --- a/crates/pica-toolkit/src/config.rs +++ /dev/null @@ -1,69 +0,0 @@ -use std::fs::{create_dir_all, read_to_string}; -use std::path::{Path, PathBuf}; - -use directories::ProjectDirs; -use pica_utils::NormalizationForm; -use serde::{Deserialize, Serialize}; - -use crate::commands::*; - -#[derive(Debug, Default, Deserialize, Serialize)] -#[serde(rename_all = "kebab-case")] -pub(crate) struct GlobalConfig { - pub(crate) translit: Option, - pub(crate) skip_invalid: Option, -} - -#[derive(Debug, Default, Serialize, Deserialize)] -pub(crate) struct Config { - #[serde(skip)] - pub(crate) path: Option, - pub(crate) convert: Option, - pub(crate) global: Option, -} - -impl Config { - pub(crate) fn new() -> Result { - let mut config = Config::default(); - - if let Some(project_dirs) = - ProjectDirs::from("de.dnb", "DNB", "pica-rs") - { - let config_dir = project_dirs.config_dir(); - if !config_dir.exists() { - create_dir_all(config_dir)?; - } - - let config_file = config_dir.join("Pica.toml"); - if config_file.exists() { - return Self::from_path(config_file); - } - - config.path = Some(config_file); - } - - Ok(config) - } - - pub(crate) fn from_path>( - path: P, - ) -> Result { - let path = PathBuf::from(path.as_ref()); - let content = read_to_string(&path)?; - - // FIXME: handle unwrap() - let mut config: Config = toml::from_str(&content).unwrap(); - config.path = Some(path); - - Ok(config) - } - - pub(crate) fn from_path_or_default>( - path: Option

, - ) -> Result { - match path { - Some(path) => Self::from_path(path), - None => Self::new(), - } - } -} diff --git a/crates/pica-toolkit/src/error.rs b/crates/pica-toolkit/src/error.rs deleted file mode 100644 index 311c7139a..000000000 --- a/crates/pica-toolkit/src/error.rs +++ /dev/null @@ -1,27 +0,0 @@ -use std::io; - -pub(crate) type CliResult = Result; - -#[derive(Debug, thiserror::Error)] -pub(crate) enum CliError { - #[error(transparent)] - Io(#[from] io::Error), - - #[error(transparent)] - Csv(#[from] csv::Error), - - #[error(transparent)] - ParsePica(#[from] pica_record_v1::ParsePicaError), - - #[error(transparent)] - ReadPica(#[from] pica_record_v1::io::ReadPicaError), - - #[error(transparent)] - ParsePath(#[from] pica_path::ParsePathError), - - #[error(transparent)] - ParseMatcher(#[from] pica_matcher::ParseMatcherError), - - #[error("{0}")] - Other(String), -} diff --git a/crates/pica-toolkit/src/macros.rs b/crates/pica-toolkit/src/macros.rs deleted file mode 100644 index 77ec93d0f..000000000 --- a/crates/pica-toolkit/src/macros.rs +++ /dev/null @@ -1,44 +0,0 @@ -#[macro_export] -macro_rules! skip_invalid_flag { - ($skip_invalid: expr, $local:expr, $global:expr) => { - if $skip_invalid { - true - } else if let Some(ref config) = $local { - config.skip_invalid.unwrap_or_default() - } else if let Some(ref config) = $global { - config.skip_invalid.unwrap_or_default() - } else { - false - } - }; -} - -#[macro_export] -macro_rules! gzip_flag { - ($gzip: expr, $local:expr) => { - if $gzip { - true - } else if let Some(ref config) = $local { - config.gzip.unwrap_or_default() - } else { - false - } - }; -} - -#[macro_export] -macro_rules! template_opt { - ($args: expr, $local:expr, $default:expr) => { - if let Some(filename_template) = $args { - filename_template - } else if let Some(ref config) = $local { - config - .template - .as_ref() - .map(|x| x.to_owned()) - .unwrap_or($default.to_string()) - } else { - $default.to_string() - } - }; -} diff --git a/crates/pica-toolkit/src/main.rs b/crates/pica-toolkit/src/main.rs deleted file mode 100644 index c83375253..000000000 --- a/crates/pica-toolkit/src/main.rs +++ /dev/null @@ -1,86 +0,0 @@ -extern crate clap; -extern crate csv; -extern crate regex; -extern crate serde; - -mod commands; -mod config; -mod error; -mod macros; -mod progress; - -use std::path::PathBuf; -use std::{io, process}; - -use clap::{Parser, Subcommand}; -use commands::Convert; -use config::Config; -use error::{CliError, CliResult}; - -#[derive(Debug, Parser)] -#[clap(version, author, infer_subcommands = true, max_term_width = 72)] -#[command(name = "pica")] -#[command( - about = "Tools to work with bibliographic records encoded in PICA+." -)] -struct Cli { - #[command(subcommand)] - command: Commands, - - #[arg(short, long)] - config: Option, -} - -#[derive(Debug, Subcommand)] -#[allow(clippy::large_enum_variant)] -enum Commands { - Convert(Convert), -} - -fn run() -> CliResult<()> { - let args = Cli::parse(); - let config = Config::from_path_or_default(args.config)?; - - match args.command { - Commands::Convert(cmd) => cmd.run(&config), - } -} - -fn main() { - match run() { - Ok(()) => process::exit(0), - Err(CliError::Io(ref err)) - if err.kind() == io::ErrorKind::BrokenPipe => - { - process::exit(0); // no-coverage - } - Err(CliError::ParsePica(err)) => { - eprintln!("error: {err}"); - process::exit(1); - } - Err(CliError::ReadPica(err)) => { - eprintln!("error: {err}"); - process::exit(1); - } - Err(CliError::ParsePath(err)) => { - eprintln!("error: {err}"); - process::exit(1); - } - Err(CliError::ParseMatcher(err)) => { - eprintln!("error: {err}"); - process::exit(1); - } - Err(CliError::Io(err)) => { - eprintln!("error: {err}"); - process::exit(1); - } - Err(CliError::Csv(err)) => { - eprintln!("error: {err}"); - process::exit(1); - } - Err(CliError::Other(err)) => { - eprintln!("error: {err}"); - process::exit(1); - } - } -} diff --git a/crates/pica-toolkit/src/progress.rs b/crates/pica-toolkit/src/progress.rs deleted file mode 100644 index 5ff421fc2..000000000 --- a/crates/pica-toolkit/src/progress.rs +++ /dev/null @@ -1,59 +0,0 @@ -use indicatif::{HumanCount, ProgressBar, ProgressStyle}; - -pub(crate) struct Progress { - bar: ProgressBar, - records: u64, - invalid: u64, -} - -impl Progress { - pub(crate) fn new(enable: bool) -> Self { - let bar = if enable { - ProgressBar::new_spinner() - } else { - ProgressBar::hidden() - }; - - bar.set_style( - ProgressStyle::with_template( - "{spinner} {msg}, elapsed: {elapsed_precise}", - ) - .unwrap(), - ); - - Self { - bar, - records: 0, - invalid: 0, - } - } - - #[inline] - pub(crate) fn record(&mut self) { - self.records += 1; - self.update(); - } - - #[inline] - pub(crate) fn invalid(&mut self) { - self.invalid += 1; - self.update(); - } - - pub(crate) fn update(&mut self) { - self.bar.inc(1); - let per_sec = self.bar.per_sec(); - - self.bar.set_message(format!( - "records: {}, invalid: {} | {} records/s", - HumanCount(self.records), - HumanCount(self.invalid), - per_sec.round() as i64, - )); - } - - #[inline] - pub(crate) fn finish(&self) { - self.bar.finish(); - } -} diff --git a/crates/pica-toolkit/tests/data/000008672.dat b/crates/pica-toolkit/tests/data/000008672.dat deleted file mode 100644 index 6d615c4da..000000000 --- a/crates/pica-toolkit/tests/data/000008672.dat +++ /dev/null @@ -1 +0,0 @@ -001@ 0-001A 09002:18-04-89001B 09999:27-09-17t00:43:48.000001D 09999:23-04-10001U 0utf8001X 00002@ 0Tb1003@ 0000008672003U ahttp://d-nb.info/gnd/867-9zhttp://d-nb.info/gnd/7538748-7004B akiz007K agnd0867-9007N agnd07538748-7007N agnd01085295990007N aswd07538748-7vzg007N agkd0867-9vzg008A afas008B azav029@ aVacuum SocietygUSA029@ aAVS4abku029@ aCommittee on Vacuum Techniques029@ aCVT4abku029A aAmerican Vacuum Society029R 99840020737Tb1Agnd010168051-XaAVS, the Science and Technology Society4nach041R 90406226657Ts1Agnd04062266-6aVakuum4them041R 904066581X7Ts1VsazAgnd04066581-1aWissenschaftliche Gesellschaft4obin042A a6.5a9.3ca21.5a31.1ba31.9a042B aXD-US042B aXD-US047A/03 eDE-1047A/03 rDE-1047C SgkdiaaAmerican Vacuum Society0867-9047C SswdikaAmerican Vacuum Society07538748-7050C aMMi050E aGKD050H a1953 gegr. wiss. Ges., die sich mit Problemen des Vakuums und vor allem seiner Anwendung in der Technik beschäftigt065R 90407870447TgzVgikAgnd04078704-7aUSA4geow diff --git a/crates/pica-toolkit/tests/data/000009229.dat b/crates/pica-toolkit/tests/data/000009229.dat deleted file mode 100644 index 230e7858b..000000000 --- a/crates/pica-toolkit/tests/data/000009229.dat +++ /dev/null @@ -1 +0,0 @@ -001@ 0-001A 09002:18-04-89001B 09999:05-06-20t05:40:04.000001D 09999:23-04-10001U 0utf8001X 00002@ 0Tb1003@ 0000009229003U ahttp://d-nb.info/gnd/922-2zhttp://d-nb.info/gnd/4499175-7zhttp://d-nb.info/gnd/1090453043004B akiz007K agnd0922-2007N agnd01090453043007N agnd04499175-7007N aswd04499175-7vzg007N agkd0922-2vzg008A afas008B azavawae010E bgererda029@ aCancer SocietygUSA029@ aACS4abku029A aAmerican Cancer Society029R 90050780917Tb1VkizAgnd0507809-XaAmerican Society for the Control of Cancer4vorg042A a27.4042B aXD-US047A/03 eDE-1047A/03 rDE-1047C SgkdiaaAmerican Cancer Society0922-2047C SswdikaAmerican Cancer Society04499175-7050E aGKD050E aHomepagebStand: 08.10.2018uhttps://www.cancer.org060R a19454datb065R 90407870447TgzVgikAgnd04078704-7aUSA4geow070A/03 0(DE-588)922-2 diff --git a/crates/pica-toolkit/tests/data/000016586.dat b/crates/pica-toolkit/tests/data/000016586.dat deleted file mode 100644 index 25cd69a85..000000000 --- a/crates/pica-toolkit/tests/data/000016586.dat +++ /dev/null @@ -1 +0,0 @@ -001@ 0-001A 09002:18-04-89001B 01240:19-04-17t14:37:25.000001D 09999:23-04-10001U 0utf8001X 00002@ 0Tb1003@ 0000016586003U ahttp://d-nb.info/gnd/1658-5zhttp://d-nb.info/gnd/4318278-1004B akiz007K agnd01658-5007N agnd04318278-1007N aswd04318278-1vzg007N agkd016291087-3007N agkd01658-5vzg008A afas008B azav029@ aUniv. of Minnesota, Minneapolis029@ aUniversidad de Minnesota029@ aU of M029@ aUM4abku029A aUniversity of Minnesota042A a6.6042B aXD-US047A/03 eDE-1047A/03 rDE-1047C SgkdiaaUniversity of Minnesota 01658-5047C SswdicaMinneapolis / University of Minnesota04318278-1050E aHomepagebStand: 19.04.2017uhttp://www.umn.edu060R a18514datb065R 90403949727Tg1VgikAgnd04039497-9aMinneapolis, Minn.4orta070A/03 SIDS0320104243 diff --git a/crates/pica-toolkit/tests/data/000016756.dat b/crates/pica-toolkit/tests/data/000016756.dat deleted file mode 100644 index 8bc41576b..000000000 --- a/crates/pica-toolkit/tests/data/000016756.dat +++ /dev/null @@ -1 +0,0 @@ -001@ 0-001A 09002:18-04-89001B 01601:26-11-19t10:50:14.000001D 09999:23-04-10001U 0utf8001X 00002@ 0Tb1003@ 0000016756003U ahttp://d-nb.info/gnd/1675-5zhttp://d-nb.info/gnd/1088210104zhttp://d-nb.info/gnd/7542287-6zhttp://d-nb.info/gnd/1086256751004B akiz007K agnd01675-5007N agnd01086256751007N agnd07542287-6007N aswd07542287-6vzg007N agnd01088210104007N agkd01675-5vzg008A afas008B avaz010E bger029@ aInstitut International de Philosophie029@ aInternationales Institut für Philosophie029@ aInstituto Internacional de Filosofia029@ aInstitute of PhilosophygInternational Institute of Philosophie029@ aInstitut für Philosophie029@ aIIP4abku029A aInternational Institute of Philosophy029R 90002612467Tb1VkizAgnd026124-5aInstitut International de Collaboration PhilosophiquegParis4vorg041R 90419009447Ts1Agnd04190094-7aWissenschaftliche Einrichtung4obin042A a6.5a4.1042B aXP047A/03 eDE-1047A/03 rDE-1047C SgkdiaaInternational Institute of Philosophy01675-5047C SswdicaParis / Internationales Institut für Philosophie07542287-6050E aYearb. 1995060R a19374datb065R 90401814567TgzVgikAgnd04018145-5aFrankreich4geow065R 90404466037TgzVgikAgnd04044660-8aParis4orta070A/03 SIDS0000000307 diff --git a/crates/pica-toolkit/tests/data/004732650-nfc.dat.gz b/crates/pica-toolkit/tests/data/004732650-nfc.dat.gz deleted file mode 100644 index eba015243..000000000 Binary files a/crates/pica-toolkit/tests/data/004732650-nfc.dat.gz and /dev/null differ diff --git a/crates/pica-toolkit/tests/data/004732650-nfc.json b/crates/pica-toolkit/tests/data/004732650-nfc.json deleted file mode 100644 index 18890c3dc..000000000 --- a/crates/pica-toolkit/tests/data/004732650-nfc.json +++ /dev/null @@ -1 +0,0 @@ -[{"fields":[{"tag":"029A","subfields":[{"tag":"a","value":"Goethe-Universität Frankfurt am Main"},{"tag":"b","value":"Institut für Sozialforschung"}]}]}] \ No newline at end of file diff --git a/crates/pica-toolkit/tests/data/004732650-nfc.txt b/crates/pica-toolkit/tests/data/004732650-nfc.txt deleted file mode 100644 index 8fdc25882..000000000 --- a/crates/pica-toolkit/tests/data/004732650-nfc.txt +++ /dev/null @@ -1,2 +0,0 @@ -029A $aGoethe-Universität Frankfurt am Main$bInstitut für Sozialforschung - diff --git a/crates/pica-toolkit/tests/data/004732650-nfc.xml b/crates/pica-toolkit/tests/data/004732650-nfc.xml deleted file mode 100644 index 864c271e7..000000000 --- a/crates/pica-toolkit/tests/data/004732650-nfc.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - Goethe-Universität Frankfurt am Main - Institut für Sozialforschung - - - \ No newline at end of file diff --git a/crates/pica-toolkit/tests/data/004732650-nfd.json b/crates/pica-toolkit/tests/data/004732650-nfd.json deleted file mode 100644 index 7cb185495..000000000 --- a/crates/pica-toolkit/tests/data/004732650-nfd.json +++ /dev/null @@ -1 +0,0 @@ -[{"fields":[{"tag":"029A","subfields":[{"tag":"a","value":"Goethe-Universität Frankfurt am Main"},{"tag":"b","value":"Institut für Sozialforschung"}]}]}] \ No newline at end of file diff --git a/crates/pica-toolkit/tests/data/004732650-nfd.txt b/crates/pica-toolkit/tests/data/004732650-nfd.txt deleted file mode 100644 index 321884069..000000000 --- a/crates/pica-toolkit/tests/data/004732650-nfd.txt +++ /dev/null @@ -1,2 +0,0 @@ -029A $aGoethe-Universität Frankfurt am Main$bInstitut für Sozialforschung - diff --git a/crates/pica-toolkit/tests/data/004732650-nfd.xml b/crates/pica-toolkit/tests/data/004732650-nfd.xml deleted file mode 100644 index 305eb7c92..000000000 --- a/crates/pica-toolkit/tests/data/004732650-nfd.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - Goethe-Universität Frankfurt am Main - Institut für Sozialforschung - - - \ No newline at end of file diff --git a/crates/pica-toolkit/tests/data/004732650-reduced.dat.gz b/crates/pica-toolkit/tests/data/004732650-reduced.dat.gz deleted file mode 100644 index 2af46ebe8..000000000 Binary files a/crates/pica-toolkit/tests/data/004732650-reduced.dat.gz and /dev/null differ diff --git a/crates/pica-toolkit/tests/data/004732650.dat.gz b/crates/pica-toolkit/tests/data/004732650.dat.gz deleted file mode 100644 index a10455e12..000000000 Binary files a/crates/pica-toolkit/tests/data/004732650.dat.gz and /dev/null differ diff --git a/crates/pica-toolkit/tests/data/004732650.txt b/crates/pica-toolkit/tests/data/004732650.txt deleted file mode 100644 index 916484d80..000000000 --- a/crates/pica-toolkit/tests/data/004732650.txt +++ /dev/null @@ -1,46 +0,0 @@ -001@ $0- -001A $09002:18-04-89 -001B $00281:23-07-21$t14:15:09.000 -001D $09999:23-04-10 -001U $0utf8 -001X $00 -002@ $0Tb1 -003@ $0004732650 -003U $ahttp://d-nb.info/gnd/2014261-4$zhttp://d-nb.info/gnd/4027194-8$zhttp://d-nb.info/gnd/1086389794 -004B $akiz -007K $agnd$02014261-4 -007N $agnd$01086389794 -007N $agnd$04027194-8 -007N $aswd$04027194-8$vzg -007N $agkd$02014261-4$vzg -008A $af$as$ah -008B $av$az$aw -010E $bger$erda -022R $91053648979$7Tu3$Vwip$Agnd$01053648979$tStempel$gGoethe-Universität Frankfurt am Main. Institut für Sozialforschung$n01$4rela -022R $91053649002$7Tu3$Vwip$Agnd$01053649002$tStempel$gGoethe-Universität Frankfurt am Main. Institut für Sozialforschung. Bücherei$n01$4rela -029@ $aInstitut für Sozialforschung$gFrankfurt am Main -029@ $aGoethe-Universität Frankfurt am Main$bInstitut für Soziale Forschung -029@ $aGoethe-Universität Frankfurt am Main$bIstituto per la Ricerca Sociale di Francoforte -029@ $aInstitut f. Sozialforschg. an d. Universität Ffm$4nauv -029@ $aInstitut für Sozialforschung an der Johann Wolfgang Goethe-Universität$4nauv -029@ $aIfS$4abku -029@ $aUniversität Frankfurt am Main$bInstitut für Sozialforschung -029A $aGoethe-Universität Frankfurt am Main$bInstitut für Sozialforschung -029R $9004735110$7Tb1$Vkiz$Agnd$02014567-6$aInternational Institute of Social Research$gNew York, NY$4nazw -029R $9004734041$7Tb1$Agnd$02014423-4$aInstitute of Social Research$gNew York, NY$4nazw -029R $9000350745$7Tb1$Vkiz$Agnd$035074-6$aGoethe-Universität Frankfurt am Main$4adue -041R $9040559165$7Ts1$Vsaz$Agnd$04055916-6$aSozialwissenschaften$4them -041R $9041602145$7Ts1$Vsaz$Agnd$04160214-6$aHochschulinstitut$4obin -042A $a6.6$a9.2a -042B $aXA-DE-HE -047A/03 $eDE-1 -047A/03 $rDE-1 -047C $Sgkd$ia$aInstitut für Sozialforschung $02014261-4 -047C $Sswd$ic$aFrankfurt am Main / Institut für Sozialforschung$04027194-8 -050E $aHomepage$bStand: 20.07.2021$uhttp://www.ifs.uni-frankfurt.de -050G $bGegründet 1923; 1933-1951 im Exil in New York, ab 1951 Fortführung in Frankfurt am Main; Verantwortlicher Träger des IfS ist – 1951 vom Hessischen Ministerium des Innern genehmigt und als gemeinnützig anerkannt – die Stiftung Institut für Sozialforschung. -060R $a1923$4datb -065R $9040247295$7Tg1$Vgik$Agnd$04024729-6$aHessen$4geow -065R $9040181189$7Tg1$Vgik$Agnd$04018118-2$aFrankfurt am Main$4orta -070A/03 $SDE-101$0004732650 - diff --git a/crates/pica-toolkit/tests/data/1004916019-color1.txt b/crates/pica-toolkit/tests/data/1004916019-color1.txt deleted file mode 100644 index cc2f112bf..000000000 --- a/crates/pica-toolkit/tests/data/1004916019-color1.txt +++ /dev/null @@ -1,24 +0,0 @@ -001A $08999:22-07-10 -001B $01250:10-09-14$t08:28:16.000 -001D $00384:27-07-10 -001U $0utf8 -001X $00 -002@ $0Ts1 -003@ $01004916019 -003U $ahttp://d-nb.info/gnd/7710287-3 -004B $asip -007K $agnd$07710287-3 -007N $aswd$07710287-3$vzg -008A $as -029R $9952570254$7Tb1$Vkiz$Agnd$05263070-5$aChrysler Corporation$4hers -041A $aPlymouth$gMarke -041R $9041145135$7Ts1$Vsaz$Agnd$04114513-6$aMarkenname$4obin -042A $a31.7 -047A/03 $eDE-210 -047A/03 $rDE-384 -047C $Sswd$is$aPlymouth $07710287-3 -050D $aKombiniere mit einer Produktgruppe, z.B. Personenkraftwagen -050E $aWikipedia, Internet$uhttp://www.mobile.de/modellverzeichnis/plymouth/ -050H $aMarkenname - - \ No newline at end of file diff --git a/crates/pica-toolkit/tests/data/1004916019-color2.txt b/crates/pica-toolkit/tests/data/1004916019-color2.txt deleted file mode 100644 index 20249728f..000000000 --- a/crates/pica-toolkit/tests/data/1004916019-color2.txt +++ /dev/null @@ -1,24 +0,0 @@ -001A $08999:22-07-10 -001B $01250:10-09-14$t08:28:16.000 -001D $00384:27-07-10 -001U $0utf8 -001X $00 -002@ $0Ts1 -003@ $01004916019 -003U $ahttp://d-nb.info/gnd/7710287-3 -004B $asip -007K $agnd$07710287-3 -007N $aswd$07710287-3$vzg -008A $as -029R $9952570254$7Tb1$Vkiz$Agnd$05263070-5$aChrysler Corporation$4hers -041A $aPlymouth$gMarke -041R $9041145135$7Ts1$Vsaz$Agnd$04114513-6$aMarkenname$4obin -042A $a31.7 -047A/03 $eDE-210 -047A/03 $rDE-384 -047C $Sswd$is$aPlymouth $07710287-3 -050D $aKombiniere mit einer Produktgruppe, z.B. Personenkraftwagen -050E $aWikipedia, Internet$uhttp://www.mobile.de/modellverzeichnis/plymouth/ -050H $aMarkenname - - \ No newline at end of file diff --git a/crates/pica-toolkit/tests/data/1004916019-spaces.txt b/crates/pica-toolkit/tests/data/1004916019-spaces.txt deleted file mode 100644 index 638ace9c3..000000000 --- a/crates/pica-toolkit/tests/data/1004916019-spaces.txt +++ /dev/null @@ -1,23 +0,0 @@ -001A $0 8999:22-07-10 -001B $0 1250:10-09-14 $t 08:28:16.000 -001D $0 0384:27-07-10 -001U $0 utf8 -001X $0 0 -002@ $0 Ts1 -003@ $0 1004916019 -003U $a http://d-nb.info/gnd/7710287-3 -004B $a sip -007K $a gnd $0 7710287-3 -007N $a swd $0 7710287-3 $v zg -008A $a s -029R $9 952570254 $7 Tb1 $V kiz $A gnd $0 5263070-5 $a Chrysler Corporation $4 hers -041A $a Plymouth $g Marke -041R $9 041145135 $7 Ts1 $V saz $A gnd $0 4114513-6 $a Markenname $4 obin -042A $a 31.7 -047A/03 $e DE-210 -047A/03 $r DE-384 -047C $S swd $i s $a Plymouth $0 7710287-3 -050D $a Kombiniere mit einer Produktgruppe, z.B. Personenkraftwagen -050E $a Wikipedia, Internet $u http://www.mobile.de/modellverzeichnis/plymouth/ -050H $a Markenname - diff --git a/crates/pica-toolkit/tests/data/1004916019.dat b/crates/pica-toolkit/tests/data/1004916019.dat deleted file mode 100644 index f860869b8..000000000 --- a/crates/pica-toolkit/tests/data/1004916019.dat +++ /dev/null @@ -1 +0,0 @@ -001A 08999:22-07-10001B 01250:10-09-14t08:28:16.000001D 00384:27-07-10001U 0utf8001X 00002@ 0Ts1003@ 01004916019003U ahttp://d-nb.info/gnd/7710287-3004B asip007K agnd07710287-3007N aswd07710287-3vzg008A as029R 99525702547Tb1VkizAgnd05263070-5aChrysler Corporation4hers041A aPlymouthgMarke041R 90411451357Ts1VsazAgnd04114513-6aMarkenname4obin042A a31.7047A/03 eDE-210047A/03 rDE-384047C SswdisaPlymouth 07710287-3050D aKombiniere mit einer Produktgruppe, z.B. Personenkraftwagen050E aWikipedia, Internetuhttp://www.mobile.de/modellverzeichnis/plymouth/050H aMarkenname diff --git a/crates/pica-toolkit/tests/data/1004916019.dat.gz b/crates/pica-toolkit/tests/data/1004916019.dat.gz deleted file mode 100644 index fb5eb98c8..000000000 Binary files a/crates/pica-toolkit/tests/data/1004916019.dat.gz and /dev/null differ diff --git a/crates/pica-toolkit/tests/data/1004916019.json b/crates/pica-toolkit/tests/data/1004916019.json deleted file mode 100644 index 34f4fd2f7..000000000 --- a/crates/pica-toolkit/tests/data/1004916019.json +++ /dev/null @@ -1 +0,0 @@ -[{"fields":[{"tag":"001A","subfields":[{"tag":"0","value":"8999:22-07-10"}]},{"tag":"001B","subfields":[{"tag":"0","value":"1250:10-09-14"},{"tag":"t","value":"08:28:16.000"}]},{"tag":"001D","subfields":[{"tag":"0","value":"0384:27-07-10"}]},{"tag":"001U","subfields":[{"tag":"0","value":"utf8"}]},{"tag":"001X","subfields":[{"tag":"0","value":"0"}]},{"tag":"002@","subfields":[{"tag":"0","value":"Ts1"}]},{"tag":"003@","subfields":[{"tag":"0","value":"1004916019"}]},{"tag":"003U","subfields":[{"tag":"a","value":"http://d-nb.info/gnd/7710287-3"}]},{"tag":"004B","subfields":[{"tag":"a","value":"sip"}]},{"tag":"007K","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"7710287-3"}]},{"tag":"007N","subfields":[{"tag":"a","value":"swd"},{"tag":"0","value":"7710287-3"},{"tag":"v","value":"zg"}]},{"tag":"008A","subfields":[{"tag":"a","value":"s"}]},{"tag":"029R","subfields":[{"tag":"9","value":"952570254"},{"tag":"7","value":"Tb1"},{"tag":"V","value":"kiz"},{"tag":"A","value":"gnd"},{"tag":"0","value":"5263070-5"},{"tag":"a","value":"Chrysler Corporation"},{"tag":"4","value":"hers"}]},{"tag":"041A","subfields":[{"tag":"a","value":"Plymouth"},{"tag":"g","value":"Marke"}]},{"tag":"041R","subfields":[{"tag":"9","value":"041145135"},{"tag":"7","value":"Ts1"},{"tag":"V","value":"saz"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4114513-6"},{"tag":"a","value":"Markenname"},{"tag":"4","value":"obin"}]},{"tag":"042A","subfields":[{"tag":"a","value":"31.7"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"e","value":"DE-210"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"r","value":"DE-384"}]},{"tag":"047C","subfields":[{"tag":"S","value":"swd"},{"tag":"i","value":"s"},{"tag":"a","value":"Plymouth "},{"tag":"0","value":"7710287-3"}]},{"tag":"050D","subfields":[{"tag":"a","value":"Kombiniere mit einer Produktgruppe, z.B. Personenkraftwagen"}]},{"tag":"050E","subfields":[{"tag":"a","value":"Wikipedia, Internet"},{"tag":"u","value":"http://www.mobile.de/modellverzeichnis/plymouth/"}]},{"tag":"050H","subfields":[{"tag":"a","value":"Markenname"}]}]}] \ No newline at end of file diff --git a/crates/pica-toolkit/tests/data/1004916019.txt b/crates/pica-toolkit/tests/data/1004916019.txt deleted file mode 100644 index 0606fc6d4..000000000 --- a/crates/pica-toolkit/tests/data/1004916019.txt +++ /dev/null @@ -1,23 +0,0 @@ -001A $08999:22-07-10 -001B $01250:10-09-14$t08:28:16.000 -001D $00384:27-07-10 -001U $0utf8 -001X $00 -002@ $0Ts1 -003@ $01004916019 -003U $ahttp://d-nb.info/gnd/7710287-3 -004B $asip -007K $agnd$07710287-3 -007N $aswd$07710287-3$vzg -008A $as -029R $9952570254$7Tb1$Vkiz$Agnd$05263070-5$aChrysler Corporation$4hers -041A $aPlymouth$gMarke -041R $9041145135$7Ts1$Vsaz$Agnd$04114513-6$aMarkenname$4obin -042A $a31.7 -047A/03 $eDE-210 -047A/03 $rDE-384 -047C $Sswd$is$aPlymouth $07710287-3 -050D $aKombiniere mit einer Produktgruppe, z.B. Personenkraftwagen -050E $aWikipedia, Internet$uhttp://www.mobile.de/modellverzeichnis/plymouth/ -050H $aMarkenname - diff --git a/crates/pica-toolkit/tests/data/1004916019.xml b/crates/pica-toolkit/tests/data/1004916019.xml deleted file mode 100644 index 3eefe2de8..000000000 --- a/crates/pica-toolkit/tests/data/1004916019.xml +++ /dev/null @@ -1,92 +0,0 @@ - - - - - 8999:22-07-10 - - - 1250:10-09-14 - 08:28:16.000 - - - 0384:27-07-10 - - - utf8 - - - 0 - - - Ts1 - - - 1004916019 - - - http://d-nb.info/gnd/7710287-3 - - - sip - - - gnd - 7710287-3 - - - swd - 7710287-3 - zg - - - s - - - 952570254 - Tb1 - kiz - gnd - 5263070-5 - Chrysler Corporation - hers - - - Plymouth - Marke - - - 041145135 - Ts1 - saz - gnd - 4114513-6 - Markenname - obin - - - 31.7 - - - DE-210 - - - DE-384 - - - swd - s - Plymouth <Marke> - 7710287-3 - - - Kombiniere mit einer Produktgruppe, z.B. Personenkraftwagen - - - Wikipedia, Internet - http://www.mobile.de/modellverzeichnis/plymouth/ - - - Markenname - - - \ No newline at end of file diff --git a/crates/pica-toolkit/tests/data/1004916019_reduced.dat b/crates/pica-toolkit/tests/data/1004916019_reduced.dat deleted file mode 100644 index c8c98e69c..000000000 --- a/crates/pica-toolkit/tests/data/1004916019_reduced.dat +++ /dev/null @@ -1 +0,0 @@ -003@ 01004916019047A/03 eDE-210047A/03 rDE-384 diff --git a/crates/pica-toolkit/tests/data/1029350469.dat.gz b/crates/pica-toolkit/tests/data/1029350469.dat.gz deleted file mode 100644 index 09006712e..000000000 Binary files a/crates/pica-toolkit/tests/data/1029350469.dat.gz and /dev/null differ diff --git a/crates/pica-toolkit/tests/data/1029350469_r1.dat b/crates/pica-toolkit/tests/data/1029350469_r1.dat deleted file mode 100644 index 65d190eaa..000000000 --- a/crates/pica-toolkit/tests/data/1029350469_r1.dat +++ /dev/null @@ -1 +0,0 @@ -003@ 01029350469041A 91185519817Tp1VpizAgnd0118551981E1770G1843dFriedrichaHölderlin041A/01 90403677467TszVsazAgnd04036774-5aLyrik041A/02 90404644907Ts1VsazAgnd04046449-0aPoetik041A/09 eDE-101rDE-101041A/10 91185477397Tp1VpizAgnd0118547739E1770G1831dGeorg Wilhelm FriedrichaHegel041A/11 90413728757Ts1VsazAgnd04137287-6aGattungstheorie041A/19 eDE-101rDE-101041A/20 90412245907TpzVpizAgnd0118540238E1749G1832dJohann WolfgangaGoethecvon7Tu1VwitAgnd04122459-0tTorquato Tasso041A/21 90409923737Tp1VpizAgnd0118551981E1770G1843dFriedrichaHölderlin7Tu1VwitAgnd04099237-8tDer @Tod des Empedokles041A/22 904099273X7Tp1VpizAgnd0118563076E1777G1811dHeinrichaKleistcvon7Tu1VwitAgnd04099273-1tPrinz Friedrich von Homburg041A/29 eDE-101rDE-101 diff --git a/crates/pica-toolkit/tests/data/1029350469_r2.dat b/crates/pica-toolkit/tests/data/1029350469_r2.dat deleted file mode 100644 index 7dee2518b..000000000 --- a/crates/pica-toolkit/tests/data/1029350469_r2.dat +++ /dev/null @@ -1 +0,0 @@ -003@ 01029350469041A/01 90403677467TszVsazAgnd04036774-5aLyrik diff --git a/crates/pica-toolkit/tests/data/1029350469_r3.dat b/crates/pica-toolkit/tests/data/1029350469_r3.dat deleted file mode 100644 index dd4d0468e..000000000 --- a/crates/pica-toolkit/tests/data/1029350469_r3.dat +++ /dev/null @@ -1 +0,0 @@ -003@ 01029350469041A/01 90403677467TszVsazAgnd04036774-5aLyrik041A/02 90404644907Ts1VsazAgnd04046449-0aPoetik041A/09 eDE-101rDE-101 diff --git a/crates/pica-toolkit/tests/data/1029350469_r4.dat b/crates/pica-toolkit/tests/data/1029350469_r4.dat deleted file mode 100644 index 69de98230..000000000 --- a/crates/pica-toolkit/tests/data/1029350469_r4.dat +++ /dev/null @@ -1 +0,0 @@ -003@ 01029350469041A/01 90403677467TszVsazAgnd04036774-5aLyrik041A/02 90404644907Ts1VsazAgnd04046449-0aPoetik041A/09 eDE-101rDE-101041A/20 90412245907TpzVpizAgnd0118540238E1749G1832dJohann WolfgangaGoethecvon7Tu1VwitAgnd04122459-0tTorquato Tasso041A/21 90409923737Tp1VpizAgnd0118551981E1770G1843dFriedrichaHölderlin7Tu1VwitAgnd04099237-8tDer @Tod des Empedokles041A/22 904099273X7Tp1VpizAgnd0118563076E1777G1811dHeinrichaKleistcvon7Tu1VwitAgnd04099273-1tPrinz Friedrich von Homburg041A/29 eDE-101rDE-101 diff --git a/crates/pica-toolkit/tests/data/118515551.dat b/crates/pica-toolkit/tests/data/118515551.dat deleted file mode 100644 index 6530c91b0..000000000 --- a/crates/pica-toolkit/tests/data/118515551.dat +++ /dev/null @@ -1 +0,0 @@ -001A 01250:01-07-88001B 01140:06-02-20t08:16:26.000001D 09999:06-04-08001U 0utf8001X 00002@ 0Tp1003@ 0118515551003U ahttp://d-nb.info/gnd/118515551zhttp://d-nb.info/gnd/185847277004B apiz007K agnd0118515551007N agnd0185847277007N apnd0185847277vzg007N apnd0118515551vzg007N apnd0131290991007N apnd0127231935007N aswd04008313-5vzg008A asafazad008B avawakaeamao010E bger028@ dHermanaBroch028@ dHermanaBroh028@ dChermanaMproch028@ dGermanaBroch028@ dHermanaBroxi028@ T01UKored헤르만a브로흐5DE-576028@ T01UHebrdהרמןaברוך5DE-576028@ T01UJpanPヘルマン・ブロッホ5DE-576028A dHermannaBroch028P dHermannaBrochSDLC0n 790658212nafv1886-1951028R 91165257037Tp1VpizAgnd0116525703E1910G1994dHermann F.aBroch de Rothermann4bezfvSohn028R 91168169027Tp1VpizAgnd0116816902E1884G1949dAliceaSchmutzer4bezfvCousine028R 91230011457Tp1VpizAgnd0123001145E1881G1976dGeorgaMerkel4bezavFreund028R 912987468X7Tp1VpizAgnd012987468XE1906G1977dRuthaNorden4beza028R 911569612117Tp3VpipAgnd01156961211dK. L.aHib4pseu032T am041R 90404579077Ts1VsazAgnd04045790-4aPhilosoph4berc041R 90402878157Ts1VsazAgnd04028781-6aJournalist4beru041R 90405330937TszVsazAgnd04053309-8aSchriftsteller4beru041R 91991064607Ts7Agnd07773640-0aTextilingenieur4beru042A a12.2pa4.7p042B aXA-ATaXD-US042C ager046G aBroch, Hermann: James Joyce und die Gegenwart. - 1936047A/03 eDE-101047A/03 rDE-101047C SpndiaaBroch, Hermann0118515551047C SpndiaaBroch, Hermann0185847277050C aOENAK050E aArchiv der American Guild for German Cultural Freedom, New York im Deutschen Exilarchiv 1933 - 1945050E aB 1986050E aLCAuth050E aNDB050E aKosch Lit.050E aM050E aBiogr. H Emigr.050E aWikipediauhttp://de.wikipedia.org/wiki/Hermann%5FBroch050E aM; B 1986; LoC-NA; NDB; Kosch Lit.; Biogr. H Emigr050G bKulturphilosoph, ursprünglich Textilingenieur, emigrierte 1938 in die USA; Vater des Hermann Friedrich Broch050G bemigrierte 1938 in die USA050G bSchriftsteller, Österreich, USA060R a01.11.1886b30.05.19514datx060R a1886b19514datl065R 90406600957TgzVgikAgnd04066009-6aWien4ortg065R 90411785487Tg1VgikAgnd04117854-3aNew Haven, Conn.4orts065R 90411785487Tg1VgikAgnd04117854-3aNew Haven, Conn.4ortw065R 90407870447TgzVgikAgnd04078704-7aUSA4ortxZ1938070A/03 0(DE-588)118515551070A/03 SIDS0110070566070A/03 SIDS0120055561 diff --git a/crates/pica-toolkit/tests/data/118515551.dat.gz b/crates/pica-toolkit/tests/data/118515551.dat.gz deleted file mode 100644 index d72ebc665..000000000 Binary files a/crates/pica-toolkit/tests/data/118515551.dat.gz and /dev/null differ diff --git a/crates/pica-toolkit/tests/data/119232022.dat.gz b/crates/pica-toolkit/tests/data/119232022.dat.gz deleted file mode 100644 index 69095ae21..000000000 Binary files a/crates/pica-toolkit/tests/data/119232022.dat.gz and /dev/null differ diff --git a/crates/pica-toolkit/tests/data/121169502.dat b/crates/pica-toolkit/tests/data/121169502.dat deleted file mode 100644 index 4f65c14a5..000000000 --- a/crates/pica-toolkit/tests/data/121169502.dat +++ /dev/null @@ -1 +0,0 @@ -001@ 0-001A 00386:17-06-99001B 09999:17-10-16t17:00:44.000001D 09999:06-04-08001U 0utf8001X 00002@ 0Tp1003@ 0121169502003U ahttp://d-nb.info/gnd/121169502zhttp://d-nb.info/gnd/183361946004B apiz007K agnd0121169502007N agnd0183361946007N apnd0183361946vzg007N apnd0121169502vzg007N aswd04549141-0vzg008A asaf008B avaw028@ dHeikeaKlußmann028A dHeikeaKlussmann041R aInstallationskünstlerin4berc041R aPhotographin4beru042A a13.7pa13.5p042B aXA-DE047A/03 eDE-386047A/03 rDE-576047C SpndiaaKlussmann, Heike0121169502047C SpndiaaKlussmann, Heike0183361946050C aNDSBIO050E aInternet060R a19684datl diff --git a/crates/pica-toolkit/tests/data/algebra.dat.gz b/crates/pica-toolkit/tests/data/algebra.dat.gz deleted file mode 100644 index 9176fb738..000000000 Binary files a/crates/pica-toolkit/tests/data/algebra.dat.gz and /dev/null differ diff --git a/crates/pica-toolkit/tests/data/allow_list.arrow b/crates/pica-toolkit/tests/data/allow_list.arrow deleted file mode 100644 index e65d3c4c4..000000000 Binary files a/crates/pica-toolkit/tests/data/allow_list.arrow and /dev/null differ diff --git a/crates/pica-toolkit/tests/data/allow_list.csv b/crates/pica-toolkit/tests/data/allow_list.csv deleted file mode 100644 index 73c689d12..000000000 --- a/crates/pica-toolkit/tests/data/allow_list.csv +++ /dev/null @@ -1,3 +0,0 @@ -1004916019 -119232022 -000008672 diff --git a/crates/pica-toolkit/tests/data/deny_list.arrow b/crates/pica-toolkit/tests/data/deny_list.arrow deleted file mode 100644 index fa733b622..000000000 Binary files a/crates/pica-toolkit/tests/data/deny_list.arrow and /dev/null differ diff --git a/crates/pica-toolkit/tests/data/deny_list.csv b/crates/pica-toolkit/tests/data/deny_list.csv deleted file mode 100644 index e179109d8..000000000 --- a/crates/pica-toolkit/tests/data/deny_list.csv +++ /dev/null @@ -1,2 +0,0 @@ -119232022 -000008672 diff --git a/crates/pica-toolkit/tests/data/dollar.dat b/crates/pica-toolkit/tests/data/dollar.dat deleted file mode 100644 index b24542283..000000000 --- a/crates/pica-toolkit/tests/data/dollar.dat +++ /dev/null @@ -1 +0,0 @@ -012A a$bcab diff --git a/crates/pica-toolkit/tests/data/dollar.txt b/crates/pica-toolkit/tests/data/dollar.txt deleted file mode 100644 index 955492355..000000000 --- a/crates/pica-toolkit/tests/data/dollar.txt +++ /dev/null @@ -1,2 +0,0 @@ -012A $a$$bc$ab - diff --git a/crates/pica-toolkit/tests/data/dump.csv b/crates/pica-toolkit/tests/data/dump.csv deleted file mode 100644 index 7eccc1af3..000000000 --- a/crates/pica-toolkit/tests/data/dump.csv +++ /dev/null @@ -1,7 +0,0 @@ -1004916019,foo,Ts1 -119232022,foo,Tp1 -000008672,foo,Tb1 -000016586,foo,Tb1 -000016756,foo,Tb1 -000009229,foo,Tb1 -121169502,foo,Tp1 diff --git a/crates/pica-toolkit/tests/data/dump.dat.gz b/crates/pica-toolkit/tests/data/dump.dat.gz deleted file mode 100644 index 68e51fbde..000000000 Binary files a/crates/pica-toolkit/tests/data/dump.dat.gz and /dev/null differ diff --git a/crates/pica-toolkit/tests/data/dump.json b/crates/pica-toolkit/tests/data/dump.json deleted file mode 100644 index dec413fb1..000000000 --- a/crates/pica-toolkit/tests/data/dump.json +++ /dev/null @@ -1 +0,0 @@ -[{"fields":[{"tag":"001A","subfields":[{"tag":"0","value":"8999:22-07-10"}]},{"tag":"001B","subfields":[{"tag":"0","value":"1250:10-09-14"},{"tag":"t","value":"08:28:16.000"}]},{"tag":"001D","subfields":[{"tag":"0","value":"0384:27-07-10"}]},{"tag":"001U","subfields":[{"tag":"0","value":"utf8"}]},{"tag":"001X","subfields":[{"tag":"0","value":"0"}]},{"tag":"002@","subfields":[{"tag":"0","value":"Ts1"}]},{"tag":"003@","subfields":[{"tag":"0","value":"1004916019"}]},{"tag":"003U","subfields":[{"tag":"a","value":"http://d-nb.info/gnd/7710287-3"}]},{"tag":"004B","subfields":[{"tag":"a","value":"sip"}]},{"tag":"007K","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"7710287-3"}]},{"tag":"007N","subfields":[{"tag":"a","value":"swd"},{"tag":"0","value":"7710287-3"},{"tag":"v","value":"zg"}]},{"tag":"008A","subfields":[{"tag":"a","value":"s"}]},{"tag":"029R","subfields":[{"tag":"9","value":"952570254"},{"tag":"7","value":"Tb1"},{"tag":"V","value":"kiz"},{"tag":"A","value":"gnd"},{"tag":"0","value":"5263070-5"},{"tag":"a","value":"Chrysler Corporation"},{"tag":"4","value":"hers"}]},{"tag":"041A","subfields":[{"tag":"a","value":"Plymouth"},{"tag":"g","value":"Marke"}]},{"tag":"041R","subfields":[{"tag":"9","value":"041145135"},{"tag":"7","value":"Ts1"},{"tag":"V","value":"saz"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4114513-6"},{"tag":"a","value":"Markenname"},{"tag":"4","value":"obin"}]},{"tag":"042A","subfields":[{"tag":"a","value":"31.7"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"e","value":"DE-210"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"r","value":"DE-384"}]},{"tag":"047C","subfields":[{"tag":"S","value":"swd"},{"tag":"i","value":"s"},{"tag":"a","value":"Plymouth "},{"tag":"0","value":"7710287-3"}]},{"tag":"050D","subfields":[{"tag":"a","value":"Kombiniere mit einer Produktgruppe, z.B. Personenkraftwagen"}]},{"tag":"050E","subfields":[{"tag":"a","value":"Wikipedia, Internet"},{"tag":"u","value":"http://www.mobile.de/modellverzeichnis/plymouth/"}]},{"tag":"050H","subfields":[{"tag":"a","value":"Markenname"}]}]},{"fields":[{"tag":"001A","subfields":[{"tag":"0","value":"0386:16-03-95"}]},{"tag":"001B","subfields":[{"tag":"0","value":"8999:20-07-20"},{"tag":"t","value":"13:19:49.000"}]},{"tag":"001D","subfields":[{"tag":"0","value":"9999:06-04-08"}]},{"tag":"001U","subfields":[{"tag":"0","value":"utf8"}]},{"tag":"001X","subfields":[{"tag":"0","value":"0"}]},{"tag":"002@","subfields":[{"tag":"0","value":"Tp1"}]},{"tag":"003@","subfields":[{"tag":"0","value":"119232022"}]},{"tag":"003U","subfields":[{"tag":"a","value":"http://d-nb.info/gnd/119232022"},{"tag":"z","value":"http://d-nb.info/gnd/172642531"}]},{"tag":"004B","subfields":[{"tag":"a","value":"pik"}]},{"tag":"007K","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"119232022"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"172642531"}]},{"tag":"007N","subfields":[{"tag":"a","value":"pnd"},{"tag":"0","value":"172642531"},{"tag":"v","value":"zg"}]},{"tag":"007N","subfields":[{"tag":"a","value":"pnd"},{"tag":"0","value":"119232022"},{"tag":"v","value":"zg"}]},{"tag":"007N","subfields":[{"tag":"a","value":"swd"},{"tag":"0","value":"4370325-2"},{"tag":"v","value":"zg"}]},{"tag":"008A","subfields":[{"tag":"a","value":"s"},{"tag":"a","value":"z"},{"tag":"a","value":"f"}]},{"tag":"008B","subfields":[{"tag":"a","value":"w"},{"tag":"a","value":"k"},{"tag":"a","value":"v"}]},{"tag":"010E","subfields":[{"tag":"e","value":"rda"}]},{"tag":"028@","subfields":[{"tag":"d","value":"Ada K."},{"tag":"c","value":"of"},{"tag":"a","value":"Lovelace"}]},{"tag":"028@","subfields":[{"tag":"d","value":"Augusta Ada"},{"tag":"c","value":"of"},{"tag":"a","value":"Lovelace"}]},{"tag":"028@","subfields":[{"tag":"d","value":"Ada Augusta"},{"tag":"c","value":"of"},{"tag":"a","value":"Lovelace"}]},{"tag":"028@","subfields":[{"tag":"d","value":"Ada"},{"tag":"a","value":"Byron"}]},{"tag":"028@","subfields":[{"tag":"d","value":"Augusta Ada"},{"tag":"a","value":"Byron King"}]},{"tag":"028@","subfields":[{"tag":"d","value":"Augusta Ada"},{"tag":"a","value":"King"}]},{"tag":"028@","subfields":[{"tag":"d","value":"Ada"},{"tag":"a","value":"King"}]},{"tag":"028@","subfields":[{"tag":"d","value":"Ada Augusta"},{"tag":"a","value":"Byron"},{"tag":"4","value":"nafr"}]},{"tag":"028@","subfields":[{"tag":"d","value":"Augusta Ada"},{"tag":"a","value":"Byron"}]},{"tag":"028@","subfields":[{"tag":"d","value":"Ada"},{"tag":"a","value":"Byron Lovelace"}]},{"tag":"028@","subfields":[{"tag":"d","value":"Ada"},{"tag":"a","value":"Lovelace"}]},{"tag":"028@","subfields":[{"tag":"d","value":"Ada King, Countess of"},{"tag":"a","value":"Lovelace"}]},{"tag":"028@","subfields":[{"tag":"d","value":"Augusta Ada King"},{"tag":"a","value":"Lovelace"}]},{"tag":"028@","subfields":[{"tag":"d","value":"Augusta Ada"},{"tag":"a","value":"Lovelace"}]},{"tag":"028A","subfields":[{"tag":"d","value":"Ada King"},{"tag":"c","value":"of"},{"tag":"a","value":"Lovelace"}]},{"tag":"028R","subfields":[{"tag":"9","value":"118518208"},{"tag":"7","value":"Tp1"},{"tag":"V","value":"piz"},{"tag":"A","value":"gnd"},{"tag":"0","value":"118518208"},{"tag":"E","value":"1788"},{"tag":"G","value":"1824"},{"tag":"d","value":"George Gordon Byron"},{"tag":"a","value":"Byron"},{"tag":"l","value":"Baron"},{"tag":"4","value":"bezf"},{"tag":"v","value":"Vater"}]},{"tag":"028R","subfields":[{"tag":"9","value":"118638130"},{"tag":"7","value":"Tp1"},{"tag":"V","value":"piz"},{"tag":"A","value":"gnd"},{"tag":"0","value":"118638130"},{"tag":"E","value":"1792"},{"tag":"G","value":"1860"},{"tag":"d","value":"Anne Isabella Milbanke Byron"},{"tag":"a","value":"Byron"},{"tag":"4","value":"bezf"},{"tag":"v","value":"Mutter"}]},{"tag":"028R","subfields":[{"tag":"9","value":"119389991"},{"tag":"7","value":"Tp1"},{"tag":"V","value":"piz"},{"tag":"A","value":"gnd"},{"tag":"0","value":"119389991"},{"tag":"E","value":"1837"},{"tag":"G","value":"1917"},{"tag":"d","value":"Anne Isabella"},{"tag":"a","value":"Blunt"},{"tag":"4","value":"bezf"},{"tag":"v","value":"Tochter"}]},{"tag":"028R","subfields":[{"tag":"d","value":"william"},{"tag":"a","value":"king"},{"tag":"4","value":"bezf"}]},{"tag":"032T","subfields":[{"tag":"a","value":"f"}]},{"tag":"041R","subfields":[{"tag":"9","value":"042527880"},{"tag":"7","value":"Ts1"},{"tag":"V","value":"saz"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4252788-0"},{"tag":"a","value":"Mathematikerin"},{"tag":"4","value":"berc"}]},{"tag":"042A","subfields":[{"tag":"a","value":"28p"},{"tag":"a","value":"9.5p"}]},{"tag":"042B","subfields":[{"tag":"a","value":"XA-GB"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"e","value":"DE-386"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"r","value":"DE-576"}]},{"tag":"047C","subfields":[{"tag":"S","value":"pnd"},{"tag":"i","value":"a"},{"tag":"a","value":"Lovelace, Ada King /of"},{"tag":"0","value":"119232022"}]},{"tag":"047C","subfields":[{"tag":"S","value":"pnd"},{"tag":"i","value":"a"},{"tag":"a","value":"Lovelace, Ada K. /of"},{"tag":"0","value":"172642531"}]},{"tag":"050C","subfields":[{"tag":"a","value":"Der Ehemann Baron William King (1805-1893) wurde 1838 zum 1. Earl of Lovelace erhoben."}]},{"tag":"050E","subfields":[{"tag":"a","value":"LoC-Na gegen Modern Engl. biogr."}]},{"tag":"050E","subfields":[{"tag":"a","value":"https://de.wikipedia.org/wiki/Ada_Lovelace"}]},{"tag":"050E","subfields":[{"tag":"a","value":"LCAuth, (OGND)"}]},{"tag":"050G","subfields":[{"tag":"b","value":"Brit. Mathematikerin; Countess of Lovelace"}]},{"tag":"050G","subfields":[{"tag":"b","value":"Informatikerin, Mathematikerin, Grossbritannien"}]},{"tag":"060R","subfields":[{"tag":"a","value":"10.12.1815"},{"tag":"b","value":"27.12.1852"},{"tag":"4","value":"datx"}]},{"tag":"060R","subfields":[{"tag":"a","value":"1815"},{"tag":"b","value":"1852"},{"tag":"4","value":"datl"}]},{"tag":"065R","subfields":[{"tag":"9","value":"040743357"},{"tag":"7","value":"Tgz"},{"tag":"V","value":"gik"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4074335-4"},{"tag":"a","value":"London"},{"tag":"4","value":"ortg"}]},{"tag":"065R","subfields":[{"tag":"9","value":"040743357"},{"tag":"7","value":"Tgz"},{"tag":"V","value":"gik"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4074335-4"},{"tag":"a","value":"London"},{"tag":"4","value":"orts"}]},{"tag":"070A","occurrence":"03","subfields":[{"tag":"0","value":"(DE-588)119232022"}]}]},{"fields":[{"tag":"001@","subfields":[{"tag":"0","value":"-"}]},{"tag":"001A","subfields":[{"tag":"0","value":"9002:18-04-89"}]},{"tag":"001B","subfields":[{"tag":"0","value":"9999:27-09-17"},{"tag":"t","value":"00:43:48.000"}]},{"tag":"001D","subfields":[{"tag":"0","value":"9999:23-04-10"}]},{"tag":"001U","subfields":[{"tag":"0","value":"utf8"}]},{"tag":"001X","subfields":[{"tag":"0","value":"0"}]},{"tag":"002@","subfields":[{"tag":"0","value":"Tb1"}]},{"tag":"003@","subfields":[{"tag":"0","value":"000008672"}]},{"tag":"003U","subfields":[{"tag":"a","value":"http://d-nb.info/gnd/867-9"},{"tag":"z","value":"http://d-nb.info/gnd/7538748-7"}]},{"tag":"004B","subfields":[{"tag":"a","value":"kiz"}]},{"tag":"007K","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"867-9"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"7538748-7"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"1085295990"}]},{"tag":"007N","subfields":[{"tag":"a","value":"swd"},{"tag":"0","value":"7538748-7"},{"tag":"v","value":"zg"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gkd"},{"tag":"0","value":"867-9"},{"tag":"v","value":"zg"}]},{"tag":"008A","subfields":[{"tag":"a","value":"f"},{"tag":"a","value":"s"}]},{"tag":"008B","subfields":[{"tag":"a","value":"z"},{"tag":"a","value":"v"}]},{"tag":"029@","subfields":[{"tag":"a","value":"Vacuum Society"},{"tag":"g","value":"USA"}]},{"tag":"029@","subfields":[{"tag":"a","value":"AVS"},{"tag":"4","value":"abku"}]},{"tag":"029@","subfields":[{"tag":"a","value":"Committee on Vacuum Techniques"}]},{"tag":"029@","subfields":[{"tag":"a","value":"CVT"},{"tag":"4","value":"abku"}]},{"tag":"029A","subfields":[{"tag":"a","value":"American Vacuum Society"}]},{"tag":"029R","subfields":[{"tag":"9","value":"984002073"},{"tag":"7","value":"Tb1"},{"tag":"A","value":"gnd"},{"tag":"0","value":"10168051-X"},{"tag":"a","value":"AVS, the Science and Technology Society"},{"tag":"4","value":"nach"}]},{"tag":"041R","subfields":[{"tag":"9","value":"040622665"},{"tag":"7","value":"Ts1"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4062266-6"},{"tag":"a","value":"Vakuum"},{"tag":"4","value":"them"}]},{"tag":"041R","subfields":[{"tag":"9","value":"04066581X"},{"tag":"7","value":"Ts1"},{"tag":"V","value":"saz"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4066581-1"},{"tag":"a","value":"Wissenschaftliche Gesellschaft"},{"tag":"4","value":"obin"}]},{"tag":"042A","subfields":[{"tag":"a","value":"6.5"},{"tag":"a","value":"9.3c"},{"tag":"a","value":"21.5"},{"tag":"a","value":"31.1b"},{"tag":"a","value":"31.9a"}]},{"tag":"042B","subfields":[{"tag":"a","value":"XD-US"}]},{"tag":"042B","subfields":[{"tag":"a","value":"XD-US"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"e","value":"DE-1"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"r","value":"DE-1"}]},{"tag":"047C","subfields":[{"tag":"S","value":"gkd"},{"tag":"i","value":"a"},{"tag":"a","value":"American Vacuum Society"},{"tag":"0","value":"867-9"}]},{"tag":"047C","subfields":[{"tag":"S","value":"swd"},{"tag":"i","value":"k"},{"tag":"a","value":"American Vacuum Society"},{"tag":"0","value":"7538748-7"}]},{"tag":"050C","subfields":[{"tag":"a","value":"MMi"}]},{"tag":"050E","subfields":[{"tag":"a","value":"GKD"}]},{"tag":"050H","subfields":[{"tag":"a","value":"1953 gegr. wiss. Ges., die sich mit Problemen des Vakuums und vor allem seiner Anwendung in der Technik beschäftigt"}]},{"tag":"065R","subfields":[{"tag":"9","value":"040787044"},{"tag":"7","value":"Tgz"},{"tag":"V","value":"gik"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4078704-7"},{"tag":"a","value":"USA"},{"tag":"4","value":"geow"}]}]},{"fields":[{"tag":"001@","subfields":[{"tag":"0","value":"-"}]},{"tag":"001A","subfields":[{"tag":"0","value":"9002:18-04-89"}]},{"tag":"001B","subfields":[{"tag":"0","value":"1240:19-04-17"},{"tag":"t","value":"14:37:25.000"}]},{"tag":"001D","subfields":[{"tag":"0","value":"9999:23-04-10"}]},{"tag":"001U","subfields":[{"tag":"0","value":"utf8"}]},{"tag":"001X","subfields":[{"tag":"0","value":"0"}]},{"tag":"002@","subfields":[{"tag":"0","value":"Tb1"}]},{"tag":"003@","subfields":[{"tag":"0","value":"000016586"}]},{"tag":"003U","subfields":[{"tag":"a","value":"http://d-nb.info/gnd/1658-5"},{"tag":"z","value":"http://d-nb.info/gnd/4318278-1"}]},{"tag":"004B","subfields":[{"tag":"a","value":"kiz"}]},{"tag":"007K","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"1658-5"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"4318278-1"}]},{"tag":"007N","subfields":[{"tag":"a","value":"swd"},{"tag":"0","value":"4318278-1"},{"tag":"v","value":"zg"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gkd"},{"tag":"0","value":"16291087-3"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gkd"},{"tag":"0","value":"1658-5"},{"tag":"v","value":"zg"}]},{"tag":"008A","subfields":[{"tag":"a","value":"f"},{"tag":"a","value":"s"}]},{"tag":"008B","subfields":[{"tag":"a","value":"z"},{"tag":"a","value":"v"}]},{"tag":"029@","subfields":[{"tag":"a","value":"Univ. of Minnesota, Minneapolis"}]},{"tag":"029@","subfields":[{"tag":"a","value":"Universidad de Minnesota"}]},{"tag":"029@","subfields":[{"tag":"a","value":"U of M"}]},{"tag":"029@","subfields":[{"tag":"a","value":"UM"},{"tag":"4","value":"abku"}]},{"tag":"029A","subfields":[{"tag":"a","value":"University of Minnesota"}]},{"tag":"042A","subfields":[{"tag":"a","value":"6.6"}]},{"tag":"042B","subfields":[{"tag":"a","value":"XD-US"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"e","value":"DE-1"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"r","value":"DE-1"}]},{"tag":"047C","subfields":[{"tag":"S","value":"gkd"},{"tag":"i","value":"a"},{"tag":"a","value":"University of Minnesota "},{"tag":"0","value":"1658-5"}]},{"tag":"047C","subfields":[{"tag":"S","value":"swd"},{"tag":"i","value":"c"},{"tag":"a","value":"Minneapolis / University of Minnesota"},{"tag":"0","value":"4318278-1"}]},{"tag":"050E","subfields":[{"tag":"a","value":"Homepage"},{"tag":"b","value":"Stand: 19.04.2017"},{"tag":"u","value":"http://www.umn.edu"}]},{"tag":"060R","subfields":[{"tag":"a","value":"1851"},{"tag":"4","value":"datb"}]},{"tag":"065R","subfields":[{"tag":"9","value":"040394972"},{"tag":"7","value":"Tg1"},{"tag":"V","value":"gik"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4039497-9"},{"tag":"a","value":"Minneapolis, Minn."},{"tag":"4","value":"orta"}]},{"tag":"070A","occurrence":"03","subfields":[{"tag":"S","value":"IDS"},{"tag":"0","value":"320104243"}]}]},{"fields":[{"tag":"001@","subfields":[{"tag":"0","value":"-"}]},{"tag":"001A","subfields":[{"tag":"0","value":"9002:18-04-89"}]},{"tag":"001B","subfields":[{"tag":"0","value":"1601:26-11-19"},{"tag":"t","value":"10:50:14.000"}]},{"tag":"001D","subfields":[{"tag":"0","value":"9999:23-04-10"}]},{"tag":"001U","subfields":[{"tag":"0","value":"utf8"}]},{"tag":"001X","subfields":[{"tag":"0","value":"0"}]},{"tag":"002@","subfields":[{"tag":"0","value":"Tb1"}]},{"tag":"003@","subfields":[{"tag":"0","value":"000016756"}]},{"tag":"003U","subfields":[{"tag":"a","value":"http://d-nb.info/gnd/1675-5"},{"tag":"z","value":"http://d-nb.info/gnd/1088210104"},{"tag":"z","value":"http://d-nb.info/gnd/7542287-6"},{"tag":"z","value":"http://d-nb.info/gnd/1086256751"}]},{"tag":"004B","subfields":[{"tag":"a","value":"kiz"}]},{"tag":"007K","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"1675-5"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"1086256751"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"7542287-6"}]},{"tag":"007N","subfields":[{"tag":"a","value":"swd"},{"tag":"0","value":"7542287-6"},{"tag":"v","value":"zg"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"1088210104"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gkd"},{"tag":"0","value":"1675-5"},{"tag":"v","value":"zg"}]},{"tag":"008A","subfields":[{"tag":"a","value":"f"},{"tag":"a","value":"s"}]},{"tag":"008B","subfields":[{"tag":"a","value":"v"},{"tag":"a","value":"z"}]},{"tag":"010E","subfields":[{"tag":"b","value":"ger"}]},{"tag":"029@","subfields":[{"tag":"a","value":"Institut International de Philosophie"}]},{"tag":"029@","subfields":[{"tag":"a","value":"Internationales Institut für Philosophie"}]},{"tag":"029@","subfields":[{"tag":"a","value":"Instituto Internacional de Filosofia"}]},{"tag":"029@","subfields":[{"tag":"a","value":"Institute of Philosophy"},{"tag":"g","value":"International Institute of Philosophie"}]},{"tag":"029@","subfields":[{"tag":"a","value":"Institut für Philosophie"}]},{"tag":"029@","subfields":[{"tag":"a","value":"IIP"},{"tag":"4","value":"abku"}]},{"tag":"029A","subfields":[{"tag":"a","value":"International Institute of Philosophy"}]},{"tag":"029R","subfields":[{"tag":"9","value":"000261246"},{"tag":"7","value":"Tb1"},{"tag":"V","value":"kiz"},{"tag":"A","value":"gnd"},{"tag":"0","value":"26124-5"},{"tag":"a","value":"Institut International de Collaboration Philosophique"},{"tag":"g","value":"Paris"},{"tag":"4","value":"vorg"}]},{"tag":"041R","subfields":[{"tag":"9","value":"041900944"},{"tag":"7","value":"Ts1"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4190094-7"},{"tag":"a","value":"Wissenschaftliche Einrichtung"},{"tag":"4","value":"obin"}]},{"tag":"042A","subfields":[{"tag":"a","value":"6.5"},{"tag":"a","value":"4.1"}]},{"tag":"042B","subfields":[{"tag":"a","value":"XP"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"e","value":"DE-1"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"r","value":"DE-1"}]},{"tag":"047C","subfields":[{"tag":"S","value":"gkd"},{"tag":"i","value":"a"},{"tag":"a","value":"International Institute of Philosophy"},{"tag":"0","value":"1675-5"}]},{"tag":"047C","subfields":[{"tag":"S","value":"swd"},{"tag":"i","value":"c"},{"tag":"a","value":"Paris / Internationales Institut für Philosophie"},{"tag":"0","value":"7542287-6"}]},{"tag":"050E","subfields":[{"tag":"a","value":"Yearb. 1995"}]},{"tag":"060R","subfields":[{"tag":"a","value":"1937"},{"tag":"4","value":"datb"}]},{"tag":"065R","subfields":[{"tag":"9","value":"040181456"},{"tag":"7","value":"Tgz"},{"tag":"V","value":"gik"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4018145-5"},{"tag":"a","value":"Frankreich"},{"tag":"4","value":"geow"}]},{"tag":"065R","subfields":[{"tag":"9","value":"040446603"},{"tag":"7","value":"Tgz"},{"tag":"V","value":"gik"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4044660-8"},{"tag":"a","value":"Paris"},{"tag":"4","value":"orta"}]},{"tag":"070A","occurrence":"03","subfields":[{"tag":"S","value":"IDS"},{"tag":"0","value":"000000307"}]}]},{"fields":[{"tag":"001@","subfields":[{"tag":"0","value":"-"}]},{"tag":"001A","subfields":[{"tag":"0","value":"9002:18-04-89"}]},{"tag":"001B","subfields":[{"tag":"0","value":"9999:05-06-20"},{"tag":"t","value":"05:40:04.000"}]},{"tag":"001D","subfields":[{"tag":"0","value":"9999:23-04-10"}]},{"tag":"001U","subfields":[{"tag":"0","value":"utf8"}]},{"tag":"001X","subfields":[{"tag":"0","value":"0"}]},{"tag":"002@","subfields":[{"tag":"0","value":"Tb1"}]},{"tag":"003@","subfields":[{"tag":"0","value":"000009229"}]},{"tag":"003U","subfields":[{"tag":"a","value":"http://d-nb.info/gnd/922-2"},{"tag":"z","value":"http://d-nb.info/gnd/4499175-7"},{"tag":"z","value":"http://d-nb.info/gnd/1090453043"}]},{"tag":"004B","subfields":[{"tag":"a","value":"kiz"}]},{"tag":"007K","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"922-2"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"1090453043"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"4499175-7"}]},{"tag":"007N","subfields":[{"tag":"a","value":"swd"},{"tag":"0","value":"4499175-7"},{"tag":"v","value":"zg"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gkd"},{"tag":"0","value":"922-2"},{"tag":"v","value":"zg"}]},{"tag":"008A","subfields":[{"tag":"a","value":"f"},{"tag":"a","value":"s"}]},{"tag":"008B","subfields":[{"tag":"a","value":"z"},{"tag":"a","value":"v"},{"tag":"a","value":"w"},{"tag":"a","value":"e"}]},{"tag":"010E","subfields":[{"tag":"b","value":"ger"},{"tag":"e","value":"rda"}]},{"tag":"029@","subfields":[{"tag":"a","value":"Cancer Society"},{"tag":"g","value":"USA"}]},{"tag":"029@","subfields":[{"tag":"a","value":"ACS"},{"tag":"4","value":"abku"}]},{"tag":"029A","subfields":[{"tag":"a","value":"American Cancer Society"}]},{"tag":"029R","subfields":[{"tag":"9","value":"005078091"},{"tag":"7","value":"Tb1"},{"tag":"V","value":"kiz"},{"tag":"A","value":"gnd"},{"tag":"0","value":"507809-X"},{"tag":"a","value":"American Society for the Control of Cancer"},{"tag":"4","value":"vorg"}]},{"tag":"042A","subfields":[{"tag":"a","value":"27.4"}]},{"tag":"042B","subfields":[{"tag":"a","value":"XD-US"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"e","value":"DE-1"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"r","value":"DE-1"}]},{"tag":"047C","subfields":[{"tag":"S","value":"gkd"},{"tag":"i","value":"a"},{"tag":"a","value":"American Cancer Society"},{"tag":"0","value":"922-2"}]},{"tag":"047C","subfields":[{"tag":"S","value":"swd"},{"tag":"i","value":"k"},{"tag":"a","value":"American Cancer Society"},{"tag":"0","value":"4499175-7"}]},{"tag":"050E","subfields":[{"tag":"a","value":"GKD"}]},{"tag":"050E","subfields":[{"tag":"a","value":"Homepage"},{"tag":"b","value":"Stand: 08.10.2018"},{"tag":"u","value":"https://www.cancer.org"}]},{"tag":"060R","subfields":[{"tag":"a","value":"1945"},{"tag":"4","value":"datb"}]},{"tag":"065R","subfields":[{"tag":"9","value":"040787044"},{"tag":"7","value":"Tgz"},{"tag":"V","value":"gik"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4078704-7"},{"tag":"a","value":"USA"},{"tag":"4","value":"geow"}]},{"tag":"070A","occurrence":"03","subfields":[{"tag":"0","value":"(DE-588)922-2"}]}]},{"fields":[{"tag":"001@","subfields":[{"tag":"0","value":"-"}]},{"tag":"001A","subfields":[{"tag":"0","value":"0386:17-06-99"}]},{"tag":"001B","subfields":[{"tag":"0","value":"9999:17-10-16"},{"tag":"t","value":"17:00:44.000"}]},{"tag":"001D","subfields":[{"tag":"0","value":"9999:06-04-08"}]},{"tag":"001U","subfields":[{"tag":"0","value":"utf8"}]},{"tag":"001X","subfields":[{"tag":"0","value":"0"}]},{"tag":"002@","subfields":[{"tag":"0","value":"Tp1"}]},{"tag":"003@","subfields":[{"tag":"0","value":"121169502"}]},{"tag":"003U","subfields":[{"tag":"a","value":"http://d-nb.info/gnd/121169502"},{"tag":"z","value":"http://d-nb.info/gnd/183361946"}]},{"tag":"004B","subfields":[{"tag":"a","value":"piz"}]},{"tag":"007K","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"121169502"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"183361946"}]},{"tag":"007N","subfields":[{"tag":"a","value":"pnd"},{"tag":"0","value":"183361946"},{"tag":"v","value":"zg"}]},{"tag":"007N","subfields":[{"tag":"a","value":"pnd"},{"tag":"0","value":"121169502"},{"tag":"v","value":"zg"}]},{"tag":"007N","subfields":[{"tag":"a","value":"swd"},{"tag":"0","value":"4549141-0"},{"tag":"v","value":"zg"}]},{"tag":"008A","subfields":[{"tag":"a","value":"s"},{"tag":"a","value":"f"}]},{"tag":"008B","subfields":[{"tag":"a","value":"v"},{"tag":"a","value":"w"}]},{"tag":"028@","subfields":[{"tag":"d","value":"Heike"},{"tag":"a","value":"Klußmann"}]},{"tag":"028A","subfields":[{"tag":"d","value":"Heike"},{"tag":"a","value":"Klussmann"}]},{"tag":"041R","subfields":[{"tag":"a","value":"Installationskünstlerin"},{"tag":"4","value":"berc"}]},{"tag":"041R","subfields":[{"tag":"a","value":"Photographin"},{"tag":"4","value":"beru"}]},{"tag":"042A","subfields":[{"tag":"a","value":"13.7p"},{"tag":"a","value":"13.5p"}]},{"tag":"042B","subfields":[{"tag":"a","value":"XA-DE"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"e","value":"DE-386"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"r","value":"DE-576"}]},{"tag":"047C","subfields":[{"tag":"S","value":"pnd"},{"tag":"i","value":"a"},{"tag":"a","value":"Klussmann, Heike"},{"tag":"0","value":"121169502"}]},{"tag":"047C","subfields":[{"tag":"S","value":"pnd"},{"tag":"i","value":"a"},{"tag":"a","value":"Klussmann, Heike"},{"tag":"0","value":"183361946"}]},{"tag":"050C","subfields":[{"tag":"a","value":"NDSBIO"}]},{"tag":"050E","subfields":[{"tag":"a","value":"Internet"}]},{"tag":"060R","subfields":[{"tag":"a","value":"1968"},{"tag":"4","value":"datl"}]}]}] \ No newline at end of file diff --git a/crates/pica-toolkit/tests/data/dump.txt b/crates/pica-toolkit/tests/data/dump.txt deleted file mode 100644 index 844b56f79..000000000 --- a/crates/pica-toolkit/tests/data/dump.txt +++ /dev/null @@ -1,254 +0,0 @@ -001A $08999:22-07-10 -001B $01250:10-09-14$t08:28:16.000 -001D $00384:27-07-10 -001U $0utf8 -001X $00 -002@ $0Ts1 -003@ $01004916019 -003U $ahttp://d-nb.info/gnd/7710287-3 -004B $asip -007K $agnd$07710287-3 -007N $aswd$07710287-3$vzg -008A $as -029R $9952570254$7Tb1$Vkiz$Agnd$05263070-5$aChrysler Corporation$4hers -041A $aPlymouth$gMarke -041R $9041145135$7Ts1$Vsaz$Agnd$04114513-6$aMarkenname$4obin -042A $a31.7 -047A/03 $eDE-210 -047A/03 $rDE-384 -047C $Sswd$is$aPlymouth $07710287-3 -050D $aKombiniere mit einer Produktgruppe, z.B. Personenkraftwagen -050E $aWikipedia, Internet$uhttp://www.mobile.de/modellverzeichnis/plymouth/ -050H $aMarkenname - -001A $00386:16-03-95 -001B $08999:20-07-20$t13:19:49.000 -001D $09999:06-04-08 -001U $0utf8 -001X $00 -002@ $0Tp1 -003@ $0119232022 -003U $ahttp://d-nb.info/gnd/119232022$zhttp://d-nb.info/gnd/172642531 -004B $apik -007K $agnd$0119232022 -007N $agnd$0172642531 -007N $apnd$0172642531$vzg -007N $apnd$0119232022$vzg -007N $aswd$04370325-2$vzg -008A $as$az$af -008B $aw$ak$av -010E $erda -028@ $dAda K.$cof$aLovelace -028@ $dAugusta Ada$cof$aLovelace -028@ $dAda Augusta$cof$aLovelace -028@ $dAda$aByron -028@ $dAugusta Ada$aByron King -028@ $dAugusta Ada$aKing -028@ $dAda$aKing -028@ $dAda Augusta$aByron$4nafr -028@ $dAugusta Ada$aByron -028@ $dAda$aByron Lovelace -028@ $dAda$aLovelace -028@ $dAda King, Countess of$aLovelace -028@ $dAugusta Ada King$aLovelace -028@ $dAugusta Ada$aLovelace -028A $dAda King$cof$aLovelace -028R $9118518208$7Tp1$Vpiz$Agnd$0118518208$E1788$G1824$dGeorge Gordon Byron$aByron$lBaron$4bezf$vVater -028R $9118638130$7Tp1$Vpiz$Agnd$0118638130$E1792$G1860$dAnne Isabella Milbanke Byron$aByron$4bezf$vMutter -028R $9119389991$7Tp1$Vpiz$Agnd$0119389991$E1837$G1917$dAnne Isabella$aBlunt$4bezf$vTochter -028R $dwilliam$aking$4bezf -032T $af -041R $9042527880$7Ts1$Vsaz$Agnd$04252788-0$aMathematikerin$4berc -042A $a28p$a9.5p -042B $aXA-GB -047A/03 $eDE-386 -047A/03 $rDE-576 -047C $Spnd$ia$aLovelace, Ada King /of$0119232022 -047C $Spnd$ia$aLovelace, Ada K. /of$0172642531 -050C $aDer Ehemann Baron William King (1805-1893) wurde 1838 zum 1. Earl of Lovelace erhoben. -050E $aLoC-Na gegen Modern Engl. biogr. -050E $ahttps://de.wikipedia.org/wiki/Ada_Lovelace -050E $aLCAuth, (OGND) -050G $bBrit. Mathematikerin; Countess of Lovelace -050G $bInformatikerin, Mathematikerin, Grossbritannien -060R $a10.12.1815$b27.12.1852$4datx -060R $a1815$b1852$4datl -065R $9040743357$7Tgz$Vgik$Agnd$04074335-4$aLondon$4ortg -065R $9040743357$7Tgz$Vgik$Agnd$04074335-4$aLondon$4orts -070A/03 $0(DE-588)119232022 - -001@ $0- -001A $09002:18-04-89 -001B $09999:27-09-17$t00:43:48.000 -001D $09999:23-04-10 -001U $0utf8 -001X $00 -002@ $0Tb1 -003@ $0000008672 -003U $ahttp://d-nb.info/gnd/867-9$zhttp://d-nb.info/gnd/7538748-7 -004B $akiz -007K $agnd$0867-9 -007N $agnd$07538748-7 -007N $agnd$01085295990 -007N $aswd$07538748-7$vzg -007N $agkd$0867-9$vzg -008A $af$as -008B $az$av -029@ $aVacuum Society$gUSA -029@ $aAVS$4abku -029@ $aCommittee on Vacuum Techniques -029@ $aCVT$4abku -029A $aAmerican Vacuum Society -029R $9984002073$7Tb1$Agnd$010168051-X$aAVS, the Science and Technology Society$4nach -041R $9040622665$7Ts1$Agnd$04062266-6$aVakuum$4them -041R $904066581X$7Ts1$Vsaz$Agnd$04066581-1$aWissenschaftliche Gesellschaft$4obin -042A $a6.5$a9.3c$a21.5$a31.1b$a31.9a -042B $aXD-US -042B $aXD-US -047A/03 $eDE-1 -047A/03 $rDE-1 -047C $Sgkd$ia$aAmerican Vacuum Society$0867-9 -047C $Sswd$ik$aAmerican Vacuum Society$07538748-7 -050C $aMMi -050E $aGKD -050H $a1953 gegr. wiss. Ges., die sich mit Problemen des Vakuums und vor allem seiner Anwendung in der Technik beschäftigt -065R $9040787044$7Tgz$Vgik$Agnd$04078704-7$aUSA$4geow - -001@ $0- -001A $09002:18-04-89 -001B $01240:19-04-17$t14:37:25.000 -001D $09999:23-04-10 -001U $0utf8 -001X $00 -002@ $0Tb1 -003@ $0000016586 -003U $ahttp://d-nb.info/gnd/1658-5$zhttp://d-nb.info/gnd/4318278-1 -004B $akiz -007K $agnd$01658-5 -007N $agnd$04318278-1 -007N $aswd$04318278-1$vzg -007N $agkd$016291087-3 -007N $agkd$01658-5$vzg -008A $af$as -008B $az$av -029@ $aUniv. of Minnesota, Minneapolis -029@ $aUniversidad de Minnesota -029@ $aU of M -029@ $aUM$4abku -029A $aUniversity of Minnesota -042A $a6.6 -042B $aXD-US -047A/03 $eDE-1 -047A/03 $rDE-1 -047C $Sgkd$ia$aUniversity of Minnesota $01658-5 -047C $Sswd$ic$aMinneapolis / University of Minnesota$04318278-1 -050E $aHomepage$bStand: 19.04.2017$uhttp://www.umn.edu -060R $a1851$4datb -065R $9040394972$7Tg1$Vgik$Agnd$04039497-9$aMinneapolis, Minn.$4orta -070A/03 $SIDS$0320104243 - -001@ $0- -001A $09002:18-04-89 -001B $01601:26-11-19$t10:50:14.000 -001D $09999:23-04-10 -001U $0utf8 -001X $00 -002@ $0Tb1 -003@ $0000016756 -003U $ahttp://d-nb.info/gnd/1675-5$zhttp://d-nb.info/gnd/1088210104$zhttp://d-nb.info/gnd/7542287-6$zhttp://d-nb.info/gnd/1086256751 -004B $akiz -007K $agnd$01675-5 -007N $agnd$01086256751 -007N $agnd$07542287-6 -007N $aswd$07542287-6$vzg -007N $agnd$01088210104 -007N $agkd$01675-5$vzg -008A $af$as -008B $av$az -010E $bger -029@ $aInstitut International de Philosophie -029@ $aInternationales Institut für Philosophie -029@ $aInstituto Internacional de Filosofia -029@ $aInstitute of Philosophy$gInternational Institute of Philosophie -029@ $aInstitut für Philosophie -029@ $aIIP$4abku -029A $aInternational Institute of Philosophy -029R $9000261246$7Tb1$Vkiz$Agnd$026124-5$aInstitut International de Collaboration Philosophique$gParis$4vorg -041R $9041900944$7Ts1$Agnd$04190094-7$aWissenschaftliche Einrichtung$4obin -042A $a6.5$a4.1 -042B $aXP -047A/03 $eDE-1 -047A/03 $rDE-1 -047C $Sgkd$ia$aInternational Institute of Philosophy$01675-5 -047C $Sswd$ic$aParis / Internationales Institut für Philosophie$07542287-6 -050E $aYearb. 1995 -060R $a1937$4datb -065R $9040181456$7Tgz$Vgik$Agnd$04018145-5$aFrankreich$4geow -065R $9040446603$7Tgz$Vgik$Agnd$04044660-8$aParis$4orta -070A/03 $SIDS$0000000307 - -001@ $0- -001A $09002:18-04-89 -001B $09999:05-06-20$t05:40:04.000 -001D $09999:23-04-10 -001U $0utf8 -001X $00 -002@ $0Tb1 -003@ $0000009229 -003U $ahttp://d-nb.info/gnd/922-2$zhttp://d-nb.info/gnd/4499175-7$zhttp://d-nb.info/gnd/1090453043 -004B $akiz -007K $agnd$0922-2 -007N $agnd$01090453043 -007N $agnd$04499175-7 -007N $aswd$04499175-7$vzg -007N $agkd$0922-2$vzg -008A $af$as -008B $az$av$aw$ae -010E $bger$erda -029@ $aCancer Society$gUSA -029@ $aACS$4abku -029A $aAmerican Cancer Society -029R $9005078091$7Tb1$Vkiz$Agnd$0507809-X$aAmerican Society for the Control of Cancer$4vorg -042A $a27.4 -042B $aXD-US -047A/03 $eDE-1 -047A/03 $rDE-1 -047C $Sgkd$ia$aAmerican Cancer Society$0922-2 -047C $Sswd$ik$aAmerican Cancer Society$04499175-7 -050E $aGKD -050E $aHomepage$bStand: 08.10.2018$uhttps://www.cancer.org -060R $a1945$4datb -065R $9040787044$7Tgz$Vgik$Agnd$04078704-7$aUSA$4geow -070A/03 $0(DE-588)922-2 - -001@ $0- -001A $00386:17-06-99 -001B $09999:17-10-16$t17:00:44.000 -001D $09999:06-04-08 -001U $0utf8 -001X $00 -002@ $0Tp1 -003@ $0121169502 -003U $ahttp://d-nb.info/gnd/121169502$zhttp://d-nb.info/gnd/183361946 -004B $apiz -007K $agnd$0121169502 -007N $agnd$0183361946 -007N $apnd$0183361946$vzg -007N $apnd$0121169502$vzg -007N $aswd$04549141-0$vzg -008A $as$af -008B $av$aw -028@ $dHeike$aKlußmann -028A $dHeike$aKlussmann -041R $aInstallationskünstlerin$4berc -041R $aPhotographin$4beru -042A $a13.7p$a13.5p -042B $aXA-DE -047A/03 $eDE-386 -047A/03 $rDE-576 -047C $Spnd$ia$aKlussmann, Heike$0121169502 -047C $Spnd$ia$aKlussmann, Heike$0183361946 -050C $aNDSBIO -050E $aInternet -060R $a1968$4datl - diff --git a/crates/pica-toolkit/tests/data/dump.xml b/crates/pica-toolkit/tests/data/dump.xml deleted file mode 100644 index 19d170440..000000000 --- a/crates/pica-toolkit/tests/data/dump.xml +++ /dev/null @@ -1,1060 +0,0 @@ - - - - - 8999:22-07-10 - - - 1250:10-09-14 - 08:28:16.000 - - - 0384:27-07-10 - - - utf8 - - - 0 - - - Ts1 - - - 1004916019 - - - http://d-nb.info/gnd/7710287-3 - - - sip - - - gnd - 7710287-3 - - - swd - 7710287-3 - zg - - - s - - - 952570254 - Tb1 - kiz - gnd - 5263070-5 - Chrysler Corporation - hers - - - Plymouth - Marke - - - 041145135 - Ts1 - saz - gnd - 4114513-6 - Markenname - obin - - - 31.7 - - - DE-210 - - - DE-384 - - - swd - s - Plymouth <Marke> - 7710287-3 - - - Kombiniere mit einer Produktgruppe, z.B. Personenkraftwagen - - - Wikipedia, Internet - http://www.mobile.de/modellverzeichnis/plymouth/ - - - Markenname - - - - - 0386:16-03-95 - - - 8999:20-07-20 - 13:19:49.000 - - - 9999:06-04-08 - - - utf8 - - - 0 - - - Tp1 - - - 119232022 - - - http://d-nb.info/gnd/119232022 - http://d-nb.info/gnd/172642531 - - - pik - - - gnd - 119232022 - - - gnd - 172642531 - - - pnd - 172642531 - zg - - - pnd - 119232022 - zg - - - swd - 4370325-2 - zg - - - s - z - f - - - w - k - v - - - rda - - - Ada K. - of - Lovelace - - - Augusta Ada - of - Lovelace - - - Ada Augusta - of - Lovelace - - - Ada - Byron - - - Augusta Ada - Byron King - - - Augusta Ada - King - - - Ada - King - - - Ada Augusta - Byron - nafr - - - Augusta Ada - Byron - - - Ada - Byron Lovelace - - - Ada - Lovelace - - - Ada King, Countess of - Lovelace - - - Augusta Ada King - Lovelace - - - Augusta Ada - Lovelace - - - Ada King - of - Lovelace - - - 118518208 - Tp1 - piz - gnd - 118518208 - 1788 - 1824 - George Gordon Byron - Byron - Baron - bezf - Vater - - - 118638130 - Tp1 - piz - gnd - 118638130 - 1792 - 1860 - Anne Isabella Milbanke Byron - Byron - bezf - Mutter - - - 119389991 - Tp1 - piz - gnd - 119389991 - 1837 - 1917 - Anne Isabella - Blunt - bezf - Tochter - - - william - king - bezf - - - f - - - 042527880 - Ts1 - saz - gnd - 4252788-0 - Mathematikerin - berc - - - 28p - 9.5p - - - XA-GB - - - DE-386 - - - DE-576 - - - pnd - a - Lovelace, Ada King /of - 119232022 - - - pnd - a - Lovelace, Ada K. /of - 172642531 - - - Der Ehemann Baron William King (1805-1893) wurde 1838 zum 1. Earl of Lovelace erhoben. - - - LoC-Na gegen Modern Engl. biogr. - - - https://de.wikipedia.org/wiki/Ada_Lovelace - - - LCAuth, (OGND) - - - Brit. Mathematikerin; Countess of Lovelace - - - Informatikerin, Mathematikerin, Grossbritannien - - - 10.12.1815 - 27.12.1852 - datx - - - 1815 - 1852 - datl - - - 040743357 - Tgz - gik - gnd - 4074335-4 - London - ortg - - - 040743357 - Tgz - gik - gnd - 4074335-4 - London - orts - - - (DE-588)119232022 - - - - - - - - - 9002:18-04-89 - - - 9999:27-09-17 - 00:43:48.000 - - - 9999:23-04-10 - - - utf8 - - - 0 - - - Tb1 - - - 000008672 - - - http://d-nb.info/gnd/867-9 - http://d-nb.info/gnd/7538748-7 - - - kiz - - - gnd - 867-9 - - - gnd - 7538748-7 - - - gnd - 1085295990 - - - swd - 7538748-7 - zg - - - gkd - 867-9 - zg - - - f - s - - - z - v - - - Vacuum Society - USA - - - AVS - abku - - - Committee on Vacuum Techniques - - - CVT - abku - - - American Vacuum Society - - - 984002073 - Tb1 - gnd - 10168051-X - AVS, the Science and Technology Society - nach - - - 040622665 - Ts1 - gnd - 4062266-6 - Vakuum - them - - - 04066581X - Ts1 - saz - gnd - 4066581-1 - Wissenschaftliche Gesellschaft - obin - - - 6.5 - 9.3c - 21.5 - 31.1b - 31.9a - - - XD-US - - - XD-US - - - DE-1 - - - DE-1 - - - gkd - a - American Vacuum Society - 867-9 - - - swd - k - American Vacuum Society - 7538748-7 - - - MMi - - - GKD - - - 1953 gegr. wiss. Ges., die sich mit Problemen des Vakuums und vor allem seiner Anwendung in der Technik beschäftigt - - - 040787044 - Tgz - gik - gnd - 4078704-7 - USA - geow - - - - - - - - - 9002:18-04-89 - - - 1240:19-04-17 - 14:37:25.000 - - - 9999:23-04-10 - - - utf8 - - - 0 - - - Tb1 - - - 000016586 - - - http://d-nb.info/gnd/1658-5 - http://d-nb.info/gnd/4318278-1 - - - kiz - - - gnd - 1658-5 - - - gnd - 4318278-1 - - - swd - 4318278-1 - zg - - - gkd - 16291087-3 - - - gkd - 1658-5 - zg - - - f - s - - - z - v - - - Univ. of Minnesota, Minneapolis - - - Universidad de Minnesota - - - U of M - - - UM - abku - - - University of Minnesota - - - 6.6 - - - XD-US - - - DE-1 - - - DE-1 - - - gkd - a - University of Minnesota <Minneapolis, Minn.> - 1658-5 - - - swd - c - Minneapolis <Minn.> / University of Minnesota - 4318278-1 - - - Homepage - Stand: 19.04.2017 - http://www.umn.edu - - - 1851 - datb - - - 040394972 - Tg1 - gik - gnd - 4039497-9 - Minneapolis, Minn. - orta - - - IDS - 320104243 - - - - - - - - - 9002:18-04-89 - - - 1601:26-11-19 - 10:50:14.000 - - - 9999:23-04-10 - - - utf8 - - - 0 - - - Tb1 - - - 000016756 - - - http://d-nb.info/gnd/1675-5 - http://d-nb.info/gnd/1088210104 - http://d-nb.info/gnd/7542287-6 - http://d-nb.info/gnd/1086256751 - - - kiz - - - gnd - 1675-5 - - - gnd - 1086256751 - - - gnd - 7542287-6 - - - swd - 7542287-6 - zg - - - gnd - 1088210104 - - - gkd - 1675-5 - zg - - - f - s - - - v - z - - - ger - - - Institut International de Philosophie - - - Internationales Institut für Philosophie - - - Instituto Internacional de Filosofia - - - Institute of Philosophy - International Institute of Philosophie - - - Institut für Philosophie - - - IIP - abku - - - International Institute of Philosophy - - - 000261246 - Tb1 - kiz - gnd - 26124-5 - Institut International de Collaboration Philosophique - Paris - vorg - - - 041900944 - Ts1 - gnd - 4190094-7 - Wissenschaftliche Einrichtung - obin - - - 6.5 - 4.1 - - - XP - - - DE-1 - - - DE-1 - - - gkd - a - International Institute of Philosophy - 1675-5 - - - swd - c - Paris / Internationales Institut für Philosophie - 7542287-6 - - - Yearb. 1995 - - - 1937 - datb - - - 040181456 - Tgz - gik - gnd - 4018145-5 - Frankreich - geow - - - 040446603 - Tgz - gik - gnd - 4044660-8 - Paris - orta - - - IDS - 000000307 - - - - - - - - - 9002:18-04-89 - - - 9999:05-06-20 - 05:40:04.000 - - - 9999:23-04-10 - - - utf8 - - - 0 - - - Tb1 - - - 000009229 - - - http://d-nb.info/gnd/922-2 - http://d-nb.info/gnd/4499175-7 - http://d-nb.info/gnd/1090453043 - - - kiz - - - gnd - 922-2 - - - gnd - 1090453043 - - - gnd - 4499175-7 - - - swd - 4499175-7 - zg - - - gkd - 922-2 - zg - - - f - s - - - z - v - w - e - - - ger - rda - - - Cancer Society - USA - - - ACS - abku - - - American Cancer Society - - - 005078091 - Tb1 - kiz - gnd - 507809-X - American Society for the Control of Cancer - vorg - - - 27.4 - - - XD-US - - - DE-1 - - - DE-1 - - - gkd - a - American Cancer Society - 922-2 - - - swd - k - American Cancer Society - 4499175-7 - - - GKD - - - Homepage - Stand: 08.10.2018 - https://www.cancer.org - - - 1945 - datb - - - 040787044 - Tgz - gik - gnd - 4078704-7 - USA - geow - - - (DE-588)922-2 - - - - - - - - - 0386:17-06-99 - - - 9999:17-10-16 - 17:00:44.000 - - - 9999:06-04-08 - - - utf8 - - - 0 - - - Tp1 - - - 121169502 - - - http://d-nb.info/gnd/121169502 - http://d-nb.info/gnd/183361946 - - - piz - - - gnd - 121169502 - - - gnd - 183361946 - - - pnd - 183361946 - zg - - - pnd - 121169502 - zg - - - swd - 4549141-0 - zg - - - s - f - - - v - w - - - Heike - Klußmann - - - Heike - Klussmann - - - Installationskünstlerin - berc - - - Photographin - beru - - - 13.7p - 13.5p - - - XA-DE - - - DE-386 - - - DE-576 - - - pnd - a - Klussmann, Heike - 121169502 - - - pnd - a - Klussmann, Heike - 183361946 - - - NDSBIO - - - Internet - - - 1968 - datl - - - \ No newline at end of file diff --git a/crates/pica-toolkit/tests/data/dump_cnt.csv b/crates/pica-toolkit/tests/data/dump_cnt.csv deleted file mode 100644 index 42bb7d8ec..000000000 --- a/crates/pica-toolkit/tests/data/dump_cnt.csv +++ /dev/null @@ -1,2 +0,0 @@ -records,fields,subfields -7,247,549 diff --git a/crates/pica-toolkit/tests/data/dump_cnt.tsv b/crates/pica-toolkit/tests/data/dump_cnt.tsv deleted file mode 100644 index a549efe0e..000000000 --- a/crates/pica-toolkit/tests/data/dump_cnt.tsv +++ /dev/null @@ -1,2 +0,0 @@ -records fields subfields -7 247 549 diff --git a/crates/pica-toolkit/tests/data/dump_cnt.txt b/crates/pica-toolkit/tests/data/dump_cnt.txt deleted file mode 100644 index 3bea20be3..000000000 --- a/crates/pica-toolkit/tests/data/dump_cnt.txt +++ /dev/null @@ -1,3 +0,0 @@ -records: 7 -fields: 247 -subfields: 549 diff --git a/crates/pica-toolkit/tests/data/filter.txt b/crates/pica-toolkit/tests/data/filter.txt deleted file mode 100644 index f0aa7f9c0..000000000 --- a/crates/pica-toolkit/tests/data/filter.txt +++ /dev/null @@ -1,2 +0,0 @@ -003@.0 == '119232022' && -042B.a == 'XA-GB' diff --git a/crates/pica-toolkit/tests/data/invalid.dat b/crates/pica-toolkit/tests/data/invalid.dat deleted file mode 100644 index bf3be0604..000000000 --- a/crates/pica-toolkit/tests/data/invalid.dat +++ /dev/null @@ -1 +0,0 @@ -003! 0123456789X002@ 0Tp1012A/00 a1a2b1 diff --git a/crates/pica-toolkit/tests/data/invalid.xml b/crates/pica-toolkit/tests/data/invalid.xml deleted file mode 100644 index daddcf981..000000000 --- a/crates/pica-toolkit/tests/data/invalid.xml +++ /dev/null @@ -1,2 +0,0 @@ - - \ No newline at end of file diff --git a/crates/pica-toolkit/tests/data/invalid_cnt.txt b/crates/pica-toolkit/tests/data/invalid_cnt.txt deleted file mode 100644 index 9b8758915..000000000 --- a/crates/pica-toolkit/tests/data/invalid_cnt.txt +++ /dev/null @@ -1,3 +0,0 @@ -records: 0 -fields: 0 -subfields: 0 diff --git a/crates/pica-toolkit/tests/data/two-records.xml b/crates/pica-toolkit/tests/data/two-records.xml deleted file mode 100644 index 33d426638..000000000 --- a/crates/pica-toolkit/tests/data/two-records.xml +++ /dev/null @@ -1,248 +0,0 @@ - - - - - 8999:22-07-10 - - - 1250:10-09-14 - 08:28:16.000 - - - 0384:27-07-10 - - - utf8 - - - 0 - - - Ts1 - - - 1004916019 - - - http://d-nb.info/gnd/7710287-3 - - - sip - - - gnd - 7710287-3 - - - swd - 7710287-3 - zg - - - s - - - 952570254 - Tb1 - kiz - gnd - 5263070-5 - Chrysler Corporation - hers - - - Plymouth - Marke - - - 041145135 - Ts1 - saz - gnd - 4114513-6 - Markenname - obin - - - 31.7 - - - DE-210 - - - DE-384 - - - swd - s - Plymouth <Marke> - 7710287-3 - - - Kombiniere mit einer Produktgruppe, z.B. Personenkraftwagen - - - Wikipedia, Internet - http://www.mobile.de/modellverzeichnis/plymouth/ - - - Markenname - - - - - - - - - 9002:18-04-89 - - - 9999:27-09-17 - 00:43:48.000 - - - 9999:23-04-10 - - - utf8 - - - 0 - - - Tb1 - - - 000008672 - - - http://d-nb.info/gnd/867-9 - http://d-nb.info/gnd/7538748-7 - - - kiz - - - gnd - 867-9 - - - gnd - 7538748-7 - - - gnd - 1085295990 - - - swd - 7538748-7 - zg - - - gkd - 867-9 - zg - - - f - s - - - z - v - - - Vacuum Society - USA - - - AVS - abku - - - Committee on Vacuum Techniques - - - CVT - abku - - - American Vacuum Society - - - 984002073 - Tb1 - gnd - 10168051-X - AVS, the Science and Technology Society - nach - - - 040622665 - Ts1 - gnd - 4062266-6 - Vakuum - them - - - 04066581X - Ts1 - saz - gnd - 4066581-1 - Wissenschaftliche Gesellschaft - obin - - - 6.5 - 9.3c - 21.5 - 31.1b - 31.9a - - - XD-US - - - XD-US - - - DE-1 - - - DE-1 - - - gkd - a - American Vacuum Society - 867-9 - - - swd - k - American Vacuum Society - 7538748-7 - - - MMi - - - GKD - - - 1953 gegr. wiss. Ges., die sich mit Problemen des Vakuums und vor allem seiner Anwendung in der Technik beschäftigt - - - 040787044 - Tgz - gik - gnd - 4078704-7 - USA - geow - - - \ No newline at end of file diff --git a/crates/pica-toolkit/tests/data/tworecs.json b/crates/pica-toolkit/tests/data/tworecs.json deleted file mode 100644 index 023888174..000000000 --- a/crates/pica-toolkit/tests/data/tworecs.json +++ /dev/null @@ -1 +0,0 @@ -[{"fields":[{"tag":"001A","subfields":[{"tag":"0","value":"8999:22-07-10"}]},{"tag":"001B","subfields":[{"tag":"0","value":"1250:10-09-14"},{"tag":"t","value":"08:28:16.000"}]},{"tag":"001D","subfields":[{"tag":"0","value":"0384:27-07-10"}]},{"tag":"001U","subfields":[{"tag":"0","value":"utf8"}]},{"tag":"001X","subfields":[{"tag":"0","value":"0"}]},{"tag":"002@","subfields":[{"tag":"0","value":"Ts1"}]},{"tag":"003@","subfields":[{"tag":"0","value":"1004916019"}]},{"tag":"003U","subfields":[{"tag":"a","value":"http://d-nb.info/gnd/7710287-3"}]},{"tag":"004B","subfields":[{"tag":"a","value":"sip"}]},{"tag":"007K","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"7710287-3"}]},{"tag":"007N","subfields":[{"tag":"a","value":"swd"},{"tag":"0","value":"7710287-3"},{"tag":"v","value":"zg"}]},{"tag":"008A","subfields":[{"tag":"a","value":"s"}]},{"tag":"029R","subfields":[{"tag":"9","value":"952570254"},{"tag":"7","value":"Tb1"},{"tag":"V","value":"kiz"},{"tag":"A","value":"gnd"},{"tag":"0","value":"5263070-5"},{"tag":"a","value":"Chrysler Corporation"},{"tag":"4","value":"hers"}]},{"tag":"041A","subfields":[{"tag":"a","value":"Plymouth"},{"tag":"g","value":"Marke"}]},{"tag":"041R","subfields":[{"tag":"9","value":"041145135"},{"tag":"7","value":"Ts1"},{"tag":"V","value":"saz"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4114513-6"},{"tag":"a","value":"Markenname"},{"tag":"4","value":"obin"}]},{"tag":"042A","subfields":[{"tag":"a","value":"31.7"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"e","value":"DE-210"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"r","value":"DE-384"}]},{"tag":"047C","subfields":[{"tag":"S","value":"swd"},{"tag":"i","value":"s"},{"tag":"a","value":"Plymouth "},{"tag":"0","value":"7710287-3"}]},{"tag":"050D","subfields":[{"tag":"a","value":"Kombiniere mit einer Produktgruppe, z.B. Personenkraftwagen"}]},{"tag":"050E","subfields":[{"tag":"a","value":"Wikipedia, Internet"},{"tag":"u","value":"http://www.mobile.de/modellverzeichnis/plymouth/"}]},{"tag":"050H","subfields":[{"tag":"a","value":"Markenname"}]}]}{"fields":[{"tag":"001@","subfields":[{"tag":"0","value":"-"}]},{"tag":"001A","subfields":[{"tag":"0","value":"9002:18-04-89"}]},{"tag":"001B","subfields":[{"tag":"0","value":"9999:27-09-17"},{"tag":"t","value":"00:43:48.000"}]},{"tag":"001D","subfields":[{"tag":"0","value":"9999:23-04-10"}]},{"tag":"001U","subfields":[{"tag":"0","value":"utf8"}]},{"tag":"001X","subfields":[{"tag":"0","value":"0"}]},{"tag":"002@","subfields":[{"tag":"0","value":"Tb1"}]},{"tag":"003@","subfields":[{"tag":"0","value":"000008672"}]},{"tag":"003U","subfields":[{"tag":"a","value":"http://d-nb.info/gnd/867-9"},{"tag":"z","value":"http://d-nb.info/gnd/7538748-7"}]},{"tag":"004B","subfields":[{"tag":"a","value":"kiz"}]},{"tag":"007K","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"867-9"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"7538748-7"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gnd"},{"tag":"0","value":"1085295990"}]},{"tag":"007N","subfields":[{"tag":"a","value":"swd"},{"tag":"0","value":"7538748-7"},{"tag":"v","value":"zg"}]},{"tag":"007N","subfields":[{"tag":"a","value":"gkd"},{"tag":"0","value":"867-9"},{"tag":"v","value":"zg"}]},{"tag":"008A","subfields":[{"tag":"a","value":"f"},{"tag":"a","value":"s"}]},{"tag":"008B","subfields":[{"tag":"a","value":"z"},{"tag":"a","value":"v"}]},{"tag":"029@","subfields":[{"tag":"a","value":"Vacuum Society"},{"tag":"g","value":"USA"}]},{"tag":"029@","subfields":[{"tag":"a","value":"AVS"},{"tag":"4","value":"abku"}]},{"tag":"029@","subfields":[{"tag":"a","value":"Committee on Vacuum Techniques"}]},{"tag":"029@","subfields":[{"tag":"a","value":"CVT"},{"tag":"4","value":"abku"}]},{"tag":"029A","subfields":[{"tag":"a","value":"American Vacuum Society"}]},{"tag":"029R","subfields":[{"tag":"9","value":"984002073"},{"tag":"7","value":"Tb1"},{"tag":"A","value":"gnd"},{"tag":"0","value":"10168051-X"},{"tag":"a","value":"AVS, the Science and Technology Society"},{"tag":"4","value":"nach"}]},{"tag":"041R","subfields":[{"tag":"9","value":"040622665"},{"tag":"7","value":"Ts1"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4062266-6"},{"tag":"a","value":"Vakuum"},{"tag":"4","value":"them"}]},{"tag":"041R","subfields":[{"tag":"9","value":"04066581X"},{"tag":"7","value":"Ts1"},{"tag":"V","value":"saz"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4066581-1"},{"tag":"a","value":"Wissenschaftliche Gesellschaft"},{"tag":"4","value":"obin"}]},{"tag":"042A","subfields":[{"tag":"a","value":"6.5"},{"tag":"a","value":"9.3c"},{"tag":"a","value":"21.5"},{"tag":"a","value":"31.1b"},{"tag":"a","value":"31.9a"}]},{"tag":"042B","subfields":[{"tag":"a","value":"XD-US"}]},{"tag":"042B","subfields":[{"tag":"a","value":"XD-US"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"e","value":"DE-1"}]},{"tag":"047A","occurrence":"03","subfields":[{"tag":"r","value":"DE-1"}]},{"tag":"047C","subfields":[{"tag":"S","value":"gkd"},{"tag":"i","value":"a"},{"tag":"a","value":"American Vacuum Society"},{"tag":"0","value":"867-9"}]},{"tag":"047C","subfields":[{"tag":"S","value":"swd"},{"tag":"i","value":"k"},{"tag":"a","value":"American Vacuum Society"},{"tag":"0","value":"7538748-7"}]},{"tag":"050C","subfields":[{"tag":"a","value":"MMi"}]},{"tag":"050E","subfields":[{"tag":"a","value":"GKD"}]},{"tag":"050H","subfields":[{"tag":"a","value":"1953 gegr. wiss. Ges., die sich mit Problemen des Vakuums und vor allem seiner Anwendung in der Technik beschäftigt"}]},{"tag":"065R","subfields":[{"tag":"9","value":"040787044"},{"tag":"7","value":"Tgz"},{"tag":"V","value":"gik"},{"tag":"A","value":"gnd"},{"tag":"0","value":"4078704-7"},{"tag":"a","value":"USA"},{"tag":"4","value":"geow"}]}]}] \ No newline at end of file diff --git a/crates/pica-utils/Cargo.toml b/crates/pica-utils/Cargo.toml deleted file mode 100644 index ec3e05a31..000000000 --- a/crates/pica-utils/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -[package] -name = "pica-utils" -version.workspace = true -authors.workspace = true -license.workspace = true -readme.workspace = true -keywords.workspace = true -edition.workspace = true -rust-version.workspace = true - -[dependencies] -serde = { workspace = true, features = ["derive"] } -unicode-normalization = { version = "0.1.22" } diff --git a/crates/pica-utils/README.md b/crates/pica-utils/README.md deleted file mode 100644 index cd215713e..000000000 --- a/crates/pica-utils/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# pica-utils - -This crate is an **internal sub-crate** of the _pica-rs_ toolkit, -providing internal utility functions. This crate is not intended for -external usage. diff --git a/crates/pica-utils/src/lib.rs b/crates/pica-utils/src/lib.rs deleted file mode 100644 index 86d4dd8f6..000000000 --- a/crates/pica-utils/src/lib.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod translit; - -pub use translit::NormalizationForm; diff --git a/crates/pica-utils/src/translit.rs b/crates/pica-utils/src/translit.rs deleted file mode 100644 index 8a6c27bd6..000000000 --- a/crates/pica-utils/src/translit.rs +++ /dev/null @@ -1,86 +0,0 @@ -use std::str::FromStr; - -use serde::{Deserialize, Serialize}; -use unicode_normalization::UnicodeNormalization; - -#[derive(Debug, PartialEq, Clone, Copy, Serialize, Deserialize)] -pub enum NormalizationForm { - #[serde(alias = "nfc")] - Nfc, - #[serde(alias = "nfkc")] - Nfkc, - #[serde(alias = "nfd")] - Nfd, - #[serde(alias = "nfkd")] - Nfkd, -} - -impl FromStr for NormalizationForm { - type Err = String; - - fn from_str(s: &str) -> Result { - match s { - "nfc" => Ok(Self::Nfc), - "nfkc" => Ok(Self::Nfkc), - "nfd" => Ok(Self::Nfd), - "nfkd" => Ok(Self::Nfkd), - _ => Err(format!("invalid normalization form '{s}'")), - } - } -} - -impl NormalizationForm { - pub fn translit(self, value: S) -> String - where - S: AsRef, - { - match self { - Self::Nfc => value.as_ref().nfc().collect::(), - Self::Nfkc => value.as_ref().nfkc().collect::(), - Self::Nfd => value.as_ref().nfd().collect::(), - Self::Nfkd => value.as_ref().nfkd().collect::(), - } - } - - pub fn translit_opt(value: S, nf: Option) -> String - where - S: AsRef, - { - match nf { - Some(nf) => nf.translit(value), - None => value.as_ref().into(), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_translit() { - use NormalizationForm::*; - - assert_eq!(Nfc.translit("Am\u{0e9}lie"), "Am\u{0e9}lie"); - assert_eq!(Nfkc.translit("Am\u{0e9}lie"), "Am\u{0e9}lie"); - assert_eq!(Nfd.translit("Am\u{0e9}lie"), "Ame\u{301}lie"); - assert_eq!(Nfkd.translit("Am\u{0e9}lie"), "Ame\u{301}lie"); - assert_eq!(Nfd.translit("Ame\u{301}lie"), "Ame\u{301}lie"); - assert_eq!(Nfkd.translit("Ame\u{301}lie"), "Ame\u{301}lie"); - assert_eq!(Nfc.translit("Ame\u{301}lie"), "Am\u{0e9}lie"); - assert_eq!(Nfkc.translit("Ame\u{301}lie"), "Am\u{0e9}lie"); - } - - #[test] - fn test_from_str() { - use NormalizationForm::*; - - assert_eq!(NormalizationForm::from_str("nfc").unwrap(), Nfc); - assert_eq!(NormalizationForm::from_str("nfkc").unwrap(), Nfkc); - assert_eq!(NormalizationForm::from_str("nfd").unwrap(), Nfd); - assert_eq!(NormalizationForm::from_str("nfkd").unwrap(), Nfkd); - - assert!(NormalizationForm::from_str("foo").is_err()); - assert!(NormalizationForm::from_str("").is_err()); - } -} diff --git a/docs.bak/content/docs/kommandos/convert.md b/docs.bak/content/docs/kommandos/convert.md deleted file mode 100644 index 8b7a693e1..000000000 --- a/docs.bak/content/docs/kommandos/convert.md +++ /dev/null @@ -1,70 +0,0 @@ -# convert - -Das `convert`-Kommando wird für die Konvertierung von und in andere -Datenformate verwendet. - -{{< hint danger >}} -**Hinweis:** -Das `convert`-Kommando befindet sich in der aktiven Entwicklung. -Funktionalitäten können unvollständig oder fehlerhaft sein. Änderungen -am _command-line interface_ (CLI) sind nicht ausgeschlossen. -{{< /hint >}} - -## Beschreibung - -Das PICA-Format kann in verschiedene Datenformate serialisiert werden. -Das `convert`-Kommando ermöglicht es, Datensätze von einem Format in ein -anderes Format zu konvertieren. Es bietet insbesondere die Möglichkeit, -Datensätze, die nicht in normalisiertem PICA+ vorliegen, nach PICA+ zu -konvertieren, um sie durch andere Kommandos verarbeiten zu können. - -Folgende Formate werden unterstützt: - -* normalisiertes PICA+ (`plus`), -* binäres PICA (`binary`), -* PICA-Importformat (`import`), -* PICA-Plain (`plain`), -* PICA-JSON (`json`), -* und PICA-XML (`xml`). - -Die Angabe der Datenformate erfolgt über die Optionen `--from`/`-f` und -`--to`/`-t`: - -```bash -$ pica convert --from plus --to binary DUMP.dat.gz -o dump.bin -$ pica convert --from plus --to json DUMP.dat.gz -o dump.json -$ pica convert --from plus --to plain DUMP.dat.gz -o dump.plain -$ pica convert --from plus --to plus DUMP.dat.gz -o dump.dat -$ pica convert --from plus --to xml DUMP.dat.gz -o dump.xml -``` - -## Optionen - -* `-s`, `--skip-invalid` — überspringt jene Zeilen aus der Eingabe, die nicht - dekodiert werden konnten. -* `-f`, `--from` — Auswahl des Datenformats der Eingabe. -* `-t`, `--to` — Auswahl des Datenformats der Ausgabe. -* `-p`, `--progress` — Anzeige des Fortschritts, der die Anzahl der - eingelesenen gültigen sowie invaliden Datensätze anzeigt. Das - Aktivieren der Option erfordert das Schreiben der Datensätze in eine - Datei mittels `-o` bzw. `--output`. -* `-o`, `--output` — Angabe, in welche Datei die Ausgabe geschrieben - werden soll. Standardmäßig wird die Ausgabe in die Standardausgabe - `stdout` geschrieben. - - -[cat]: {{< relref "cat.md" >}} -[completions]: {{< relref "completions.md" >}} -[convert]: {{< relref "convert.md" >}} -[count]: {{< relref "count.md" >}} -[explode]: {{< relref "explode.md" >}} -[filter]: {{< relref "filter.md" >}} -[frequency]: {{< relref "frequency.md" >}} -[hash]: {{< relref "hash.md" >}} -[invalid]: {{< relref "invalid.md" >}} -[partition]: {{< relref "partition.md" >}} -[print]: {{< relref "print.md" >}} -[slice]: {{< relref "slice.md" >}} -[split]: {{< relref "split.md" >}} - -[Gzip]: https://de.wikipedia.org/wiki/Gzip diff --git a/docs/book/_quarto.yml b/docs/book/_quarto.yml index cf8408cc5..12f6f4d26 100644 --- a/docs/book/_quarto.yml +++ b/docs/book/_quarto.yml @@ -23,6 +23,7 @@ book: - commands/completions.qmd - commands/concat.qmd - commands/config.qmd + - commands/convert.qmd - commands/count.qmd - commands/explode.qmd - commands/filter.qmd diff --git a/docs/book/commands/convert.qmd b/docs/book/commands/convert.qmd new file mode 100644 index 000000000..2afc3fa71 --- /dev/null +++ b/docs/book/commands/convert.qmd @@ -0,0 +1,47 @@ +# convert {.unnumbered} + +Das PICA-Format kann in verschiedene Datenformate serialisiert werden. +Das `convert`-Kommando ermöglicht es, Datensätze von einem Format in ein +anderes Format zu konvertieren. Es bietet insbesondere die Möglichkeit, +Datensätze, die nicht in normalisiertem PICA+ vorliegen, nach PICA+ zu +konvertieren, um sie durch andere Kommandos verarbeiten zu können. + +Folgende Formate werden unterstützt: + +* normalisiertes PICA+ (`plus`) +* binäres PICA (`binary`) +* PICA-Importformat (`import`) +* PICA-Plain (`plain`) +* PICA-JSON (`json`) +* PICA-XML (`xml`) + +Die Angabe der Datenformate erfolgt über die Optionen `--from`/`-f` und +`--to`/`-t`: + +```{.bash} +$ pica convert --from plus --to binary DUMP.dat.gz -o dump.bin +$ pica convert --from plus --to json DUMP.dat.gz -o dump.json +$ pica convert --from plus --to plain DUMP.dat.gz -o dump.plain +$ pica convert --from plus --to plus DUMP.dat.gz -o dump.dat +$ pica convert --from plus --to xml DUMP.dat.gz -o dump.xml +``` + +## Optionen + +`-s`, `--skip-invalid` +: Überspringt jene Zeilen aus der Eingabe, die nicht dekodiert werden konnten. + +`-f `, `--from ` +: Auswahl des Datenformats der Eingabe. + +`-t `, `--to ` +: Auswahl des Datenformats der Ausgabe. + +`-p`, `--progress` +: Anzeige des Fortschritts, der die Anzahl der eingelesenen gültigen sowie +invaliden Datensätze anzeigt. Das Aktivieren der Option erfordert das +Schreiben der Datensätze in eine Datei mittels `-o` bzw. `--output`. + +`-o `, `--output ` +: Angabe, in welche Datei die Ausgabe geschrieben werden soll. Standardmäßig +wird die Ausgabe in die Standardausgabe `stdout` geschrieben. diff --git a/src/primitives/occurrence.rs b/src/primitives/occurrence.rs index 88f2c1954..d543a3f2c 100644 --- a/src/primitives/occurrence.rs +++ b/src/primitives/occurrence.rs @@ -146,6 +146,12 @@ impl> PartialEq for OccurrenceRef<'_> { } } +impl Display for OccurrenceRef<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "/{}", self.0) + } +} + /// A mutable occurrence. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] diff --git a/src/primitives/tag.rs b/src/primitives/tag.rs index 4bf93a598..268b89366 100644 --- a/src/primitives/tag.rs +++ b/src/primitives/tag.rs @@ -154,6 +154,12 @@ impl<'a> TagRef<'a> { } } +impl Display for TagRef<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + impl Index for TagRef<'_> { type Output = u8; diff --git a/crates/pica-toolkit/tests/data/119232022.dat b/tests/data/ada.bin similarity index 99% rename from crates/pica-toolkit/tests/data/119232022.dat rename to tests/data/ada.bin index 8642bc1ea..f0753552e 100644 --- a/crates/pica-toolkit/tests/data/119232022.dat +++ b/tests/data/ada.bin @@ -1 +1 @@ -001A 00386:16-03-95001B 08999:20-07-20t13:19:49.000001D 09999:06-04-08001U 0utf8001X 00002@ 0Tp1003@ 0119232022003U ahttp://d-nb.info/gnd/119232022zhttp://d-nb.info/gnd/172642531004B apik007K agnd0119232022007N agnd0172642531007N apnd0172642531vzg007N apnd0119232022vzg007N aswd04370325-2vzg008A asazaf008B awakav010E erda028@ dAda K.cofaLovelace028@ dAugusta AdacofaLovelace028@ dAda AugustacofaLovelace028@ dAdaaByron028@ dAugusta AdaaByron King028@ dAugusta AdaaKing028@ dAdaaKing028@ dAda AugustaaByron4nafr028@ dAugusta AdaaByron028@ dAdaaByron Lovelace028@ dAdaaLovelace028@ dAda King, Countess ofaLovelace028@ dAugusta Ada KingaLovelace028@ dAugusta AdaaLovelace028A dAda KingcofaLovelace028R 91185182087Tp1VpizAgnd0118518208E1788G1824dGeorge Gordon ByronaByronlBaron4bezfvVater028R 91186381307Tp1VpizAgnd0118638130E1792G1860dAnne Isabella Milbanke ByronaByron4bezfvMutter028R 91193899917Tp1VpizAgnd0119389991E1837G1917dAnne IsabellaaBlunt4bezfvTochter028R dwilliamaking4bezf032T af041R 90425278807Ts1VsazAgnd04252788-0aMathematikerin4berc042A a28pa9.5p042B aXA-GB047A/03 eDE-386047A/03 rDE-576047C SpndiaaLovelace, Ada King /of0119232022047C SpndiaaLovelace, Ada K. /of0172642531050C aDer Ehemann Baron William King (1805-1893) wurde 1838 zum 1. Earl of Lovelace erhoben.050E aLoC-Na gegen Modern Engl. biogr.050E ahttps://de.wikipedia.org/wiki/Ada_Lovelace050E aLCAuth, (OGND)050G bBrit. Mathematikerin; Countess of Lovelace050G bInformatikerin, Mathematikerin, Grossbritannien060R a10.12.1815b27.12.18524datx060R a1815b18524datl065R 90407433577TgzVgikAgnd04074335-4aLondon4ortg065R 90407433577TgzVgikAgnd04074335-4aLondon4orts070A/03 0(DE-588)119232022 +001A 00386:16-03-95001B 08999:20-07-20t13:19:49.000001D 09999:06-04-08001U 0utf8001X 00002@ 0Tp1003@ 0119232022003U ahttp://d-nb.info/gnd/119232022zhttp://d-nb.info/gnd/172642531004B apik007K agnd0119232022007N agnd0172642531007N apnd0172642531vzg007N apnd0119232022vzg007N aswd04370325-2vzg008A asazaf008B awakav010E erda028@ dAda K.cofaLovelace028@ dAugusta AdacofaLovelace028@ dAda AugustacofaLovelace028@ dAdaaByron028@ dAugusta AdaaByron King028@ dAugusta AdaaKing028@ dAdaaKing028@ dAda AugustaaByron4nafr028@ dAugusta AdaaByron028@ dAdaaByron Lovelace028@ dAdaaLovelace028@ dAda King, Countess ofaLovelace028@ dAugusta Ada KingaLovelace028@ dAugusta AdaaLovelace028A dAda KingcofaLovelace028R 91185182087Tp1VpizAgnd0118518208E1788G1824dGeorge Gordon ByronaByronlBaron4bezfvVater028R 91186381307Tp1VpizAgnd0118638130E1792G1860dAnne Isabella Milbanke ByronaByron4bezfvMutter028R 91193899917Tp1VpizAgnd0119389991E1837G1917dAnne IsabellaaBlunt4bezfvTochter028R dwilliamaking4bezf032T af041R 90425278807Ts1VsazAgnd04252788-0aMathematikerin4berc042A a28pa9.5p042B aXA-GB047A/03 eDE-386047A/03 rDE-576047C SpndiaaLovelace, Ada King /of0119232022047C SpndiaaLovelace, Ada K. /of0172642531050C aDer Ehemann Baron William King (1805-1893) wurde 1838 zum 1. Earl of Lovelace erhoben.050E aLoC-Na gegen Modern Engl. biogr.050E ahttps://de.wikipedia.org/wiki/Ada_Lovelace050E aLCAuth, (OGND)050G bBrit. Mathematikerin; Countess of Lovelace050G bInformatikerin, Mathematikerin, Grossbritannien060R a10.12.1815b27.12.18524datx060R a1815b18524datl065R 90407433577TgzVgikAgnd04074335-4aLondon4ortg065R 90407433577TgzVgikAgnd04074335-4aLondon4orts070A/03 0(DE-588)119232022 \ No newline at end of file diff --git a/tests/data/ada.import b/tests/data/ada.import new file mode 100644 index 000000000..6633e4b8f --- /dev/null +++ b/tests/data/ada.import @@ -0,0 +1,56 @@ +' +001A 00386:16-03-95 +001B 08999:20-07-20t13:19:49.000 +001D 09999:06-04-08 +001U 0utf8 +001X 00 +002@ 0Tp1 +003@ 0119232022 +003U ahttp://d-nb.info/gnd/119232022zhttp://d-nb.info/gnd/172642531 +004B apik +007K agnd0119232022 +007N agnd0172642531 +007N apnd0172642531vzg +007N apnd0119232022vzg +007N aswd04370325-2vzg +008A asazaf +008B awakav +010E erda +028@ dAda K.cofaLovelace +028@ dAugusta AdacofaLovelace +028@ dAda AugustacofaLovelace +028@ dAdaaByron +028@ dAugusta AdaaByron King +028@ dAugusta AdaaKing +028@ dAdaaKing +028@ dAda AugustaaByron4nafr +028@ dAugusta AdaaByron +028@ dAdaaByron Lovelace +028@ dAdaaLovelace +028@ dAda King, Countess ofaLovelace +028@ dAugusta Ada KingaLovelace +028@ dAugusta AdaaLovelace +028A dAda KingcofaLovelace +028R 91185182087Tp1VpizAgnd0118518208E1788G1824dGeorge Gordon ByronaByronlBaron4bezfvVater +028R 91186381307Tp1VpizAgnd0118638130E1792G1860dAnne Isabella Milbanke ByronaByron4bezfvMutter +028R 91193899917Tp1VpizAgnd0119389991E1837G1917dAnne IsabellaaBlunt4bezfvTochter +028R dwilliamaking4bezf +032T af +041R 90425278807Ts1VsazAgnd04252788-0aMathematikerin4berc +042A a28pa9.5p +042B aXA-GB +047A/03 eDE-386 +047A/03 rDE-576 +047C SpndiaaLovelace, Ada King /of0119232022 +047C SpndiaaLovelace, Ada K. /of0172642531 +050C aDer Ehemann Baron William King (1805-1893) wurde 1838 zum 1. Earl of Lovelace erhoben. +050E aLoC-Na gegen Modern Engl. biogr. +050E ahttps://de.wikipedia.org/wiki/Ada_Lovelace +050E aLCAuth, (OGND) +050G bBrit. Mathematikerin; Countess of Lovelace +050G bInformatikerin, Mathematikerin, Grossbritannien +060R a10.12.1815b27.12.18524datx +060R a1815b18524datl +065R 90407433577TgzVgikAgnd04074335-4aLondon4ortg +065R 90407433577TgzVgikAgnd04074335-4aLondon4orts +070A/03 0(DE-588)119232022 diff --git a/tests/data/ada.json b/tests/data/ada.json new file mode 100644 index 000000000..29491b314 --- /dev/null +++ b/tests/data/ada.json @@ -0,0 +1 @@ +[[["001A",null,"0","0386:16-03-95"],["001B",null,"0","8999:20-07-20","t","13:19:49.000"],["001D",null,"0","9999:06-04-08"],["001U",null,"0","utf8"],["001X",null,"0","0"],["002@",null,"0","Tp1"],["003@",null,"0","119232022"],["003U",null,"a","http://d-nb.info/gnd/119232022","z","http://d-nb.info/gnd/172642531"],["004B",null,"a","pik"],["007K",null,"a","gnd","0","119232022"],["007N",null,"a","gnd","0","172642531"],["007N",null,"a","pnd","0","172642531","v","zg"],["007N",null,"a","pnd","0","119232022","v","zg"],["007N",null,"a","swd","0","4370325-2","v","zg"],["008A",null,"a","s","a","z","a","f"],["008B",null,"a","w","a","k","a","v"],["010E",null,"e","rda"],["028@",null,"d","Ada K.","c","of","a","Lovelace"],["028@",null,"d","Augusta Ada","c","of","a","Lovelace"],["028@",null,"d","Ada Augusta","c","of","a","Lovelace"],["028@",null,"d","Ada","a","Byron"],["028@",null,"d","Augusta Ada","a","Byron King"],["028@",null,"d","Augusta Ada","a","King"],["028@",null,"d","Ada","a","King"],["028@",null,"d","Ada Augusta","a","Byron","4","nafr"],["028@",null,"d","Augusta Ada","a","Byron"],["028@",null,"d","Ada","a","Byron Lovelace"],["028@",null,"d","Ada","a","Lovelace"],["028@",null,"d","Ada King, Countess of","a","Lovelace"],["028@",null,"d","Augusta Ada King","a","Lovelace"],["028@",null,"d","Augusta Ada","a","Lovelace"],["028A",null,"d","Ada King","c","of","a","Lovelace"],["028R",null,"9","118518208","7","Tp1","V","piz","A","gnd","0","118518208","E","1788","G","1824","d","George Gordon Byron","a","Byron","l","Baron","4","bezf","v","Vater"],["028R",null,"9","118638130","7","Tp1","V","piz","A","gnd","0","118638130","E","1792","G","1860","d","Anne Isabella Milbanke Byron","a","Byron","4","bezf","v","Mutter"],["028R",null,"9","119389991","7","Tp1","V","piz","A","gnd","0","119389991","E","1837","G","1917","d","Anne Isabella","a","Blunt","4","bezf","v","Tochter"],["028R",null,"d","william","a","king","4","bezf"],["032T",null,"a","f"],["041R",null,"9","042527880","7","Ts1","V","saz","A","gnd","0","4252788-0","a","Mathematikerin","4","berc"],["042A",null,"a","28p","a","9.5p"],["042B",null,"a","XA-GB"],["047A","/03","e","DE-386"],["047A","/03","r","DE-576"],["047C",null,"S","pnd","i","a","a","Lovelace, Ada King /of","0","119232022"],["047C",null,"S","pnd","i","a","a","Lovelace, Ada K. /of","0","172642531"],["050C",null,"a","Der Ehemann Baron William King (1805-1893) wurde 1838 zum 1. Earl of Lovelace erhoben."],["050E",null,"a","LoC-Na gegen Modern Engl. biogr."],["050E",null,"a","https://de.wikipedia.org/wiki/Ada_Lovelace"],["050E",null,"a","LCAuth, (OGND)"],["050G",null,"b","Brit. Mathematikerin; Countess of Lovelace"],["050G",null,"b","Informatikerin, Mathematikerin, Grossbritannien"],["060R",null,"a","10.12.1815","b","27.12.1852","4","datx"],["060R",null,"a","1815","b","1852","4","datl"],["065R",null,"9","040743357","7","Tgz","V","gik","A","gnd","0","4074335-4","a","London","4","ortg"],["065R",null,"9","040743357","7","Tgz","V","gik","A","gnd","0","4074335-4","a","London","4","orts"],["070A","/03","0","(DE-588)119232022"]]] \ No newline at end of file diff --git a/tests/data/ada.plain b/tests/data/ada.plain new file mode 100644 index 000000000..1118553c8 --- /dev/null +++ b/tests/data/ada.plain @@ -0,0 +1,55 @@ +001A $00386:16-03-95 +001B $08999:20-07-20$t13:19:49.000 +001D $09999:06-04-08 +001U $0utf8 +001X $00 +002@ $0Tp1 +003@ $0119232022 +003U $ahttp://d-nb.info/gnd/119232022$zhttp://d-nb.info/gnd/172642531 +004B $apik +007K $agnd$0119232022 +007N $agnd$0172642531 +007N $apnd$0172642531$vzg +007N $apnd$0119232022$vzg +007N $aswd$04370325-2$vzg +008A $as$az$af +008B $aw$ak$av +010E $erda +028@ $dAda K.$cof$aLovelace +028@ $dAugusta Ada$cof$aLovelace +028@ $dAda Augusta$cof$aLovelace +028@ $dAda$aByron +028@ $dAugusta Ada$aByron King +028@ $dAugusta Ada$aKing +028@ $dAda$aKing +028@ $dAda Augusta$aByron$4nafr +028@ $dAugusta Ada$aByron +028@ $dAda$aByron Lovelace +028@ $dAda$aLovelace +028@ $dAda King, Countess of$aLovelace +028@ $dAugusta Ada King$aLovelace +028@ $dAugusta Ada$aLovelace +028A $dAda King$cof$aLovelace +028R $9118518208$7Tp1$Vpiz$Agnd$0118518208$E1788$G1824$dGeorge Gordon Byron$aByron$lBaron$4bezf$vVater +028R $9118638130$7Tp1$Vpiz$Agnd$0118638130$E1792$G1860$dAnne Isabella Milbanke Byron$aByron$4bezf$vMutter +028R $9119389991$7Tp1$Vpiz$Agnd$0119389991$E1837$G1917$dAnne Isabella$aBlunt$4bezf$vTochter +028R $dwilliam$aking$4bezf +032T $af +041R $9042527880$7Ts1$Vsaz$Agnd$04252788-0$aMathematikerin$4berc +042A $a28p$a9.5p +042B $aXA-GB +047A/03 $eDE-386 +047A/03 $rDE-576 +047C $Spnd$ia$aLovelace, Ada King /of$0119232022 +047C $Spnd$ia$aLovelace, Ada K. /of$0172642531 +050C $aDer Ehemann Baron William King (1805-1893) wurde 1838 zum 1. Earl of Lovelace erhoben. +050E $aLoC-Na gegen Modern Engl. biogr. +050E $ahttps://de.wikipedia.org/wiki/Ada_Lovelace +050E $aLCAuth, (OGND) +050G $bBrit. Mathematikerin; Countess of Lovelace +050G $bInformatikerin, Mathematikerin, Grossbritannien +060R $a10.12.1815$b27.12.1852$4datx +060R $a1815$b1852$4datl +065R $9040743357$7Tgz$Vgik$Agnd$04074335-4$aLondon$4ortg +065R $9040743357$7Tgz$Vgik$Agnd$04074335-4$aLondon$4orts +070A/03 $0(DE-588)119232022 diff --git a/tests/data/ada.xml b/tests/data/ada.xml new file mode 100644 index 000000000..41f95806d --- /dev/null +++ b/tests/data/ada.xml @@ -0,0 +1,266 @@ + + + + + 0386:16-03-95 + + + 8999:20-07-20 + 13:19:49.000 + + + 9999:06-04-08 + + + utf8 + + + 0 + + + Tp1 + + + 119232022 + + + http://d-nb.info/gnd/119232022 + http://d-nb.info/gnd/172642531 + + + pik + + + gnd + 119232022 + + + gnd + 172642531 + + + pnd + 172642531 + zg + + + pnd + 119232022 + zg + + + swd + 4370325-2 + zg + + + s + z + f + + + w + k + v + + + rda + + + Ada K. + of + Lovelace + + + Augusta Ada + of + Lovelace + + + Ada Augusta + of + Lovelace + + + Ada + Byron + + + Augusta Ada + Byron King + + + Augusta Ada + King + + + Ada + King + + + Ada Augusta + Byron + nafr + + + Augusta Ada + Byron + + + Ada + Byron Lovelace + + + Ada + Lovelace + + + Ada King, Countess of + Lovelace + + + Augusta Ada King + Lovelace + + + Augusta Ada + Lovelace + + + Ada King + of + Lovelace + + + 118518208 + Tp1 + piz + gnd + 118518208 + 1788 + 1824 + George Gordon Byron + Byron + Baron + bezf + Vater + + + 118638130 + Tp1 + piz + gnd + 118638130 + 1792 + 1860 + Anne Isabella Milbanke Byron + Byron + bezf + Mutter + + + 119389991 + Tp1 + piz + gnd + 119389991 + 1837 + 1917 + Anne Isabella + Blunt + bezf + Tochter + + + william + king + bezf + + + f + + + 042527880 + Ts1 + saz + gnd + 4252788-0 + Mathematikerin + berc + + + 28p + 9.5p + + + XA-GB + + + DE-386 + + + DE-576 + + + pnd + a + Lovelace, Ada King /of + 119232022 + + + pnd + a + Lovelace, Ada K. /of + 172642531 + + + Der Ehemann Baron William King (1805-1893) wurde 1838 zum 1. Earl of Lovelace erhoben. + + + LoC-Na gegen Modern Engl. biogr. + + + https://de.wikipedia.org/wiki/Ada_Lovelace + + + LCAuth, (OGND) + + + Brit. Mathematikerin; Countess of Lovelace + + + Informatikerin, Mathematikerin, Grossbritannien + + + 10.12.1815 + 27.12.1852 + datx + + + 1815 + 1852 + datl + + + 040743357 + Tgz + gik + gnd + 4074335-4 + London + ortg + + + 040743357 + Tgz + gik + gnd + 4074335-4 + London + orts + + + (DE-588)119232022 + + + \ No newline at end of file