From d23e50c0c338881a21d2135ff391b8cc8e24e595 Mon Sep 17 00:00:00 2001 From: Iikka Hauhio Date: Thu, 28 Feb 2019 00:44:05 +0200 Subject: [PATCH] space optimization --- Cargo.toml | 5 +++-- src/lib.rs | 38 +++++++++++++++++++------------------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4a7c9c1..463222a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,8 @@ [package] name = "conllu-rs" -version = "0.2.1" +version = "0.2.5" authors = ["Iikka Hauhio "] edition = "2018" -[dependencies] \ No newline at end of file +[dependencies] +tendril = "*" diff --git a/src/lib.rs b/src/lib.rs index f568d0f..612bf2d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,20 +1,21 @@ -use std::collections::HashMap; -use std::hash::Hash; use std::str::FromStr; use std::fmt::Debug; +use tendril::Tendril; +use tendril::fmt::UTF8; +use tendril::SliceExt; #[derive(Debug, Clone)] pub struct Word { pub id: usize, - pub form: String, - pub lemma: String, - pub upos: String, - pub xpos: String, - pub feats: HashMap, + pub form: Tendril, + pub lemma: Tendril, + pub upos: Tendril, + pub xpos: Tendril, + pub feats: Vec<(Tendril, Tendril)>, pub head: usize, - pub deprel: String, - pub deps: HashMap, - pub misc: HashMap + pub deprel: Tendril, + pub deps: Vec<(usize, Tendril)>, + pub misc: Vec<(Tendril, Tendril)> } pub fn parse_conllu(lines: impl Iterator) -> Vec> { @@ -29,13 +30,13 @@ pub fn parse_conllu(lines: impl Iterator) -> Vec> { } ans.last_mut().unwrap().push(Word { id: id, - form: fields[1].to_string(), - lemma: fields[2].to_string(), - upos: fields[3].to_string(), - xpos: fields[4].to_string(), + form: fields[1].to_tendril(), + lemma: fields[2].to_tendril(), + upos: fields[3].to_tendril(), + xpos: fields[4].to_tendril(), feats: parse_attrs(fields[5], '='), head: fields[6].parse().unwrap_or(0), - deprel: fields[7].to_string(), + deprel: fields[7].to_tendril(), deps: parse_attrs(fields[8], ':'), misc: parse_attrs(fields[9], '=') }); @@ -43,19 +44,18 @@ pub fn parse_conllu(lines: impl Iterator) -> Vec> { ans } -fn parse_attrs(text: &str, sep: char) -> HashMap +fn parse_attrs(text: &str, sep: char) -> Vec<(T, Tendril)> where T: Eq, - T: Hash, T: FromStr, ::Err: Debug { - let mut ans = HashMap::new(); + let mut ans = Vec::new(); if text == "_" { return ans; } for item in text.split('|') { let pair: Vec<&str> = item.split(sep).collect(); if pair.len() != 2 { continue; } - ans.insert(pair[0].parse().unwrap(), pair[1].to_string()); + ans.push((pair[0].parse().unwrap(), pair[1].to_tendril())); } ans }