Skip to content

Commit c6b5378

Browse files
committed
Update dependencies and fix warnings
All tests still pass. The `gecode-csv` update required a significant bump to our `tokio` version.
1 parent 6f19a75 commit c6b5378

19 files changed

+315
-770
lines changed

Cargo.lock

+230-649
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

+11
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,14 @@ This repository contains tools for manipulating CSV files, all written in Rust.
88
- [`geocode-csv`](./geocode-csv): Geocode CSV files in bulk using the SmartyStreets API.
99
- [`scrubcsv`](./scrubcsv): Turn messy, slightly corrupt CSV files into something clean and standardized.
1010
- [`hashcsv`](./hashcsv): Add a new column to a CSV file, containing a hash of the other columns. Useful for de-duplicating.
11+
12+
## Current coding standards
13+
14+
In general, this repository should contain standard modern Rust code, formatting using `cargo fmt` and the supplied settings. The code should have no warnings when run with `clippy`.
15+
16+
These tools were written over several years, and they represent a history of Rust at Faraday. The following dependencies should be replaced if we get the chance:
17+
18+
- `docopt`: Replace with `structopt`.
19+
- `error_chain` and `failure`: Replace with `anyhow` (plus `thiserror` if we need specific custom error types).
20+
21+
In general, it's a good idea to update any older code to match the newest code.

catcsv/Cargo.toml

+8-11
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,11 @@ repository = "https://github.com/faradayio/catcsv"
1313
cli_test_dir = "0.1.2"
1414

1515
[dependencies]
16-
# Enable this by passing `--features "clippy"` to cargo. Needs nightly Rust.
17-
clippy = { version = "0.0.*", optional = true }
18-
csv = "0.15.0"
19-
docopt = "0.7.0"
20-
env_logger = "0.4.2"
21-
error-chain = "0.10.0"
22-
log = "0.3.7"
23-
# Deprecated in favor of serde, but needed for doctopt:
24-
rustc-serialize = "0.3"
25-
snap = "0.2.1"
26-
walkdir = "1.0.7"
16+
csv = "1"
17+
docopt = "1"
18+
env_logger = "0.8"
19+
error-chain = "0.12.4"
20+
log = "0.4.14"
21+
serde = "1.0.123"
22+
snap = "1"
23+
walkdir = "2.3"

catcsv/src/main.rs

+16-30
Original file line numberDiff line numberDiff line change
@@ -5,40 +5,25 @@
55
//! This replaces a 20-line shell script with two pages of Rust. But it has
66
//! a much nicer UI and it handles the corner cases better.
77
8-
// `error_chain!` can recurse deeply
9-
#![recursion_limit = "1024"]
10-
// Enable clippy if we were asked to do so.
11-
#![cfg_attr(feature = "clippy", feature(plugin))]
12-
#![cfg_attr(feature = "clippy", plugin(clippy))]
13-
14-
use csv;
15-
16-
use env_logger;
17-
#[macro_use]
18-
extern crate error_chain;
19-
#[macro_use]
20-
extern crate log;
21-
22-
use snap;
23-
use walkdir;
24-
25-
use csv::ByteString;
8+
use csv::ByteRecord;
269
use docopt::Docopt;
27-
use std::borrow::Cow;
10+
use error_chain::quick_main;
11+
use log::debug;
12+
use serde::Deserialize;
2813
use std::fs::File;
2914
use std::io::prelude::*;
3015
use std::io::{self, BufReader, BufWriter};
3116
use std::process;
17+
use std::{borrow::Cow, io::Cursor};
3218
use walkdir::WalkDir;
3319

3420
use crate::errors::*;
3521

3622
/// A module to hold `Error`, etc., types generated by `error-chain`.
3723
mod errors {
38-
use csv;
24+
use error_chain::error_chain;
3925
use std::io;
4026
use std::path::PathBuf;
41-
use walkdir;
4227

4328
error_chain! {
4429
foreign_links {
@@ -60,7 +45,7 @@ mod errors {
6045
// delegate the real work to `run`.
6146
quick_main!(run);
6247

63-
const USAGE: &'static str = "
48+
const USAGE: &str = "
6449
catcsv - Combine many CSV files into one
6550
6651
Usage:
@@ -78,7 +63,7 @@ If passed a directory, this will recurse over all files in that directory.
7863
";
7964

8065
/// Our command-line arguments.
81-
#[derive(Debug, RustcDecodable)]
66+
#[derive(Debug, Deserialize)]
8267
struct Args {
8368
/// Should we show the version of the program and exit?
8469
flag_version: bool,
@@ -88,10 +73,10 @@ struct Args {
8873

8974
/// Our real `main` function that parses arguments and figures out what to do.
9075
fn run() -> Result<()> {
91-
env_logger::init().expect("could not initialize log subsystem");
76+
env_logger::init();
9277

9378
let args: Args = Docopt::new(USAGE)
94-
.and_then(|dopt| dopt.decode())
79+
.and_then(|dopt| dopt.deserialize())
9580
.unwrap_or_else(|e| e.exit());
9681
debug!("{:?}", args);
9782

@@ -108,7 +93,7 @@ fn run() -> Result<()> {
10893

10994
// Iterate over our arguments. We do this without using recursion, mostly
11095
// to see how that looks in Rust.
111-
let mut first_headers: Option<Vec<ByteString>> = None;
96+
let mut first_headers: Option<ByteRecord> = None;
11297
let mut files_processed: u64 = 0;
11398
for input in &args.arg_input_file_or_dir {
11499
for entry in WalkDir::new(input).follow_links(true) {
@@ -132,7 +117,7 @@ fn run() -> Result<()> {
132117
} else if filename.ends_with(".csv.sz") {
133118
debug!("Processing as *.csv.sz");
134119
let file = File::open(path).chain_err(&mkerr)?;
135-
let mut decompressed = snap::Reader::new(file);
120+
let mut decompressed = snap::read::FrameDecoder::new(file);
136121
output_csv(&mut decompressed, &mut first_headers, &mut out)
137122
.chain_err(&mkerr)?;
138123
} else {
@@ -159,7 +144,7 @@ fn run() -> Result<()> {
159144
/// Output the specified CSV data to stand
160145
fn output_csv(
161146
file: &mut dyn Read,
162-
first_headers: &mut Option<Vec<ByteString>>,
147+
first_headers: &mut Option<ByteRecord>,
163148
output: &mut dyn Write,
164149
) -> Result<()> {
165150
// Force buffered input for a big performance boost and so we can
@@ -171,10 +156,11 @@ fn output_csv(
171156
// NOTE: This will fail if there are escaped newlines in the header line.
172157
let mut first_line = String::new();
173158
input.read_line(&mut first_line)?;
174-
let mut rdr = csv::Reader::from_string(first_line.clone());
159+
let mut cursor = Cursor::new(&first_line);
160+
let mut rdr = csv::Reader::from_reader(&mut cursor);
175161

176162
// Get our header line only.
177-
let headers = rdr.byte_headers()?;
163+
let headers = rdr.byte_headers()?.to_owned();
178164

179165
// If this is the first set of headers we've found, save them. If not,
180166
// make sure that the headers match between files.

fixed2csv/Cargo.toml

+3-3
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ license = "MIT/Apache-2.0"
77

88
[dependencies]
99
csv = "1.0.2"
10-
env_logger = "0.5.13"
10+
env_logger = "0.8.3"
1111
failure = "0.1.3"
1212
humansize = "1"
13-
humantime = "1"
13+
humantime = "2"
1414
log = "0.4.6"
15-
structopt = "0.2.12"
15+
structopt = "0.3.21"

fixed2csv/src/main.rs

+5-8
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
1-
use csv;
2-
3-
#[macro_use]
4-
extern crate log;
5-
use structopt;
6-
71
use failure::Error;
82
use humansize::{file_size_opts, FileSize};
93
use humantime::format_duration;
4+
use log::debug;
105
use std::{
116
cmp::min,
127
io::{prelude::*, stdin, stdout, BufReader},
@@ -95,8 +90,10 @@ fn extract_fields(
9590
// Get our column.
9691
let mut field = &line[min(offset, end)..min(offset + width, end)];
9792

98-
// Strip spaces and add to our output record.
99-
while field.len() > 0 && field[field.len() - 1] == b' ' {
93+
// Strip spaces and add to our output record. We do this the hard
94+
// way because there's no stable API on `&[u8]` that works like
95+
// `trim_end_matches`.
96+
while !field.is_empty() && field[field.len() - 1] == b' ' {
10097
field = &field[..field.len() - 1];
10198
}
10299
record.push_field(field);

geochunk/Cargo.toml

+8-10
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,14 @@ homepage = "http://blog.faraday.io/geochunk-fast-intelligent-splitting-for-piles
1414
cli_test_dir = "0.1.2"
1515

1616
[dependencies]
17-
# Enable this by passing `--features "clippy"` to cargo. Needs nightly Rust.
18-
clippy = { version = "0.0.*", optional = true }
1917
csv = "1"
20-
docopt = "0.8"
21-
env_logger = "0.4"
22-
error-chain = "0.11"
23-
lazy_static = "0.2"
24-
log = "0.3.7"
25-
regex = "0.2"
18+
docopt = "1.1"
19+
env_logger = "0.8.3"
20+
error-chain = "0.12.4"
21+
lazy_static = "1.4"
22+
log = "0.4.14"
23+
regex = "1.4.3"
2624
serde = "1.0"
2725
serde_derive = "1.0"
28-
snap = "0.2.1"
29-
walkdir = "1.0.7"
26+
snap = "1.0.4"
27+
walkdir = "2.3.1"

geochunk/src/errors.rs

-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
//! A module to hold `Error`, etc., types generated by `error-chain`.
22
3-
use csv;
43
use std::io;
54

65
error_chain! {

geochunk/src/main.rs

+2-12
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,10 @@
1-
// `error_chain!` can recurse deeply
2-
#![recursion_limit = "1024"]
3-
// Enable clippy if we were asked to do so.
4-
#![cfg_attr(feature = "clippy", feature(plugin))]
5-
#![cfg_attr(feature = "clippy", plugin(clippy))]
6-
7-
use env_logger;
81
#[macro_use]
92
extern crate error_chain;
103
#[macro_use]
114
extern crate lazy_static;
125
#[macro_use]
136
extern crate log;
147

15-
#[macro_use]
16-
extern crate serde_derive;
17-
188
use docopt::Docopt;
199
use serde::de::Error as DeError;
2010
use serde::{Deserialize, Deserializer};
@@ -52,7 +42,7 @@ impl<'de> Deserialize<'de> for ChunkType {
5242
}
5343
}
5444

55-
const USAGE: &'static str = "
45+
const USAGE: &str = "
5646
geochunk - Partition data sets by estimated population.
5747
5848
Usage:
@@ -89,7 +79,7 @@ quick_main!(run);
8979

9080
/// Our actual `main` function, called by the `quick_main!` macro above.
9181
fn run() -> Result<()> {
92-
env_logger::init().expect("Could not initialize logging");
82+
env_logger::init();
9383
let args: Args = Docopt::new(USAGE)
9484
.and_then(|d| d.deserialize())
9585
.unwrap_or_else(|e| e.exit());

geochunk/src/zip2010.rs

+7-10
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
//! Support for chunks based on 2010 census population data.
22
3-
use csv;
4-
#[cfg(test)]
5-
use env_logger;
63
use regex::Regex;
74
use std::collections::hash_map::Entry;
85
use std::collections::HashMap;
@@ -35,8 +32,8 @@ impl Classifier {
3532
&mut chunk_id_for_prefix,
3633
);
3734
Classifier {
38-
target_population: target_population,
39-
chunk_id_for_prefix: chunk_id_for_prefix,
35+
target_population,
36+
chunk_id_for_prefix,
4037
}
4138
}
4239

@@ -142,7 +139,7 @@ impl Classifier {
142139

143140
#[test]
144141
fn classifies_sample_zip_codes_as_expected() {
145-
let _ = env_logger::init();
142+
let _ = env_logger::try_init();
146143
let classifier = Classifier::new(250000);
147144
assert_eq!(classifier.chunk_for("01000").unwrap(), "010_0");
148145
assert_eq!(classifier.chunk_for("07720").unwrap(), "077_1");
@@ -151,7 +148,7 @@ fn classifies_sample_zip_codes_as_expected() {
151148

152149
#[test]
153150
fn does_not_assign_geochunks_to_missing_or_invalid_zips() {
154-
let _ = env_logger::init();
151+
let _ = env_logger::try_init();
155152
let classifier = Classifier::new(250000);
156153
assert!(classifier.chunk_for("").is_none());
157154
assert!(classifier.chunk_for("0").is_none());
@@ -160,7 +157,7 @@ fn does_not_assign_geochunks_to_missing_or_invalid_zips() {
160157

161158
#[test]
162159
fn does_not_panic_on_corner_cases() {
163-
let _ = env_logger::init();
160+
let _ = env_logger::try_init();
164161
let classifier = Classifier::new(250000);
165162
// I don't actually care whether or not this is mapped to a geochunk or
166163
// not, because we don't try to do detailed validation until _after_
@@ -173,7 +170,7 @@ type PrefixPopulationMaps = [HashMap<String, u64>; ZIP_CODE_LENGTH + 1];
173170

174171
/// Directly include our zip code population data in our application binary
175172
/// for ease of distribution and packaging.
176-
const ZIP_POPULATION_CSV: &'static str = include_str!("zip2010.csv");
173+
const ZIP_POPULATION_CSV: &str = include_str!("zip2010.csv");
177174

178175
/// The population associated with a zip code prefix.
179176
struct PrefixPopulation {
@@ -206,7 +203,7 @@ impl PrefixPopulation {
206203
}
207204
}
208205

209-
PrefixPopulation { maps: maps }
206+
PrefixPopulation { maps }
210207
}
211208

212209
/// Look up the population of a zip code prefix. Calling this function

geocode-csv/Cargo.toml

+7-6
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,19 @@ cli_test_dir = "0.1.7"
1616
[dependencies]
1717
common_failures = "0.1.1"
1818
csv = "1.0.7"
19-
env_logger = "0.7.1"
19+
env_logger = "0.8.3"
2020
failure = "0.1.5"
2121
futures = "0.3.4"
22-
hyper = "0.13.4"
23-
hyper-tls = "0.4.1"
22+
hyper = { version = "0.14.4", features = ["client", "http2", "stream"] }
23+
hyper-tls = "0.5.0"
2424
log = "0.4.6"
2525
serde = { version = "1.0.92", features = ["derive"] }
2626
# IMPORTANT: We require `preserve_order` to correctly handle "structure" JSON'
2727
# specifications in a way that puts the right data in the right output columns.
2828
serde_json = { version = "1.0.39", features = ["preserve_order"] }
2929
structopt = "0.3.4"
30-
strum = "0.18.0"
31-
strum_macros = "0.18.0"
32-
tokio = { version = "0.2.13", features = ["io-util", "stream", "sync"] }
30+
strum = "0.20.0"
31+
strum_macros = "0.20.1"
32+
tokio = { version = "1.2.0", features = ["io-util", "rt-multi-thread", "sync"] }
33+
tokio-stream = "0.1.3"
3334
url = "2.1.1"

geocode-csv/src/addresses.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ impl ColumnKeyOrKeys<usize> {
6060
// Already there, so ignore it. This appears in a lot of
6161
// real-world databases, for some reason.
6262
} else {
63-
extracted.push_str(" ");
63+
extracted.push(' ');
6464
extracted.push_str(s);
6565
}
6666
}
@@ -110,9 +110,9 @@ pub struct AddressColumnKeys<K: Default + Eq> {
110110

111111
impl AddressColumnKeys<usize> {
112112
/// Given a CSV row, extract an `Address` value to send to SmartyStreets.
113-
pub fn extract_address_from_record<'a>(
113+
pub fn extract_address_from_record(
114114
&self,
115-
record: &'a StringRecord,
115+
record: &StringRecord,
116116
) -> Result<Address> {
117117
Ok(Address {
118118
street: self.street.extract_from_record(record)?.into_owned(),
@@ -201,7 +201,7 @@ impl<Key: Default + Eq> AddressColumnSpec<Key> {
201201
.map(|k| &k[..])
202202
.collect::<Vec<_>>();
203203
// Do not remove this `sort`!
204-
prefixes.sort();
204+
prefixes.sort_unstable();
205205
prefixes
206206
}
207207

geocode-csv/src/async_util.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ where
1818
T: Send + 'static,
1919
{
2020
// Spawn a worker thread outside our thread pool to do the actual work.
21-
let (mut sender, mut receiver) = mpsc::channel(1);
21+
let (sender, mut receiver) = mpsc::channel(1);
2222
let thr = thread::Builder::new().name(thread_name);
2323
let handle = thr
2424
.spawn(move || {

0 commit comments

Comments
 (0)