From 90ab61bbfd67522529235583bec09ce0f4eec4f7 Mon Sep 17 00:00:00 2001 From: dazzag24 Date: Tue, 8 Jan 2019 19:25:13 +0000 Subject: [PATCH 1/3] Add na_filter=False to read_csv() --- csvs_to_sqlite/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csvs_to_sqlite/utils.py b/csvs_to_sqlite/utils.py index 720268d..f3b4fc7 100644 --- a/csvs_to_sqlite/utils.py +++ b/csvs_to_sqlite/utils.py @@ -24,7 +24,7 @@ def load_csv(filepath, separator, skip_errors, quoting, shape, encodings_to_try= return pd.read_csv( filepath, sep=separator, quoting=quoting, error_bad_lines=not skip_errors, low_memory=True, - encoding=encoding, usecols=usecols + encoding=encoding, usecols=usecols, na_filter=False ) except UnicodeDecodeError: continue From a2535ee371dfb93018cd290b92b6b0eef81f59e8 Mon Sep 17 00:00:00 2001 From: DazzaG Date: Fri, 18 Jan 2019 10:25:47 +0000 Subject: [PATCH 2/3] Add cli option to control na_filter in pandas.read_csv() --- csvs_to_sqlite/cli.py | 10 +++++++++- csvs_to_sqlite/utils.py | 3 ++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/csvs_to_sqlite/cli.py b/csvs_to_sqlite/cli.py index 5cca320..a777e77 100644 --- a/csvs_to_sqlite/cli.py +++ b/csvs_to_sqlite/cli.py @@ -116,6 +116,13 @@ is_flag=True, help="Skip adding full-text index on values extracted using --extract-column (default is to add them)", ) +@click.option( + "--na-filter", + "na_filter", + is_flag=True, + default=True, + help="Detect missing value markers (empty strings and the value of na_values). See pandas.read_csv() documentation", +) @click.version_option() def cli( paths, @@ -136,6 +143,7 @@ def cli( filename_column, no_index_fks, no_fulltext_fks, + na_filter ): """ PATHS: paths to individual .csv files or to directories containing .csvs @@ -162,7 +170,7 @@ def cli( sql_type_overrides = None for name, path in csvs.items(): try: - df = load_csv(path, separator, skip_errors, quoting, shape) + df = load_csv(path, separator, skip_errors, quoting, shape, na_filter) df.table_name = table or name if filename_column: df[filename_column] = name diff --git a/csvs_to_sqlite/utils.py b/csvs_to_sqlite/utils.py index dd03383..3d99659 100644 --- a/csvs_to_sqlite/utils.py +++ b/csvs_to_sqlite/utils.py @@ -25,6 +25,7 @@ def load_csv( skip_errors, quoting, shape, + na_filter, encodings_to_try=("utf8", "latin-1"), ): usecols = None @@ -41,7 +42,7 @@ def load_csv( low_memory=True, encoding=encoding, usecols=usecols, - na_filter=False + na_filter=na_filter ) except UnicodeDecodeError: continue From c40c2c45d7aa31a8e00b220384bc46995199f14a Mon Sep 17 00:00:00 2001 From: DazzaG Date: Fri, 18 Jan 2019 11:05:21 +0000 Subject: [PATCH 3/3] Corrected implemntation of command line flags --- csvs_to_sqlite/cli.py | 11 +++++------ csvs_to_sqlite/utils.py | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/csvs_to_sqlite/cli.py b/csvs_to_sqlite/cli.py index a777e77..259b288 100644 --- a/csvs_to_sqlite/cli.py +++ b/csvs_to_sqlite/cli.py @@ -117,11 +117,10 @@ help="Skip adding full-text index on values extracted using --extract-column (default is to add them)", ) @click.option( - "--na-filter", - "na_filter", + "--no-na-filter", + "no_na_filter", is_flag=True, - default=True, - help="Detect missing value markers (empty strings and the value of na_values). See pandas.read_csv() documentation", + help="Skip detection of missing value markers (empty strings and the value of na_values). See pandas.read_csv() documentation", ) @click.version_option() def cli( @@ -143,7 +142,7 @@ def cli( filename_column, no_index_fks, no_fulltext_fks, - na_filter + no_na_filter ): """ PATHS: paths to individual .csv files or to directories containing .csvs @@ -170,7 +169,7 @@ def cli( sql_type_overrides = None for name, path in csvs.items(): try: - df = load_csv(path, separator, skip_errors, quoting, shape, na_filter) + df = load_csv(path, separator, skip_errors, quoting, shape, no_na_filter) df.table_name = table or name if filename_column: df[filename_column] = name diff --git a/csvs_to_sqlite/utils.py b/csvs_to_sqlite/utils.py index 3d99659..bfa13e8 100644 --- a/csvs_to_sqlite/utils.py +++ b/csvs_to_sqlite/utils.py @@ -25,7 +25,7 @@ def load_csv( skip_errors, quoting, shape, - na_filter, + no_na_filter, encodings_to_try=("utf8", "latin-1"), ): usecols = None @@ -42,7 +42,7 @@ def load_csv( low_memory=True, encoding=encoding, usecols=usecols, - na_filter=na_filter + na_filter=not no_na_filter ) except UnicodeDecodeError: continue