diff --git a/csvs_to_sqlite/cli.py b/csvs_to_sqlite/cli.py index 5cca320..259b288 100644 --- a/csvs_to_sqlite/cli.py +++ b/csvs_to_sqlite/cli.py @@ -116,6 +116,12 @@ is_flag=True, help="Skip adding full-text index on values extracted using --extract-column (default is to add them)", ) +@click.option( + "--no-na-filter", + "no_na_filter", + is_flag=True, + help="Skip detection of missing value markers (empty strings and the value of na_values). See pandas.read_csv() documentation", +) @click.version_option() def cli( paths, @@ -136,6 +142,7 @@ def cli( filename_column, no_index_fks, no_fulltext_fks, + no_na_filter ): """ PATHS: paths to individual .csv files or to directories containing .csvs @@ -162,7 +169,7 @@ def cli( sql_type_overrides = None for name, path in csvs.items(): try: - df = load_csv(path, separator, skip_errors, quoting, shape) + df = load_csv(path, separator, skip_errors, quoting, shape, no_na_filter) df.table_name = table or name if filename_column: df[filename_column] = name diff --git a/csvs_to_sqlite/utils.py b/csvs_to_sqlite/utils.py index bb4ae4f..bfa13e8 100644 --- a/csvs_to_sqlite/utils.py +++ b/csvs_to_sqlite/utils.py @@ -25,6 +25,7 @@ def load_csv( skip_errors, quoting, shape, + no_na_filter, encodings_to_try=("utf8", "latin-1"), ): usecols = None @@ -41,6 +42,7 @@ def load_csv( low_memory=True, encoding=encoding, usecols=usecols, + na_filter=not no_na_filter ) except UnicodeDecodeError: continue