From 28705bc282411be8714c2f14f60159be04f05a14 Mon Sep 17 00:00:00 2001 From: Mark Parrish Date: Wed, 8 Dec 2021 11:55:32 -0800 Subject: [PATCH 1/2] Update __init__.py Added address_type as determined by usaddress - https://usaddress.readthedocs.io/en/latest/ return Street Address, Intersection, PO Box, or Ambiguous --- pandas_usaddress/__init__.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas_usaddress/__init__.py b/pandas_usaddress/__init__.py index 14bd25c..1bcd22a 100644 --- a/pandas_usaddress/__init__.py +++ b/pandas_usaddress/__init__.py @@ -63,6 +63,12 @@ def usaddress_field_creation(x,i): except: None +def usaddress_address_type(x): + try: + return x[1][0] + else: + None + def trim(x): x = str(x) x = x.split() @@ -99,8 +105,8 @@ def tag(dfa, address_columns, granularity='full', standardize=False): for i in usaddress_fields: df[i] = df['odictaddress'].apply(lambda x: usaddress_field_creation(x,i)) - - + df['address_type'] = df['odictaddress'].apply(usaddress_address_type) + df = df.drop(columns='odictaddress') @@ -269,4 +275,4 @@ def tag(dfa, address_columns, granularity='full', standardize=False): df = df.replace({'None': np.nan, 'none': np.nan, 'nan': np.nan, 'NaN': np.nan, None: np.nan, '': np.nan}).copy() - return df \ No newline at end of file + return df From 50f7211c8ed524f3f3ddc0f6a22fe03cd2cd7a2f Mon Sep 17 00:00:00 2001 From: Mark Parrish Date: Wed, 8 Dec 2021 20:11:13 -0800 Subject: [PATCH 2/2] fixed bad reference and changed else to except --- pandas_usaddress/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas_usaddress/__init__.py b/pandas_usaddress/__init__.py index 1bcd22a..01bf42a 100644 --- a/pandas_usaddress/__init__.py +++ b/pandas_usaddress/__init__.py @@ -65,8 +65,8 @@ def usaddress_field_creation(x,i): def usaddress_address_type(x): try: - return x[1][0] - else: + return x[1] + except: None def trim(x): @@ -105,7 +105,8 @@ def tag(dfa, address_columns, granularity='full', standardize=False): for i in usaddress_fields: df[i] = df['odictaddress'].apply(lambda x: usaddress_field_creation(x,i)) - df['address_type'] = df['odictaddress'].apply(usaddress_address_type) + + df['address_type'] = df['odictaddress'].apply(usaddress_address_type) df = df.drop(columns='odictaddress')