Skip to content

Commit

Permalink
Refactor column renaming in lease_dataframe.py to use exact matches f…
Browse files Browse the repository at this point in the history
…or improved clarity and accuracy
  • Loading branch information
perfectly-preserved-pie committed Dec 3, 2024
1 parent 4031407 commit 68b6611
Showing 1 changed file with 23 additions and 23 deletions.
46 changes: 23 additions & 23 deletions lease_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,35 +50,35 @@

# Standardize the column names by renaming them
# https://stackoverflow.com/a/65332240
# Define a renaming dictionary based on patterns
# Define a renaming dictionary with exact matches
rename_dict = {
'agent': 'phone_number',
'allowed': 'pet_policy',
'baths': 'bathrooms',
'bedrooms': 'bedrooms',
'city': 'city',
'furnished': 'furnished',
'key': 'key_deposit',
'laundry': 'laundry',
'list': 'list_price',
'lot': 'lot_size',
'mls': 'mls_number',
'other': 'other_deposit',
'pet deposit': 'pet_deposit',
'prking': 'parking_spaces',
'security': 'security_deposit',
'sqft': 'sqft',
'square': 'ppsqft',
'prop subtype': 'subtype',
'st #': 'street_number',
'st name': 'street_name',
'sub': 'subtype',
'terms': 'terms',
'yr': 'year_built',
'address': 'street_name',
'city': 'city',
'zip': 'zip_code',
'br': 'bedrooms',
'baths(fthq)': 'bathrooms',
'other deposit': 'other_deposit',
'pet deposit': 'pet_deposit',
'key deposit': 'key_deposit',
'security deposit': 'security_deposit',
'lp': 'list_price',
'sqft': 'sqft',
'lp $/sqft': 'ppsqft',
'yb': 'year_built',
'# prking spaces': 'parking_spaces',
'laundry': 'laundry',
'pets': 'pet_policy',
'lease terms': 'terms',
'furnished': 'furnished',
"seller's agent 1 cell": 'phone_number',
'lot sz': 'lot_size',
}

# Rename columns based on substrings in the column names
df = df.rename(columns=lambda c: next((v for k, v in rename_dict.items() if k in c), c))
# Rename columns based on exact matches
df = df.rename(columns=rename_dict)

# Drop the numbers in the first group of characters in the street_name column
df['street_name'] = df['street_name'].str.replace(r'^\d+\s*', '', regex=True)
Expand Down

0 comments on commit 68b6611

Please sign in to comment.