Skip to content

Commit

Permalink
creating DG tests for cenus_tract level
Browse files Browse the repository at this point in the history
  • Loading branch information
Niklewa committed Jun 25, 2024
1 parent 18140f6 commit 29f5f0d
Showing 1 changed file with 68 additions and 21 deletions.
89 changes: 68 additions & 21 deletions tests/test_data_grabber.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from cities.utils.data_grabber import (
DataGrabber,
MSADataGrabber,
CTDataGrabberCSV, # TODO: Change to CTDataGrabber() in the future
list_available_features,
list_interventions,
list_outcomes,
Expand All @@ -13,7 +14,7 @@

features = list_available_features()
features_msa = list_available_features("msa")

features_ct = list_available_features("census_tract")

def test_non_emptiness_DataGrabber():
assert features is not None
Expand All @@ -40,9 +41,9 @@ def test_non_emptiness_DataGrabber():
)


def test_non_emptiness_MSADataGrabber():
def test_non_emptiness_MSADataGrabber():
os.chdir(os.path.dirname(os.getcwd()))
data_msa = MSADataGrabber()
data_msa = MSADataGrabber()

data_msa.get_features_wide(features_msa)
data_msa.get_features_std_wide(features_msa)
Expand All @@ -56,7 +57,24 @@ def test_non_emptiness_MSADataGrabber():
assert data_msa.std_long[feature].shape[1] == 4


def general_data_format_testing(data, features):

def test_non_emptiness_CTDataGrabber():
os.chdir(os.path.dirname(os.getcwd()))
data_ct = CTDataGrabberCSV() # TODO: Change to CTDataGrabber() in the future

data_ct.get_features_wide(features_ct)
data_ct.get_features_std_wide(features_ct)
data_ct.get_features_long(features_ct)
data_ct.get_features_std_long(features_ct)

for feature in features_ct:
assert data_ct.wide[feature].shape[0] > 100
assert data_ct.std_wide[feature].shape[1] < 100
assert data_ct.long[feature].shape[0] > 100
assert data_ct.std_long[feature].shape[1] == 4


def general_data_format_testing(data, features, level = "county_msa"):
assert features is not None

data.get_features_wide(features)
Expand All @@ -76,23 +94,30 @@ def general_data_format_testing(data, features):
assert data.std_long[feature].iloc[:, 1].dtype == object, dataTypeError

for feature in features:
namesFipsError = "FIPS codes and GeoNames don't match!"
assert (
data.wide[feature]["GeoFIPS"].nunique()
== data.wide[feature]["GeoName"].nunique()
), namesFipsError
assert (
data.long[feature]["GeoFIPS"].nunique()
== data.long[feature]["GeoName"].nunique()
), namesFipsError
assert (
data.std_wide[feature]["GeoFIPS"].nunique()
== data.std_wide[feature]["GeoName"].nunique()
), namesFipsError
assert (
data.std_long[feature]["GeoFIPS"].nunique()
== data.std_long[feature]["GeoName"].nunique()
), namesFipsError
if level == "county_msa":

namesFipsError = "FIPS codes and GeoNames don't match!"
assert (
data.wide[feature]["GeoFIPS"].nunique()
== data.wide[feature]["GeoName"].nunique()
), namesFipsError
assert (
data.long[feature]["GeoFIPS"].nunique()
== data.long[feature]["GeoName"].nunique()
), namesFipsError
assert (
data.std_wide[feature]["GeoFIPS"].nunique()
== data.std_wide[feature]["GeoName"].nunique()
), namesFipsError
assert (
data.std_long[feature]["GeoFIPS"].nunique()
== data.std_long[feature]["GeoName"].nunique()
), namesFipsError

elif level == "census_tract":

pass # TODO: check whether the county number is correct as indicated by the CT number


for feature in features:
for column in data.wide[feature].columns[2:]:
Expand Down Expand Up @@ -148,6 +173,13 @@ def test_MSADataGrabber_data_types():
general_data_format_testing(data_msa, features_msa)



def test_CTDataGrabber_data_types():
data_ct = CTDataGrabberCSV() # TODO: Change to CTDataGrabber() in the future

general_data_format_testing(data_ct, features_ct, level= "census_tract")


def test_feature_listing_runtime():
features = list_available_features()
tensed_features = list_tensed_features()
Expand Down Expand Up @@ -180,3 +212,18 @@ def test_GeoFIPS_ma_column_values():
column_values = data_msa.long[feature]["GeoFIPS"]

assert all(value > 9999 and str(value)[-1] == "0" for value in column_values)



data_ct = CTDataGrabberCSV() # TODO: Change to CTDataGrabber() in the future
data_ct.get_features_wide(features_ct)


def test_GeoFIPS_ct_column_values():
for feature in features_ct:
data_ct.wide[feature]["GeoFIPS"]
column_values = data_ct.wide[feature]["GeoFIPS"]

assert all(value > 999999999 for value in column_values)


0 comments on commit 29f5f0d

Please sign in to comment.