Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rough version of imported that uses CSV config for imports #612

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions conf/imports.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name,label,description,data_type,category,subcategory,release_date,source_label,source,source_type,data_url,table,default_value,exclude_countries,unit_type,unit_distribution,fill_blanks,is_public,is_filterable,is_shadeable,data_file,uses_gss,constituency_col,data_col,file_type,area_type
import_test,"Berkshire, Buckinghamshire and Oxfordshire Wildlife Trust members",,integer,movement,places_and_spaces,August 2024,"Data from Berkshire, Buckinghamshire and Oxfordshire Wildlife Trusts",,xlsl,,areadata,10,"Scotland, Northern Ireland",raw,people_in_area,FALSE,FALSE,FALSE,FALSE,berks_bucks_oxon_wildlife_trust_member_counts.xlsx,FALSE,Constituency,Total_Members,excel,WMC23
86 changes: 86 additions & 0 deletions hub/management/commands/import_from_comfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from django.conf import settings

import pandas as pd

from .base_importers import BaseImportFromDataFrameCommand


class Command(BaseImportFromDataFrameCommand):
help = "Import based on config"

config_file = settings.BASE_DIR / "conf" / "imports.csv"

defaults_cols = [
"label",
"data_type",
"category",
"subcategory",
"release_date",
"source_label",
"source",
"source_type",
"data_url",
"table",
"default_value",
"exclude_countries",
# "comparators",
"unit_type",
"unit_distribution",
"fill_blanks",
]

def add_arguments(self, parser):
super().add_arguments(parser)

parser.add_argument(
"--import_name", action="store", required=True, help="Name of import to run"
)

def setup(self, import_name):
df = pd.read_csv(self.config_file)

df.loc[df["name"] == import_name]
row = df.iloc[0]

self.message = f"Importing {row['label']}"
self.cons_row = row["constituency_col"]
self.cons_col = row["constituency_col"]
self.data_file = settings.BASE_DIR / "data" / row["data_file"]
self.file_type = row["file_type"]
self.area_type = row["area_type"]

if row["uses_gss"] == "TRUE":
self.uses_gss = True
else:
self.uses_gss = False

defaults = {}

for col in self.defaults_cols:
defaults[col] = row[col]

self.data_sets = {import_name: {"defaults": defaults, "col": row["data_col"]}}

def get_dataframe(self):
if self.file_type == "csv":
df = pd.read_csv(self.data_file)
elif self.file_type == "excel":
df = pd.read_excel(self.data_file)
else:
self.stderr.write(f"Unknown file type: {self.file_type}")
return None

df = df.astype({self.get_cons_col(): "str"})
return df

def handle(
self,
quiet=False,
skip_new_areatype_conversion=False,
import_name=None,
*args,
**options,
):
self.setup(import_name)

super().handle(quiet, skip_new_areatype_conversion, *args, **options)
Loading