Skip to content
This repository has been archived by the owner on Feb 28, 2018. It is now read-only.

Importing and serializing company data #263

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 17 additions & 28 deletions jarbas/core/management/commands/companies.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
import csv
import lzma

from django.core.exceptions import ValidationError
from django.core.validators import validate_email
import rows

from jarbas.core.management.commands import LoadCommand
from jarbas.core.models import Activity, Company


class CompaniesDate(rows.fields.DateField):
INPUT_FORMAT = '%d/%m/%Y'

companies_csv_field_types = {
'email': rows.fields.EmailField,
'opening': CompaniesDate,
'situation_date': CompaniesDate,
'special_situation_date': CompaniesDate,
'latitude': rows.fields.FloatField,
'longitude': rows.fields.FloatField
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a contant, could you named it with caps (i.e. COMPANIES_CSV_FIELDS) and put it before the class definition (just after the imports)?

Also, in Python we use one of these formatting styles:

COMPANIES_CSV_FIELDS = {
    'email': rows.fields.EmailField,
    'opening': CompaniesDate,
    'situation_date': CompaniesDate,
    'special_situation_date': CompaniesDate,
    'latitude': rows.fields.FloatField,
    'longitude': rows.fields.FloatField
}

Or:

COMPANIES_CSV_FIELDS = {'email': rows.fields.EmailField,
                        'opening': CompaniesDate,
                        'situation_date': CompaniesDate,
                        'special_situation_date': CompaniesDate,
                        'latitude': rows.fields.FloatField,
                        'longitude': rows.fields.FloatField}

Also, can you reflect this formatting decision in arbas/core/tests/test_companies_command.py (line 46), isting one field per line?



class Command(LoadCommand):
help = 'Load Serenata de Amor companies dataset into the database'

Expand All @@ -31,11 +41,12 @@ def save_companies(self):
skip = ('main_activity', 'secondary_activity')
keys = tuple(f.name for f in Company._meta.fields if f not in skip)
with lzma.open(self.path, mode='rt', encoding='utf-8') as file_handler:
for row in csv.DictReader(file_handler):
for row in rows.import_from_csv(file_handler, force_types=companies_csv_field_types):
row = row._asdict()
main, secondary = self.save_activities(row)

filtered = {k: v for k, v in row.items() if k in keys}
obj = Company.objects.create(**self.serialize(filtered))
obj = Company.objects.create(**filtered)
for activity in main:
obj.main_activity.add(activity)
for activity in secondary:
Expand All @@ -62,25 +73,3 @@ def save_activities(self, row):
secondaries.append(obj)

return [main], secondaries

def serialize(self, row):
row['email'] = self.to_email(row['email'])

dates = ('opening', 'situation_date', 'special_situation_date')
for key in dates:
row[key] = self.to_date(row[key])

decimals = ('latitude', 'longitude')
for key in decimals:
row[key] = self.to_number(row[key])

return row

@staticmethod
def to_email(email):
try:
validate_email(email)
return email

except ValidationError:
return None