Skip to content

Commit

Permalink
Merge pull request #3 from acdh-oeaw/model-all
Browse files Browse the repository at this point in the history
Model entities
  • Loading branch information
gythaogg authored Apr 22, 2024
2 parents 8ab3aff + 0590458 commit 8c92f38
Show file tree
Hide file tree
Showing 8 changed files with 1,379 additions and 8 deletions.
29 changes: 21 additions & 8 deletions apis_ontology/management/commands/import_entities.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import logging
from pprint import pprint

import pandas as pd
from apis_ontology.models import Place
from django.apps import apps
from django.core.management.base import BaseCommand
from tqdm.auto import tqdm

logger = logging.getLogger(__name__)
pd.set_option("display.max_colwidth", None)

ENTITY_MODELS = ["person", "place", "instance", "work"]


class Command(BaseCommand):
"""
Expand All @@ -18,35 +18,43 @@ class Command(BaseCommand):

import_file = "data/dump_test.json"

def add_arguments(self, parser):
parser.add_argument("args", nargs="*", type=str)

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self.df = pd.read_json(self.import_file)
self.df = self.df[self.df.model != "apis_metainfo.uri"]
logger.debug("Columns: %s", self.df.columns)
logger.debug("Number of rows: %d", self.df.shape[0])
logger.debug("Models: %s", self.df.model.unique())
Place.objects.all().delete()

def handle(self, *args, **kwargs):
"""
parses the data dump from the old schema and imports it into the current schema
"""
if not args:
logging.error("Please enter the names of entity models to import")
return

def get_entity_data(model_name):
ENTITY_MODELS = ["person", "place", "instance", "work"]
if model_name not in ENTITY_MODELS:
logging.error(
"%s is unknown. Expecting one of %s", model_name, ENTITY_MODELS
)
enitity_rows = self.df[self.df.model == f"apis_ontology.{model_name}"]
logging.debug("Found %d rows of type %s", enitity_rows.shape[0], model_name)
entity_rows = self.df[self.df.model == f"apis_ontology.{model_name}"]
logging.debug("Found %d rows of type %s", entity_rows.shape[0], model_name)
other_fields = {}
for _, row in tqdm(enitity_rows.iterrows(), total=enitity_rows.shape[0]):
for _, row in tqdm(entity_rows.iterrows(), total=entity_rows.shape[0]):
# get data from root object
other_object_info = self.df[
(self.df.pk == row.pk) & (self.df.model != row.model)
]
for _, extra_fields in other_object_info.iterrows():
if extra_fields.model.startswith("apis_ontology"):
continue

other_fields = {**other_fields, **extra_fields.fields}

field_values = {
Expand All @@ -72,4 +80,9 @@ def get_entity_data(model_name):
model_object = model_class(**field_values)
model_object.save()

df = get_entity_data("place")
for arg in args:
if arg not in ENTITY_MODELS:
logging.error("Unrecognised entity type %s for import", arg)
break

get_entity_data(arg)
Original file line number Diff line number Diff line change
@@ -0,0 +1,320 @@
# Generated by Django 4.2.11 on 2024-04-22 05:57

import apis_core.generic.abc
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
import simple_history.models


class Migration(migrations.Migration):

dependencies = [
("apis_metainfo", "0012_remove_rootobject_deprecated_name"),
("contenttypes", "0002_remove_content_type_name"),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("apis_ontology", "0001_initial"),
]

operations = [
migrations.CreateModel(
name="Person",
fields=[
(
"rootobject_ptr",
models.OneToOneField(
auto_created=True,
on_delete=django.db.models.deletion.CASCADE,
parent_link=True,
primary_key=True,
serialize=False,
to="apis_metainfo.rootobject",
),
),
("start_date", models.DateField(blank=True, editable=False, null=True)),
(
"start_start_date",
models.DateField(blank=True, editable=False, null=True),
),
(
"start_end_date",
models.DateField(blank=True, editable=False, null=True),
),
("end_date", models.DateField(blank=True, editable=False, null=True)),
(
"end_start_date",
models.DateField(blank=True, editable=False, null=True),
),
(
"end_end_date",
models.DateField(blank=True, editable=False, null=True),
),
(
"start_date_written",
models.CharField(
blank=True, max_length=255, null=True, verbose_name="Start"
),
),
(
"end_date_written",
models.CharField(
blank=True, max_length=255, null=True, verbose_name="End"
),
),
(
"alternative_names",
models.TextField(
blank=True, null=True, verbose_name="Alternative names"
),
),
(
"external_links",
models.TextField(
blank=True, null=True, verbose_name="External links"
),
),
(
"review",
models.BooleanField(
default=False,
help_text="Should be set to True, if the data record holds up quality standards.",
),
),
(
"notes",
models.TextField(blank=True, null=True, verbose_name="Notes"),
),
("published", models.BooleanField(default=False)),
(
"name",
models.CharField(
blank=True, default="", max_length=255, verbose_name="Name"
),
),
(
"gender",
models.CharField(
choices=[("male", "Male"), ("female", "Female")],
default="male",
max_length=6,
),
),
(
"nationality",
models.CharField(
blank=True,
choices=[("Indic", "Indic"), ("Tibetan", "Tibetan")],
max_length=10,
null=True,
),
),
("comments", models.TextField(blank=True, null=True)),
(
"collection",
models.ManyToManyField(
editable=False, to="apis_metainfo.collection"
),
),
],
options={
"abstract": False,
},
bases=("apis_metainfo.rootobject", models.Model),
),
migrations.CreateModel(
name="VersionPerson",
fields=[
(
"rootobject_ptr",
models.ForeignKey(
auto_created=True,
blank=True,
db_constraint=False,
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
parent_link=True,
related_name="+",
to="apis_metainfo.rootobject",
),
),
(
"id",
models.IntegerField(
auto_created=True, blank=True, db_index=True, verbose_name="ID"
),
),
("start_date", models.DateField(blank=True, editable=False, null=True)),
(
"start_start_date",
models.DateField(blank=True, editable=False, null=True),
),
(
"start_end_date",
models.DateField(blank=True, editable=False, null=True),
),
("end_date", models.DateField(blank=True, editable=False, null=True)),
(
"end_start_date",
models.DateField(blank=True, editable=False, null=True),
),
(
"end_end_date",
models.DateField(blank=True, editable=False, null=True),
),
(
"start_date_written",
models.CharField(
blank=True, max_length=255, null=True, verbose_name="Start"
),
),
(
"end_date_written",
models.CharField(
blank=True, max_length=255, null=True, verbose_name="End"
),
),
(
"version_tag",
models.CharField(blank=True, max_length=255, null=True),
),
(
"alternative_names",
models.TextField(
blank=True, null=True, verbose_name="Alternative names"
),
),
(
"external_links",
models.TextField(
blank=True, null=True, verbose_name="External links"
),
),
(
"review",
models.BooleanField(
default=False,
help_text="Should be set to True, if the data record holds up quality standards.",
),
),
(
"notes",
models.TextField(blank=True, null=True, verbose_name="Notes"),
),
("published", models.BooleanField(default=False)),
(
"name",
models.CharField(
blank=True, default="", max_length=255, verbose_name="Name"
),
),
(
"gender",
models.CharField(
choices=[("male", "Male"), ("female", "Female")],
default="male",
max_length=6,
),
),
(
"nationality",
models.CharField(
blank=True,
choices=[("Indic", "Indic"), ("Tibetan", "Tibetan")],
max_length=10,
null=True,
),
),
("comments", models.TextField(blank=True, null=True)),
("history_id", models.AutoField(primary_key=True, serialize=False)),
("history_date", models.DateTimeField(db_index=True)),
("history_change_reason", models.CharField(max_length=100, null=True)),
(
"history_type",
models.CharField(
choices=[("+", "Created"), ("~", "Changed"), ("-", "Deleted")],
max_length=1,
),
),
(
"history_user",
models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="+",
to=settings.AUTH_USER_MODEL,
),
),
(
"self_contenttype",
models.ForeignKey(
blank=True,
db_constraint=False,
editable=False,
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="+",
to="contenttypes.contenttype",
),
),
],
options={
"verbose_name": "Version",
"verbose_name_plural": "Versions",
"ordering": ("-history_date", "-history_id"),
"get_latest_by": ("history_date", "history_id"),
},
bases=(
simple_history.models.HistoricalChanges,
models.Model,
apis_core.generic.abc.GenericModel,
),
),
migrations.CreateModel(
name="VersionPerson_collection",
fields=[
(
"id",
models.IntegerField(
auto_created=True, blank=True, db_index=True, verbose_name="ID"
),
),
("m2m_history_id", models.AutoField(primary_key=True, serialize=False)),
(
"collection",
models.ForeignKey(
blank=True,
db_constraint=False,
db_tablespace="",
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="+",
to="apis_metainfo.collection",
),
),
(
"history",
models.ForeignKey(
db_constraint=False,
on_delete=django.db.models.deletion.DO_NOTHING,
to="apis_ontology.versionperson",
),
),
(
"person",
models.ForeignKey(
blank=True,
db_constraint=False,
db_tablespace="",
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="+",
to="apis_ontology.person",
),
),
],
options={
"verbose_name": "VersionPerson_collection",
},
bases=(simple_history.models.HistoricalChanges, models.Model),
),
]
Loading

0 comments on commit 8c92f38

Please sign in to comment.