Skip to content

Commit

Permalink
Merge pull request #39 from g0v/reduce-db-size-#35
Browse files Browse the repository at this point in the history
Fix wrong cursor usage and add tgz packaging to reduce file size, #35
  • Loading branch information
ddio authored Aug 17, 2019
2 parents db77e29 + dfcef05 commit a37fd49
Showing 1 changed file with 44 additions and 9 deletions.
53 changes: 44 additions & 9 deletions backend/rental/management/commands/archivehistory.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from os import path, makedirs
from glob import glob
from datetime import timedelta, datetime
from uuid import UUID
import time
import shutil
import tarfile
import json
from django.core.management.base import BaseCommand, CommandError
from django.forms.models import model_to_dict
from django.utils import timezone
from django.core.paginator import Paginator
from django.contrib.gis.geos import Point

from rental.models import HouseEtc, HouseTS
Expand Down Expand Up @@ -46,6 +49,8 @@ class Command(BaseCommand):
requires_migrations_checks = True
default_days_ago = 60
items_per_page = 3000
ts_dir = 'timeseries'
tgz_dir = 'compressed'

def add_arguments(self, parser):
parser.add_argument('output_dir')
Expand Down Expand Up @@ -86,7 +91,26 @@ def handle(self, *args, **options):
else:
before_date -= timedelta(self.default_days_ago)

self.remove_old_ts(output, before_date)
# self.remove_old_ts(output, before_date)
self.make_tgz(output)

def make_tgz(self, output_dir):
ts_base = path.join(output_dir, self.ts_dir)
timestamp = int(time.time())
if path.exists(ts_base):
makedirs(path.join(output_dir, self.tgz_dir), exist_ok=True)
for rel_ts_dir in glob('{}/*/*/*'.format(ts_base)):
year_month_day = '-'.join(rel_ts_dir.split('/')[-3:])
self.stdout.write('Compressing HouseTS {}'.format(year_month_day))
arcname = path.join(self.ts_dir, year_month_day)
output_path = path.join(
output_dir,
self.tgz_dir,
'{}-{}-{}.tgz'.format(self.ts_dir, year_month_day, timestamp)
)
with tarfile.open(output_path, 'w:gz') as tgz:
tgz.add(rel_ts_dir, arcname=arcname)
shutil.rmtree(rel_ts_dir)

def remove_old_ts(self, output_dir, before_date: datetime.date):
before_date += timedelta(1)
Expand All @@ -95,17 +119,27 @@ def remove_old_ts(self, output_dir, before_date: datetime.date):
)

total_house = old_houses.count()
pages = Paginator(old_houses, self.items_per_page)
n_done = 0
total_done = 0
done_this_ite = 1
self.stdout.ending = ''
self.stdout.write("[HouseTS] Start to backup {} rows before {}.\n".format(
total_house,
before_date.isoformat()
))

for page_num in pages.page_range:
for house in pages.page(page_num):
sub_dir = 'ts/{:04d}/{:02d}/{:02d}'.format(house.created.year, house.created.month, house.created.day)
while done_this_ite:
old_houses = HouseTS.objects.filter(
created__lt=before_date
)[:self.items_per_page]
done_this_ite = 0

for house in old_houses:
sub_dir = '{}/{:04d}/{:02d}/{:02d}'.format(
self.ts_dir,
house.created.year,
house.created.month,
house.created.day
)
filename = 'house.{}.{}.json'.format(house.vendor.name, house.vendor_house_id)
self.dump_row(
base_dir=output_dir,
Expand All @@ -114,9 +148,10 @@ def remove_old_ts(self, output_dir, before_date: datetime.date):
house=house
)
house.delete()
n_done += 1
total_done += 1
done_this_ite += 1
self.stdout.write("\r[HouseTS] {:3.0f}% done".format(
100 * n_done / total_house
100 * total_done / total_house
))

self.stdout.write("\n[HouseTS] done!\n")
Expand Down

0 comments on commit a37fd49

Please sign in to comment.