From 78b63388abf57476b9ec298371b7ce44e305772c Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Sun, 2 Jun 2024 21:51:00 -0400 Subject: [PATCH] WIP analytics: * finishes the population of base analytics events and initialization of sessions --- .../0027_populate_analytics_events.py | 118 +++++++++++++++--- 1 file changed, 103 insertions(+), 15 deletions(-) diff --git a/askbot/migrations/0027_populate_analytics_events.py b/askbot/migrations/0027_populate_analytics_events.py index 200d87440b..18b6a7a0d1 100644 --- a/askbot/migrations/0027_populate_analytics_events.py +++ b/askbot/migrations/0027_populate_analytics_events.py @@ -1,10 +1,13 @@ """Copies some events from Activity to analytics.Event""" # Generated by Django 4.2.4 on 2024-04-20 23:55 # pylint: disable=missing-docstring, invalid-name +from datetime import timedelta from django.db import migrations from django.utils.translation import gettext_lazy as _ from askbot.utils.console import ProgressBar +SESSION_IDLE_TIMEOUT = timedelta(minutes=30) + EVENT_TYPES = ( (1, _('registered')), # Activity.activity_type == 51 (4, _('question viewed')), # Activity.activity_type == 52 @@ -25,13 +28,23 @@ def delete_analytics_objects(apps, schema_editor): #pylint: disable=missing-docs Session.objects.all().delete() -def create_session(user_id, timestamp, apps): #pylint: disable=missing-docstring - """Creates a session for the given user and timestamp""" +def get_user_session(user_id, timestamp, apps): #pylint: disable=missing-docstring + """Returns first user session which is within SESSION_IDLE_TIMEOUT of the given timestamp + for this user. + If such session does not exist, creates it""" Session = apps.get_model('askbot', 'Session') + dt_range = (timestamp - SESSION_IDLE_TIMEOUT, timestamp + SESSION_IDLE_TIMEOUT) + sessions = Session.objects.filter(user_id=user_id, updated_at__range=dt_range) + if sessions.exists(): + return sessions[0] + return Session.objects.create(user_id=user_id, created_at=timestamp, updated_at=timestamp) def populate_user_registered_events(apps): #pylint: disable=missing-docstring + """For each user, creates a user registered event. + With the timestamp of user's registration date. + """ User = apps.get_model('auth', 'User') Event = apps.get_model('askbot', 'Event') ContentType = apps.get_model('contenttypes', 'ContentType') @@ -39,11 +52,10 @@ def populate_user_registered_events(apps): #pylint: disable=missing-docstring users = User.objects.all().only('pk', 'date_joined') count = users.count() - print('') message = 'Populating user registered events' for user in ProgressBar(users.iterator(), count, message): timestamp = user.date_joined - session = create_session(user.pk, timestamp, apps) + session = get_user_session(user.pk, timestamp, apps) Event.objects.create( session=session, event_type=1, @@ -54,6 +66,8 @@ def populate_user_registered_events(apps): #pylint: disable=missing-docstring def populate_question_viewed_events(apps): #pylint: disable=missing-docstring + """For each QuestionView record, + creates a corresponding analytics.Event object""" QuestionView = apps.get_model('askbot', 'QuestionView') Event = apps.get_model('askbot', 'Event') Post = apps.get_model('askbot', 'Post') @@ -62,11 +76,10 @@ def populate_question_viewed_events(apps): #pylint: disable=missing-docstring views = QuestionView.objects.all().only('when', 'who_id', 'question_id') count = views.count() - print('') - message = 'Population Question View events' + message = 'Populating Question View events' for view in ProgressBar(views.iterator(), count, message): timestamp = view.when - session = create_session(view.who_id, timestamp, apps) + session = get_user_session(view.who_id, timestamp, apps) Event.objects.create( session=session, event_type=4, @@ -76,7 +89,8 @@ def populate_question_viewed_events(apps): #pylint: disable=missing-docstring ) -def populate_voted_events(apps, activity_type=None, event_type=None, message=None): #pylint: disable=missing-docstring +def populate_voted_events(apps, activity_type=None, event_type=None, message=None): #pylint: disable=too-many-locals + """For each vote activity, creates a corresponding analytics Event object""" Activity = apps.get_model('askbot', 'Activity') Event = apps.get_model('askbot', 'Event') Post = apps.get_model('askbot', 'Post') @@ -88,7 +102,7 @@ def populate_voted_events(apps, activity_type=None, event_type=None, message=Non acts = Activity.objects.filter(activity_type=activity_type).only(*activity_fields) count = acts.count() for act in ProgressBar(acts.iterator(), count, message): - session = create_session(act.user_id, act.active_at, apps) + session = get_user_session(act.user_id, act.active_at, apps) if act.content_type.model != 'vote': continue @@ -109,16 +123,90 @@ def populate_voted_events(apps, activity_type=None, event_type=None, message=Non ) +def populate_posted_events(apps, post_type=None, event_type=None, message=None): #pylint: disable=too-many-locals + """For each question, creates a corresponding analytics Event object""" + Event = apps.get_model('askbot', 'Event') + Post = apps.get_model('askbot', 'Post') + ContentType = apps.get_model('contenttypes', 'ContentType') + content_type = ContentType.objects.get_for_model(Post) + + posts = Post.objects.filter(post_type=post_type).only('id', 'added_at', 'author_id') + count = posts.count() + for post in ProgressBar(posts.iterator(), count, message): + timestamp = post.added_at + session = get_user_session(post.author_id, timestamp, apps) + Event.objects.create( + session=session, + event_type=event_type, + timestamp = timestamp, + content_type=content_type, + object_id=post.id + ) + + +def populate_commented_events(apps, parent_post_type=None, event_type=None, message=None): #pylint: disable=missing-docstring + Event = apps.get_model('askbot', 'Event') + Post = apps.get_model('askbot', 'Post') + ContentType = apps.get_model('contenttypes', 'ContentType') + content_type = ContentType.objects.get_for_model(Post) + + comments = Post.objects.filter( + post_type='comment', + parent__post_type=parent_post_type + ).only('id', 'added_at', 'author_id') + count = comments.count() + for question in ProgressBar(comments.iterator(), count, message): + timestamp = question.added_at + session = get_user_session(question.author_id, timestamp, apps) + Event.objects.create( + session=session, + event_type=event_type, + timestamp = timestamp, + content_type=content_type, + object_id=question.id + ) + + +def populate_retagged_question_events(apps): #pylint: disable=missing-docstring + Event = apps.get_model('askbot', 'Event') + Activity = apps.get_model('askbot', 'Activity') + Post = apps.get_model('askbot', 'Post') + act_fields = ('active_at', 'user_id', 'object_id', 'content_type') + acts = Activity.objects.filter(activity_type=15).only(*act_fields) + count = acts.count() + message = 'Populating retagged question events' + ContentType = apps.get_model('contenttypes', 'ContentType') + post_content_type_id = ContentType.objects.get_for_model(Post).id + for act in ProgressBar(acts.iterator(), count, message): + if act.content_type_id != post_content_type_id: + continue + + session = get_user_session(act.user_id, act.active_at, apps) + Event.objects.create( + session=session, + event_type=13, + timestamp=act.active_at, + content_type=act.content_type, + object_id=act.object_id + ) + def populate_analytics_events(apps, schema_editor): #pylint: disable=unused-argument, missing-docstring populate_user_registered_events(apps) populate_question_viewed_events(apps) - populate_voted_events(apps, activity_type=9, event_type=6, message='Populating upvoted events') - populate_downvoted_events(apps, activity_type=10, event_type=7, message='Populating downvoted events') - #populate_asked_events(apps) - #populate_commented_question_events(apps) - #populate_commented_answer_events(apps) - #populate_retagged_question_events(apps) + populate_voted_events(apps, activity_type=9, event_type=6, + message='Populating upvoted events') + populate_voted_events(apps, activity_type=10, event_type=7, + message='Populating downvoted events') + populate_posted_events(apps, post_type='question', event_type=9, + message='Populating asked events') + populate_posted_events(apps, post_type='answer', event_type=10, + message='Populating answered events') + populate_commented_events(apps, parent_post_type='question', + event_type=11, message='Populating commented question events') + populate_commented_events(apps, parent_post_type='answer', + event_type=12, message='Populating commented answer events') + populate_retagged_question_events(apps) class Migration(migrations.Migration):