diff --git a/listenbrainz/background/export.py b/listenbrainz/background/export.py index 9e04923e74..deb6fb18d4 100644 --- a/listenbrainz/background/export.py +++ b/listenbrainz/background/export.py @@ -77,17 +77,81 @@ def export_query_to_jsonl(conn, file_path, query, **kwargs): def export_listens_for_time_range(ts_conn, file_path, user_id: int, start_time: datetime, end_time: datetime): """ Export user's listens for a given time period. """ query = """ - SELECT jsonb_build_object( - 'listened_at' - , extract(epoch from listened_at) - , 'track_metadata' - , jsonb_set(data, '{recording_msid}'::text[], to_jsonb(recording_msid::text)) - )::text as line - FROM listen - WHERE listened_at >= :start_time - AND listened_at <= :end_time - AND user_id = :user_id - ORDER BY listened_at ASC + WITH selected_listens AS ( + SELECT l.listened_at + , l.data + , l.recording_msid + , COALESCE((data->'additional_info'->>'recording_mbid')::uuid, user_mm.recording_mbid, mm.recording_mbid, other_mm.recording_mbid) AS recording_mbid + FROM listen l + LEFT JOIN mbid_mapping mm + ON l.recording_msid = mm.recording_msid + LEFT JOIN mbid_manual_mapping user_mm + ON l.recording_msid = user_mm.recording_msid + AND user_mm.user_id = l.user_id + LEFT JOIN mbid_manual_mapping_top other_mm + ON l.recording_msid = other_mm.recording_msid + WHERE listened_at >= :start_time + AND listened_at <= :end_time + AND l.user_id = :user_id + ) + SELECT jsonb_build_object( + 'listened_at' + , extract(epoch from listened_at) + , 'track_metadata' + , jsonb_set( + jsonb_set(data, '{recording_msid}'::text[], to_jsonb(recording_msid::text)), + '{mbid_mapping}'::text[] + , CASE + WHEN mbc.recording_mbid IS NULL + THEN 'null'::jsonb + ELSE + jsonb_build_object( + 'recording_name' + , mbc.recording_data->>'name' + , 'recording_mbid' + , mbc.recording_mbid::text + , 'release_mbid' + , mbc.release_mbid::text + , 'artist_mbids' + , mbc.artist_mbids::TEXT[] + , 'caa_id' + , (mbc.release_data->>'caa_id')::bigint + , 'caa_release_mbid' + , (mbc.release_data->>'caa_release_mbid')::text + , 'artists' + , jsonb_agg( + jsonb_build_object( + 'artist_credit_name' + , artist->>'name' + , 'join_phrase' + , artist->>'join_phrase' + , 'artist_mbid' + , mbc.artist_mbids[position] + ) + ORDER BY position + ) + ) + END + ) + )::text as line + FROM selected_listens sl + LEFT JOIN mapping.mb_metadata_cache mbc + ON sl.recording_mbid = mbc.recording_mbid + LEFT JOIN LATERAL jsonb_array_elements(artist_data->'artists') WITH ORDINALITY artists(artist, position) + ON TRUE + GROUP BY sl.listened_at + , sl.recording_msid + , sl.data + , mbc.recording_mbid + , recording_data->>'name' + , release_mbid + , artist_mbids + , artist_data->>'name' + , recording_data->>'name' + , release_data->>'name' + , release_data->>'caa_id' + , release_data->>'caa_release_mbid' + ORDER BY listened_at """ return export_query_to_jsonl(ts_conn, file_path, query, user_id=user_id, start_time=start_time, end_time=end_time) diff --git a/listenbrainz/tests/integration/test_export.py b/listenbrainz/tests/integration/test_export.py index c45ed31c49..a1f026f4ca 100644 --- a/listenbrainz/tests/integration/test_export.py +++ b/listenbrainz/tests/integration/test_export.py @@ -7,12 +7,11 @@ from brainzutils import cache from sqlalchemy import text -import listenbrainz.db.user as db_user -import listenbrainz.db.pinned_recording as db_pinned_rec import listenbrainz.db.feedback as db_feedback +import listenbrainz.db.pinned_recording as db_pinned_rec +import listenbrainz.db.user as db_user from listenbrainz.background.export import cleanup_old_exports from listenbrainz.db.model.feedback import Feedback - from listenbrainz.db.model.pinned_recording import WritablePinnedRecording from listenbrainz.listenstore.timescale_utils import recalculate_all_user_data from listenbrainz.tests.integration import ListenAPIIntegrationTestCase @@ -26,6 +25,52 @@ def setUp(self): self.user = db_user.get_or_create(self.db_conn, 1799, 'lucifer-export') db_user.agree_to_gdpr(self.db_conn, self.user['musicbrainz_id']) self.redis = cache._r + self.recording = { + "recording_mbid": "6b64a82d-0aa8-430e-bf25-26aa4c569af0", + "artist_mbids": ["d15721d8-56b4-453d-b506-fc915b14cba2"], + "release_mbid": "f10badef-094b-48b1-b345-cddfc3d41673", + "recording_data": { + "name": "Sister", + "rels": [ + { + "type": "performer", "artist_mbid": "d15721d8-56b4-453d-b506-fc915b14cba2", + "artist_name": "The Black Keys" + } + ], + "length": 205000 + }, + "artist_data": { + "name": "The Black Keys", + "artists": [ + { + "area": "United States", + "name": "The Black Keys", + "rels": { + "lyrics": "https://www.musixmatch.com/artist/The-Black-Keys", + "youtube": "https://www.youtube.com/channel/UCJL3h2-wEOB6EigQOBZ3ryg", + "wikidata": "https://www.wikidata.org/wiki/Q606226", + "streaming": "https://tidal.com/artist/64643", + "free streaming": "https://www.deezer.com/artist/2483", + "social network": "https://www.instagram.com/theblackkeys/", + "official homepage": "https://www.theblackkeys.com/", + "purchase for download": "https://www.qobuz.com/us-en/interpreter/-/40589", + "purchase for mail-order": "https://www.cdjapan.co.jp/person/700225155"}, + "type": "Group", "begin_year": 2001, + "join_phrase": ""} + ], + "artist_credit_id": 59036 + }, + "tag_data": {}, + "release_data": { + "mbid": "f10badef-094b-48b1-b345-cddfc3d41673", + "name": "El Camino", + "year": 2011, + "caa_id": 39543436175, + "caa_release_mbid": "2fa2b8f0-e4ab-46e8-8ab4-91d0e1aecfad", + "album_artist_name": "The Black Keys", + "release_group_mbid": "c2eed4c1-5cd9-469a-9075-b82077093967" + }, + } def tearDown(self): with self.app.app_context(): @@ -34,6 +79,29 @@ def tearDown(self): os.remove(os.path.join(self.app.config["USER_DATA_EXPORT_BASE_DIR"], archive)) super(ExportTestCase, self).tearDown() + def create_mapping_record(self, recording_msid): + self.ts_conn.execute(text(""" + INSERT INTO mapping.mb_metadata_cache + (recording_mbid, artist_mbids, release_mbid, recording_data, artist_data, tag_data, release_data, dirty) + VALUES (:recording_mbid ::UUID, :artist_mbids ::UUID[], :release_mbid ::UUID, :recording_data, :artist_data, :tag_data, :release_data, 'f') + """), { + "recording_mbid": self.recording["recording_mbid"], + "artist_mbids": self.recording["artist_mbids"], + "release_mbid": self.recording["release_mbid"], + "recording_data": json.dumps(self.recording["recording_data"]), + "artist_data": json.dumps(self.recording["artist_data"]), + "release_data": json.dumps(self.recording["release_data"]), + "tag_data": json.dumps(self.recording["tag_data"]) + }) + self.ts_conn.execute(text(""" + INSERT INTO mbid_mapping (recording_msid, recording_mbid, match_type) + VALUES (:recording_msid, :recording_mbid, 'exact_match') + """), { + "recording_msid": recording_msid, + "recording_mbid": self.recording["recording_mbid"] + }) + self.ts_conn.commit() + def send_listens(self): with open(self.path_to_data_file('user_export_test.json')) as f: payload = json.load(f) @@ -52,9 +120,11 @@ def test_export(self): self.send_listens() url = self.custom_url_for('api_v1.get_listens', user_name=self.user['musicbrainz_id']) - response = self.wait_for_query_to_have_items(url, 1, query_string={'count': '1'}) + response = self.wait_for_query_to_have_items(url, 1, query_string={'count': '3'}) data = json.loads(response.data)['payload'] self.assert200(response) + recording_msid = data["listens"][1]["recording_msid"] + self.create_mapping_record(recording_msid) pinned_recordings = [ { @@ -141,13 +211,8 @@ def test_export(self): else: break - with self.app.app_context(): - for task in db_conn.execute(text("SELECT * FROM user_data_export")).all(): - print(task) - self.assert200(response) with zipfile.ZipFile(BytesIO(response.data), "r") as export_zip: - export_zip.printdir() with export_zip.open("user.json", "r") as f: data = json.load(f) self.assertEqual(data, { @@ -170,6 +235,24 @@ def test_export(self): self.assertEqual(expected["track_metadata"]["artist_name"], received["track_metadata"]["artist_name"]) self.assertEqual(expected["track_metadata"]["release_name"], received["track_metadata"]["release_name"]) self.assertEqual(expected["listened_at"], received["listened_at"]) + if received["track_metadata"]["track_name"] == "Sister": + self.assertEqual({ + "caa_id": self.recording["release_data"]["caa_id"], + "caa_release_mbid": self.recording["release_data"]["caa_release_mbid"], + "artist_mbids": self.recording["artist_mbids"], + "recording_name": self.recording["recording_data"]["name"], + "recording_mbid": self.recording["recording_mbid"], + "release_mbid": self.recording["release_data"]["mbid"], + "artists": [ + { + "artist_mbid": "d15721d8-56b4-453d-b506-fc915b14cba2", + "join_phrase": "", + "artist_credit_name": "The Black Keys" + } + ], + }, received["track_metadata"]["mbid_mapping"]) + else: + self.assertEqual(None, received["track_metadata"]["mbid_mapping"]) with export_zip.open("pinned_recording.jsonl", "r") as f: received_pins = []