Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LB-1687: Add mapping info to listen exports #3099

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 75 additions & 11 deletions listenbrainz/background/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,81 @@ def export_query_to_jsonl(conn, file_path, query, **kwargs):
def export_listens_for_time_range(ts_conn, file_path, user_id: int, start_time: datetime, end_time: datetime):
""" Export user's listens for a given time period. """
query = """
SELECT jsonb_build_object(
'listened_at'
, extract(epoch from listened_at)
, 'track_metadata'
, jsonb_set(data, '{recording_msid}'::text[], to_jsonb(recording_msid::text))
)::text as line
FROM listen
WHERE listened_at >= :start_time
AND listened_at <= :end_time
AND user_id = :user_id
ORDER BY listened_at ASC
WITH selected_listens AS (
SELECT l.listened_at
, l.data
, l.recording_msid
, COALESCE((data->'additional_info'->>'recording_mbid')::uuid, user_mm.recording_mbid, mm.recording_mbid, other_mm.recording_mbid) AS recording_mbid
FROM listen l
LEFT JOIN mbid_mapping mm
ON l.recording_msid = mm.recording_msid
LEFT JOIN mbid_manual_mapping user_mm
ON l.recording_msid = user_mm.recording_msid
AND user_mm.user_id = l.user_id
LEFT JOIN mbid_manual_mapping_top other_mm
ON l.recording_msid = other_mm.recording_msid
WHERE listened_at >= :start_time
AND listened_at <= :end_time
AND l.user_id = :user_id
)
SELECT jsonb_build_object(
'listened_at'
, extract(epoch from listened_at)
, 'track_metadata'
, jsonb_set(
jsonb_set(data, '{recording_msid}'::text[], to_jsonb(recording_msid::text)),
'{mbid_mapping}'::text[]
, CASE
WHEN mbc.recording_mbid IS NULL
THEN 'null'::jsonb
ELSE
jsonb_build_object(
'recording_name'
, mbc.recording_data->>'name'
, 'recording_mbid'
, mbc.recording_mbid::text
, 'release_mbid'
, mbc.release_mbid::text
, 'artist_mbids'
, mbc.artist_mbids::TEXT[]
, 'caa_id'
, (mbc.release_data->>'caa_id')::bigint
, 'caa_release_mbid'
, (mbc.release_data->>'caa_release_mbid')::text
, 'artists'
, jsonb_agg(
jsonb_build_object(
'artist_credit_name'
, artist->>'name'
, 'join_phrase'
, artist->>'join_phrase'
, 'artist_mbid'
, mbc.artist_mbids[position]
)
ORDER BY position
)
)
END
)
)::text as line
FROM selected_listens sl
LEFT JOIN mapping.mb_metadata_cache mbc
ON sl.recording_mbid = mbc.recording_mbid
LEFT JOIN LATERAL jsonb_array_elements(artist_data->'artists') WITH ORDINALITY artists(artist, position)
ON TRUE
GROUP BY sl.listened_at
, sl.recording_msid
, sl.data
, mbc.recording_mbid
, recording_data->>'name'
, release_mbid
, artist_mbids
, artist_data->>'name'
, recording_data->>'name'
, release_data->>'name'
, release_data->>'caa_id'
, release_data->>'caa_release_mbid'
ORDER BY listened_at
"""
return export_query_to_jsonl(ts_conn, file_path, query, user_id=user_id, start_time=start_time, end_time=end_time)

Expand Down
101 changes: 92 additions & 9 deletions listenbrainz/tests/integration/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@
from brainzutils import cache
from sqlalchemy import text

import listenbrainz.db.user as db_user
import listenbrainz.db.pinned_recording as db_pinned_rec
import listenbrainz.db.feedback as db_feedback
import listenbrainz.db.pinned_recording as db_pinned_rec
import listenbrainz.db.user as db_user
from listenbrainz.background.export import cleanup_old_exports
from listenbrainz.db.model.feedback import Feedback

from listenbrainz.db.model.pinned_recording import WritablePinnedRecording
from listenbrainz.listenstore.timescale_utils import recalculate_all_user_data
from listenbrainz.tests.integration import ListenAPIIntegrationTestCase
Expand All @@ -26,6 +25,52 @@ def setUp(self):
self.user = db_user.get_or_create(self.db_conn, 1799, 'lucifer-export')
db_user.agree_to_gdpr(self.db_conn, self.user['musicbrainz_id'])
self.redis = cache._r
self.recording = {
"recording_mbid": "6b64a82d-0aa8-430e-bf25-26aa4c569af0",
"artist_mbids": ["d15721d8-56b4-453d-b506-fc915b14cba2"],
"release_mbid": "f10badef-094b-48b1-b345-cddfc3d41673",
"recording_data": {
"name": "Sister",
"rels": [
{
"type": "performer", "artist_mbid": "d15721d8-56b4-453d-b506-fc915b14cba2",
"artist_name": "The Black Keys"
}
],
"length": 205000
},
"artist_data": {
"name": "The Black Keys",
"artists": [
{
"area": "United States",
"name": "The Black Keys",
"rels": {
"lyrics": "https://www.musixmatch.com/artist/The-Black-Keys",
"youtube": "https://www.youtube.com/channel/UCJL3h2-wEOB6EigQOBZ3ryg",
"wikidata": "https://www.wikidata.org/wiki/Q606226",
"streaming": "https://tidal.com/artist/64643",
"free streaming": "https://www.deezer.com/artist/2483",
"social network": "https://www.instagram.com/theblackkeys/",
"official homepage": "https://www.theblackkeys.com/",
"purchase for download": "https://www.qobuz.com/us-en/interpreter/-/40589",
"purchase for mail-order": "https://www.cdjapan.co.jp/person/700225155"},
"type": "Group", "begin_year": 2001,
"join_phrase": ""}
],
"artist_credit_id": 59036
},
"tag_data": {},
"release_data": {
"mbid": "f10badef-094b-48b1-b345-cddfc3d41673",
"name": "El Camino",
"year": 2011,
"caa_id": 39543436175,
"caa_release_mbid": "2fa2b8f0-e4ab-46e8-8ab4-91d0e1aecfad",
"album_artist_name": "The Black Keys",
"release_group_mbid": "c2eed4c1-5cd9-469a-9075-b82077093967"
},
}

def tearDown(self):
with self.app.app_context():
Expand All @@ -34,6 +79,29 @@ def tearDown(self):
os.remove(os.path.join(self.app.config["USER_DATA_EXPORT_BASE_DIR"], archive))
super(ExportTestCase, self).tearDown()

def create_mapping_record(self, recording_msid):
self.ts_conn.execute(text("""
INSERT INTO mapping.mb_metadata_cache
(recording_mbid, artist_mbids, release_mbid, recording_data, artist_data, tag_data, release_data, dirty)
VALUES (:recording_mbid ::UUID, :artist_mbids ::UUID[], :release_mbid ::UUID, :recording_data, :artist_data, :tag_data, :release_data, 'f')
"""), {
"recording_mbid": self.recording["recording_mbid"],
"artist_mbids": self.recording["artist_mbids"],
"release_mbid": self.recording["release_mbid"],
"recording_data": json.dumps(self.recording["recording_data"]),
"artist_data": json.dumps(self.recording["artist_data"]),
"release_data": json.dumps(self.recording["release_data"]),
"tag_data": json.dumps(self.recording["tag_data"])
})
self.ts_conn.execute(text("""
INSERT INTO mbid_mapping (recording_msid, recording_mbid, match_type)
VALUES (:recording_msid, :recording_mbid, 'exact_match')
"""), {
"recording_msid": recording_msid,
"recording_mbid": self.recording["recording_mbid"]
})
self.ts_conn.commit()

def send_listens(self):
with open(self.path_to_data_file('user_export_test.json')) as f:
payload = json.load(f)
Expand All @@ -52,9 +120,11 @@ def test_export(self):

self.send_listens()
url = self.custom_url_for('api_v1.get_listens', user_name=self.user['musicbrainz_id'])
response = self.wait_for_query_to_have_items(url, 1, query_string={'count': '1'})
response = self.wait_for_query_to_have_items(url, 1, query_string={'count': '3'})
data = json.loads(response.data)['payload']
self.assert200(response)
recording_msid = data["listens"][1]["recording_msid"]
self.create_mapping_record(recording_msid)

pinned_recordings = [
{
Expand Down Expand Up @@ -141,13 +211,8 @@ def test_export(self):
else:
break

with self.app.app_context():
for task in db_conn.execute(text("SELECT * FROM user_data_export")).all():
print(task)

self.assert200(response)
with zipfile.ZipFile(BytesIO(response.data), "r") as export_zip:
export_zip.printdir()
with export_zip.open("user.json", "r") as f:
data = json.load(f)
self.assertEqual(data, {
Expand All @@ -170,6 +235,24 @@ def test_export(self):
self.assertEqual(expected["track_metadata"]["artist_name"], received["track_metadata"]["artist_name"])
self.assertEqual(expected["track_metadata"]["release_name"], received["track_metadata"]["release_name"])
self.assertEqual(expected["listened_at"], received["listened_at"])
if received["track_metadata"]["track_name"] == "Sister":
self.assertEqual({
"caa_id": self.recording["release_data"]["caa_id"],
"caa_release_mbid": self.recording["release_data"]["caa_release_mbid"],
"artist_mbids": self.recording["artist_mbids"],
"recording_name": self.recording["recording_data"]["name"],
"recording_mbid": self.recording["recording_mbid"],
"release_mbid": self.recording["release_data"]["mbid"],
"artists": [
{
"artist_mbid": "d15721d8-56b4-453d-b506-fc915b14cba2",
"join_phrase": "",
"artist_credit_name": "The Black Keys"
}
],
}, received["track_metadata"]["mbid_mapping"])
else:
self.assertEqual(None, received["track_metadata"]["mbid_mapping"])

with export_zip.open("pinned_recording.jsonl", "r") as f:
received_pins = []
Expand Down
Loading