-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
42 changed files
with
1,682 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
name: Pypi publish | ||
|
||
on: | ||
push: | ||
branches: | ||
- main | ||
|
||
jobs: | ||
build-n-publish: | ||
name: Build and publish Python distributions to PyPI | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Set up Python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: '3.8' | ||
|
||
- name: Check out code | ||
uses: actions/checkout@v2 | ||
|
||
- name: Upgrade pip | ||
run: python -m pip install --upgrade pip | ||
|
||
- name: Install poetry | ||
run: pip install poetry==1.5.1 | ||
|
||
- name: Install dependencies | ||
run: poetry install --no-root | ||
|
||
# - name: Build package | ||
# run: python3 -m build | ||
# | ||
# - name: Publish distribution to PyPI | ||
# uses: pypa/gh-action-pypi-publish@release/v1 | ||
# with: | ||
# password: ${{ secrets.PYPI_API_TOKEN }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
name: Unit tests | ||
|
||
on: | ||
push: | ||
branches: | ||
- main | ||
pull_request: | ||
branches: | ||
- main | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Set up Python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: '3.8' | ||
|
||
- name: Check out code | ||
uses: actions/checkout@v2 | ||
|
||
- name: Upgrade pip | ||
run: python -m pip install --upgrade pip | ||
|
||
- name: Install poetry | ||
run: pip install poetry==1.5.1 | ||
|
||
- name: Install dependencies | ||
run: poetry install --no-root | ||
|
||
- name: Run unit tests | ||
run: PYTHONPATH=tidal_algorithmic_mixes:test poetry run pytest test/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,17 @@ | ||
# tidal-algorithmic-mixes | ||
Tidal algorithmic mixes | ||
# Tidal algorithmic mixes | ||
|
||
This contains the logic of how tidal create its algorithmic offline mixes, | ||
how it utilizes different machine learning models, | ||
alongside business rules to create different mixes for different use cases, | ||
included personalized mixes (like my mix, my new arrivals and daily discovery) | ||
and non-personalized like track radio and artist radio. | ||
|
||
- Make sure you have pyenv and [pyenv](https://github.com/pyenv/pyenv) amd [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv) installed on your local environment. | ||
- Install python 3.8.16 with pyenv `pyenv install 3.8.16`. | ||
- Set up a new virtual env `pyenv virtualenv 3.8.16 mixes` | ||
- Set local pyenv version `pyenv local mixes` | ||
- Activate the virtual pyenv using `pyenv activate mixes` | ||
- Upgrade the pip package installer `pip install --upgrade pip` | ||
- Install poetry for package management `pip install poetry==1.5.1` | ||
- Install dependencies from the lock file `poetry install --no-root` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
[project] | ||
requires-python = ">=3.8" | ||
classifiers = [ | ||
"Programming Language :: Python :: 3", | ||
"License :: Apache License V 2.0", | ||
"Operating System :: OS Independent", | ||
] | ||
|
||
[project.urls] | ||
"GitHub" = "https://github.com/tidal-music/tidal-algorithmic-mixes" | ||
|
||
[tool.poetry] | ||
name = "tidal_algorithmic_mixes" | ||
version = "0.0.1" | ||
description = "common transformers used by the tidal personalization team." | ||
authors = [ | ||
"Loay <[email protected]>", | ||
"Jing <[email protected]>", | ||
"Tao <[email protected]>", | ||
"Thomas <[email protected]>", | ||
"Yuhua [email protected]" | ||
] | ||
|
||
license = "Apache License V 2.0" | ||
readme = "README.md" | ||
|
||
[tool.poetry.dependencies] | ||
python = ">=3.8.0" | ||
pyspark = "3.4.0" | ||
numpy = ">=1.16.4" | ||
s3fs = "2022.11.0" | ||
boto3 = "1.24.59" | ||
pandas = ">=1.4.2" | ||
great-expectations = "0.16.15" | ||
scikit-learn = "1.1.1" | ||
alphabet-detector = "0.0.7" | ||
pyarrow = "7.0.0" | ||
tidal-per-transformers = "0.0.4" | ||
torch = "1.9.1" | ||
mlflow = "2.1.1" | ||
|
||
[tool.poetry.group.dev.dependencies] | ||
pytest = "6.1.2" | ||
coverage = ">=4.5.2" | ||
pytest-cov = ">=2.6.1" | ||
coveralls = ">=1.6.0" | ||
mock = ">=2.0.0" | ||
moto = ">=3.1.11" | ||
|
||
|
||
[build-system] | ||
requires = ["poetry-core"] | ||
build-backend = "poetry.core.masonry.api" |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from datetime import date | ||
|
||
import tidal_algorithmic_mixes.utils.constants as c | ||
|
||
from test.pyspark_test import PySparkTest | ||
from tidal_algorithmic_mixes.discovery_mix.daily_update_transformation import DiscoveryMixDailyUpdateTransformation, \ | ||
DiscoveryMixDailyUpdateTransformationData | ||
|
||
|
||
class DiscoveryMixDailyUpdateTransformationTestInterface(DiscoveryMixDailyUpdateTransformation): | ||
def extract(self, *args, **kwargs): | ||
... | ||
|
||
def validate(self, *args, **kwargs): | ||
... | ||
|
||
def load(self, *args, **kwargs): | ||
... | ||
|
||
|
||
class DiscoveryMixDailyUpdateTest(PySparkTest): | ||
|
||
def test_slicer(self): | ||
mixes = self.spark.createDataFrame([ | ||
(0, [10, 11, 12, 13, 14, 15, 16]), | ||
(1, [10, 11, 12, 13, 14, 15, 16]), | ||
(2, [10, 11, 12, 13, 14, 15, 16]), | ||
(3, [10, 11, 12, 13, 14, 15, 16]) | ||
], [c.USER, c.TRACKS]) | ||
|
||
runner = DiscoveryMixDailyUpdateTransformationTestInterface(self.spark) | ||
|
||
runner._data = DiscoveryMixDailyUpdateTransformationData(mixes) | ||
|
||
self.assertEqual(runner.slicer(mixes, date(2021, 2, 15), 1).collect()[0][c.TRACKS][0], 10) | ||
self.assertEqual(runner.slicer(mixes, date(2021, 2, 18), 1).collect()[0][c.TRACKS][0], 13) | ||
self.assertEqual(runner.slicer(mixes, date(2021, 2, 21), 1).collect()[0][c.TRACKS][0], 16) | ||
|
||
def test_offset(self): | ||
runner = DiscoveryMixDailyUpdateTransformationTestInterface(self.spark) | ||
self.assertEqual(runner.offset(date(2021, 2, 15), 10), 0) | ||
self.assertEqual(runner.offset(date(2021, 2, 18), 10), 30) | ||
self.assertEqual(runner.offset(date(2021, 2, 21), 10), 60) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import tidal_algorithmic_mixes.utils.constants as c | ||
from pyspark_test import PySparkTest | ||
from tidal_algorithmic_mixes.discovery_mix.observed_tracks_aggregator_transformation import \ | ||
ObservedDiscoveryMixTracksAggregatorTransformation, ObservedDiscoveryMixTracksAggregatorTransformationData | ||
|
||
|
||
class ObservedDiscoveryMixTracksAggregatorTransformationTestInterface( | ||
ObservedDiscoveryMixTracksAggregatorTransformation): | ||
def extract(self, *args, **kwargs): | ||
... | ||
|
||
def validate(self, *args, **kwargs): | ||
... | ||
|
||
def load(self, *args, **kwargs): | ||
... | ||
|
||
|
||
class ObservedDiscoveryMixTracksAggregatorTest(PySparkTest): | ||
|
||
def setUp(self): | ||
super().setUp() | ||
|
||
def test_transform(self): | ||
user_1 = 26129743 | ||
user_2 = 43727840 | ||
|
||
user_1_mix = "5b5b0f74b66cbecf46de5f00297" | ||
user_2_mix = "c71e7c0b5f8daeaff1bdea48f9f" | ||
|
||
tracks_user_1 = [1, 2, 3, 4, 5, 6] | ||
tracks_user_2 = [3, 4, 5, 6, 7, 9] | ||
|
||
mixes = self.spark.createDataFrame([ | ||
(user_1_mix,), | ||
(user_2_mix,), | ||
], [c.MIX_ID]) | ||
|
||
observed_mixes = self.spark.createDataFrame([ | ||
(user_1_mix, user_1, tracks_user_1), | ||
(user_2_mix, user_2, tracks_user_2), | ||
("xvxfewfwsdf34r3sf3jfaae4tgs", 1664, [11, 22, 33, 44]), | ||
("a71e7xffw4rzdzdf34zsz23ead3", 1984, [55, 66, 77, 11, 22]), | ||
], [c.MIX_ID, c.USER, c.TRACKS]) | ||
|
||
runner = ObservedDiscoveryMixTracksAggregatorTransformationTestInterface(self.spark) | ||
runner._data = ObservedDiscoveryMixTracksAggregatorTransformationData(observed_mixes=observed_mixes, | ||
mixes=mixes) | ||
runner.transform() | ||
res = runner.output.output | ||
|
||
self.assertEqual(res.columns, [c.USER, c.TRACK_GROUP]) | ||
self.assertEqual(res.count(), len(tracks_user_1) + len(tracks_user_2)) | ||
|
||
self.assertEqual([user_1, user_2], ([x[c.USER] for x in res.select(c.USER).distinct().collect()])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
from datetime import datetime | ||
from pyspark.sql.types import Row | ||
from pyspark_test import PySparkTest | ||
from tidal_algorithmic_mixes.discovery_mix.post_processor_transformation import\ | ||
DiscoveryMixPostProcessorTransformation, DiscoveryMixPostProcessorTransformationData | ||
|
||
|
||
class DiscoveryMixPostProcessorTransformationTestInterface(DiscoveryMixPostProcessorTransformation): | ||
def extract(self, *args, **kwargs): | ||
... | ||
|
||
def validate(self, *args, **kwargs): | ||
... | ||
|
||
def load(self, *args, **kwargs): | ||
... | ||
|
||
|
||
class DiscoveryMixPostProcessorTest(PySparkTest): | ||
|
||
def setUp(self): | ||
tracks_metadata = self.spark.createDataFrame([Row(id=1, | ||
title='Chime again', | ||
popularityWW=0, | ||
trackNumber=16, | ||
volumeNumber=1, | ||
numAlbums=3, | ||
explicit=False, | ||
generatedFromVideo=False, | ||
trackGroup='xxx', | ||
audioQuality='LOSSLESS', | ||
available=True, | ||
version='x', | ||
duration=192, | ||
mixes={'x': 'y'}, | ||
mainArtistsIds=[1], | ||
mainArtistsNames=['Me'], | ||
mainArtistId=1, | ||
mainArtistPicture='xxx', | ||
featuringArtistsIds=[''], | ||
albumId=1, | ||
masterBundleId='x', | ||
albumTitle='Victorian', | ||
albumCover='be7c307bc938', | ||
releaseDate=datetime(2010, 6, 8, 0, 0), | ||
albumReleaseDate=datetime(2010, 6, 8, 0, 0), | ||
creditsArtistId=[1], | ||
creditsName=['La La'], | ||
creditsRole=['Main Artist'], | ||
creditsRoleCategory=['HIDDEN'], | ||
numTrackStreams=0, | ||
numTrackStreamers=0, | ||
voicenessScore=0, | ||
voice=1, | ||
genre='Christmas', | ||
originalGenre='Christmas', | ||
AvailableCountryCodes=['AD', 'AE'])]) | ||
track_groups_metadata = self.spark.createDataFrame([Row(trackGroup='xxx', | ||
AvailableCountryCodes=['AD', 'AE'])]) | ||
|
||
precomputed_recs = self.spark.createDataFrame([Row(user=1, | ||
recommendations=['xxx', 'xyz'])]) | ||
|
||
user_history_tracks = self.spark.createDataFrame([Row(userId=1, | ||
productId=2, | ||
artistId=2, | ||
trackGroup='xyz', | ||
title="Don't Let The Sun Go Down On Me", | ||
cleanedTitle="dd", | ||
count=2, | ||
source='UserTracksHistory', | ||
dt=datetime(2020, 12, 21, 13, 3, 36, 534000))]) | ||
user_history_artists = self.spark.createDataFrame([Row(userId=1, | ||
artistId=3, | ||
count=10, | ||
source='UserArtistsHistory', | ||
dt=datetime(2022, 5, 2, 20, 28, 23, 516000))]) | ||
user_fav_tracks = self.spark.createDataFrame([Row(userId=1, | ||
productId=5, | ||
artistId=7, | ||
trackGroup='aaa', | ||
title='Breathing Underwater', | ||
cleanedTitle='aa', | ||
count=1, | ||
source='UserTracksFavourite', | ||
dt=datetime(2020, 10, 23, 6, 49, 33))]) | ||
user_fav_artists = self.spark.createDataFrame([Row(userId=1, | ||
artistId=111, | ||
count=1, | ||
source='UserArtistsFavourite', | ||
dt=datetime(2019, 11, 21, 13, 31, 11))]) | ||
|
||
artist_clusters = self.spark.createDataFrame([Row(artistId=1, cluster=42)]) | ||
|
||
user_observed_tracks = self.spark.createDataFrame([Row(userId=1, | ||
productId=5, | ||
artistId=7, | ||
trackGroup='aaa', | ||
title='Breathing Underwater', | ||
cleanedTitle='aa', | ||
count=1, | ||
source='UserTracksDiscoveryObserved', | ||
dt=datetime(2020, 10, 23, 6, 49, 33))]) | ||
|
||
user_table = self.spark.createDataFrame([Row(id=1, countrycode='AD')]) | ||
|
||
user_blacklist_table = self.spark.createDataFrame([Row(artifactId='111', | ||
artifactType='TRACK', | ||
created=1568546619349, | ||
userId='3')]) | ||
|
||
artist_compound_mapping_table = self.spark.createDataFrame([Row(id=4, | ||
artistid=5, | ||
artistcompoundid=6, | ||
priority=1, | ||
mainartist=False)]) | ||
|
||
self.data = DiscoveryMixPostProcessorTransformationData(tracks_metadata, | ||
track_groups_metadata, | ||
precomputed_recs, | ||
user_history_tracks, | ||
user_history_artists, | ||
user_fav_tracks, | ||
user_fav_artists, | ||
artist_clusters, | ||
user_observed_tracks, | ||
user_table, | ||
user_blacklist_table, | ||
artist_compound_mapping_table | ||
) | ||
|
||
def test_transform(self): | ||
post_processor = DiscoveryMixPostProcessorTransformationTestInterface(self.spark, | ||
threshold_known_artists=1, | ||
mix_size=1, | ||
min_mix_size=0) | ||
post_processor._data = self.data | ||
post_processor.transform() | ||
res = post_processor.output.output.collect()[0] | ||
self.assertEqual(Row(user=1, tracks=['xxx'], mixId='1f1451b3b417516e9e4b4423958', atDate=res.atDate), | ||
res) |
Oops, something went wrong.