Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an onboarding topic-aware user embedder with various embedding sources and strategies #147

Merged
merged 39 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
8a48bef
Remove the zero-clicks guard from topic-based user embedder
karlhigley Dec 23, 2024
1595f23
Add a script to run recommenders locally without Serverless
karlhigley Dec 23, 2024
4e40767
Print a count of articles per topic in the handler
karlhigley Dec 23, 2024
ba92120
Add a second method of computing topic embeddings
karlhigley Dec 23, 2024
19c0a73
Print onboarding topics
karlhigley Dec 30, 2024
5657300
added pipeline for each topic embedding
zsristy43 Jan 7, 2025
94287a8
avg topic wise embedder
zsristy43 Jan 7, 2025
416196d
Update the Pixi lock file
karlhigley Jan 21, 2025
f308762
Adjust topic embedding computation to fall back to other methods
karlhigley Jan 21, 2025
3f1487a
Fix typo in embedding source name
karlhigley Jan 21, 2025
772d7e9
Add all articles to `past_history`
karlhigley Jan 21, 2025
1102044
Update the interest profile to include Technology and Sports
karlhigley Jan 21, 2025
9fb5111
Adjust the script to reorder it from most generic to most specific
karlhigley Jan 21, 2025
3908c8c
Apply `squeeze` to make padding check work for both `avg` and `nrms`
karlhigley Jan 21, 2025
b0378b8
topic embedder with rrf
zsristy43 Jan 21, 2025
26d2a7f
Fix the definition of "politics"
karlhigley Jan 21, 2025
be82b72
Update pixi.lock
karlhigley Jan 21, 2025
0bf1dec
Clean up code merging issues from rebase
karlhigley Jan 21, 2025
5471b3a
Make the linter happy
karlhigley Jan 21, 2025
df8fd4b
Rename files and classes to highlight "onboarding"
karlhigley Jan 21, 2025
e88d951
Move local_req.py to `scripts`
karlhigley Jan 21, 2025
78d184e
Rename onboarding test data file
karlhigley Jan 21, 2025
d0d8762
Remove stray TODO
karlhigley Jan 21, 2025
73698b1
Use preference (instead of squared) in virtual click creation
karlhigley Jan 21, 2025
148ce29
Pass candidate articles to topic-aware user embedder in test
karlhigley Jan 21, 2025
be721bd
Bump Serverless dependency to `^3.40.0` for Node 22 compatability
karlhigley Jan 21, 2025
d9fe0f3
Fix user onboarding embedder test
karlhigley Jan 21, 2025
366ce29
Rename topic-aware embedder file back to clarify the diff
karlhigley Jan 21, 2025
3dd78af
Remove locality calibration and RRF from available pipelines
karlhigley Jan 21, 2025
3fe3df3
Disable locality calibration test
karlhigley Jan 21, 2025
ffaf80f
add logging to eval startup
mdekstrand Jan 23, 2025
bedf642
relock with updated pixi
mdekstrand Jan 23, 2025
93e1a25
Merge branch 'main' into karl/feature/local-recs
mdekstrand Jan 23, 2025
30228b8
turn on debug logging w/ github actions debug logs
mdekstrand Jan 23, 2025
f0c1557
Use the same article embedder across all methods/invocations
karlhigley Jan 23, 2025
a5ecc49
Pull the onboarding pipelines with RRF back in
karlhigley Jan 24, 2025
4d97b45
Update comments
karlhigley Jan 24, 2025
2c22532
Merge remote-tracking branch 'origin/main' into karl/feature/local-recs
karlhigley Jan 30, 2025
94cf961
Update pixi.lock
karlhigley Jan 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
"serverless-python-requirements": "^6.1.0"
},
"dependencies": {
"serverless": "^3.39.0"
"serverless": "^3.40.0"
}
}
27,372 changes: 14,205 additions & 13,167 deletions pixi.lock

Large diffs are not rendered by default.

128 changes: 128 additions & 0 deletions scripts/local_req.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Simulates a request to the recommender without requiring Serverless
import warnings

from poprox_concepts.api.recommendations import RecommendationResponse
from poprox_recommender.handler import generate_recs
from poprox_recommender.paths import project_root
from poprox_recommender.topics import extract_general_topics

warnings.filterwarnings("ignore")


if __name__ == "__main__":
with open(project_root() / "tests/request_data/onboarding.json", "r") as req_file:
raw_json = req_file.read()

event_nrms = {
"body": raw_json,
"queryStringParameters": {"pipeline": "nrms"},
"isBase64Encoded": False,
}
event_static = {
"body": raw_json,
"queryStringParameters": {"pipeline": "nrms-topics-static"},
"isBase64Encoded": False,
}
event_candidate = {
"body": raw_json,
"queryStringParameters": {"pipeline": "nrms-topics-candidate"},
"isBase64Encoded": False,
}
event_clicked = {
"body": raw_json,
"queryStringParameters": {"pipeline": "nrms-topics-clicked"},
"isBase64Encoded": False,
}
event_hybrid = {
"body": raw_json,
"queryStringParameters": {"pipeline": "nrms-topics-hybrid"},
"isBase64Encoded": False,
}
event_rrf_static_candidate = {
"body": raw_json,
"queryStringParameters": {"pipeline": "nrms_rrf_static_candidate"},
"isBase64Encoded": False,
}
event_rrf_static_clicked = {
"body": raw_json,
"queryStringParameters": {"pipeline": "nrms_rrf_static_clicked"},
"isBase64Encoded": False,
}

response_nrms = generate_recs(event_nrms, {})
response_nrms = RecommendationResponse.model_validate_json(response_nrms["body"])

response_static = generate_recs(event_static, {})
response_static = RecommendationResponse.model_validate_json(response_static["body"])

response_candidate = generate_recs(event_candidate, {})
response_candidate = RecommendationResponse.model_validate_json(response_candidate["body"])

response_clicked = generate_recs(event_clicked, {})
response_clicked = RecommendationResponse.model_validate_json(response_clicked["body"])

response_hybrid = generate_recs(event_hybrid, {})
response_hybrid = RecommendationResponse.model_validate_json(response_hybrid["body"])

response_rrf_static_candidate = generate_recs(event_rrf_static_candidate, {})
response_rrf_static_candidate = RecommendationResponse.model_validate_json(response_rrf_static_candidate["body"])

response_rrf_static_clicked = generate_recs(event_rrf_static_clicked, {})
response_rrf_static_clicked = RecommendationResponse.model_validate_json(response_rrf_static_clicked["body"])

for profile_id, recs in response_nrms.recommendations.items():
print("\n")
print(f"Recs for {profile_id}:")
print(f"{event_nrms['queryStringParameters']['pipeline']}")

for idx, article in enumerate(recs):
article_topics = extract_general_topics(article)
print(f"{idx + 1}. {article.headline} {article_topics}")

for profile_id, recs in response_static.recommendations.items():
print("\n")
print(f"{event_static['queryStringParameters']['pipeline']}")

for idx, article in enumerate(recs):
article_topics = extract_general_topics(article)
print(f"{idx + 1}. {article.headline} {article_topics}")

for profile_id, recs in response_candidate.recommendations.items():
print("\n")
print(f"{event_candidate['queryStringParameters']['pipeline']}")

for idx, article in enumerate(recs):
article_topics = extract_general_topics(article)
print(f"{idx + 1}. {article.headline} {article_topics}")

for profile_id, recs in response_clicked.recommendations.items():
print("\n")
print(f"{event_clicked['queryStringParameters']['pipeline']}")

for idx, article in enumerate(recs):
article_topics = extract_general_topics(article)
print(f"{idx + 1}. {article.headline} {article_topics}")

for profile_id, recs in response_hybrid.recommendations.items():
print("\n")
print(f"{event_hybrid['queryStringParameters']['pipeline']}")

for idx, article in enumerate(recs):
article_topics = extract_general_topics(article)
print(f"{idx + 1}. {article.headline} {article_topics}")

for profile_id, recs in response_rrf_static_candidate.recommendations.items():
print("\n")
print(f"{event_rrf_static_candidate['queryStringParameters']['pipeline']}")

for idx, article in enumerate(recs):
article_topics = extract_general_topics(article)
print(f"{idx + 1}. {article.headline} {article_topics}")

for profile_id, recs in response_rrf_static_clicked.recommendations.items():
print("\n")
print(f"{event_rrf_static_clicked['queryStringParameters']['pipeline']}")

for idx, article in enumerate(recs):
article_topics = extract_general_topics(article)
print(f"{idx + 1}. {article.headline} {article_topics}")
Loading
Loading