From 8a4aca2bce09dc91365d1bf08216cb3a36e353b9 Mon Sep 17 00:00:00 2001 From: Guillaume Hivert Date: Tue, 21 May 2024 20:21:49 +0200 Subject: [PATCH] feat: add github popularity fetching Signed-off-by: Guillaume Hivert --- ...525_alter_table_package_add_popularity.sql | 9 +++ apps/backend/db/schema.sql | 5 +- apps/backend/gleam.toml | 1 + apps/backend/manifest.toml | 5 +- apps/backend/src/api/github.gleam | 60 +++++++++++++++++++ .../src/api/github/stargazer_count.gleam | 21 +++++++ apps/backend/src/backend.gleam | 4 ++ apps/backend/src/backend/config.gleam | 6 +- .../src/backend/postgres/postgres.gleam | 8 ++- .../src/backend/postgres/queries.gleam | 27 ++++++++- apps/backend/src/tasks/popularity.gleam | 39 ++++++++++++ apps/frontend/manifest.toml | 2 +- 12 files changed, 179 insertions(+), 8 deletions(-) create mode 100644 apps/backend/db/migrations/20240521174525_alter_table_package_add_popularity.sql create mode 100644 apps/backend/src/api/github.gleam create mode 100644 apps/backend/src/api/github/stargazer_count.gleam create mode 100644 apps/backend/src/tasks/popularity.gleam diff --git a/apps/backend/db/migrations/20240521174525_alter_table_package_add_popularity.sql b/apps/backend/db/migrations/20240521174525_alter_table_package_add_popularity.sql new file mode 100644 index 0000000..b9f9f49 --- /dev/null +++ b/apps/backend/db/migrations/20240521174525_alter_table_package_add_popularity.sql @@ -0,0 +1,9 @@ +-- migrate:up +alter table package + add column popularity jsonb, + drop column favorites; + +-- migrate:down +alter table package + drop column popularity, + add column favorites int not null default 0; diff --git a/apps/backend/db/schema.sql b/apps/backend/db/schema.sql index 1051561..657c105 100644 --- a/apps/backend/db/schema.sql +++ b/apps/backend/db/schema.sql @@ -121,7 +121,7 @@ CREATE TABLE public.package ( created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, rank integer DEFAULT 0 NOT NULL, - favorites integer DEFAULT 0 NOT NULL + popularity jsonb ); @@ -485,4 +485,5 @@ INSERT INTO public.schema_migrations (version) VALUES ('20240512214036'), ('20240514214138'), ('20240517083006'), - ('20240518232212'); + ('20240518232212'), + ('20240521174525'); diff --git a/apps/backend/gleam.toml b/apps/backend/gleam.toml index 0e833da..8944e6b 100644 --- a/apps/backend/gleam.toml +++ b/apps/backend/gleam.toml @@ -24,6 +24,7 @@ tom = ">= 1.0.0 and < 2.0.0" verl = ">= 1.1.1 and < 2.0.0" wisp = "~> 0.14" cors_builder = ">= 1.0.0 and < 2.0.0" +decipher = ">= 1.2.0 and < 2.0.0" [dev-dependencies] gleeunit = "~> 1.0" diff --git a/apps/backend/manifest.toml b/apps/backend/manifest.toml index 05d9d57..e5357ab 100644 --- a/apps/backend/manifest.toml +++ b/apps/backend/manifest.toml @@ -6,6 +6,7 @@ packages = [ { name = "backoff", version = "1.1.6", build_tools = ["rebar3"], requirements = [], otp_app = "backoff", source = "hex", outer_checksum = "CF0CFFF8995FB20562F822E5CC47D8CCF664C5ECDC26A684CBE85C225F9D7C39" }, { name = "birl", version = "1.6.1", build_tools = ["gleam"], requirements = ["gleam_stdlib", "ranger"], otp_app = "birl", source = "hex", outer_checksum = "976CFF85D34D50F7775896615A71745FBE0C325E50399787088F941B539A0497" }, { name = "cors_builder", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_http", "gleam_stdlib", "mist", "wisp"], otp_app = "cors_builder", source = "hex", outer_checksum = "951B5B648E958BD6181A6EED98BCA4EEB302B83DC7DCE2954B3462114209EC43" }, + { name = "decipher", version = "1.2.0", build_tools = ["gleam"], requirements = ["birl", "gleam_json", "gleam_stdlib", "stoiridh_version"], otp_app = "decipher", source = "hex", outer_checksum = "9F1B5C6FF0D798046E4E0EF87D09DD729324CB72BD7F0D4152B797324D51223E" }, { name = "dot_env", version = "0.5.1", build_tools = ["gleam"], requirements = ["gleam_stdlib", "simplifile"], otp_app = "dot_env", source = "hex", outer_checksum = "AF5C972D6129F67AF3BB00134AB2808D37111A8D61686CFA86F3ADF652548982" }, { name = "exception", version = "2.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "exception", source = "hex", outer_checksum = "F5580D584F16A20B7FCDCABF9E9BE9A2C1F6AC4F9176FA6DD0B63E3B20D450AA" }, { name = "filepath", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "filepath", source = "hex", outer_checksum = "EFB6FF65C98B2A16378ABC3EE2B14124168C0CE5201553DE652E2644DCFDB594" }, @@ -39,6 +40,7 @@ packages = [ { name = "ranger", version = "1.2.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "ranger", source = "hex", outer_checksum = "1566C272B1D141B3BBA38B25CB761EF56E312E79EC0E2DFD4D3C19FB0CC1F98C" }, { name = "shellout", version = "1.6.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "shellout", source = "hex", outer_checksum = "E2FCD18957F0E9F67E1F497FC9FF57393392F8A9BAEAEA4779541DE7A68DD7E0" }, { name = "simplifile", version = "1.7.0", build_tools = ["gleam"], requirements = ["filepath", "gleam_stdlib"], otp_app = "simplifile", source = "hex", outer_checksum = "1D5DFA3A2F9319EC85825F6ED88B8E449F381B0D55A62F5E61424E748E7DDEB0" }, + { name = "stoiridh_version", version = "0.1.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "stoiridh_version", source = "hex", outer_checksum = "298ABEA44DF37764A34C2E9190A84BF2770BC59DD9397C6DC7708040E5A0142B" }, { name = "thoas", version = "1.2.1", build_tools = ["rebar3"], requirements = [], otp_app = "thoas", source = "hex", outer_checksum = "E38697EDFFD6E91BD12CEA41B155115282630075C2A727E7A6B2947F5408B86A" }, { name = "tom", version = "1.0.0", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "tom", source = "hex", outer_checksum = "A5364613E3DBF77F38EFF81DA9F99324086D029EC2B2D44348762FBE38602311" }, { name = "verl", version = "1.1.1", build_tools = ["rebar3"], requirements = [], otp_app = "verl", source = "hex", outer_checksum = "0925E51CD92A0A8BE271765B02430B2E2CFF8AC30EF24D123BD0D58511E8FB18" }, @@ -48,7 +50,8 @@ packages = [ [requirements] aws4_request = { version = ">= 0.1.1 and < 1.0.0" } birl = { version = "~> 1.6" } -cors_builder = { version = ">= 1.0.0 and < 2.0.0"} +cors_builder = { version = ">= 1.0.0 and < 2.0.0" } +decipher = { version = ">= 1.2.0 and < 2.0.0"} dot_env = { version = "~> 0.5" } gleam_erlang = { version = "~> 0.25" } gleam_hexpm = { version = "~> 1.0" } diff --git a/apps/backend/src/api/github.gleam b/apps/backend/src/api/github.gleam new file mode 100644 index 0000000..becbc3c --- /dev/null +++ b/apps/backend/src/api/github.gleam @@ -0,0 +1,60 @@ +import api/github/stargazer_count +import backend/error +import gleam/dynamic +import gleam/function +import gleam/http +import gleam/http/request +import gleam/httpc +import gleam/json +import gleam/list +import gleam/option.{type Option, Some} +import gleam/regex +import gleam/result + +fn query( + token: String, + query: String, + variables: Option(json.Json), + decoder: dynamic.Decoder(a), +) { + let body = + json.object([ + #("query", json.string(query)), + #("variables", json.nullable(variables, function.identity)), + ]) + use response <- result.try( + request.new() + |> request.set_header("authorization", "Bearer " <> token) + |> request.set_header("user-agent", "gloogle / 0.0.0") + |> request.set_method(http.Post) + |> request.set_scheme(http.Https) + |> request.set_host("api.github.com") + |> request.set_path("/graphql") + |> request.set_body(json.to_string(body)) + |> httpc.send() + |> result.map_error(error.FetchError), + ) + + response.body + |> json.decode(using: decoder) + |> result.map_error(error.JsonError) +} + +fn match_repository_name(repo_url: String) { + let assert Ok(owner_name) = regex.from_string("https://github.com/(.+)/(.+)") + regex.scan(with: owner_name, content: repo_url) + |> list.first() + |> result.replace_error(error.UnknownError( + "No repository match for " <> repo_url, + )) +} + +pub fn get_stargazer_count(token: String, repo_url: String) { + use match <- result.try(match_repository_name(repo_url)) + case match.submatches { + [Some(owner), Some(name)] -> + stargazer_count.variables(name, owner) + |> query(token, stargazer_count.query, _, stargazer_count.decoder) + _ -> Error(error.UnknownError("")) + } +} diff --git a/apps/backend/src/api/github/stargazer_count.gleam b/apps/backend/src/api/github/stargazer_count.gleam new file mode 100644 index 0000000..9cfc58d --- /dev/null +++ b/apps/backend/src/api/github/stargazer_count.gleam @@ -0,0 +1,21 @@ +import decipher +import gleam/dynamic +import gleam/json +import gleam/option.{Some} + +pub const query = " +query getStargazers($name: String!, $owner: String!) { + repository(owner: $owner, name: $name) { + stargazerCount + } +}" + +pub fn decoder(dyn) { + decipher.at(["data", "repository", "stargazerCount"], dynamic.int)(dyn) +} + +pub fn variables(name: String, owner: String) { + Some( + json.object([#("name", json.string(name)), #("owner", json.string(owner))]), + ) +} diff --git a/apps/backend/src/backend.gleam b/apps/backend/src/backend.gleam index b5b7add..a82204b 100644 --- a/apps/backend/src/backend.gleam +++ b/apps/backend/src/backend.gleam @@ -9,6 +9,7 @@ import mist import periodic import setup import tasks/hex +import tasks/popularity import tasks/ranking import wisp import wisp/logger @@ -42,6 +43,9 @@ pub fn main() { add_periodic_worker(periodic_children, waiting: 86_400_000, do: fn() { ranking.compute_ranking(ctx) }) + add_periodic_worker(periodic_children, waiting: 86_400_000, do: fn() { + popularity.compute_popularity(ctx) + }) }) }) diff --git a/apps/backend/src/backend/config.gleam b/apps/backend/src/backend/config.gleam index 7cf4640..e51a1fc 100644 --- a/apps/backend/src/backend/config.gleam +++ b/apps/backend/src/backend/config.gleam @@ -6,7 +6,7 @@ import wisp import wisp/logger pub type Context { - Context(db: pgo.Connection, hex_api_key: String) + Context(db: pgo.Connection, hex_api_key: String, github_token: String) } pub type Config { @@ -15,12 +15,14 @@ pub type Config { hex_api_key: String, port: Int, level: logger.Level, + github_token: String, ) } pub fn read_config() { let assert Ok(database_url) = os.get_env("DATABASE_URL") let assert Ok(hex_api_key) = os.get_env("HEX_API_KEY") + let assert Ok(github_token) = os.get_env("GITHUB_TOKEN") let assert Ok(port) = os.get_env("PORT") |> result.try(int.parse) @@ -28,7 +30,7 @@ pub fn read_config() { os.get_env("LOG_LEVEL") |> result.try(logger.parse) |> result.unwrap(logger.Info) - Config(database_url, hex_api_key, port, level) + Config(database_url, hex_api_key, port, level, github_token) } pub fn get_secret_key_base() { diff --git a/apps/backend/src/backend/postgres/postgres.gleam b/apps/backend/src/backend/postgres/postgres.gleam index edfe3d3..cff020f 100644 --- a/apps/backend/src/backend/postgres/postgres.gleam +++ b/apps/backend/src/backend/postgres/postgres.gleam @@ -13,7 +13,13 @@ pub fn connect(cnf: Config) { let assert Ok(config) = parse_database_url(cnf.database_url) config |> pgo.connect() - |> fn(db) { Context(db: db, hex_api_key: cnf.hex_api_key) } + |> fn(db) { + Context( + db: db, + hex_api_key: cnf.hex_api_key, + github_token: cnf.github_token, + ) + } } fn parse_database_url(database_url: String) { diff --git a/apps/backend/src/backend/postgres/queries.gleam b/apps/backend/src/backend/postgres/queries.gleam index f3fdb8a..4a2a548 100644 --- a/apps/backend/src/backend/postgres/queries.gleam +++ b/apps/backend/src/backend/postgres/queries.gleam @@ -4,13 +4,14 @@ import backend/error import backend/gleam/context import birl.{type Time} import gleam/bool -import gleam/dict +import gleam/dict.{type Dict} import gleam/dynamic import gleam/hexpm import gleam/json import gleam/list import gleam/option.{type Option} import gleam/package_interface +import gleam/pair import gleam/pgo import gleam/result import gleam/string @@ -592,3 +593,27 @@ pub fn update_package_rank(db: pgo.Connection, package: String, rank: Int) { |> pgo.execute(db, [pgo.text(package), pgo.int(rank)], dynamic.dynamic) |> result.map_error(error.DatabaseError) } + +pub fn select_package_repository_address(db: pgo.Connection, offset: Int) { + let decoder = dynamic.element(0, dynamic.optional(dynamic.string)) + "SELECT repository FROM package LIMIT 100 OFFSET $1" + |> pgo.execute(db, [pgo.int(offset)], decoder) + |> result.map_error(error.DatabaseError) + |> result.map(fn(r) { r.rows }) +} + +pub fn update_package_popularity( + db: pgo.Connection, + url: String, + popularity: Dict(String, Int), +) { + let popularity = + dict.to_list(popularity) + |> list.map(pair.map_second(_, json.int)) + |> json.object() + |> json.to_string() + |> pgo.text() + "UPDATE package SET popularity = $2 WHERE repository = $1" + |> pgo.execute(db, [pgo.text(url), popularity], dynamic.dynamic) + |> result.map_error(error.DatabaseError) +} diff --git a/apps/backend/src/tasks/popularity.gleam b/apps/backend/src/tasks/popularity.gleam new file mode 100644 index 0000000..6f13aff --- /dev/null +++ b/apps/backend/src/tasks/popularity.gleam @@ -0,0 +1,39 @@ +import api/github +import backend/config.{type Context} +import backend/postgres/queries +import gleam/bool +import gleam/dict +import gleam/function +import gleam/list +import gleam/option +import gleam/result +import wisp + +pub fn compute_popularity(ctx: Context) { + wisp.log_info("Syncing popularity") + do_compute_popularity(ctx, offset: 0) + |> function.tap(fn(_) { wisp.log_info("Syncing package ranks finished!") }) +} + +fn do_compute_popularity(ctx: Context, offset offset: Int) { + let db = ctx.db + use repos <- result.try(queries.select_package_repository_address(db, offset)) + use <- bool.guard(when: list.is_empty(repos), return: Ok(Nil)) + list.map(repos, fn(repo) { + repo + |> option.map(update_repo_popularity(ctx, _)) + |> option.unwrap(Ok(Nil)) + |> result.try_recover(fn(_) { Ok(Nil) }) + }) + |> result.all() + |> result.try(fn(_) { do_compute_popularity(ctx, offset: offset + 100) }) +} + +fn update_repo_popularity(ctx: Context, repo: String) { + wisp.log_debug("Syncing " <> repo) + use count <- result.try(github.get_stargazer_count(ctx.github_token, repo)) + dict.from_list([#("github", count)]) + |> queries.update_package_popularity(ctx.db, repo, _) + |> result.replace(Nil) + |> function.tap(fn(_) { wisp.log_debug("Synced " <> repo) }) +} diff --git a/apps/frontend/manifest.toml b/apps/frontend/manifest.toml index 8ebc707..0306a3a 100644 --- a/apps/frontend/manifest.toml +++ b/apps/frontend/manifest.toml @@ -27,7 +27,7 @@ gleam_javascript = { version = "~> 0.8" } gleam_json = { version = ">= 1.0.1 and < 2.0.0" } gleam_stdlib = { version = "~> 0.34 or ~> 1.0" } gleeunit = { version = "~> 1.0" } -grille_pain = { version = ">= 1.0.0 and < 2.0.0"} +grille_pain = { version = ">= 1.0.0 and < 2.0.0" } lustre = { version = ">= 4.2.0 and < 5.0.0" } lustre_http = { version = "~> 0.5" } modem = { version = ">= 1.1.0 and < 2.0.0" }