Skip to content

Commit

Permalink
Post all repos of a crawled batch to Airsequel at once
Browse files Browse the repository at this point in the history
  • Loading branch information
ad-si committed Jan 8, 2024
1 parent 2673e34 commit b63a05e
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 172 deletions.
206 changes: 88 additions & 118 deletions app/Airsequel.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,20 @@

{-# HLINT ignore "Use maybe" #-}
{-# HLINT ignore "Use unless" #-}
{-# HLINT ignore "Use fmap" #-}

module Airsequel where

import Protolude (
Either (..),
IO,
Int,
Integer,
Maybe (..),
Text,
encodeUtf8,
filter,
fromMaybe,
isJust,
print,
pure,
putErrLn,
Expand Down Expand Up @@ -64,6 +67,7 @@ import Network.HTTP.Client.TLS (tlsManagerSettings)
import Network.HTTP.Types (statusCode)
import Text.RawString.QQ (r)

import Numeric (showInt)
import Types (GqlRes (..), Repo (..), SaveStrategy (..))
import Utils (loadAirsWriteToken, loadDbEndpoint)

Expand All @@ -90,39 +94,9 @@ setRequestFields airseqWriteToken query variables req =
upsertRepoQuery :: Text
upsertRepoQuery = do
[r|
mutation InsertRepo (
$rowid: Int
$github_id: Int!
$owner: String!
$name: String!
$description: String
$homepage: String
$language: String
$stargazers_count: Int
$open_issues_count: Int
$commits_count: Int
$is_archived: Boolean
$created_utc: String
$updated_utc: String
$crawled_utc: String!
) {
mutation InsertRepo( $objects: [repos_insert_input!]! ) {
insert_repos(
objects: [{
rowid: $rowid
github_id: $github_id
owner: $owner
name: $name
description: $description
homepage: $homepage
language: $language
stargazers_count: $stargazers_count
open_issues_count: $open_issues_count
commits_count: $commits_count
is_archived: $is_archived
created_utc: $created_utc
updated_utc: $updated_utc
crawled_utc: $crawled_utc
}]
objects: $objects
on_conflict: {
constraint: rowid
update_columns: [
Expand All @@ -149,125 +123,121 @@ upsertRepoQuery = do
|]


-- | Get rowid of a repo with the specified GitHub ID
getRowid :: Manager -> Text -> Text -> Repo -> IO (Maybe Int)
getRowid manager dbEndpoint airseqWriteToken repo = do
-- | Load Airsequel rowid of repos by their GitHub ID
loadRowids :: Manager -> Text -> Text -> [Repo] -> IO [Repo]
loadRowids manager dbEndpoint airseqWriteToken repos = do
let
githubId = repo.githubId

getRowidQuery :: Text
getRowidQuery =
getReposWithRowidQuery :: Text
getReposWithRowidQuery =
[r|
query GetRowid($github_id: Int!) {
repos(
filter: { github_id: { eq: $github_id } }
) {
query GetRowids ($githubIds: [Int]) {
repos(filter: { github_id: { in: $githubIds } }) {
databaseId: github_id
rowid
}
}
|]

initialGetRowidRequest <- parseRequest $ T.unpack dbEndpoint
initialGetRowidsRequest <- parseRequest $ T.unpack dbEndpoint

let getRowidRequest =
let getRowidsRequest =
setRequestFields
airseqWriteToken
getRowidQuery
(KeyMap.fromList ["github_id" .= githubId])
initialGetRowidRequest
getRowidResponse <- httpLbs getRowidRequest manager
getReposWithRowidQuery
(KeyMap.fromList ["githubIds" .= (repos <&> githubId)])
initialGetRowidsRequest

getReposWithRowidResponse <- httpLbs getRowidsRequest manager

when
(getRowidResponse.responseStatus.statusCode /= 200)
(putErrText $ show getRowidResponse.responseBody)
(getReposWithRowidResponse.responseStatus.statusCode /= 200)
(putErrText $ show getReposWithRowidResponse.responseBody)

let
repoSlug = repo.owner <> "/" <> repo.name
msgBase =
"Repo \"" <> repoSlug <> "\" is not"

rowidResult :: Either [P.Char] Int =
( getRowidResponse.responseBody
& eitherDecode
:: Either [P.Char] Object
)
>>= ( \gqlRes ->
P.flip parseEither gqlRes $ \gqlResObj -> do
gqlData <- gqlResObj .: "data"
gqlData .: "repos"
)
>>= ( \case
[] -> Left $ T.unpack $ msgBase <> " in Airsequel yet"
[repoObj :: Object] -> parseEither (.: "rowid") repoObj
_ ->
Left $
T.unpack $
"Error: " <> msgBase <> " unique in Airsequel"
)

case rowidResult of
ghIdsWithRowid
:: Either [P.Char] [(Integer {- githubId -}, Integer {- rowid -})] =
(getReposWithRowidResponse.responseBody & eitherDecode)
>>= ( \gqlRes ->
P.flip parseEither gqlRes $ \gqlResObj -> do
gqlData <- gqlResObj .: "data"
gqlData .: "repos"
)
<&> ( \(reposWithRowid :: [Repo]) ->
reposWithRowid
& filter (\repoWithRowid -> isJust repoWithRowid.rowid)
<&> ( \repoWithRowid ->
( repoWithRowid.githubId
, repoWithRowid.rowid & fromMaybe 0
)
)
)

case ghIdsWithRowid of
Left err -> do
putErrLn err
pure Nothing
Right rowid -> do
P.putText $
"Repo \""
<> repoSlug
<> "\" is already in Airsequel "
<> ("(rowid " <> show rowid <> ") ")
<> "and will be updated."
pure $ Just rowid


{-| Insert or upsert the repo in Airsequel
pure repos
Right rowids -> do
P.putStrLn $
showInt (P.length rowids) " of "
<> showInt (P.length repos) " repos already exist in Airsequel"

pure $
repos <&> \repo ->
repo
{ rowid =
rowids
& filter (\(ghId, _) -> ghId == repo.githubId)
& P.head
<&> P.snd
}


{-| Insert or upsert repos in Airsequel
via a POST request executed by http-client
-}
saveRepoInAirsequel :: SaveStrategy -> Repo -> IO ()
saveRepoInAirsequel saveStrategy repo = do
saveReposInAirsequel :: SaveStrategy -> [Repo] -> IO ()
saveReposInAirsequel saveStrategy repos = do
P.putText $ "⏳ Saving " <> show (P.length repos) <> " repos in Airsequel …"

dbEndpoint <- loadDbEndpoint
airseqWriteToken <- loadAirsWriteToken

manager <- newManager tlsManagerSettings

now <- getCurrentTime <&> (iso8601Show >>> T.pack)

-- Get rowid for the repo to execute an upsert
-- if the save strategy is to overwrite
rowidMb <-
if saveStrategy == OverwriteRepo
then
getRowid
manager
dbEndpoint
airseqWriteToken
repo
else pure Nothing
-- Get rowid for repos if repos shall be overwritten
reposNorm <- case saveStrategy of
OverwriteRepo -> loadRowids manager dbEndpoint airseqWriteToken repos
AddRepo -> pure repos

initialInsertRequest <- parseRequest $ T.unpack dbEndpoint

let
variables =
[ "rowid" .= rowidMb
, "github_id" .= repo.githubId
, "owner" .= repo.owner
, "name" .= repo.name
, "description" .= repo.description
, "homepage" .= repo.homepageUrl
, "language" .= repo.primaryLanguage
, "stargazers_count" .= repo.stargazerCount
, "open_issues_count" .= repo.openIssuesCount
, "commits_count" .= (repo.commitsCount & fromMaybe 0)
, "is_archived" .= repo.isArchived
, "created_utc" .= iso8601Show repo.createdAt
, "updated_utc" .= iso8601Show repo.updatedAt
, "crawled_utc" .= now
]
objects =
reposNorm <&> \repo ->
object
[ "rowid" .= repo.rowid
, "github_id" .= repo.githubId
, "owner" .= repo.owner
, "name" .= repo.name
, "description" .= repo.description
, "homepage" .= repo.homepageUrl
, "language" .= repo.primaryLanguage
, "stargazers_count" .= repo.stargazerCount
, "open_issues_count" .= repo.openIssuesCount
, "commits_count" .= (repo.commitsCount & fromMaybe 0)
, "is_archived" .= repo.isArchived
, "created_utc" .= (repo.createdAt <&> iso8601Show)
, "updated_utc" .= (repo.updatedAt <&> iso8601Show)
, "crawled_utc" .= now
]

insertRequest =
setRequestFields
airseqWriteToken
upsertRepoQuery
(KeyMap.fromList variables)
(KeyMap.fromList ["objects" .= objects])
initialInsertRequest

insertResponse <- httpLbs insertRequest manager
Expand Down
37 changes: 21 additions & 16 deletions app/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import Protolude (
mempty,
pure,
putErrText,
putStrLn,
putText,
show,
when,
Expand Down Expand Up @@ -75,7 +76,8 @@ import Options.Applicative (
)
import Text.RawString.QQ (r)

import Airsequel (saveRepoInAirsequel)
import Airsequel (saveReposInAirsequel)
import Numeric (showInt)
import Options.Applicative.Help.Pretty (vsep)
import Types (GqlRepoRes (..), Repo (..), SaveStrategy (..))
import Utils (loadGitHubToken)
Expand Down Expand Up @@ -127,9 +129,14 @@ commands = do


formatRepo :: Repo -> Text
formatRepo repo =
formatRepo repo = do
let repoSlug =
(repo.owner & fromMaybe "")
<> "/"
<> (repo.name & fromMaybe "")

"\n\n"
<> ("repo_url: github.com/" <> repo.owner <> "/" <> repo.name <> "\n")
<> ("repo_url: github.com/" <> repoSlug <> "\n")
<> ("description: " <> (repo.description & fromMaybe "") <> "\n")
<> ("homepage: " <> (repo.homepageUrl & fromMaybe "") <> "\n")
<> ("language: " <> (repo.primaryLanguage & fromMaybe "") <> "\n")
Expand Down Expand Up @@ -221,6 +228,8 @@ getGhHeaders tokenMb =

execGithubGqlQuery :: Maybe Text -> Text -> KeyMap Value -> [Repo] -> IO [Repo]
execGithubGqlQuery ghTokenMb query variables initialRepos = do
putText "\n▶️ Query a batch of repos from GitHub …"

manager <- newManager tlsManagerSettings

initialRequest <- parseRequest $ T.unpack "https://api.github.com/graphql"
Expand Down Expand Up @@ -254,16 +263,16 @@ execGithubGqlQuery ghTokenMb query variables initialRepos = do

let repos :: [Repo] = gqlResponse.repos

putText $
putStrLn $
"✅ Received "
<> show (P.length repos)
<> " repos from GitHub"
<> showInt (P.length repos) " repos "
<> "from GitHub"

repos
<&> ( \repo ->
repo.owner
(repo.owner & fromMaybe "")
<> ("/" :: Text)
<> repo.name
<> (repo.name & fromMaybe "")
<> (" | stars: " :: Text)
<> show repo.stargazerCount
<> (" | commits: " :: Text)
Expand All @@ -275,13 +284,7 @@ execGithubGqlQuery ghTokenMb query variables initialRepos = do
& mapM_ putText

when (P.not $ P.null repos) $ do
putText $
"⏳ Save "
<> show (P.length repos)
<> " repos to Airsequel …"
-- TODO: Save all repos in one request
repos
& mapM_ (saveRepoInAirsequel OverwriteRepo)
saveReposInAirsequel OverwriteRepo repos

case gqlResponse.nextCursorMb of
Nothing -> pure $ initialRepos <> repos
Expand Down Expand Up @@ -394,7 +397,9 @@ run cliCmd = do

repos <- loadAndSaveReposViaSearch ghTokenMb searchQueryNorm 20 Nothing

putText $ "🏁 Total number of crawled repos: " <> show (P.length repos)
putStrLn $
"🏁 Total number of crawled repos: "
<> showInt (P.length repos) ""

pure ()

Expand Down
Loading

0 comments on commit b63a05e

Please sign in to comment.