Skip to content

Commit

Permalink
Save all relevant fields in Airsequel
Browse files Browse the repository at this point in the history
  • Loading branch information
ad-si committed Nov 21, 2023
1 parent 5a031da commit 67e2c43
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 65 deletions.
101 changes: 40 additions & 61 deletions app/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,8 @@ import Protolude (
Eq,
Generic,
IO,
Int,
Integer,
Maybe (..),
Proxy (Proxy),
Show,
Text,
elem,
Expand Down Expand Up @@ -80,7 +78,7 @@ import Network.URI (URI)
import System.Environment (lookupEnv)
import Text.RawString.QQ (r)

import Utils (emptyOwner, emptyRepo, mapMSequentially)
import Utils (RepoObject, mapMSequentially, repoObjectToRepo)

-- | Replaces a variable in a string with a value
var :: Text -> Text -> Text -> Text
Expand Down Expand Up @@ -145,22 +143,6 @@ formatRepo extendedRepo =
<> "\n"
)

-- queryRepos :: Text
-- queryRepos =
-- [r|
-- query reposQuery {
-- repos( limit: 100 ) {
-- rowid
-- id
-- name
-- language
-- url
-- stars
-- updated_utc
-- }
-- }
-- |]

-- | Query @Link@ header with @rel=last@ from the request headers
getLastUrl :: Response a -> Maybe URI
getLastUrl req = do
Expand Down Expand Up @@ -275,7 +257,7 @@ upsertRepoQuery utc extendedRepo =
& var
"language"
(repo & GH.repoLanguage <&> GH.getLanguage & fromMaybe "")
& var "stargazers_count" (repo & GH.repoWatchersCount & show)
& var "stargazers_count" (repo & GH.repoStargazersCount & show)
& var "open_issues_count" (repo & GH.repoOpenIssuesCount & show)
& var "commits_count" (commitsCount & fromMaybe 0 & show)
& var "created_utc" (getTimestamp GH.repoCreatedAt)
Expand Down Expand Up @@ -325,7 +307,7 @@ insertRepoQuery utc extendedRepo =
& var
"language"
(repo & GH.repoLanguage <&> GH.getLanguage & fromMaybe "")
& var "stargazers_count" (repo & GH.repoWatchersCount & show)
& var "stargazers_count" (repo & GH.repoStargazersCount & show)
& var "open_issues_count" (repo & GH.repoOpenIssuesCount & show)
& var "commits_count" (commitsCount & fromMaybe 0 & show)
& var "created_utc" (getTimestamp GH.repoCreatedAt)
Expand Down Expand Up @@ -400,23 +382,8 @@ loadAndSaveRepo saveStrategy owner name = do
putText $ formatRepo extendedRepo
saveRepoInAirsequel saveStrategy extendedRepo

data RepoObject = RepoObject
{ owner :: Text
, name :: Text
, githubId :: Int
}
deriving (Show, Eq, Generic)

instance FromJSON RepoObject where
parseJSON = withObject "RepoObject" $ \o -> do
ownerObj <- o .: "owner"
owner <- ownerObj .: "login"
name <- o .: "name"
githubId <- o .: "databaseId"
pure RepoObject{owner, name, githubId}

data GqlResponse = GqlResponse
{ repos :: [RepoObject]
{ repos :: [Repo]
, errorsMb :: Maybe Value
, nextCursorMb :: Maybe Text
}
Expand All @@ -428,12 +395,17 @@ instance FromJSON GqlResponse where
errorsMb <- o .:? "errors"
search <- data_ .: "search"
edges <- search .: "edges"
repos <- edges & mapM (.: "node")
repos :: [RepoObject] <- edges & mapM (.: "node")

pageInfo <- search .: "pageInfo"
nextCursorMb <- pageInfo .:? "endCursor"

pure GqlResponse{repos, errorsMb, nextCursorMb}
pure
GqlResponse
{ repos = repos <&> repoObjectToRepo
, errorsMb
, nextCursorMb
}

execGqlQuery ::
Text ->
Expand Down Expand Up @@ -485,22 +457,15 @@ execGqlQuery apiEndpoint tokenMb query nextCursorMb initialRepos = do
Just errors -> putErrText $ "GraphQL Errors:\n" <> show errors
Nothing -> pure ()

let repos :: [GH.Repo] =
gqlResponse.repos <&> \repoObj ->
emptyRepo
{ GH.repoOwner =
emptyOwner
{ GH.simpleOwnerLogin =
GH.mkOwnerName repoObj.owner
}
, GH.repoName = GH.mkRepoName repoObj.name
, GH.repoId =
GH.mkId
(Proxy :: Proxy GH.Repo)
repoObj.githubId
}

commitsCounts <- mapMSequentially 1000 getNumberOfCommits repos
let
repos :: [GH.Repo] = gqlResponse.repos
-- Number must be quite high to avoid rate limiting
delayBetweenRequests = 20000 -- ms
commitsCounts <-
mapMSequentially
delayBetweenRequests
getNumberOfCommits
repos

let extendedRepos =
P.zipWith
Expand All @@ -513,6 +478,12 @@ execGqlQuery apiEndpoint tokenMb query nextCursorMb initialRepos = do
repos
commitsCounts

putText
$ "⏳ Save "
<> show (P.length repos)
<> " repos to Airsequel …"
extendedRepos & mapM_ (saveRepoInAirsequel OverwriteRepo)

case gqlResponse.nextCursorMb of
Nothing -> pure $ initialRepos <> extendedRepos
Just nextCursor -> do
Expand All @@ -531,7 +502,7 @@ getReposViaSearch githubToken searchQuery = do
search(
query: "<<searchQuery>>",
type: REPOSITORY,
first: 100
first: 20
<<optionalAfter>>
) {
edges {
Expand All @@ -540,6 +511,16 @@ getReposViaSearch githubToken searchQuery = do
owner { login }
name
databaseId
stargazerCount
createdAt
description
homepageUrl
name
issues (states: [OPEN]) {
totalCount
}
createdAt
updatedAt
}
}
}
Expand All @@ -565,10 +546,12 @@ main = do
-- TODO: Add CLI flag to load and save a single repo
-- loadAndSaveRepo OverwriteRepo "Airsequel" "SQLiteDAV"

-- TODO: Add CLI flag to choose between OverwriteRepo and AddRepo

repos <-
getReposViaSearch
githubTokenMb
"language:haskell stars:>500 sort:stars-desc"
"language:haskell stars:>200 sort:updated-desc"

putText $ "Found " <> show (P.length repos) <> " repos:"
repos
Expand All @@ -581,8 +564,4 @@ main = do
)
& mapM_ putText

putText $ "⏳ Save " <> show (P.length repos) <> " repos to Airsequel …"
-- TODO: Add CLI flag to choose between OverwriteRepo and AddRepo
repos & mapM_ (saveRepoInAirsequel OverwriteRepo)

pure ()
73 changes: 70 additions & 3 deletions app/Utils.hs
Original file line number Diff line number Diff line change
@@ -1,20 +1,39 @@
module Utils (emptyOwner, emptyRepo, mapMSequentially)
module Utils (
emptyOwner,
emptyRepo,
mapMSequentially,
RepoObject (..),
repoObjectToRepo,
)
where

import Protolude (

Check warning on line 10 in app/Utils.hs

View workflow job for this annotation

GitHub Actions / build

The import of ‘&, fromMaybe’ from module ‘Protolude’ is redundant
Bool (False),
Eq,
Generic,
IO,
Int,
Maybe (Nothing),
Maybe (Just, Nothing),
Proxy (Proxy),
Show,
Text,
fromMaybe,
liftIO,
mapM,
pure,
($),
(&),
(*),
(<*),
(>>=),
)

import Control.Concurrent (threadDelay)
import Data.Aeson (FromJSON, withObject, (.:))
import Data.Aeson.Types (parseJSON)
import Data.Text qualified as T

Check warning on line 34 in app/Utils.hs

View workflow job for this annotation

GitHub Actions / build

The qualified import of ‘Data.Text’ is redundant
import Data.Time (UTCTime)
import Data.Time.Format.ISO8601 (iso8601Show)

Check warning on line 36 in app/Utils.hs

View workflow job for this annotation

GitHub Actions / build

The import of ‘Data.Time.Format.ISO8601’ is redundant
import GitHub.Data qualified as GH

emptyOwner :: GH.SimpleOwner
Expand Down Expand Up @@ -66,5 +85,53 @@ emptyRepo =

mapMSequentially :: Int -> (a -> IO b) -> [a] -> IO [b]
mapMSequentially delayInMs f xs = do
let delayM = liftIO $ threadDelay (delayInMs * 1000) -- Delay in milliseconds
let delayM = liftIO $ threadDelay (delayInMs * 1000)
mapM (\x -> f x <* delayM) xs

{- | To make loading data from GitHub GraphQL API easier
| we also have this simpler (in comparison to GH.Repo) data type
-}
data RepoObject = RepoObject
{ owner :: Text
, name :: Text
, githubId :: Int
, stargazerCount :: Int
, description :: Maybe Text
, homepageUrl :: Maybe Text
, issuesCount :: Int
, createdAt :: UTCTime
, updatedAt :: UTCTime
}
deriving (Show, Eq, Generic)

instance FromJSON RepoObject where
parseJSON = withObject "RepoObject" $ \o -> do
owner <- o .: "owner" >>= (.: "login")
name <- o .: "name"
githubId <- o .: "databaseId"
stargazerCount <- o .: "stargazerCount"
description <- o .: "description"
homepageUrl <- o .: "homepageUrl"
issuesCount <- o .: "issues" >>= (.: "totalCount")
createdAt <- o .: "createdAt"
updatedAt <- o .: "updatedAt"

pure RepoObject{..}

repoObjectToRepo :: RepoObject -> GH.Repo
repoObjectToRepo repoObj =
emptyRepo
{ GH.repoOwner =
emptyOwner
{ GH.simpleOwnerLogin =
GH.mkOwnerName repoObj.owner
}
, GH.repoName = GH.mkRepoName repoObj.name
, GH.repoId = GH.mkId (Proxy :: Proxy GH.Repo) repoObj.githubId
, GH.repoHomepage = repoObj.homepageUrl
, GH.repoDescription = repoObj.description
, GH.repoStargazersCount = repoObj.stargazerCount
, GH.repoOpenIssuesCount = repoObj.issuesCount
, GH.repoCreatedAt = Just repoObj.createdAt
, GH.repoUpdatedAt = Just repoObj.updatedAt
}
1 change: 1 addition & 0 deletions package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ default-extensions:
- NoImplicitPrelude
- OverloadedRecordDot
- OverloadedStrings
- RecordWildCards

ghc-options:
- -Wall
Expand Down
4 changes: 3 additions & 1 deletion repos-uploader.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ cabal-version: 2.2
--
-- see: https://github.com/sol/hpack
--
-- hash: d8c7ea98d1b7d2a39392e05c7048b9f8f54bf3b52a01a408ef7b0a12540cb7ad
-- hash: 490342bfbe157db93933ef67d7545824abe9227ba1f9a84434f7fe37c1a70072

name: repos-uploader
version: 0.0.0.0
Expand Down Expand Up @@ -35,6 +35,7 @@ library
NoImplicitPrelude
OverloadedRecordDot
OverloadedStrings
RecordWildCards
ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints -fno-warn-orphans
build-depends:
aeson
Expand Down Expand Up @@ -64,6 +65,7 @@ executable repos-uploader
NoImplicitPrelude
OverloadedRecordDot
OverloadedStrings
RecordWildCards
ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints -fno-warn-orphans
build-depends:
aeson
Expand Down

0 comments on commit 67e2c43

Please sign in to comment.