commune: Introduce concept of `App` monad

By the very nature of the `commune` project, almost every piece of code needs
to be able to run SQL and to be able to make API requests described as Servant
type. It is just easier to do it using `ReaderT` monad than passing components
explicitly.

I considered passing the environment explicitly, but GHC's support for
polymorphic field values is sketchy, so it is better that `servant` and
`sqlite` functions are defined at top-level of `Commune.Main`.

Entry point in `main` will still have to deal with packing polymorphic function
into `Hoist` newtype, but that will be one-time pain.

 * package.yaml: Add dependency on `transformers` for `ReaderT` type.
 * src/Commune/Main.hs: Defined `App` monad and `servant`/`sqlite` functions
   that return values in it.
 * src/Commune/Main/Command/Fetch.hs: Rewrite code in terms of `App` monad instead
   of passing `conn` and `servant` explicitly.
This commit is contained in:
Dmitry Bogatov 2023-09-10 13:51:09 -04:00
parent 6aee19ba76
commit b930e87ab8
3 changed files with 90 additions and 46 deletions

View File

@ -90,3 +90,4 @@ library:
- file-embed
- split
- http-media
- transformers

View File

@ -0,0 +1,41 @@
module Commune.Main
( App
, App'(..)
, Env
, Env'(..)
, Hoist(..)
, servant
, sqlite
) where
import Commune.Beam.Prelude
import Control.Monad.Trans.Reader
import Servant.Client (ClientM)
import Control.Monad.Trans.Class
newtype Hoist m n = Hoist { hoist :: forall a. m a -> n a }
type Env' m = Record
'[ "sqlite" := Hoist SqliteM m
, "servant" := Hoist ClientM m
]
type Env = Env' IO
{- It is just less typing to work in ReaderT than pass components of the
- environment explicitly.
-
- In some cases that means that sub-command handler will receive more of
- context than it really needs, but I think this is acceptable sacrifice for
- unifying types of the "process" function for different sub-commands.
-}
type App' m a = ReaderT (Env' m) m a
type App a = App' IO a
servant :: Monad m => ClientM a -> App' m a
servant m = do
env <- ask
lift $ hoist env.servant m
sqlite :: Monad m => SqliteM a -> App' m a
sqlite m = do
env <- ask
lift $ hoist env.sqlite m

View File

@ -1,4 +1,7 @@
module Commune.Main.Command.Fetch (Options, options, process) where
import Control.Monad.Trans.Reader (runReaderT)
import Commune.Beam.Prelude
import Commune.Main
import Commune.API.GitHub qualified as GitHub
import Commune.API.GitHub.Routes.Repos (Repo, RepoName(..), ListForUserOpts)
import Commune.API.GitHub.Routes.Users (UserLogin(..))
@ -14,16 +17,11 @@ import Commune.SQL.GitHub qualified as SQL
import Commune.Types.Page (Page (..))
import Control.Monad (when, forM_, forM, unless)
import Data.Map qualified as Map
import Data.Int (Int64)
import Data.List.NonEmpty as NE
import Data.Maybe.Extended (isJust, fromJust)
import Data.Set qualified as Set
import Data.Set (Set)
import Data.Text (Text)
import Data.Text qualified as T
import Data.Text.IO qualified as TIO
import Data.Word (Word64)
import Database.Beam.Sqlite (runBeamSqlite)
import Database.SQLite.Simple qualified as SQL
import Options.Applicative qualified as O
import Servant.Client
@ -41,9 +39,9 @@ iterateOver field init fetch savepoint cont = go init
savepoint next
when (next > init) $ go next
processIssues :: SQL.Connection -> (forall a. ClientM a -> IO a) -> UserLogin -> RepoName -> IO ()
processIssues conn servant owner name = do
Just (RepoT.RepoId repo, since, savepoint) <- runBeamSqlite conn $ CheckPointT.get database owner name #issue
processIssues :: UserLogin -> RepoName -> App ()
processIssues owner name = do
Just (RepoT.RepoId repo, since, savepoint) <- sqlite $ CheckPointT.get database owner name #issue
let opts = ANON { state = Just #all
, sort = Just #updated
, direction = Just #asc
@ -55,7 +53,7 @@ processIssues conn servant owner name = do
-- This is excessive, but will work for now. Ideally pull requests would be
-- listed separately and diff downloaded only if sha1 of the head changed.
timestamps <- forM issues $ \issue -> do
saveUser conn issue.user.id.unwrapUserId issue.user.login
sqlite $ saveUser issue.user.id.unwrapUserId issue.user.login
let new = IssueT.Issue
{ IssueT.id = issue.id.unwrapIssueId
, IssueT.repo = RepoT.RepoId repo
@ -68,15 +66,13 @@ processIssues conn servant owner name = do
, IssueT.updatedAt = issue.updated_at
, IssueT.createdAt = issue.created_at
}
runBeamSqlite conn $ IssueT.upsert database new
sqlite $ IssueT.upsert database new
pure issue.updated_at
-- Save the progress.
let latest = maximum timestamps
runBeamSqlite conn $ savepoint latest
sqlite $ savepoint latest
pure (True, ())
paginate :: Monad m => (Page -> m [a]) -> v -> (v -> NonEmpty a -> m (Bool, v)) -> m v
paginate fetch init handle = go 1 init
where
@ -88,11 +84,11 @@ paginate fetch init handle = go 1 init
(continue, acc') <- handle acc e
if continue then go (page + 1) acc' else pure acc'
saveUser :: SQL.Connection -> Int64 -> UserLogin -> IO ()
saveUser conn id login = runBeamSqlite conn $ SQL.upsertUser (UserT.User id login.unwrapUserLogin)
saveUser :: Int64 -> UserLogin -> SqliteM ()
saveUser id login = SQL.upsertUser (UserT.User id login.unwrapUserLogin)
processIssueComments :: SQL.Connection -> (forall a. ClientM a -> IO a) -> UserLogin -> RepoName -> IO ()
processIssueComments conn servant owner name =
processIssueComments :: UserLogin -> RepoName -> App ()
processIssueComments owner name =
let fetch ts = servant $ GitHub.api.issues.listCommentsForRepo owner name opts (Page 1)
where opts = ANON { since = Just ts, sort = Just #updated, direction = Just #asc, per_page = Just 100 }
use repo comments = do
@ -107,7 +103,7 @@ processIssueComments conn servant owner name =
users = Map.elems . Map.fromList . NE.toList $ fmap entry comments
numbers = Set.toList . Set.fromList . NE.toList $ fmap (\c -> issue_number c.html_url) comments
issueIds <- runBeamSqlite conn $ IssueT.idMap database (RepoT.RepoId repo) numbers
issueIds <- sqlite $ IssueT.idMap database (RepoT.RepoId repo) numbers
let issue c = IssueCommentT.IssueComment
{ IssueCommentT.id = c.id.unwrapIssueCommentId
, IssueCommentT.user = UserT.UserId c.user.id.unwrapUserId
@ -118,11 +114,11 @@ processIssueComments conn servant owner name =
}
issues = NE.toList $ fmap issue comments
runBeamSqlite conn $ UserT.upsert database users
runBeamSqlite conn $ IssueCommentT.upsert database issues
sqlite $ UserT.upsert database users
sqlite $ IssueCommentT.upsert database issues
in do
Just (RepoT.RepoId repo, since, savepoint) <- runBeamSqlite conn $ CheckPointT.get database owner name #issue_comment
iterateOver (.updated_at) since fetch (runBeamSqlite conn . savepoint) (use repo)
Just (RepoT.RepoId repo, since, savepoint) <- sqlite $ CheckPointT.get database owner name #issue_comment
iterateOver (.updated_at) since fetch (sqlite . savepoint) (use repo)
data Options = Options
{ login :: UserLogin
@ -139,24 +135,24 @@ options = O.info parser opts
-- Fetch all repositories of the user, save them into the database and return
-- set of their names for further processing.
processRepos :: UserLogin -> (forall a. ClientM a -> IO a) -> SQL.Connection -> IO (Set RepoName)
processRepos login servant conn = do
processRepos :: UserLogin -> App (Set RepoName)
processRepos login = do
-- TODO: Use some logging framework instead of conditionally cluttering stdout.
TIO.putStrLn $ "Fetching list of repositories of user " <> login.unwrapUserLogin <> "..."
liftIO $ TIO.putStrLn $ "Fetching list of repositories of user " <> login.unwrapUserLogin <> "..."
paginate fetch Set.empty merge
where
-- In their infinite wisdom, GitHub did not implement `since` parameter
-- to this API, so we can't use more efficient `iterateOver` function.
fetch :: Page -> IO [Repo]
fetch :: Page -> App [Repo]
fetch page = servant $ GitHub.api.repos.listForUser login opts
where
opts :: ListForUserOpts
opts = ANON { page = page, per_page = 100 }
merge :: Set RepoName -> NonEmpty Repo -> IO (Bool, Set RepoName)
merge :: Set RepoName -> NonEmpty Repo -> App (Bool, Set RepoName)
merge acc repos@(r :| _) = do
when (Set.null acc) $ do
saveUser conn r.owner.id.unwrapUserId r.owner.login
sqlite $ saveUser r.owner.id.unwrapUserId r.owner.login
let repoT x = RepoT.Repo
{ RepoT.id = x.id.unwrapRepoId
@ -165,33 +161,39 @@ processRepos login servant conn = do
}
names = fmap (.name) repos
runBeamSqlite conn $ RepoT.upsert database $ NE.toList (fmap repoT repos)
sqlite $ RepoT.upsert database $ NE.toList (fmap repoT repos)
pure (True, acc `Set.union` Set.fromList (NE.toList names))
downloadDiffs :: SQL.Connection -> (forall a. ClientM a -> IO a) -> UserLogin -> RepoName -> IO ()
downloadDiffs conn servant login repo = go
downloadDiffs :: UserLogin -> RepoName -> App ()
downloadDiffs login repo = go
where
go = do
missing <- runBeamSqlite conn $ PullRequestT.withMissingDiff database login repo
missing <- sqlite $ PullRequestT.withMissingDiff database login repo
unless (null missing) $ do
pulls <- forM missing $ \(issueNumber, issueId) -> do
TIO.putStrLn $ "Downloading diff of " <> login.unwrapUserLogin <> "/" <> repo.unwrapRepoName <> "#" <> T.pack (show issueNumber.unwrapIssueNumber)
liftIO $ TIO.putStrLn $ "Downloading diff of " <> login.unwrapUserLogin <> "/" <> repo.unwrapRepoName <> "#" <> T.pack (show issueNumber.unwrapIssueNumber)
Diff diff <- servant $ GitHub.api.pulls.get.diff login repo issueNumber
pure $ PullRequestT.PullRequest issueId diff
runBeamSqlite conn $ PullRequestT.upsert database pulls
sqlite $ PullRequestT.upsert database pulls
go
process' :: Options -> App ()
process' (Options { login, repo }) = do
repoNames <- case repo of
Just name -> sqlite (SQL.existsRepo login.unwrapUserLogin name) >>= \case
Just _ -> pure $ Set.singleton (RepoName name)
Nothing -> processRepos login
Nothing -> processRepos login
forM_ (Set.toList repoNames) $ \r -> do
liftIO $ TIO.putStrLn $ "Fetching issues for " <> login.unwrapUserLogin <> "/" <> r.unwrapRepoName
processIssues login r
processIssueComments login r
downloadDiffs login r
process :: Options -> (forall a. ClientM a -> IO a) -> SQL.Connection -> IO ()
process (Options { login, repo }) servant conn = do
repoNames <- case repo of
Just name -> runBeamSqlite conn (SQL.existsRepo login.unwrapUserLogin name) >>= \case
Just _ -> pure $ Set.singleton (RepoName name)
Nothing -> processRepos login servant conn
Nothing -> processRepos login servant conn
forM_ (Set.toList repoNames) $ \r -> do
TIO.putStrLn $ "Fetching issues for " <> login.unwrapUserLogin <> "/" <> r.unwrapRepoName
processIssues conn servant login r
processIssueComments conn servant login r
downloadDiffs conn servant login r
process opts servant conn =
let
env = ANON { sqlite = Hoist $ runBeamSqlite conn, servant = Hoist servant }
in runReaderT (process' opts) env