Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent double cloning of git repos when fetching them in parallel #1291

Merged
merged 8 commits into from
Sep 28, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion src/Spago/Command/Fetch.purs
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,14 @@ import Data.Either as Either
import Data.FunctorWithIndex (mapWithIndex)
import Data.HTTP.Method as Method
import Data.Int as Int
import Data.List as List
import Data.Map as Map
import Data.Newtype (wrap)
import Data.Set as Set
import Data.String (joinWith)
import Data.Traversable (sequence)
import Effect.Aff as Aff
import Effect.Aff.AVar as AVar
import Effect.Ref as Ref
import Node.Buffer as Buffer
import Node.Encoding as Encoding
Expand Down Expand Up @@ -204,11 +206,27 @@ run { packages: packagesRequestedToInstall, ensureRanges, isTest, isRepl } = do
fetchPackagesToLocalCache :: ∀ a. Map PackageName Package -> Spago (FetchEnv a) Unit
fetchPackagesToLocalCache packages = do
{ offline } <- ask
-- Before starting to fetch packages we build a Map of AVars to act as locks for each git location.
-- This is so we don't have two threads trying to clone the same repo at the same time.
gitLocks <- liftAff $ map (Map.fromFoldable <<< List.catMaybes) $ for (Map.values packages) case _ of
GitPackage gitPackage -> (Just <<< Tuple gitPackage.git) <$> AVar.new unit
_ -> pure Nothing
parallelise $ packages # Map.toUnfoldable <#> \(Tuple name package) -> do
let localPackageLocation = Config.getPackageLocation name package
-- first of all, we check if we have the package in the local cache. If so, we don't even do the work
unlessM (FS.exists localPackageLocation) case package of
GitPackage gitPackage -> getGitPackageInLocalCache name gitPackage
GitPackage gitPackage -> do
-- for git repos it's a little more involved since cloning them takes a while and we risk race conditions
-- and possibly cloning the same repo multiple times - so we use a lock on the git url to prevent that
case Map.lookup gitPackage.git gitLocks of
Nothing -> do
-- This is not supposed to happen but we just move on if it does
getGitPackageInLocalCache name gitPackage
Just lock -> do
-- Take the lock, do the git thing, release the lock
liftAff $ AVar.take lock
getGitPackageInLocalCache name gitPackage
liftAff $ AVar.put unit lock
f-f marked this conversation as resolved.
Show resolved Hide resolved
RegistryVersion v -> do
-- if the version comes from the registry then we have a longer list of things to do
let versionString = Registry.Version.print v
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Reading Spago workspace configuration...

✓ Selecting package to build: consumer

Downloading dependencies...
Cloning <library-repo-path>
Building...
40 changes: 26 additions & 14 deletions test/Spago/Build/Monorepo.purs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import Test.Prelude
import Data.Array as Array
import Data.String (Pattern(..))
import Data.String as String
import Data.String.Regex as Regex
import Data.String.Regex.Flags as Regex.Flags
import Effect.Aff (bracket)
import Node.Path as Path
import Node.Process as Process
Expand Down Expand Up @@ -254,19 +256,20 @@ spec = Spec.describe "monorepo" do

Spec.it "#1208: clones a monorepo only once, even if multiple packages from it are needed" \{ spago, fixture, testCwd } -> do
-- A local file system Git repo to use as a remote for Spago to clone from
let createLibraryRepo = do
let libRepo = Path.concat [ Paths.paths.temp, "spago-1208" ]
whenM (FS.exists libRepo) $ rmRf libRepo
FS.copyTree { src: fixture "monorepo/1208-no-double-cloning/library", dst: libRepo }
git_ libRepo [ "init" ]
git_ libRepo [ "add", "." ]
git_ libRepo [ "config", "--global", "core.longpaths", "true" ]
git_ libRepo [ "config", "user.name", "test-user" ]
git_ libRepo [ "config", "user.email", "[email protected]" ]
git_ libRepo [ "commit", "-m", "Initial commit" ]
git_ libRepo [ "tag", "v1" ]
git_ libRepo [ "tag", "v2" ]
pure libRepo
let
createLibraryRepo = do
let libRepo = Path.concat [ Paths.paths.temp, "spago-1208" ]
whenM (FS.exists libRepo) $ rmRf libRepo
FS.copyTree { src: fixture "monorepo/1208-no-double-cloning/library", dst: libRepo }
git_ libRepo [ "init" ]
git_ libRepo [ "add", "." ]
git_ libRepo [ "config", "--global", "core.longpaths", "true" ]
git_ libRepo [ "config", "user.name", "test-user" ]
git_ libRepo [ "config", "user.email", "[email protected]" ]
git_ libRepo [ "commit", "-m", "Initial commit" ]
git_ libRepo [ "tag", "v1" ]
git_ libRepo [ "tag", "v2" ]
pure libRepo

bracket createLibraryRepo rmRf \libRepo -> do
let
Expand Down Expand Up @@ -302,7 +305,10 @@ spec = Spec.describe "monorepo" do
{ stdoutFile: Nothing
, stderrFile: Just $ fixture expectedFixture
, result
, sanitize: String.trim >>> String.replaceAll (String.Pattern libRepo) (String.Replacement "<library-repo-path>")
, sanitize:
String.replaceAll (String.Pattern libRepo) (String.Replacement "<library-repo-path>")
>>> Regex.replace (unsafeFromRight $ Regex.regex "Building...[\\s\\S]*" (Regex.Flags.global <> Regex.Flags.multiline)) "Building..."
>>> String.trim
}

-- First run `spago install` to make sure global cache is populated,
Expand Down Expand Up @@ -348,6 +354,12 @@ spec = Spec.describe "monorepo" do
shouldBeSuccessErr' "monorepo/1208-no-double-cloning/expected-stderr/four-deps.txt"
assertRefCheckedOut "lib4" "v4"

-- Lockfile test: when it's up to date but the cache is not populated (i.e. a fresh clone)
-- then there are no double clones. This is a regression test for #1206
spago [ "build" ] >>= shouldBeSuccess
rmRf ".spago"
spago [ "build" ] >>= shouldBeSuccessErr' "monorepo/1208-no-double-cloning/expected-stderr/lockfile-up-to-date.txt"

where
git_ cwd = void <<< git cwd

Expand Down
Loading