From 1a6fe59db19217e3f561a4b4d515511ec83a452a Mon Sep 17 00:00:00 2001 From: Jason Funk Date: Tue, 19 Apr 2022 10:23:48 +0300 Subject: [PATCH 1/7] Reimplement a basic version of streaming large files. Based on PR#93 --- src/Commands/Load.php | 10 ++- src/Exceptions/CannotLoadSnapshot.php | 13 +++ src/Snapshot.php | 122 +++++++++++++++++++++++++- tests/Commands/LoadTest.php | 34 +++++++ 4 files changed, 174 insertions(+), 5 deletions(-) create mode 100644 src/Exceptions/CannotLoadSnapshot.php diff --git a/src/Commands/Load.php b/src/Commands/Load.php index 2726014..7f1e9ff 100644 --- a/src/Commands/Load.php +++ b/src/Commands/Load.php @@ -12,7 +12,7 @@ class Load extends Command use AsksForSnapshotName; use ConfirmableTrait; - protected $signature = 'snapshot:load {name?} {--connection=} {--force} --disk {--latest} {--drop-tables=1}'; + protected $signature = 'snapshot:load {name?} {--connection=} {--force} {--stream} {--progress} --disk {--latest} {--drop-tables=1}'; protected $description = 'Load up a snapshot.'; @@ -45,6 +45,14 @@ public function handle() return; } + if ($this->option('stream') ?: false) { + $snapshot->useStream(); + } + + if ($this->option('progress') ?: false) { + $snapshot->showProgressBar(); + } + $snapshot->load($this->option('connection'), (bool) $this->option('drop-tables')); $this->info("Snapshot `{$name}` loaded!"); diff --git a/src/Exceptions/CannotLoadSnapshot.php b/src/Exceptions/CannotLoadSnapshot.php new file mode 100644 index 0000000..abc8cb9 --- /dev/null +++ b/src/Exceptions/CannotLoadSnapshot.php @@ -0,0 +1,13 @@ +disk = $disk; @@ -36,6 +46,20 @@ public function __construct(Disk $disk, string $fileName) $this->name = pathinfo($fileName, PATHINFO_FILENAME); } + public function useStream() + { + $this->useStream = true; + + return $this; + } + + public function showProgressBar() + { + $this->showProgress = true; + + return $this; + } + public function load(string $connectionName = null, bool $dropTables = true): void { event(new LoadingSnapshot($this)); @@ -48,15 +72,105 @@ public function load(string $connectionName = null, bool $dropTables = true): vo $this->dropAllCurrentTables(); } + $this->useStream ? $this->loadStream($connectionName) : $this->loadAsync($connectionName); + + event(new LoadedSnapshot($this)); + } + + protected function getUncompressedLocalSnapshotPath(): string + { + $stream = $this->disk->readStream($this->fileName); + + $uncompressedFilePath = (new TemporaryDirectory(config('db-snapshots.temporary_directory_path'))) + ->create() + ->path('snapshot.sql'); + + $fileDest = fopen($uncompressedFilePath, 'w'); + + while (feof($stream) !== true) { + fwrite($fileDest, gzdecode(gzread($stream, self::STREAM_BUFFER_SIZE))); + } + + fclose($fileDest); + + $this->disk = Storage::disk('local'); + return $uncompressedFilePath; + } + + protected function loadAsync(string $connectionName = null) + { $dbDumpContents = $this->disk->get($this->fileName); if ($this->compressionExtension === 'gz') { $dbDumpContents = gzdecode($dbDumpContents); } + if ($this->showProgress) { + $bar = $this->output->createProgressBar(1); + $bar->start(); + } + DB::connection($connectionName)->unprepared($dbDumpContents); - event(new LoadedSnapshot($this)); + if ($this->showProgress) { + $bar->progress(); + $bar->finish(); + } + } + + protected function isASqlComment(string $line): bool + { + return substr($line, 0, 2) === '--'; + } + + protected function shouldIgnoreLine(string $line): bool + { + $line = trim($line); + return empty($line) || $this->isASqlComment($line); + } + + protected function loadStream(string $connectionName = null) + { + $snapshotFilePath = $this->compressionExtension === 'gz' + ? $this->getUncompressedLocalSnapshotPath() + : $this->disk->path($this->fileName); + + if (!is_readable($snapshotFilePath)) { + throw CannotLoadSnapshot::fileNotReadable($snapshotFilePath); + } + + if ($this->showProgress) { + $bar = $this->output->createProgressBar(filesize($snapshotFilePath)); + $bar->start(); + } + + LazyCollection::make(function() use ($snapshotFilePath) { + $handle = fopen($snapshotFilePath, 'r'); + + $statement = ''; + while (($line = fgets($handle)) !== false) { + if ($this->showProgress) { + $bar->advance(strlen($line)); + } + + if ($this->shouldIgnoreLine($line)) { + continue; + } + + $statement .= $line; + + if (substr(trim($statement), -1, 1) === ';') { + yield $statement; + $statement = ''; + } + } + })->each(function (string $statement) use($connectionName) { + DB::connection($connectionName)->unprepared($statement); + }); + + if ($this->showProgress) { + $bar->finish(); + } } public function delete(): void diff --git a/tests/Commands/LoadTest.php b/tests/Commands/LoadTest.php index ec513ff..9798338 100644 --- a/tests/Commands/LoadTest.php +++ b/tests/Commands/LoadTest.php @@ -38,6 +38,40 @@ public function it_can_load_a_snapshot() $this->assertSnapshotLoaded('snapshot2'); } + /** @test */ + public function it_can_load_a_snapshot_via_streaming() + { + $this->assertSnapshotNotLoaded('snapshot2'); + + $this->command + ->shouldReceive('choice') + ->once() + ->andReturn('snapshot2'); + + Artisan::call('snapshot:load', [ + '--stream' => true + ]); + + $this->assertSnapshotLoaded('snapshot2'); + } + + /** @test */ + public function it_can_load_a_compressed_snapshot_via_streaming() + { + $this->assertSnapshotNotLoaded('snapshot4'); + + $this->command + ->shouldReceive('choice') + ->once() + ->andReturn('snapshot4'); + + Artisan::call('snapshot:load', [ + '--stream' => true + ]); + + $this->assertSnapshotLoaded('snapshot4'); + } + /** @test */ public function it_drops_tables_when_loading_a_snapshot() { From 91c11225906483495fd7ce288423cd03084d2a1a Mon Sep 17 00:00:00 2001 From: Jason Funk Date: Tue, 19 Apr 2022 13:56:30 +0300 Subject: [PATCH 2/7] Remove the progress bar. KISS --- src/Commands/Load.php | 6 +----- src/Snapshot.php | 35 ++--------------------------------- 2 files changed, 3 insertions(+), 38 deletions(-) diff --git a/src/Commands/Load.php b/src/Commands/Load.php index 7f1e9ff..9d12104 100644 --- a/src/Commands/Load.php +++ b/src/Commands/Load.php @@ -12,7 +12,7 @@ class Load extends Command use AsksForSnapshotName; use ConfirmableTrait; - protected $signature = 'snapshot:load {name?} {--connection=} {--force} {--stream} {--progress} --disk {--latest} {--drop-tables=1}'; + protected $signature = 'snapshot:load {name?} {--connection=} {--force} {--stream} --disk {--latest} {--drop-tables=1}'; protected $description = 'Load up a snapshot.'; @@ -49,10 +49,6 @@ public function handle() $snapshot->useStream(); } - if ($this->option('progress') ?: false) { - $snapshot->showProgressBar(); - } - $snapshot->load($this->option('connection'), (bool) $this->option('drop-tables')); $this->info("Snapshot `{$name}` loaded!"); diff --git a/src/Snapshot.php b/src/Snapshot.php index 65c9d3b..d9d8ea1 100644 --- a/src/Snapshot.php +++ b/src/Snapshot.php @@ -4,6 +4,7 @@ use Carbon\Carbon; use Illuminate\Support\Facades\DB; +use Illuminate\Console\OutputStyle; use Illuminate\Support\LazyCollection; use Illuminate\Support\Facades\Storage; use Spatie\DbSnapshots\Events\LoadedSnapshot; @@ -26,8 +27,6 @@ class Snapshot private bool $useStream = false; - private bool $showProgress = false; - const STREAM_BUFFER_SIZE = 16384; public function __construct(Disk $disk, string $fileName) @@ -53,13 +52,6 @@ public function useStream() return $this; } - public function showProgressBar() - { - $this->showProgress = true; - - return $this; - } - public function load(string $connectionName = null, bool $dropTables = true): void { event(new LoadingSnapshot($this)); @@ -105,17 +97,7 @@ protected function loadAsync(string $connectionName = null) $dbDumpContents = gzdecode($dbDumpContents); } - if ($this->showProgress) { - $bar = $this->output->createProgressBar(1); - $bar->start(); - } - DB::connection($connectionName)->unprepared($dbDumpContents); - - if ($this->showProgress) { - $bar->progress(); - $bar->finish(); - } } protected function isASqlComment(string $line): bool @@ -139,20 +121,11 @@ protected function loadStream(string $connectionName = null) throw CannotLoadSnapshot::fileNotReadable($snapshotFilePath); } - if ($this->showProgress) { - $bar = $this->output->createProgressBar(filesize($snapshotFilePath)); - $bar->start(); - } - - LazyCollection::make(function() use ($snapshotFilePath) { + LazyCollection::make(function() use ($snapshotFilePath, $bar) { $handle = fopen($snapshotFilePath, 'r'); $statement = ''; while (($line = fgets($handle)) !== false) { - if ($this->showProgress) { - $bar->advance(strlen($line)); - } - if ($this->shouldIgnoreLine($line)) { continue; } @@ -167,10 +140,6 @@ protected function loadStream(string $connectionName = null) })->each(function (string $statement) use($connectionName) { DB::connection($connectionName)->unprepared($statement); }); - - if ($this->showProgress) { - $bar->finish(); - } } public function delete(): void From 7ea13ba80062a60bf2625993c4c126bb6347c917 Mon Sep 17 00:00:00 2001 From: Jason Funk Date: Tue, 19 Apr 2022 14:10:29 +0300 Subject: [PATCH 3/7] No longer need this exception. --- src/Exceptions/CannotLoadSnapshot.php | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 src/Exceptions/CannotLoadSnapshot.php diff --git a/src/Exceptions/CannotLoadSnapshot.php b/src/Exceptions/CannotLoadSnapshot.php deleted file mode 100644 index abc8cb9..0000000 --- a/src/Exceptions/CannotLoadSnapshot.php +++ /dev/null @@ -1,13 +0,0 @@ - Date: Tue, 19 Apr 2022 14:11:17 +0300 Subject: [PATCH 4/7] Move the gzdecoding inline instead of downloading the decoding the whole file --- src/Snapshot.php | 70 ++++++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/src/Snapshot.php b/src/Snapshot.php index d9d8ea1..d0fe450 100644 --- a/src/Snapshot.php +++ b/src/Snapshot.php @@ -4,7 +4,6 @@ use Carbon\Carbon; use Illuminate\Support\Facades\DB; -use Illuminate\Console\OutputStyle; use Illuminate\Support\LazyCollection; use Illuminate\Support\Facades\Storage; use Spatie\DbSnapshots\Events\LoadedSnapshot; @@ -13,7 +12,6 @@ use Spatie\DbSnapshots\Events\DeletingSnapshot; use Spatie\TemporaryDirectory\TemporaryDirectory; use Illuminate\Filesystem\FilesystemAdapter as Disk; -use Spatie\DbSnapshots\Exceptions\CannotLoadSnapshot; class Snapshot { @@ -69,26 +67,6 @@ public function load(string $connectionName = null, bool $dropTables = true): vo event(new LoadedSnapshot($this)); } - protected function getUncompressedLocalSnapshotPath(): string - { - $stream = $this->disk->readStream($this->fileName); - - $uncompressedFilePath = (new TemporaryDirectory(config('db-snapshots.temporary_directory_path'))) - ->create() - ->path('snapshot.sql'); - - $fileDest = fopen($uncompressedFilePath, 'w'); - - while (feof($stream) !== true) { - fwrite($fileDest, gzdecode(gzread($stream, self::STREAM_BUFFER_SIZE))); - } - - fclose($fileDest); - - $this->disk = Storage::disk('local'); - return $uncompressedFilePath; - } - protected function loadAsync(string $connectionName = null) { $dbDumpContents = $this->disk->get($this->fileName); @@ -113,29 +91,39 @@ protected function shouldIgnoreLine(string $line): bool protected function loadStream(string $connectionName = null) { - $snapshotFilePath = $this->compressionExtension === 'gz' - ? $this->getUncompressedLocalSnapshotPath() - : $this->disk->path($this->fileName); - - if (!is_readable($snapshotFilePath)) { - throw CannotLoadSnapshot::fileNotReadable($snapshotFilePath); - } - - LazyCollection::make(function() use ($snapshotFilePath, $bar) { - $handle = fopen($snapshotFilePath, 'r'); + LazyCollection::make(function() { + $stream = $this->disk->readStream($this->fileName); $statement = ''; - while (($line = fgets($handle)) !== false) { - if ($this->shouldIgnoreLine($line)) { - continue; + while(!feof($stream)) { + $chunk = $this->compressionExtension === 'gz' + ? gzdecode(gzread($stream, self::STREAM_BUFFER_SIZE)) + : fread($stream, self::STREAM_BUFFER_SIZE); + + $lines = explode("\n", $chunk); + foreach($lines as $idx => $line) { + if ($this->shouldIgnoreLine($line)) { + continue; + } + + $statement .= $line; + + // Carry-over the last line to the next chunk since it + // is possible that this chunk finished mid-line right on + // a semi-colon. + if (count($lines) == $idx + 1) { + break; + } + + if (substr(trim($statement), -1, 1) === ';') { + yield $statement; + $statement = ''; + } } + } - $statement .= $line; - - if (substr(trim($statement), -1, 1) === ';') { - yield $statement; - $statement = ''; - } + if (substr(trim($statement), -1, 1) === ';') { + yield $statement; } })->each(function (string $statement) use($connectionName) { DB::connection($connectionName)->unprepared($statement); From f25d0c087bf79cd3bf0b16490b265257b3bccf3e Mon Sep 17 00:00:00 2001 From: Jason Funk Date: Tue, 19 Apr 2022 14:15:13 +0300 Subject: [PATCH 5/7] Updated the readme --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index b0646a9..97c24be 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,12 @@ By default, `snapshot:load` will drop all existing tables in the database. If yo php artisan snapshot:load my-first-dump --drop-tables=0 ``` +By default, `snapshot:load` will load the entire snapshot into memory which may cause memory problems on large files. To get avoid this, you can pass the `--stream` option to stream the snapshot to the database one statement at a time: + +```bash +php artisan snapshot:load my-first-dump --stream +``` + To list all the dumps run: ```bash From 18e11f3dd56c00dcf182fbc836f099772925d91b Mon Sep 17 00:00:00 2001 From: Jason Funk Date: Tue, 19 Apr 2022 14:16:11 +0300 Subject: [PATCH 6/7] Updated the readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 97c24be..1ce8b7e 100644 --- a/README.md +++ b/README.md @@ -168,7 +168,7 @@ By default, `snapshot:load` will drop all existing tables in the database. If yo php artisan snapshot:load my-first-dump --drop-tables=0 ``` -By default, `snapshot:load` will load the entire snapshot into memory which may cause memory problems on large files. To get avoid this, you can pass the `--stream` option to stream the snapshot to the database one statement at a time: +By default, `snapshot:load` will load the entire snapshot into memory which may cause problems when using large files. To get avoid this, you can pass the `--stream` option to stream the snapshot to the database one statement at a time: ```bash php artisan snapshot:load my-first-dump --stream From 1bc963254074c0723471a43b0d7acc4d8c2e66f3 Mon Sep 17 00:00:00 2001 From: Jason Funk Date: Tue, 19 Apr 2022 14:17:53 +0300 Subject: [PATCH 7/7] Updated the readme (typo) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1ce8b7e..37f3b65 100644 --- a/README.md +++ b/README.md @@ -168,7 +168,7 @@ By default, `snapshot:load` will drop all existing tables in the database. If yo php artisan snapshot:load my-first-dump --drop-tables=0 ``` -By default, `snapshot:load` will load the entire snapshot into memory which may cause problems when using large files. To get avoid this, you can pass the `--stream` option to stream the snapshot to the database one statement at a time: +By default, `snapshot:load` will load the entire snapshot into memory which may cause problems when using large files. To avoid this, you can pass the `--stream` option to stream the snapshot to the database one statement at a time: ```bash php artisan snapshot:load my-first-dump --stream