From 392b7be665ee33d9c1069ce64acf20aac2e5ba9b Mon Sep 17 00:00:00 2001 From: flynsarmy Date: Sun, 21 Jun 2015 00:02:59 +1000 Subject: [PATCH] Add tests, offset and column mapping support --- readme.md | 2 +- src/Flynsarmy/CsvSeeder/CsvSeeder.php | 235 ++++++++++++------ tests/CsvTest.php | 205 +++++++++++++++ tests/csvs/test.csv | 6 + .../2014_10_12_000000_create_users_table.php | 32 +++ 5 files changed, 398 insertions(+), 82 deletions(-) create mode 100644 tests/CsvTest.php create mode 100644 tests/csvs/test.csv create mode 100644 tests/migrations/2014_10_12_000000_create_users_table.php diff --git a/readme.md b/readme.md index 8f5a500..d412904 100644 --- a/readme.md +++ b/readme.md @@ -53,7 +53,7 @@ In addition to setting the database table and CSV filename, two other configurat - `insert_chunk_size` (int 500) An SQL insert statement will trigger every `insert_chunk_size` number of rows while reading the CSV - `csv_delimiter` (string ,) The CSV field delimiter. - - `hashable` (string password) Hash the hashable field, useful if you are importing users and need their passwords hashed. Uses `Hash::make()` + - `hashable` (string password) Hash the hashable field, useful if you are importing users and need their passwords hashed. Uses `Hash::make()`. Note: This is EXTREMELY SLOW. If you have a lot of rows in your CSV your import will take quite a long time. For example if you have a CSV with pipe delimited values, your constructor seed constructor will look like so: diff --git a/src/Flynsarmy/CsvSeeder/CsvSeeder.php b/src/Flynsarmy/CsvSeeder/CsvSeeder.php index 55a82bc..3be7c67 100644 --- a/src/Flynsarmy/CsvSeeder/CsvSeeder.php +++ b/src/Flynsarmy/CsvSeeder/CsvSeeder.php @@ -1,9 +1,10 @@ id, + * 2 => name, + * 3 => description, + * ) + * + * @var array + */ + public $mapping = []; /** @@ -59,111 +82,161 @@ class CsvSeeder extends Seeder */ public function run() { - $this->seedFromCSV($this->filename, $this->csv_delimiter); + $this->seedFromCSV($this->filename, $this->csv_delimiter); } /** * Strip UTF-8 BOM characters from the start of a string * * @param string $text - * * @return string String with BOM stripped */ - private function strip_utf8_bom( $text ) + public function stripUtf8Bom( $text ) { $bom = pack('H*','EFBBBF'); $text = preg_replace("/^$bom/", '', $text); - return $text; + + return $text; } + /** + * Opens a CSV file and returns it as a resource + * + * @param $filename + * @return FALSE|resource + */ + public function openCSV($filename) + { + if ( !file_exists($filename) || !is_readable($filename) ) + { + Log::error("CSV insert failed: CSV " . $filename . " does not exist or is not readable."); + return FALSE; + } + + // check if file is gzipped + $finfo = finfo_open(FILEINFO_MIME_TYPE); + $file_mime_type = finfo_file($finfo, $filename); + finfo_close($finfo); + $gzipped = strcmp($file_mime_type, "application/x-gzip") == 0; + + $handle = $gzipped ? gzopen($filename, 'r') : fopen($filename, 'r'); + + return $handle; + } + /** * Collect data from a given CSV file and return as array * - * @param $filename + * @param string $filename * @param string $deliminator * @return array|bool */ - private function seedFromCSV($filename, $deliminator = ",") + public function seedFromCSV($filename, $deliminator = ",") { - if ( !file_exists($filename) || !is_readable($filename) ) - { - Log::error("CSV insert failed: CSV " . $filename . " does not exist or is not readable."); - return FALSE; - } + $handle = $this->openCSV($filename); - // check if file is gzipped - $finfo = finfo_open(FILEINFO_MIME_TYPE); - $file_mime_type = finfo_file($finfo, $filename); - finfo_close($finfo); - $gzipped = strcmp($file_mime_type, "application/x-gzip") == 0; + // CSV doesn't exist or couldn't be read from. + if ( $handle === FALSE ) + return []; $header = NULL; $row_count = 0; - $data = array(); - $handle = $gzipped ? popen("gzip -cd " . $filename, 'r') : fopen($filename, 'r'); - - if ( $handle !== FALSE ) - { - while ( ($row = fgetcsv($handle, 0, $deliminator)) !== FALSE ) - { - - if ( !$header ) - { - $header = $row; - $header[0] = $this->strip_utf8_bom($header[0]); - } - else - { - // insert only non-empty fields from the csv file - $i = 0; - $row_values = []; - - foreach ($header as $key) { - if ($row[$i] === '') { - $row_values[$key] = NULL; - } - else { - $row_values[$key] = $row[$i]; - } - $i++; - } - - if(isset($row_values[$this->hashable])){ - $row_values[$this->hashable] = Hash::make($row_values[$this->hashable]); - } - - $data[$row_count] = $row_values; - - // Chunk size reached, insert - if ( ++$row_count == $this->insert_chunk_size ) - { - $this->run_insert($data); - $row_count = 0; - //clear the data array explicitly when it was inserted so that nothing is left, otherwise a leftover scenario can cause duplicate inserts - $data = array(); - } - } - } - - // Insert any leftover rows - //check if the data array explicitly if there are any values left to be inserted, if insert them - if ( count($data) ) - $this->run_insert($data); - - fclose($handle); - } + $data = []; + $mapping = $this->mapping ?: []; + $offset = $this->offset_rows; + + while ( ($row = fgetcsv($handle, 0, $deliminator)) !== FALSE ) + { + // Offset the specified number of rows + + while ( $offset > 0 ) + { + $offset--; + continue 2; + } + + // No mapping specified - grab the first CSV row and use it + if ( !$mapping ) + { + $mapping = $row; + $mapping[0] = $this->stripUtf8Bom($mapping[0]); + } + else + { + $row = $this->readRow($row, $mapping); + + // insert only non-empty rows from the csv file + if ( !$row ) + continue; + + $data[$row_count] = $row; + + // Chunk size reached, insert + if ( ++$row_count == $this->insert_chunk_size ) + { + $this->insert($data); + $row_count = 0; + // clear the data array explicitly when it was inserted so + // that nothing is left, otherwise a leftover scenario can + // cause duplicate inserts + $data = array(); + } + } + } + + // Insert any leftover rows + //check if the data array explicitly if there are any values left to be inserted, if insert them + if ( count($data) ) + $this->insert($data); + + fclose($handle); return $data; } - private function run_insert( array $seedData ) + /** + * Read a CSV row into a DB insertable array + * + * @param array $row List of CSV columns + * @param array $mapping Array of csvCol => dbCol + * @return array + */ + public function readRow( array $row, array $mapping ) + { + $row_values = []; + + foreach ($mapping as $csvCol => $dbCol) { + if (!isset($row[$csvCol]) || $row[$csvCol] === '') { + $row_values[$dbCol] = NULL; + } + else { + $row_values[$dbCol] = $row[$csvCol]; + } + } + + if ($this->hashable && isset($row_values[$this->hashable])) { + $row_values[$this->hashable] = Hash::make($row_values[$this->hashable]); + } + + return $row_values; + } + + /** + * Seed a given set of data to the DB + * + * @param array $seedData + * @return bool TRUE on success else FALSE + */ + public function insert( array $seedData ) { try { - DB::table($this->table)->insert($seedData); + DB::table($this->table)->insert($seedData); } catch (\Exception $e) { - Log::error("CSV insert failed: " . $e->getMessage() . " - CSV " . $this->filename); + Log::error("CSV insert failed: " . $e->getMessage() . " - CSV " . $this->filename); + return FALSE; } + return TRUE; } } diff --git a/tests/CsvTest.php b/tests/CsvTest.php new file mode 100644 index 0000000..474e02d --- /dev/null +++ b/tests/CsvTest.php @@ -0,0 +1,205 @@ +artisan('migrate', [ + '--path' => 'vendor/flynsarmy/csv-seeder/tests/migrations', + ]); + + $this->beforeApplicationDestroyed(function () { + $this->artisan('migrate:rollback'); + }); + } + + /** + * Setup the test environment. + * + * @return void + */ + public function setUp() + { + parent::setUp(); + + // Use an in-memory DB + $this->app['config']->set('database.default', 'csvSeederTest'); + $this->app['config']->set('database.connections.csvSeederTest', [ + 'driver' => 'sqlite', + 'database' => ':memory:', + 'prefix' => '', + ]); + } + + public function testBOMIsStripped() + { + $seeder = new \Flynsarmy\CsvSeeder\CsvSeeder; + + $bomString = chr(239) . chr(187) . chr(191) . "foo"; + $nonBomString = "my non bom string"; + + // Test a BOM string + $expected = "foo"; + $actual = $seeder->stripUtf8Bom($bomString); + $this->assertEquals($expected, $actual); + + // Test a non BOM string + $expected = $nonBomString; + $actual = $seeder->stripUtf8Bom($nonBomString); + $this->assertEquals($expected, $actual); + } + + public function testMappings() + { + $seeder = new \Flynsarmy\CsvSeeder\CsvSeeder; + $row = [1, 'ignored', 'first', 'last']; + + // Test no skipped columns + $mapping = [ + 0 => 'id', + 1 => 'ignored', + 2 => 'first_name', + 3 => 'last_name', + ]; + $actual = $seeder->readRow($row, $mapping); + $expected = [ + 'id' => 1, + 'ignored' => 'ignored', + 'first_name' => 'first', + 'last_name' => 'last', + ]; + $this->assertEquals($expected, $actual); + + // Test a skipped column + $mapping = [ + 0 => 'id', + 2 => 'first_name', + 3 => 'last_name', + ]; + $actual = $seeder->readRow($row, $mapping); + $expected = [ + 'id' => 1, + 'first_name' => 'first', + 'last_name' => 'last', + ]; + $this->assertEquals($expected, $actual); + + // Test a non-existant column + $mapping = [ + 0 => 'id', + 2 => 'first_name', + 99 => 'last_name', + ]; + $actual = $seeder->readRow($row, $mapping); + $expected = [ + 'id' => 1, + 'first_name' => 'first', + 'last_name' => null, + ]; + $this->assertEquals($expected, $actual); + } + + public function testCanOpenCSV() + { + $seeder = new \Flynsarmy\CsvSeeder\CsvSeeder; + + // Test an openable CSV + $expected = "resource"; + $actual = $seeder->openCSV(__DIR__.'/csvs/test.csv'); + $this->assertInternalType($expected, $actual); + + // Test a non-openable CSV + $expected = FALSE; + $actual = $seeder->openCSV(__DIR__.'/csvs/test_that_does_not_exist.csv'); + $this->assertEquals($expected, $actual); + } + + public function testImport() + { + $seeder = new \Flynsarmy\CsvSeeder\CsvSeeder; + $seeder->table = 'users'; + $seeder->filename = __DIR__.'/csvs/test.csv'; + $seeder->hashable = ''; + $seeder->run(); + + // Make sure the rows imported + $this->seeInDatabase('users', [ + 'id' => 1, + 'first_name' => 'Abe', + 'last_name' => 'Abeson', + 'email' => 'abe.abeson@foo.com', + 'age' => 50, + ]); + $this->seeInDatabase('users', [ + 'id' => 3, + 'first_name' => 'Charly', + 'last_name' => 'Charlyson', + 'email' => 'charly.charlyson@foo.com', + 'age' => 52, + ]); + } + + public function testHash() + { + $seeder = new \Flynsarmy\CsvSeeder\CsvSeeder; + $seeder->table = 'users'; + $seeder->filename = __DIR__.'/csvs/test.csv'; + + // Assert unhashed passwords + $seeder->hashable = ''; + $seeder->run(); + $this->seeInDatabase('users', [ + 'id' => 1, + 'password' => 'abeabeson', + ]); + + // Reset users table + DB::table('users')->truncate(); + + // Assert hashed passwords + var_dump('running again'); + $seeder->hashable = 'password'; + $seeder->run(); + // Row 1 should still be in DB... + $this->seeInDatabase('users', [ + 'id' => 1, + ]); + // ... But passwords were hashed + $this->missingFromDatabase('users', [ + 'id' => 1, + 'password' => 'abeabeson', + ]); + } + + public function testOffset() + { + $seeder = new \Flynsarmy\CsvSeeder\CsvSeeder; + $seeder->table = 'users'; + $seeder->filename = __DIR__.'/csvs/test.csv'; + $seeder->hashable = ''; + $seeder->offset_rows = 4; + $seeder->mapping = [ + 0 => 'id', + 1 => 'first_name', + 6 => 'age', + ]; + $seeder->run(); + + // Assert offset occurred + $this->missingFromDatabase('users', [ + 'id' => 1, + ]); + + // Assert mapping worked + $this->seeInDatabase('users', [ + 'id' => 5, + 'first_name' => 'Echo', + 'last_name' => '', + 'age' => 54 + ]); + } +} \ No newline at end of file diff --git a/tests/csvs/test.csv b/tests/csvs/test.csv new file mode 100644 index 0000000..904d59e --- /dev/null +++ b/tests/csvs/test.csv @@ -0,0 +1,6 @@ +id,first_name,last_name,email,password,address,age +1,Abe,Abeson,abe.abeson@foo.com,abeabeson,123 Abe street,50 +2,Betty,Bettyson,betty.bettyson@foo.com,bettybettyson,123 Betty street,51 +3,Charly,Charlyson,charly.charlyson@foo.com,charlycharlyson,123 Charly street,52 +4,Delta,Deltason,delta.deltason@foo.com,deltadeltason,123 Delta street,53 +5,Echo,Echoson,echo.echoson@foo.com,echoechoson,123 Echo street,54 \ No newline at end of file diff --git a/tests/migrations/2014_10_12_000000_create_users_table.php b/tests/migrations/2014_10_12_000000_create_users_table.php new file mode 100644 index 0000000..73d0204 --- /dev/null +++ b/tests/migrations/2014_10_12_000000_create_users_table.php @@ -0,0 +1,32 @@ +increments('id'); + $table->string('first_name')->default(''); + $table->string('last_name')->default(''); + $table->string('email')->default(''); + $table->string('password')->default(''); + $table->string('address')->default(''); + $table->integer('age')->default(0); + }); + } + /** + * Reverse the migrations. + * + * @return void + */ + public function down() + { + Schema::drop('users'); + } +} \ No newline at end of file