Skip to content

Commit 392b7be

Browse files
committed
Add tests, offset and column mapping support
1 parent 9c33bbe commit 392b7be

File tree

5 files changed

+398
-82
lines changed

5 files changed

+398
-82
lines changed

readme.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ In addition to setting the database table and CSV filename, two other configurat
5353

5454
- `insert_chunk_size` (int 500) An SQL insert statement will trigger every `insert_chunk_size` number of rows while reading the CSV
5555
- `csv_delimiter` (string ,) The CSV field delimiter.
56-
- `hashable` (string password) Hash the hashable field, useful if you are importing users and need their passwords hashed. Uses `Hash::make()`
56+
- `hashable` (string password) Hash the hashable field, useful if you are importing users and need their passwords hashed. Uses `Hash::make()`. Note: This is EXTREMELY SLOW. If you have a lot of rows in your CSV your import will take quite a long time.
5757

5858
For example if you have a CSV with pipe delimited values, your constructor seed constructor will look like so:
5959

src/Flynsarmy/CsvSeeder/CsvSeeder.php

Lines changed: 154 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
<?php namespace Flynsarmy\CsvSeeder;
22

3+
use App;
34
use Log;
5+
use DB;
6+
use Hash;
47
use Illuminate\Database\Seeder;
5-
use Illuminate\Support\Facades\DB;
6-
use Illuminate\Support\Facades\Hash;
78

89
/**
910
* Taken from http://laravelsnippets.com/snippets/seeding-database-with-csv-files-cleanly
@@ -17,14 +18,14 @@ class CsvSeeder extends Seeder
1718
*
1819
* @var string
1920
*/
20-
protected $table;
21+
public $table;
2122

2223
/**
2324
* CSV filename
2425
*
2526
* @var string
2627
*/
27-
protected $filename;
28+
public $filename;
2829

2930
/**
3031
* DB field that to be hashed, most likely a password field.
@@ -34,136 +35,208 @@ class CsvSeeder extends Seeder
3435
* @var string
3536
*/
3637

37-
protected $hashable = 'password';
38+
public $hashable = 'password';
3839

3940
/**
4041
* An SQL INSERT query will execute every time this number of rows
4142
* are read from the CSV. Without this, large INSERTS will silently
4243
* fail.
4344
*
44-
* @var integer
45+
* @var int
4546
*/
46-
protected $insert_chunk_size = 50;
47+
public $insert_chunk_size = 50;
4748

4849
/**
4950
* CSV delimiter (defaults to ,)
5051
*
5152
* @var string
5253
*/
53-
protected $csv_delimiter = ',';
54-
54+
public $csv_delimiter = ',';
55+
56+
/**
57+
* Number of rows to skip at the start of the CSV
58+
*
59+
* @var int
60+
*/
61+
public $offset_rows = 0;
62+
63+
64+
/**
65+
* The mapping of CSV to DB column. If not specified manually, the first
66+
* row (after offset_rows) of your CSV will be read as your DB columns.
67+
*
68+
* IE to read the first, third and fourth columns of your CSV only, use:
69+
* array(
70+
* 0 => id,
71+
* 2 => name,
72+
* 3 => description,
73+
* )
74+
*
75+
* @var array
76+
*/
77+
public $mapping = [];
5578

5679

5780
/**
5881
* Run DB seed
5982
*/
6083
public function run()
6184
{
62-
$this->seedFromCSV($this->filename, $this->csv_delimiter);
85+
$this->seedFromCSV($this->filename, $this->csv_delimiter);
6386
}
6487

6588
/**
6689
* Strip UTF-8 BOM characters from the start of a string
6790
*
6891
* @param string $text
69-
*
7092
* @return string String with BOM stripped
7193
*/
72-
private function strip_utf8_bom( $text )
94+
public function stripUtf8Bom( $text )
7395
{
7496
$bom = pack('H*','EFBBBF');
7597
$text = preg_replace("/^$bom/", '', $text);
76-
return $text;
98+
99+
return $text;
77100
}
78101

102+
/**
103+
* Opens a CSV file and returns it as a resource
104+
*
105+
* @param $filename
106+
* @return FALSE|resource
107+
*/
108+
public function openCSV($filename)
109+
{
110+
if ( !file_exists($filename) || !is_readable($filename) )
111+
{
112+
Log::error("CSV insert failed: CSV " . $filename . " does not exist or is not readable.");
113+
return FALSE;
114+
}
115+
116+
// check if file is gzipped
117+
$finfo = finfo_open(FILEINFO_MIME_TYPE);
118+
$file_mime_type = finfo_file($finfo, $filename);
119+
finfo_close($finfo);
120+
$gzipped = strcmp($file_mime_type, "application/x-gzip") == 0;
121+
122+
$handle = $gzipped ? gzopen($filename, 'r') : fopen($filename, 'r');
123+
124+
return $handle;
125+
}
126+
79127
/**
80128
* Collect data from a given CSV file and return as array
81129
*
82-
* @param $filename
130+
* @param string $filename
83131
* @param string $deliminator
84132
* @return array|bool
85133
*/
86-
private function seedFromCSV($filename, $deliminator = ",")
134+
public function seedFromCSV($filename, $deliminator = ",")
87135
{
88-
if ( !file_exists($filename) || !is_readable($filename) )
89-
{
90-
Log::error("CSV insert failed: CSV " . $filename . " does not exist or is not readable.");
91-
return FALSE;
92-
}
136+
$handle = $this->openCSV($filename);
93137

94-
// check if file is gzipped
95-
$finfo = finfo_open(FILEINFO_MIME_TYPE);
96-
$file_mime_type = finfo_file($finfo, $filename);
97-
finfo_close($finfo);
98-
$gzipped = strcmp($file_mime_type, "application/x-gzip") == 0;
138+
// CSV doesn't exist or couldn't be read from.
139+
if ( $handle === FALSE )
140+
return [];
99141

100142
$header = NULL;
101143
$row_count = 0;
102-
$data = array();
103-
$handle = $gzipped ? popen("gzip -cd " . $filename, 'r') : fopen($filename, 'r');
104-
105-
if ( $handle !== FALSE )
106-
{
107-
while ( ($row = fgetcsv($handle, 0, $deliminator)) !== FALSE )
108-
{
109-
110-
if ( !$header )
111-
{
112-
$header = $row;
113-
$header[0] = $this->strip_utf8_bom($header[0]);
114-
}
115-
else
116-
{
117-
// insert only non-empty fields from the csv file
118-
$i = 0;
119-
$row_values = [];
120-
121-
foreach ($header as $key) {
122-
if ($row[$i] === '') {
123-
$row_values[$key] = NULL;
124-
}
125-
else {
126-
$row_values[$key] = $row[$i];
127-
}
128-
$i++;
129-
}
130-
131-
if(isset($row_values[$this->hashable])){
132-
$row_values[$this->hashable] = Hash::make($row_values[$this->hashable]);
133-
}
134-
135-
$data[$row_count] = $row_values;
136-
137-
// Chunk size reached, insert
138-
if ( ++$row_count == $this->insert_chunk_size )
139-
{
140-
$this->run_insert($data);
141-
$row_count = 0;
142-
//clear the data array explicitly when it was inserted so that nothing is left, otherwise a leftover scenario can cause duplicate inserts
143-
$data = array();
144-
}
145-
}
146-
}
147-
148-
// Insert any leftover rows
149-
//check if the data array explicitly if there are any values left to be inserted, if insert them
150-
if ( count($data) )
151-
$this->run_insert($data);
152-
153-
fclose($handle);
154-
}
144+
$data = [];
145+
$mapping = $this->mapping ?: [];
146+
$offset = $this->offset_rows;
147+
148+
while ( ($row = fgetcsv($handle, 0, $deliminator)) !== FALSE )
149+
{
150+
// Offset the specified number of rows
151+
152+
while ( $offset > 0 )
153+
{
154+
$offset--;
155+
continue 2;
156+
}
157+
158+
// No mapping specified - grab the first CSV row and use it
159+
if ( !$mapping )
160+
{
161+
$mapping = $row;
162+
$mapping[0] = $this->stripUtf8Bom($mapping[0]);
163+
}
164+
else
165+
{
166+
$row = $this->readRow($row, $mapping);
167+
168+
// insert only non-empty rows from the csv file
169+
if ( !$row )
170+
continue;
171+
172+
$data[$row_count] = $row;
173+
174+
// Chunk size reached, insert
175+
if ( ++$row_count == $this->insert_chunk_size )
176+
{
177+
$this->insert($data);
178+
$row_count = 0;
179+
// clear the data array explicitly when it was inserted so
180+
// that nothing is left, otherwise a leftover scenario can
181+
// cause duplicate inserts
182+
$data = array();
183+
}
184+
}
185+
}
186+
187+
// Insert any leftover rows
188+
//check if the data array explicitly if there are any values left to be inserted, if insert them
189+
if ( count($data) )
190+
$this->insert($data);
191+
192+
fclose($handle);
155193

156194
return $data;
157195
}
158196

159-
private function run_insert( array $seedData )
197+
/**
198+
* Read a CSV row into a DB insertable array
199+
*
200+
* @param array $row List of CSV columns
201+
* @param array $mapping Array of csvCol => dbCol
202+
* @return array
203+
*/
204+
public function readRow( array $row, array $mapping )
205+
{
206+
$row_values = [];
207+
208+
foreach ($mapping as $csvCol => $dbCol) {
209+
if (!isset($row[$csvCol]) || $row[$csvCol] === '') {
210+
$row_values[$dbCol] = NULL;
211+
}
212+
else {
213+
$row_values[$dbCol] = $row[$csvCol];
214+
}
215+
}
216+
217+
if ($this->hashable && isset($row_values[$this->hashable])) {
218+
$row_values[$this->hashable] = Hash::make($row_values[$this->hashable]);
219+
}
220+
221+
return $row_values;
222+
}
223+
224+
/**
225+
* Seed a given set of data to the DB
226+
*
227+
* @param array $seedData
228+
* @return bool TRUE on success else FALSE
229+
*/
230+
public function insert( array $seedData )
160231
{
161232
try {
162-
DB::table($this->table)->insert($seedData);
233+
DB::table($this->table)->insert($seedData);
163234
} catch (\Exception $e) {
164-
Log::error("CSV insert failed: " . $e->getMessage() . " - CSV " . $this->filename);
235+
Log::error("CSV insert failed: " . $e->getMessage() . " - CSV " . $this->filename);
236+
return FALSE;
165237
}
166238

239+
return TRUE;
167240
}
168241

169242
}

0 commit comments

Comments
 (0)