From 218557b86f4b06705a69c7b1c8a9edfce43f14e9 Mon Sep 17 00:00:00 2001 From: jbrule Date: Wed, 1 Jul 2015 16:21:12 -0500 Subject: [PATCH 1/3] Update fix-serialization.php --- fix-serialization.php | 117 ++++-------------------------------------- 1 file changed, 10 insertions(+), 107 deletions(-) diff --git a/fix-serialization.php b/fix-serialization.php index 2795801..3807e42 100755 --- a/fix-serialization.php +++ b/fix-serialization.php @@ -12,9 +12,10 @@ * http://davidcoveney.com/575/php-serialization-fix-for-wordpress-migrations/ * * Usage: -* -* /usr/bin/php fix-serialization.php my-sql-file.sql -* +* Uncompressed Files +* cat dump.sql | php fix-serialization.php > fixed-dump.sql +* Compressed Files +* gunzip -c dump.sql.gz | php fix-serialization.php | gzip > fixed-dump.sql.gz * Versions: * * 1.0 2011-08-03 Initial release @@ -37,8 +38,6 @@ * */ - - // Unescape to avoid dump-text issues function unescape_mysql($value) { return str_replace(array("\\\\", "\\0", "\\n", "\\r", "\Z", "\'", '\"'), @@ -46,116 +45,20 @@ function unescape_mysql($value) { $value); } - - // Fix strange behaviour if you have escaped quotes in your replacement function unescape_quotes($value) { return str_replace('\"', '"', $value); } +while (false !== ($line = fgets(STDIN))) { + $do_preg_replace = true; - -// Check command line arguments -if (!(isset($argv) && isset($argv[1]))) { - - // Error - echo 'Error: no input file specified'."\n\n"; - -// With arguments -} else { - - // Compose path from argument - $path = dirname(__FILE__).'/'.$argv[1]; - if (!file_exists($path)) { + // Replace serialized string values + $data = preg_replace('!s:(\d+):([\\\\]?"[\\\\]?"|[\\\\]?"((.*?)[^\\\\])[\\\\]?");!e', "'s:'.strlen(unescape_mysql('$3')).':\"'.unescape_quotes('$3').'\";'", $line); - // Error - echo 'Error: input file does not exists'."\n"; - echo $path."\n\n"; - - // File exists - } else { - - // Get file contents - if (!($fp = fopen($path, 'r'))) { - - // Error - echo 'Error: can`t open input file for read'."\n"; - echo $path."\n\n"; - - // File opened for read - } else { - - // Initializations - $do_preg_replace = false; - - // Copy data - if (!($data = fread($fp, filesize($path)))) { - - // Error - echo 'Error: can`t read entire data from input file'."\n"; - echo $path."\n\n"; - - // Check data - } elseif (!(isset($data) && strlen($data) > 0)) { - - // Warning - echo "Warning: the file is empty or can't read contents\n"; - echo $path."\n\n"; - - // Data ok - } else { - - // Tag context - $do_preg_replace = true; - - // Replace serialized string values - $data = preg_replace('!s:(\d+):([\\\\]?"[\\\\]?"|[\\\\]?"((.*?)[^\\\\])[\\\\]?");!e', "'s:'.strlen(unescape_mysql('$3')).':\"'.unescape_quotes('$3').'\";'", $data); - } - - // Close file - fclose($fp); - - // Check data - if (!(isset($data) && strlen($data) > 0)) { - - // Check origin - if ($do_preg_replace) { - - // Error - echo "Error: preg_replace returns nothing\n"; - if (function_exists('preg_last_error')) echo "preg_last_error() = ".preg_last_error()."\n"; - echo $path."\n\n"; - } - - // Data Ok - } else { - - // And finally write data - if (!($fp = fopen($path, 'w'))) { - - // Error - echo "Error: can't open input file for writing\n"; - echo $path."\n\n"; - - // Open for write - } else { - - // Write file data - if (!fwrite($fp, $data)) { - - // Error - echo "Error: can't write input file\n"; - echo $path."\n\n"; - } - - // Close file - fclose($fp); - } - } - } - } + fwrite(STDOUT,$data); } -?> \ No newline at end of file +?> From 30ce8c7111324707920ecb9fb5b975e051ebede6 Mon Sep 17 00:00:00 2001 From: jbrule Date: Thu, 2 Jul 2015 10:33:26 -0500 Subject: [PATCH 2/3] Update README Added stream processing usage examples --- README | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/README b/README index 8dca8a7..8e1ab3a 100644 --- a/README +++ b/README @@ -9,7 +9,20 @@ How to use: - Execute directly this script in the way: - /usr/bin/php fix-serialization.php my-sql-file.sql + - In place file processing + /usr/bin/php fix-serialization.php my-sql-file.sql + + - Stream processing Examples + + -- Uncompressed to uncompressed + cat my-sql-file.sql | /usr/bin/php fix-serialization.php --stream > my-sql-fixed-file.sql + + -- Compressed to compressed + gunzip < my-sql-file.sql.gz | /usr/bin/php fix-serialization.php --stream | gzip > my-sql-fixed-file.sql.gz + + -- Direct import into mysql from compressed + gunzip < my-sql-file.sql.gz | /usr/bin/php fix-serialization.php --stream | mysql -uuname -p "database name" + - Or use the shell script replace.sh that replaces with sed command each sql file in directory and call fix-serialization script From a121b2240abef337c6bfdede81c2d5568e1cc602 Mon Sep 17 00:00:00 2001 From: jbrule Date: Thu, 2 Jul 2015 10:34:26 -0500 Subject: [PATCH 3/3] Update fix-serialization.php Added stream processing option --- fix-serialization.php | 151 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 138 insertions(+), 13 deletions(-) diff --git a/fix-serialization.php b/fix-serialization.php index 3807e42..76d64d0 100755 --- a/fix-serialization.php +++ b/fix-serialization.php @@ -2,7 +2,7 @@ fixed-dump.sql -* Compressed Files -* gunzip -c dump.sql.gz | php fix-serialization.php | gzip > fixed-dump.sql.gz +* +* -In place file processing +* /usr/bin/php fix-serialization.php my-sql-file.sql +* +* -Stream processing Examples +* +* --Uncompressed to uncompressed +* cat my-sql-file.sql | /usr/bin/php fix-serialization.php --stream > my-sql-fixed-file.sql +* +* --Compressed to compressed +* gunzip < my-sql-file.sql.gz | /usr/bin/php fix-serialization.php --stream | gzip > my-sql-fixed-file.sql.gz +* +* --Direct import into mysql from compressed +* gunzip < my-sql-file.sql.gz | /usr/bin/php fix-serialization.php --stream | mysql -uuname -p "database name" +* * Versions: * * 1.0 2011-08-03 Initial release * 1.1 2011-08-18 Support for backslashed quotes, added some code warnings * 1.2 2011-09-29 Support for null or zero length strings after preg_replace is called, and explain how to handle these errors +* 1.2-1 2015-07-02 (jbrule) Added stream processing support to avoid the memory allocation issue when working with large database files (tested up to 6GB). * * Knowed errors: * @@ -48,17 +60,130 @@ function unescape_mysql($value) { // Fix strange behaviour if you have escaped quotes in your replacement function unescape_quotes($value) { return str_replace('\"', '"', $value); -} +} -while (false !== ($line = fgets(STDIN))) { - $do_preg_replace = true; +function replace_serialized_string_values($data){ + // Replace serialized string values + + return preg_replace('!s:(\d+):([\\\\]?"[\\\\]?"|[\\\\]?"((.*?)[^\\\\])[\\\\]?");!e', "'s:'.strlen(unescape_mysql('$3')).':\"'.unescape_quotes('$3').'\";'", $data); +} + +//Setup arguments to flag stream processing +$options = getopt("s::",array("stream::")); - // Replace serialized string values - $data = preg_replace('!s:(\d+):([\\\\]?"[\\\\]?"|[\\\\]?"((.*?)[^\\\\])[\\\\]?");!e', "'s:'.strlen(unescape_mysql('$3')).':\"'.unescape_quotes('$3').'\";'", $line); +//Check if data is being feed in via a pipe. Process line by line and output via stdout if that is the case. +if((key_exists("s",$options) || key_exists("stream",$options)) && $fh = fopen('php://stdin','r')) { + stream_set_blocking($fh, false); - fwrite(STDOUT,$data); -} + $stdin = ''; + + while (false !== ($stdin = fgets($fh))) { + //Note. $stdin is a single line of the dumpfile. In testing this appears to be fine as the mysqldump field values do not appear to span past a single line. + + // Replace serialized string values + $data = replace_serialized_string_values($stdin); + + fwrite(STDOUT,$data); + + } + fclose($fh); +} elseif (!(isset($argv) && isset($argv[1]))) { // Check command line arguments + + // Error + echo 'Error: no input file specified'."\n\n"; +// With arguments +} else { + + // Compose path from argument + $path = dirname(__FILE__).'/'.$argv[1]; + if (!file_exists($path)) { + + // Error + echo 'Error: input file does not exists'."\n"; + echo $path."\n\n"; + + // File exists + } else { + + // Get file contents + if (!($fp = fopen($path, 'r'))) { + + // Error + echo 'Error: can`t open input file for read'."\n"; + echo $path."\n\n"; + + // File opened for read + } else { + + // Initializations + $do_preg_replace = false; + + // Copy data + if (!($data = fread($fp, filesize($path)))) { + + // Error + echo 'Error: can`t read entire data from input file'."\n"; + echo $path."\n\n"; + + // Check data + } elseif (!(isset($data) && strlen($data) > 0)) { + + // Warning + echo "Warning: the file is empty or can't read contents\n"; + echo $path."\n\n"; + + // Data ok + } else { + + // Tag context + $do_preg_replace = true; -?> + // Replace serialized string values + $data = replace_serialized_string_values($data); + } + + // Close file + fclose($fp); + + // Check data + if (!(isset($data) && strlen($data) > 0)) { + + // Check origin + if ($do_preg_replace) { + + // Error + echo "Error: preg_replace returns nothing\n"; + if (function_exists('preg_last_error')) echo "preg_last_error() = ".preg_last_error()."\n"; + echo $path."\n\n"; + } + + // Data Ok + } else { + + // And finally write data + if (!($fp = fopen($path, 'w'))) { + + // Error + echo "Error: can't open input file for writing\n"; + echo $path."\n\n"; + + // Open for write + } else { + + // Write file data + if (!fwrite($fp, $data)) { + + // Error + echo "Error: can't write input file\n"; + echo $path."\n\n"; + } + + // Close file + fclose($fp); + } + } + } + } +}