diff --git a/CREDITS b/CREDITS index f0e6de7f08fae..35ab4d48a8f79 100644 --- a/CREDITS +++ b/CREDITS @@ -10,6 +10,7 @@ Visma http://visma.com (2015 - 2016) Acronis http://acronis.com (2016) Nexedi https://www.nexedi.com (2016) Automattic https://automattic.com (2014 - 2016) +Tencent Game DBA http://tencentdba.com/about (2016) Verkkokauppa.com https://www.verkkokauppa.com (2015 - 2016) Virtuozzo https://virtuozzo.com (2016) diff --git a/VERSION b/VERSION index e748b1bda54d4..a82a4e4d77d54 100644 --- a/VERSION +++ b/VERSION @@ -1,3 +1,3 @@ MYSQL_VERSION_MAJOR=10 MYSQL_VERSION_MINOR=0 -MYSQL_VERSION_PATCH=27 +MYSQL_VERSION_PATCH=28 diff --git a/client/mysql.cc b/client/mysql.cc index 89f9a75ec11f8..4b20e4d98cb40 100644 --- a/client/mysql.cc +++ b/client/mysql.cc @@ -245,7 +245,8 @@ static void end_pager(); static void init_tee(const char *); static void end_tee(); static const char* construct_prompt(); -static char *get_arg(char *line, my_bool get_next_arg); +enum get_arg_mode { CHECK, GET, GET_NEXT}; +static char *get_arg(char *line, get_arg_mode mode); static void init_username(); static void add_int_to_prompt(int toadd); static int get_result_width(MYSQL_RES *res); @@ -2257,7 +2258,7 @@ static COMMANDS *find_command(char *name) if (!my_strnncoll(&my_charset_latin1, (uchar*) name, len, (uchar*) commands[i].name, len) && (commands[i].name[len] == '\0') && - (!end || commands[i].takes_params)) + (!end || (commands[i].takes_params && get_arg(name, CHECK)))) { index= i; break; @@ -3177,7 +3178,7 @@ com_charset(String *buffer __attribute__((unused)), char *line) char buff[256], *param; CHARSET_INFO * new_cs; strmake_buf(buff, line); - param= get_arg(buff, 0); + param= get_arg(buff, GET); if (!param || !*param) { return put_info("Usage: \\C charset_name | charset charset_name", @@ -4263,12 +4264,12 @@ com_connect(String *buffer, char *line) #ifdef EXTRA_DEBUG tmp[1]= 0; #endif - tmp= get_arg(buff, 0); + tmp= get_arg(buff, GET); if (tmp && *tmp) { my_free(current_db); current_db= my_strdup(tmp, MYF(MY_WME)); - tmp= get_arg(buff, 1); + tmp= get_arg(buff, GET_NEXT); if (tmp) { my_free(current_host); @@ -4371,7 +4372,7 @@ com_delimiter(String *buffer __attribute__((unused)), char *line) char buff[256], *tmp; strmake_buf(buff, line); - tmp= get_arg(buff, 0); + tmp= get_arg(buff, GET); if (!tmp || !*tmp) { @@ -4402,7 +4403,7 @@ com_use(String *buffer __attribute__((unused)), char *line) bzero(buff, sizeof(buff)); strmake_buf(buff, line); - tmp= get_arg(buff, 0); + tmp= get_arg(buff, GET); if (!tmp || !*tmp) { put_info("USE must be followed by a database name", INFO_ERROR); @@ -4487,23 +4488,22 @@ com_nowarnings(String *buffer __attribute__((unused)), } /* - Gets argument from a command on the command line. If get_next_arg is - not defined, skips the command and returns the first argument. The - line is modified by adding zero to the end of the argument. If - get_next_arg is defined, then the function searches for end of string - first, after found, returns the next argument and adds zero to the - end. If you ever wish to use this feature, remember to initialize all - items in the array to zero first. + Gets argument from a command on the command line. If mode is not GET_NEXT, + skips the command and returns the first argument. The line is modified by + adding zero to the end of the argument. If mode is GET_NEXT, then the + function searches for end of string first, after found, returns the next + argument and adds zero to the end. If you ever wish to use this feature, + remember to initialize all items in the array to zero first. */ -char *get_arg(char *line, my_bool get_next_arg) +static char *get_arg(char *line, get_arg_mode mode) { char *ptr, *start; - my_bool quoted= 0, valid_arg= 0; + bool short_cmd= false; char qtype= 0; ptr= line; - if (get_next_arg) + if (mode == GET_NEXT) { for (; *ptr; ptr++) ; if (*(ptr + 1)) @@ -4514,7 +4514,7 @@ char *get_arg(char *line, my_bool get_next_arg) /* skip leading white spaces */ while (my_isspace(charset_info, *ptr)) ptr++; - if (*ptr == '\\') // short command was used + if ((short_cmd= *ptr == '\\')) // short command was used ptr+= 2; else while (*ptr &&!my_isspace(charset_info, *ptr)) // skip command @@ -4527,24 +4527,28 @@ char *get_arg(char *line, my_bool get_next_arg) if (*ptr == '\'' || *ptr == '\"' || *ptr == '`') { qtype= *ptr; - quoted= 1; ptr++; } for (start=ptr ; *ptr; ptr++) { - if (*ptr == '\\' && ptr[1]) // escaped character + if ((*ptr == '\\' && ptr[1]) || // escaped character + (!short_cmd && qtype && *ptr == qtype && ptr[1] == qtype)) // quote { - // Remove the backslash - strmov_overlapp(ptr, ptr+1); + // Remove (or skip) the backslash (or a second quote) + if (mode != CHECK) + strmov_overlapp(ptr, ptr+1); + else + ptr++; } - else if ((!quoted && *ptr == ' ') || (quoted && *ptr == qtype)) + else if (*ptr == (qtype ? qtype : ' ')) { - *ptr= 0; + qtype= 0; + if (mode != CHECK) + *ptr= 0; break; } } - valid_arg= ptr != start; - return valid_arg ? start : NullS; + return ptr != start && !qtype ? start : NullS; } diff --git a/client/mysqldump.c b/client/mysqldump.c index 153761ed5102c..64ed21ac7fca8 100644 --- a/client/mysqldump.c +++ b/client/mysqldump.c @@ -575,9 +575,7 @@ static int dump_all_tablespaces(); static int dump_tablespaces_for_tables(char *db, char **table_names, int tables); static int dump_tablespaces_for_databases(char** databases); static int dump_tablespaces(char* ts_where); -static void print_comment(FILE *sql_file, my_bool is_error, const char *format, - ...); - +static void print_comment(FILE *, my_bool, const char *, ...); /* Print the supplied message if in verbose mode @@ -655,6 +653,30 @@ static void short_usage(FILE *f) } +/** returns a string fixed to be safely printed inside a -- comment + + that is, any new line in it gets prefixed with -- +*/ +static const char *fix_for_comment(const char *ident) +{ + static char buf[1024]; + char c, *s= buf; + + while ((c= *s++= *ident++)) + { + if (s >= buf + sizeof(buf) - 10) + { + strmov(s, "..."); + break; + } + if (c == '\n') + s= strmov(s, "-- "); + } + + return buf; +} + + static void write_header(FILE *sql_file, char *db_name) { if (opt_xml) @@ -677,8 +699,8 @@ static void write_header(FILE *sql_file, char *db_name) DUMP_VERSION, MYSQL_SERVER_VERSION, SYSTEM_TYPE, MACHINE_TYPE); print_comment(sql_file, 0, "-- Host: %s Database: %s\n", - current_host ? current_host : "localhost", - db_name ? db_name : ""); + fix_for_comment(current_host ? current_host : "localhost"), + fix_for_comment(db_name ? db_name : "")); print_comment(sql_file, 0, "-- ------------------------------------------------------\n" ); @@ -2224,7 +2246,8 @@ static uint dump_events_for_db(char *db) /* nice comments */ print_comment(sql_file, 0, - "\n--\n-- Dumping events for database '%s'\n--\n", db); + "\n--\n-- Dumping events for database '%s'\n--\n", + fix_for_comment(db)); /* not using "mysql_query_with_error_report" because we may have not @@ -2436,7 +2459,8 @@ static uint dump_routines_for_db(char *db) /* nice comments */ print_comment(sql_file, 0, - "\n--\n-- Dumping routines for database '%s'\n--\n", db); + "\n--\n-- Dumping routines for database '%s'\n--\n", + fix_for_comment(db)); /* not using "mysql_query_with_error_report" because we may have not @@ -2731,11 +2755,11 @@ static uint get_table_structure(char *table, char *db, char *table_type, if (strcmp (table_type, "VIEW") == 0) /* view */ print_comment(sql_file, 0, "\n--\n-- Temporary table structure for view %s\n--\n\n", - result_table); + fix_for_comment(result_table)); else print_comment(sql_file, 0, "\n--\n-- Table structure for table %s\n--\n\n", - result_table); + fix_for_comment(result_table)); if (opt_drop) { @@ -2977,7 +3001,7 @@ static uint get_table_structure(char *table, char *db, char *table_type, print_comment(sql_file, 0, "\n--\n-- Table structure for table %s\n--\n\n", - result_table); + fix_for_comment(result_table)); if (opt_drop) fprintf(sql_file, "DROP TABLE IF EXISTS %s;\n", result_table); if (!opt_xml) @@ -3684,21 +3708,21 @@ static void dump_table(char *table, char *db) { print_comment(md_result_file, 0, "\n--\n-- Dumping data for table %s\n--\n", - result_table); + fix_for_comment(result_table)); dynstr_append_checked(&query_string, "SELECT /*!40001 SQL_NO_CACHE */ * FROM "); dynstr_append_checked(&query_string, result_table); if (where) { - print_comment(md_result_file, 0, "-- WHERE: %s\n", where); + print_comment(md_result_file, 0, "-- WHERE: %s\n", fix_for_comment(where)); dynstr_append_checked(&query_string, " WHERE "); dynstr_append_checked(&query_string, where); } if (order_by) { - print_comment(md_result_file, 0, "-- ORDER BY: %s\n", order_by); + print_comment(md_result_file, 0, "-- ORDER BY: %s\n", fix_for_comment(order_by)); dynstr_append_checked(&query_string, " ORDER BY "); dynstr_append_checked(&query_string, order_by); @@ -4208,7 +4232,7 @@ static int dump_tablespaces(char* ts_where) if (first) { print_comment(md_result_file, 0, "\n--\n-- Logfile group: %s\n--\n", - row[0]); + fix_for_comment(row[0])); fprintf(md_result_file, "\nCREATE"); } @@ -4277,7 +4301,8 @@ static int dump_tablespaces(char* ts_where) first= 1; if (first) { - print_comment(md_result_file, 0, "\n--\n-- Tablespace: %s\n--\n", row[0]); + print_comment(md_result_file, 0, "\n--\n-- Tablespace: %s\n--\n", + fix_for_comment(row[0])); fprintf(md_result_file, "\nCREATE"); } else @@ -4481,7 +4506,8 @@ static int init_dumping(char *database, int init_func(char*)) char *qdatabase= quote_name(database,quoted_database_buf,opt_quoted); print_comment(md_result_file, 0, - "\n--\n-- Current Database: %s\n--\n", qdatabase); + "\n--\n-- Current Database: %s\n--\n", + fix_for_comment(qdatabase)); /* Call the view or table specific function */ init_func(qdatabase); @@ -5672,7 +5698,7 @@ static my_bool get_view_structure(char *table, char* db) print_comment(sql_file, 0, "\n--\n-- Final view structure for view %s\n--\n\n", - result_table); + fix_for_comment(result_table)); /* Table might not exist if this view was dumped with --tab. */ fprintf(sql_file, "/*!50001 DROP TABLE IF EXISTS %s*/;\n", opt_quoted_table); diff --git a/client/mysqltest.cc b/client/mysqltest.cc index 66bcb6462e7ec..dede6527d11ac 100644 --- a/client/mysqltest.cc +++ b/client/mysqltest.cc @@ -3373,10 +3373,6 @@ void do_exec(struct st_command *command) #endif #endif - /* exec command is interpreted externally and will not take newlines */ - while(replace(&ds_cmd, "\n", 1, " ", 1) == 0) - ; - DBUG_PRINT("info", ("Executing '%s' as '%s'", command->first_argument, ds_cmd.str)); diff --git a/cmake/cpack_rpm.cmake b/cmake/cpack_rpm.cmake index 174548502d819..00f21c1cd8bef 100644 --- a/cmake/cpack_rpm.cmake +++ b/cmake/cpack_rpm.cmake @@ -221,6 +221,9 @@ SETA(CPACK_RPM_test_PACKAGE_PROVIDES "perl(mtr_io.pl)" "perl(mtr_match)" "perl(mtr_misc.pl)" + "perl(mtr_gcov.pl)" + "perl(mtr_gprof.pl)" + "perl(mtr_process.pl)" "perl(mtr_report)" "perl(mtr_results)" "perl(mtr_unique)") diff --git a/cmake/package_name.cmake b/cmake/package_name.cmake index 87db39d68d458..30f5199441fcb 100644 --- a/cmake/package_name.cmake +++ b/cmake/package_name.cmake @@ -30,6 +30,10 @@ IF(NOT VERSION) SET(64BIT 1) ENDIF() + IF(NOT 64BIT AND CMAKE_SYSTEM_PROCESSOR MATCHES "^mips64") + SET(DEFAULT_MACHINE "mips") + ENDIF() + IF(CMAKE_SYSTEM_NAME MATCHES "Windows") SET(NEED_DASH_BETWEEN_PLATFORM_AND_MACHINE 0) SET(DEFAULT_PLATFORM "win") diff --git a/extra/innochecksum.cc b/extra/innochecksum.cc index 6018a4884ea5b..c09458630c834 100644 --- a/extra/innochecksum.cc +++ b/extra/innochecksum.cc @@ -243,10 +243,9 @@ int main(int argc, char **argv) time_t lastt; /* last time */ ulint oldcsum, oldcsumfield, csum, csumfield, crc32, logseq, logseqfield; /* ulints for checksum storage */ - struct stat st; /* for stat, if you couldn't guess */ unsigned long long int size; /* size of file (has to be 64 bits) */ ulint pages; /* number of pages in file */ - off_t offset= 0; + long long offset= 0; int fd; printf("InnoDB offline file checksum utility.\n"); @@ -269,6 +268,47 @@ int main(int argc, char **argv) goto error; } +#ifdef _WIN32 + /* Switch off OS file buffering for the file. */ + + HANDLE h = CreateFile(filename, GENERIC_READ, + FILE_SHARE_READ|FILE_SHARE_WRITE, 0, + OPEN_EXISTING, FILE_FLAG_NO_BUFFERING, 0); + + if (!h) + { + fprintf(stderr, "Error; cant open file\n"); + goto error; + } + + if (!GetFileSizeEx(h, (LARGE_INTEGER *)&size)) + { + fprintf(stderr, "Error; GetFileSize() failed\n"); + goto error; + } + + fd = _open_osfhandle ((intptr_t) h, _O_RDONLY); + if (fd < 0) + { + fprintf(stderr, "Error; _open_osfhandle() failed\n"); + goto error; + } + + f = _fdopen(fd, "rb"); + if (!f) + { + fprintf(stderr, "Error; fdopen() failed\n"); + goto error; + } + + /* + Disable stdio buffering (FILE_FLAG_NO_BUFFERING requires properly IO buffers + which stdio does not guarantee. + */ + setvbuf(f, NULL, _IONBF, 0); + +#else + struct stat st; /* stat the file to get size and page count */ if (stat(filename, &st)) { @@ -279,6 +319,8 @@ int main(int argc, char **argv) /* Open the file for reading */ f= fopen(filename, "rb"); +#endif + if (f == NULL) { fprintf(stderr, "Error; %s cannot be opened", filename); @@ -323,7 +365,7 @@ int main(int argc, char **argv) } else if (verbose) { - printf("file %s = %llu bytes (%lu pages)...\n", filename, size, pages); + printf("file %s = %llu bytes (%lu pages)...\n", filename, size, (ulong)pages); if (do_one_page) printf("InnoChecksum; checking page %lu\n", do_page); else @@ -348,9 +390,12 @@ int main(int argc, char **argv) goto error; } - offset= (off_t)start_page * (off_t)physical_page_size; - + offset= (longlong)start_page * (longlong)physical_page_size; +#ifdef _WIN32 + if (_lseeki64(fd, offset, SEEK_SET) != offset) +#else if (lseek(fd, offset, SEEK_SET) != offset) +#endif { perror("Error; Unable to seek to necessary offset"); goto error; diff --git a/extra/yassl/README b/extra/yassl/README index b5eb88824fb0d..a3d4f60f56128 100644 --- a/extra/yassl/README +++ b/extra/yassl/README @@ -12,6 +12,24 @@ before calling SSL_new(); *** end Note *** +yaSSL Release notes, version 2.4.2 (9/22/2016) + This release of yaSSL fixes a medium security vulnerability. A fix for + potential AES side channel leaks is included that a local user monitoring + the same CPU core cache could exploit. VM users, hyper-threading users, + and users where potential attackers have access to the CPU cache will need + to update if they utilize AES. + + DSA padding fixes for unusual sizes is included as well. Users with DSA + certficiates should update. + +yaSSL Release notes, version 2.4.0 (5/20/2016) + This release of yaSSL fixes the OpenSSL compatibility function + SSL_CTX_load_verify_locations() when using the path directory to allow + unlimited path sizes. Minor Windows build fixes are included. + No high level security fixes in this version but we always recommend + updating. + + yaSSL Release notes, version 2.3.9b (2/03/2016) This release of yaSSL fixes the OpenSSL compatibility function X509_NAME_get_index_by_NID() to use the actual index of the common name diff --git a/extra/yassl/certs/dsa-cert.pem b/extra/yassl/certs/dsa-cert.pem index 10d533edc88b0..10794cbee7313 100644 --- a/extra/yassl/certs/dsa-cert.pem +++ b/extra/yassl/certs/dsa-cert.pem @@ -1,22 +1,22 @@ -----BEGIN CERTIFICATE----- -MIIDqzCCA2ugAwIBAgIJAMGqrgDU6DyhMAkGByqGSM44BAMwgY4xCzAJBgNVBAYT +MIIDrzCCA2+gAwIBAgIJAK1zRM7YFcNjMAkGByqGSM44BAMwgZAxCzAJBgNVBAYT AlVTMQ8wDQYDVQQIDAZPcmVnb24xETAPBgNVBAcMCFBvcnRsYW5kMRAwDgYDVQQK -DAd3b2xmU1NMMRAwDgYDVQQLDAd0ZXN0aW5nMRYwFAYDVQQDDA13d3cueWFzc2wu -Y29tMR8wHQYJKoZIhvcNAQkBFhBpbmZvQHdvbGZzc2wuY29tMB4XDTEzMDQyMjIw -MDk0NFoXDTE2MDExNzIwMDk0NFowgY4xCzAJBgNVBAYTAlVTMQ8wDQYDVQQIDAZP -cmVnb24xETAPBgNVBAcMCFBvcnRsYW5kMRAwDgYDVQQKDAd3b2xmU1NMMRAwDgYD -VQQLDAd0ZXN0aW5nMRYwFAYDVQQDDA13d3cueWFzc2wuY29tMR8wHQYJKoZIhvcN -AQkBFhBpbmZvQHdvbGZzc2wuY29tMIIBuDCCASwGByqGSM44BAEwggEfAoGBAL1R -7koy4IrH6sbh6nDEUUPPKgfhxxLCWCVexF2+qzANEr+hC9M002haJXFOfeS9DyoO -WFbL0qMZOuqv+22CaHnoUWl7q3PjJOAI3JH0P54ZyUPuU1909RzgTdIDp5+ikbr7 -KYjnltL73FQVMbjTZQKthIpPn3MjYcF+4jp2W2zFAhUAkcntYND6MGf+eYzIJDN2 -L7SonHUCgYEAklpxErfqznIZjVvqqHFaq+mgAL5J8QrKVmdhYZh/Y8z4jCjoCA8o -TDoFKxf7s2ZzgaPKvglaEKiYqLqic9qY78DYJswzQMLFvjsF4sFZ+pYCBdWPQI4N -PgxCiznK6Ce+JH9ikSBvMvG+tevjr2UpawDIHX3+AWYaZBZwKADAaboDgYUAAoGB -AJ3LY89yHyvQ/TsQ6zlYbovjbk/ogndsMqPdNUvL4RuPTgJP/caaDDa0XJ7ak6A7 -TJ+QheLNwOXoZPYJC4EGFSDAXpYniGhbWIrVTCGe6lmZDfnx40WXS0kk3m/DHaC0 -3ElLAiybxVGxyqoUfbT3Zv1JwftWMuiqHH5uADhdXuXVo1AwTjAdBgNVHQ4EFgQU -IJjk416o4v8qpH9LBtXlR9v8gccwHwYDVR0jBBgwFoAUIJjk416o4v8qpH9LBtXl -R9v8gccwDAYDVR0TBAUwAwEB/zAJBgcqhkjOOAQDAy8AMCwCFCjGKIdOSV12LcTu -k08owGM6YkO1AhQe+K173VuaO/OsDNsxZlKpyH8+1g== +DAd3b2xmU1NMMRAwDgYDVQQLDAd0ZXN0aW5nMRgwFgYDVQQDDA93d3cud29sZnNz +bC5jb20xHzAdBgkqhkiG9w0BCQEWEGluZm9Ad29sZnNzbC5jb20wHhcNMTYwOTIy +MjEyMzA0WhcNMjIwMzE1MjEyMzA0WjCBkDELMAkGA1UEBhMCVVMxDzANBgNVBAgM +Bk9yZWdvbjERMA8GA1UEBwwIUG9ydGxhbmQxEDAOBgNVBAoMB3dvbGZTU0wxEDAO +BgNVBAsMB3Rlc3RpbmcxGDAWBgNVBAMMD3d3dy53b2xmc3NsLmNvbTEfMB0GCSqG +SIb3DQEJARYQaW5mb0B3b2xmc3NsLmNvbTCCAbgwggEsBgcqhkjOOAQBMIIBHwKB +gQC9Ue5KMuCKx+rG4epwxFFDzyoH4ccSwlglXsRdvqswDRK/oQvTNNNoWiVxTn3k +vQ8qDlhWy9KjGTrqr/ttgmh56FFpe6tz4yTgCNyR9D+eGclD7lNfdPUc4E3SA6ef +opG6+ymI55bS+9xUFTG402UCrYSKT59zI2HBfuI6dltsxQIVAJHJ7WDQ+jBn/nmM +yCQzdi+0qJx1AoGBAJJacRK36s5yGY1b6qhxWqvpoAC+SfEKylZnYWGYf2PM+Iwo +6AgPKEw6BSsX+7Nmc4Gjyr4JWhComKi6onPamO/A2CbMM0DCxb47BeLBWfqWAgXV +j0CODT4MQos5yugnviR/YpEgbzLxvrXr469lKWsAyB19/gFmGmQWcCgAwGm6A4GF +AAKBgQCdy2PPch8r0P07EOs5WG6L425P6IJ3bDKj3TVLy+Ebj04CT/3Gmgw2tFye +2pOgO0yfkIXizcDl6GT2CQuBBhUgwF6WJ4hoW1iK1UwhnupZmQ358eNFl0tJJN5v +wx2gtNxJSwIsm8VRscqqFH2092b9ScH7VjLoqhx+bgA4XV7l1aNQME4wHQYDVR0O +BBYEFCCY5ONeqOL/KqR/SwbV5Ufb/IHHMB8GA1UdIwQYMBaAFCCY5ONeqOL/KqR/ +SwbV5Ufb/IHHMAwGA1UdEwQFMAMBAf8wCQYHKoZIzjgEAwMvADAsAhQRYSCVN/Ge +agV3mffU3qNZ92fI0QIUPH7Jp+iASI7U1ocaYDc10qXGaGY= -----END CERTIFICATE----- diff --git a/extra/yassl/include/openssl/ssl.h b/extra/yassl/include/openssl/ssl.h index c95eb1ed88705..9ec99b46c1f00 100644 --- a/extra/yassl/include/openssl/ssl.h +++ b/extra/yassl/include/openssl/ssl.h @@ -34,7 +34,7 @@ #include "rsa.h" -#define YASSL_VERSION "2.3.9b" +#define YASSL_VERSION "2.4.2" #if defined(__cplusplus) diff --git a/extra/yassl/src/ssl.cpp b/extra/yassl/src/ssl.cpp index 57542f174c942..7069140dcda02 100644 --- a/extra/yassl/src/ssl.cpp +++ b/extra/yassl/src/ssl.cpp @@ -162,7 +162,7 @@ int read_file(SSL_CTX* ctx, const char* file, int format, CertType type) TaoCrypt::DSA_PrivateKey dsaKey; dsaKey.Initialize(dsaSource); - if (rsaSource.GetError().What()) { + if (dsaSource.GetError().What()) { // neither worked ret = SSL_FAILURE; } @@ -785,40 +785,67 @@ int SSL_CTX_load_verify_locations(SSL_CTX* ctx, const char* file, WIN32_FIND_DATA FindFileData; HANDLE hFind; - char name[MAX_PATH + 1]; // directory specification - strncpy(name, path, MAX_PATH - 3); - strncat(name, "\\*", 3); + const int DELIMITER_SZ = 2; + const int DELIMITER_STAR_SZ = 3; + int pathSz = (int)strlen(path); + int nameSz = pathSz + DELIMITER_STAR_SZ + 1; // plus 1 for terminator + char* name = NEW_YS char[nameSz]; // directory specification + memset(name, 0, nameSz); + strncpy(name, path, nameSz - DELIMITER_STAR_SZ - 1); + strncat(name, "\\*", DELIMITER_STAR_SZ); hFind = FindFirstFile(name, &FindFileData); - if (hFind == INVALID_HANDLE_VALUE) return SSL_BAD_PATH; + if (hFind == INVALID_HANDLE_VALUE) { + ysArrayDelete(name); + return SSL_BAD_PATH; + } do { - if (FindFileData.dwFileAttributes != FILE_ATTRIBUTE_DIRECTORY) { - strncpy(name, path, MAX_PATH - 2 - HALF_PATH); - strncat(name, "\\", 2); - strncat(name, FindFileData.cFileName, HALF_PATH); + if (!(FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { + int curSz = (int)strlen(FindFileData.cFileName); + if (pathSz + curSz + DELIMITER_SZ + 1 > nameSz) { + ysArrayDelete(name); + // plus 1 for terminator + nameSz = pathSz + curSz + DELIMITER_SZ + 1; + name = NEW_YS char[nameSz]; + } + memset(name, 0, nameSz); + strncpy(name, path, nameSz - curSz - DELIMITER_SZ - 1); + strncat(name, "\\", DELIMITER_SZ); + strncat(name, FindFileData.cFileName, + nameSz - pathSz - DELIMITER_SZ - 1); ret = read_file(ctx, name, SSL_FILETYPE_PEM, CA); } } while (ret == SSL_SUCCESS && FindNextFile(hFind, &FindFileData)); + ysArrayDelete(name); FindClose(hFind); #else // _WIN32 - - const int MAX_PATH = 260; - DIR* dir = opendir(path); if (!dir) return SSL_BAD_PATH; struct dirent* entry; struct stat buf; - char name[MAX_PATH + 1]; + const int DELIMITER_SZ = 1; + int pathSz = (int)strlen(path); + int nameSz = pathSz + DELIMITER_SZ + 1; //plus 1 for null terminator + char* name = NEW_YS char[nameSz]; // directory specification while (ret == SSL_SUCCESS && (entry = readdir(dir))) { - strncpy(name, path, MAX_PATH - 1 - HALF_PATH); - strncat(name, "/", 1); - strncat(name, entry->d_name, HALF_PATH); + int curSz = (int)strlen(entry->d_name); + if (pathSz + curSz + DELIMITER_SZ + 1 > nameSz) { + ysArrayDelete(name); + nameSz = pathSz + DELIMITER_SZ + curSz + 1; + name = NEW_YS char[nameSz]; + } + memset(name, 0, nameSz); + strncpy(name, path, nameSz - curSz - 1); + strncat(name, "/", DELIMITER_SZ); + strncat(name, entry->d_name, nameSz - pathSz - DELIMITER_SZ - 1); + if (stat(name, &buf) < 0) { + ysArrayDelete(name); closedir(dir); return SSL_BAD_STAT; } @@ -827,6 +854,7 @@ int SSL_CTX_load_verify_locations(SSL_CTX* ctx, const char* file, ret = read_file(ctx, name, SSL_FILETYPE_PEM, CA); } + ysArrayDelete(name); closedir(dir); #endif diff --git a/extra/yassl/taocrypt/include/aes.hpp b/extra/yassl/taocrypt/include/aes.hpp index 017630331560b..bccf6e73fc720 100644 --- a/extra/yassl/taocrypt/include/aes.hpp +++ b/extra/yassl/taocrypt/include/aes.hpp @@ -60,6 +60,7 @@ class AES : public Mode_BASE { static const word32 Te[5][256]; static const word32 Td[5][256]; + static const byte CTd4[256]; static const word32* Te0; static const word32* Te1; @@ -80,11 +81,68 @@ class AES : public Mode_BASE { void ProcessAndXorBlock(const byte*, const byte*, byte*) const; + word32 PreFetchTe() const; + word32 PreFetchTd() const; + word32 PreFetchCTd4() const; + AES(const AES&); // hide copy AES& operator=(const AES&); // and assign }; +#if defined(__x86_64__) || defined(_M_X64) || \ + (defined(__ILP32__) && (__ILP32__ >= 1)) + #define TC_CACHE_LINE_SZ 64 +#else + /* default cache line size */ + #define TC_CACHE_LINE_SZ 32 +#endif + +inline word32 AES::PreFetchTe() const +{ + word32 x = 0; + + /* 4 tables of 256 entries */ + for (int i = 0; i < 4; i++) { + /* each entry is 4 bytes */ + for (int j = 0; j < 256; j += TC_CACHE_LINE_SZ/4) { + x &= Te[i][j]; + } + } + + return x; +} + + +inline word32 AES::PreFetchTd() const +{ + word32 x = 0; + + /* 4 tables of 256 entries */ + for (int i = 0; i < 4; i++) { + /* each entry is 4 bytes */ + for (int j = 0; j < 256; j += TC_CACHE_LINE_SZ/4) { + x &= Td[i][j]; + } + } + + return x; +} + + +inline word32 AES::PreFetchCTd4() const +{ + word32 x = 0; + int i; + + for (i = 0; i < 256; i += TC_CACHE_LINE_SZ) { + x &= CTd4[i]; + } + + return x; +} + + typedef BlockCipher AES_ECB_Encryption; typedef BlockCipher AES_ECB_Decryption; diff --git a/extra/yassl/taocrypt/include/integer.hpp b/extra/yassl/taocrypt/include/integer.hpp index 75a3ee3d3df80..05fe189fd585f 100644 --- a/extra/yassl/taocrypt/include/integer.hpp +++ b/extra/yassl/taocrypt/include/integer.hpp @@ -119,6 +119,9 @@ namespace TaoCrypt { +#ifdef _WIN32 + #undef max // avoid name clash +#endif // general MAX template inline const T& max(const T& a, const T& b) diff --git a/extra/yassl/taocrypt/src/aes.cpp b/extra/yassl/taocrypt/src/aes.cpp index e47765b87d0ef..2321c72554cdf 100644 --- a/extra/yassl/taocrypt/src/aes.cpp +++ b/extra/yassl/taocrypt/src/aes.cpp @@ -109,10 +109,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/) { temp = rk[3]; rk[4] = rk[0] ^ - (Te4[GETBYTE(temp, 2)] & 0xff000000) ^ - (Te4[GETBYTE(temp, 1)] & 0x00ff0000) ^ - (Te4[GETBYTE(temp, 0)] & 0x0000ff00) ^ - (Te4[GETBYTE(temp, 3)] & 0x000000ff) ^ + (Te2[GETBYTE(temp, 2)] & 0xff000000) ^ + (Te3[GETBYTE(temp, 1)] & 0x00ff0000) ^ + (Te0[GETBYTE(temp, 0)] & 0x0000ff00) ^ + (Te1[GETBYTE(temp, 3)] & 0x000000ff) ^ rcon_[i]; rk[5] = rk[1] ^ rk[4]; rk[6] = rk[2] ^ rk[5]; @@ -128,10 +128,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/) { temp = rk[ 5]; rk[ 6] = rk[ 0] ^ - (Te4[GETBYTE(temp, 2)] & 0xff000000) ^ - (Te4[GETBYTE(temp, 1)] & 0x00ff0000) ^ - (Te4[GETBYTE(temp, 0)] & 0x0000ff00) ^ - (Te4[GETBYTE(temp, 3)] & 0x000000ff) ^ + (Te2[GETBYTE(temp, 2)] & 0xff000000) ^ + (Te3[GETBYTE(temp, 1)] & 0x00ff0000) ^ + (Te0[GETBYTE(temp, 0)] & 0x0000ff00) ^ + (Te1[GETBYTE(temp, 3)] & 0x000000ff) ^ rcon_[i]; rk[ 7] = rk[ 1] ^ rk[ 6]; rk[ 8] = rk[ 2] ^ rk[ 7]; @@ -149,10 +149,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/) { temp = rk[ 7]; rk[ 8] = rk[ 0] ^ - (Te4[GETBYTE(temp, 2)] & 0xff000000) ^ - (Te4[GETBYTE(temp, 1)] & 0x00ff0000) ^ - (Te4[GETBYTE(temp, 0)] & 0x0000ff00) ^ - (Te4[GETBYTE(temp, 3)] & 0x000000ff) ^ + (Te2[GETBYTE(temp, 2)] & 0xff000000) ^ + (Te3[GETBYTE(temp, 1)] & 0x00ff0000) ^ + (Te0[GETBYTE(temp, 0)] & 0x0000ff00) ^ + (Te1[GETBYTE(temp, 3)] & 0x000000ff) ^ rcon_[i]; rk[ 9] = rk[ 1] ^ rk[ 8]; rk[10] = rk[ 2] ^ rk[ 9]; @@ -161,10 +161,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/) break; temp = rk[11]; rk[12] = rk[ 4] ^ - (Te4[GETBYTE(temp, 3)] & 0xff000000) ^ - (Te4[GETBYTE(temp, 2)] & 0x00ff0000) ^ - (Te4[GETBYTE(temp, 1)] & 0x0000ff00) ^ - (Te4[GETBYTE(temp, 0)] & 0x000000ff); + (Te2[GETBYTE(temp, 3)] & 0xff000000) ^ + (Te3[GETBYTE(temp, 2)] & 0x00ff0000) ^ + (Te0[GETBYTE(temp, 1)] & 0x0000ff00) ^ + (Te1[GETBYTE(temp, 0)] & 0x000000ff); rk[13] = rk[ 5] ^ rk[12]; rk[14] = rk[ 6] ^ rk[13]; rk[15] = rk[ 7] ^ rk[14]; @@ -191,25 +191,25 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/) for (i = 1; i < rounds_; i++) { rk += 4; rk[0] = - Td0[Te4[GETBYTE(rk[0], 3)] & 0xff] ^ - Td1[Te4[GETBYTE(rk[0], 2)] & 0xff] ^ - Td2[Te4[GETBYTE(rk[0], 1)] & 0xff] ^ - Td3[Te4[GETBYTE(rk[0], 0)] & 0xff]; + Td0[Te1[GETBYTE(rk[0], 3)] & 0xff] ^ + Td1[Te1[GETBYTE(rk[0], 2)] & 0xff] ^ + Td2[Te1[GETBYTE(rk[0], 1)] & 0xff] ^ + Td3[Te1[GETBYTE(rk[0], 0)] & 0xff]; rk[1] = - Td0[Te4[GETBYTE(rk[1], 3)] & 0xff] ^ - Td1[Te4[GETBYTE(rk[1], 2)] & 0xff] ^ - Td2[Te4[GETBYTE(rk[1], 1)] & 0xff] ^ - Td3[Te4[GETBYTE(rk[1], 0)] & 0xff]; + Td0[Te1[GETBYTE(rk[1], 3)] & 0xff] ^ + Td1[Te1[GETBYTE(rk[1], 2)] & 0xff] ^ + Td2[Te1[GETBYTE(rk[1], 1)] & 0xff] ^ + Td3[Te1[GETBYTE(rk[1], 0)] & 0xff]; rk[2] = - Td0[Te4[GETBYTE(rk[2], 3)] & 0xff] ^ - Td1[Te4[GETBYTE(rk[2], 2)] & 0xff] ^ - Td2[Te4[GETBYTE(rk[2], 1)] & 0xff] ^ - Td3[Te4[GETBYTE(rk[2], 0)] & 0xff]; + Td0[Te1[GETBYTE(rk[2], 3)] & 0xff] ^ + Td1[Te1[GETBYTE(rk[2], 2)] & 0xff] ^ + Td2[Te1[GETBYTE(rk[2], 1)] & 0xff] ^ + Td3[Te1[GETBYTE(rk[2], 0)] & 0xff]; rk[3] = - Td0[Te4[GETBYTE(rk[3], 3)] & 0xff] ^ - Td1[Te4[GETBYTE(rk[3], 2)] & 0xff] ^ - Td2[Te4[GETBYTE(rk[3], 1)] & 0xff] ^ - Td3[Te4[GETBYTE(rk[3], 0)] & 0xff]; + Td0[Te1[GETBYTE(rk[3], 3)] & 0xff] ^ + Td1[Te1[GETBYTE(rk[3], 2)] & 0xff] ^ + Td2[Te1[GETBYTE(rk[3], 1)] & 0xff] ^ + Td3[Te1[GETBYTE(rk[3], 0)] & 0xff]; } } } @@ -244,6 +244,7 @@ void AES::encrypt(const byte* inBlock, const byte* xorBlock, s2 ^= rk[2]; s3 ^= rk[3]; + s0 |= PreFetchTe(); /* * Nr - 1 full rounds: */ @@ -312,28 +313,28 @@ void AES::encrypt(const byte* inBlock, const byte* xorBlock, */ s0 = - (Te4[GETBYTE(t0, 3)] & 0xff000000) ^ - (Te4[GETBYTE(t1, 2)] & 0x00ff0000) ^ - (Te4[GETBYTE(t2, 1)] & 0x0000ff00) ^ - (Te4[GETBYTE(t3, 0)] & 0x000000ff) ^ + (Te2[GETBYTE(t0, 3)] & 0xff000000) ^ + (Te3[GETBYTE(t1, 2)] & 0x00ff0000) ^ + (Te0[GETBYTE(t2, 1)] & 0x0000ff00) ^ + (Te1[GETBYTE(t3, 0)] & 0x000000ff) ^ rk[0]; s1 = - (Te4[GETBYTE(t1, 3)] & 0xff000000) ^ - (Te4[GETBYTE(t2, 2)] & 0x00ff0000) ^ - (Te4[GETBYTE(t3, 1)] & 0x0000ff00) ^ - (Te4[GETBYTE(t0, 0)] & 0x000000ff) ^ + (Te2[GETBYTE(t1, 3)] & 0xff000000) ^ + (Te3[GETBYTE(t2, 2)] & 0x00ff0000) ^ + (Te0[GETBYTE(t3, 1)] & 0x0000ff00) ^ + (Te1[GETBYTE(t0, 0)] & 0x000000ff) ^ rk[1]; s2 = - (Te4[GETBYTE(t2, 3)] & 0xff000000) ^ - (Te4[GETBYTE(t3, 2)] & 0x00ff0000) ^ - (Te4[GETBYTE(t0, 1)] & 0x0000ff00) ^ - (Te4[GETBYTE(t1, 0)] & 0x000000ff) ^ + (Te2[GETBYTE(t2, 3)] & 0xff000000) ^ + (Te3[GETBYTE(t3, 2)] & 0x00ff0000) ^ + (Te0[GETBYTE(t0, 1)] & 0x0000ff00) ^ + (Te1[GETBYTE(t1, 0)] & 0x000000ff) ^ rk[2]; s3 = - (Te4[GETBYTE(t3, 3)] & 0xff000000) ^ - (Te4[GETBYTE(t0, 2)] & 0x00ff0000) ^ - (Te4[GETBYTE(t1, 1)] & 0x0000ff00) ^ - (Te4[GETBYTE(t2, 0)] & 0x000000ff) ^ + (Te2[GETBYTE(t3, 3)] & 0xff000000) ^ + (Te3[GETBYTE(t0, 2)] & 0x00ff0000) ^ + (Te0[GETBYTE(t1, 1)] & 0x0000ff00) ^ + (Te1[GETBYTE(t2, 0)] & 0x000000ff) ^ rk[3]; @@ -358,6 +359,8 @@ void AES::decrypt(const byte* inBlock, const byte* xorBlock, s2 ^= rk[2]; s3 ^= rk[3]; + s0 |= PreFetchTd(); + /* * Nr - 1 full rounds: */ @@ -423,29 +426,32 @@ void AES::decrypt(const byte* inBlock, const byte* xorBlock, * apply last round and * map cipher state to byte array block: */ + + t0 |= PreFetchCTd4(); + s0 = - (Td4[GETBYTE(t0, 3)] & 0xff000000) ^ - (Td4[GETBYTE(t3, 2)] & 0x00ff0000) ^ - (Td4[GETBYTE(t2, 1)] & 0x0000ff00) ^ - (Td4[GETBYTE(t1, 0)] & 0x000000ff) ^ + ((word32)CTd4[GETBYTE(t0, 3)] << 24) ^ + ((word32)CTd4[GETBYTE(t3, 2)] << 16) ^ + ((word32)CTd4[GETBYTE(t2, 1)] << 8) ^ + ((word32)CTd4[GETBYTE(t1, 0)]) ^ rk[0]; s1 = - (Td4[GETBYTE(t1, 3)] & 0xff000000) ^ - (Td4[GETBYTE(t0, 2)] & 0x00ff0000) ^ - (Td4[GETBYTE(t3, 1)] & 0x0000ff00) ^ - (Td4[GETBYTE(t2, 0)] & 0x000000ff) ^ + ((word32)CTd4[GETBYTE(t1, 3)] << 24) ^ + ((word32)CTd4[GETBYTE(t0, 2)] << 16) ^ + ((word32)CTd4[GETBYTE(t3, 1)] << 8) ^ + ((word32)CTd4[GETBYTE(t2, 0)]) ^ rk[1]; s2 = - (Td4[GETBYTE(t2, 3)] & 0xff000000) ^ - (Td4[GETBYTE(t1, 2)] & 0x00ff0000) ^ - (Td4[GETBYTE(t0, 1)] & 0x0000ff00) ^ - (Td4[GETBYTE(t3, 0)] & 0x000000ff) ^ + ((word32)CTd4[GETBYTE(t2, 3)] << 24 ) ^ + ((word32)CTd4[GETBYTE(t1, 2)] << 16 ) ^ + ((word32)CTd4[GETBYTE(t0, 1)] << 8 ) ^ + ((word32)CTd4[GETBYTE(t3, 0)]) ^ rk[2]; s3 = - (Td4[GETBYTE(t3, 3)] & 0xff000000) ^ - (Td4[GETBYTE(t2, 2)] & 0x00ff0000) ^ - (Td4[GETBYTE(t1, 1)] & 0x0000ff00) ^ - (Td4[GETBYTE(t0, 0)] & 0x000000ff) ^ + ((word32)CTd4[GETBYTE(t3, 3)] << 24) ^ + ((word32)CTd4[GETBYTE(t2, 2)] << 16) ^ + ((word32)CTd4[GETBYTE(t1, 1)] << 8) ^ + ((word32)CTd4[GETBYTE(t0, 0)]) ^ rk[3]; gpBlock::Put(xorBlock, outBlock)(s0)(s1)(s2)(s3); @@ -1826,18 +1832,52 @@ const word32 AES::Td[5][256] = { } }; +const byte AES::CTd4[256] = +{ + 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U, + 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU, + 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U, + 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU, + 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU, + 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU, + 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U, + 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U, + 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U, + 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U, + 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU, + 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U, + 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU, + 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U, + 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U, + 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU, + 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU, + 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U, + 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U, + 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU, + 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U, + 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU, + 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U, + 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U, + 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U, + 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU, + 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU, + 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU, + 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U, + 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U, + 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U, + 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU, +}; + const word32* AES::Te0 = AES::Te[0]; const word32* AES::Te1 = AES::Te[1]; const word32* AES::Te2 = AES::Te[2]; const word32* AES::Te3 = AES::Te[3]; -const word32* AES::Te4 = AES::Te[4]; const word32* AES::Td0 = AES::Td[0]; const word32* AES::Td1 = AES::Td[1]; const word32* AES::Td2 = AES::Td[2]; const word32* AES::Td3 = AES::Td[3]; -const word32* AES::Td4 = AES::Td[4]; diff --git a/extra/yassl/taocrypt/src/asn.cpp b/extra/yassl/taocrypt/src/asn.cpp index 0474e7c21d596..80bcd612d27fc 100644 --- a/extra/yassl/taocrypt/src/asn.cpp +++ b/extra/yassl/taocrypt/src/asn.cpp @@ -1219,17 +1219,17 @@ word32 DecodeDSA_Signature(byte* decoded, const byte* encoded, word32 sz) } word32 rLen = GetLength(source); if (rLen != 20) { - if (rLen == 21) { // zero at front, eat + while (rLen > 20 && source.remaining() > 0) { // zero's at front, eat source.next(); --rLen; } - else if (rLen == 19) { // add zero to front so 20 bytes + if (rLen < 20) { // add zero's to front so 20 bytes + word32 tmpLen = rLen; + while (tmpLen < 20) { decoded[0] = 0; decoded++; + tmpLen++; } - else { - source.SetError(DSA_SZ_E); - return 0; } } memcpy(decoded, source.get_buffer() + source.get_index(), rLen); @@ -1242,17 +1242,17 @@ word32 DecodeDSA_Signature(byte* decoded, const byte* encoded, word32 sz) } word32 sLen = GetLength(source); if (sLen != 20) { - if (sLen == 21) { - source.next(); // zero at front, eat + while (sLen > 20 && source.remaining() > 0) { + source.next(); // zero's at front, eat --sLen; } - else if (sLen == 19) { - decoded[rLen] = 0; // add zero to front so 20 bytes + if (sLen < 20) { // add zero's to front so 20 bytes + word32 tmpLen = sLen; + while (tmpLen < 20) { + decoded[rLen] = 0; decoded++; + tmpLen++; } - else { - source.SetError(DSA_SZ_E); - return 0; } } memcpy(decoded + rLen, source.get_buffer() + source.get_index(), sLen); diff --git a/extra/yassl/taocrypt/src/dsa.cpp b/extra/yassl/taocrypt/src/dsa.cpp index 72221441b2bdd..fda01881df5b2 100644 --- a/extra/yassl/taocrypt/src/dsa.cpp +++ b/extra/yassl/taocrypt/src/dsa.cpp @@ -172,6 +172,7 @@ word32 DSA_Signer::Sign(const byte* sha_digest, byte* sig, const Integer& q = key_.GetSubGroupOrder(); const Integer& g = key_.GetSubGroupGenerator(); const Integer& x = key_.GetPrivatePart(); + byte* tmpPtr = sig; // initial signature output Integer k(rng, 1, q - 1); @@ -187,22 +188,23 @@ word32 DSA_Signer::Sign(const byte* sha_digest, byte* sig, return (word32) -1; int rSz = r_.ByteCount(); + int tmpSz = rSz; - if (rSz == 19) { - sig[0] = 0; - sig++; + while (tmpSz++ < SHA::DIGEST_SIZE) { + *sig++ = 0; } r_.Encode(sig, rSz); + sig = tmpPtr + SHA::DIGEST_SIZE; // advance sig output to s int sSz = s_.ByteCount(); + tmpSz = sSz; - if (sSz == 19) { - sig[rSz] = 0; - sig++; + while (tmpSz++ < SHA::DIGEST_SIZE) { + *sig++ = 0; } - s_.Encode(sig + rSz, sSz); + s_.Encode(sig, sSz); return 40; } diff --git a/extra/yassl/taocrypt/src/integer.cpp b/extra/yassl/taocrypt/src/integer.cpp index fb8d9276bd9f8..dd8425396eda2 100644 --- a/extra/yassl/taocrypt/src/integer.cpp +++ b/extra/yassl/taocrypt/src/integer.cpp @@ -193,8 +193,9 @@ DWord() {} "a" (a), "rm" (b) : "cc"); #elif defined(__mips64) - __asm__("dmultu %2,%3" : "=d" (r.halfs_.high), "=l" (r.halfs_.low) - : "r" (a), "r" (b)); + unsigned __int128 t = (unsigned __int128) a * b; + r.halfs_.high = t >> 64; + r.halfs_.low = (word) t; #elif defined(_M_IX86) // for testing diff --git a/extra/yassl/taocrypt/test/test.cpp b/extra/yassl/taocrypt/test/test.cpp index c23d981924d8a..b07a9eb9f29d5 100644 --- a/extra/yassl/taocrypt/test/test.cpp +++ b/extra/yassl/taocrypt/test/test.cpp @@ -1281,6 +1281,9 @@ int dsa_test() if (!verifier.Verify(digest, decoded)) return -90; + if (!verifier.Verify(digest, signature)) + return -91; + return 0; } diff --git a/extra/yassl/testsuite/test.hpp b/extra/yassl/testsuite/test.hpp index 5374edd0e2ad2..a65a212cf995d 100644 --- a/extra/yassl/testsuite/test.hpp +++ b/extra/yassl/testsuite/test.hpp @@ -22,7 +22,6 @@ #define yaSSL_TEST_HPP #include "runtime.hpp" -#include "openssl/ssl.h" /* openssl compatibility test */ #include "error.hpp" #include #include @@ -56,6 +55,7 @@ #endif #define SOCKET_T int #endif /* _WIN32 */ +#include "openssl/ssl.h" /* openssl compatibility test */ #ifdef _MSC_VER diff --git a/include/byte_order_generic_x86.h b/include/byte_order_generic_x86.h index 0a71a17829b70..a97dd0f43a37f 100644 --- a/include/byte_order_generic_x86.h +++ b/include/byte_order_generic_x86.h @@ -27,19 +27,9 @@ ((uint32) (uchar) (A)[0]))) #define sint4korr(A) (*((const long *) (A))) #define uint2korr(A) (*((const uint16 *) (A))) - -/* - Attention: Please, note, uint3korr reads 4 bytes (not 3)! - It means, that you have to provide enough allocated space. -*/ -#if defined(HAVE_valgrind) && !defined(_WIN32) #define uint3korr(A) (uint32) (((uint32) ((uchar) (A)[0])) +\ (((uint32) ((uchar) (A)[1])) << 8) +\ (((uint32) ((uchar) (A)[2])) << 16)) -#else -#define uint3korr(A) (long) (*((const unsigned int *) (A)) & 0xFFFFFF) -#endif - #define uint4korr(A) (*((const uint32 *) (A))) #define uint5korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) +\ (((uint32) ((uchar) (A)[1])) << 8) +\ diff --git a/include/byte_order_generic_x86_64.h b/include/byte_order_generic_x86_64.h index b6b0c5d8ea582..8c7493965a996 100644 --- a/include/byte_order_generic_x86_64.h +++ b/include/byte_order_generic_x86_64.h @@ -27,17 +27,9 @@ ((uint32) (uchar) (A)[0]))) #define sint4korr(A) (int32) (*((int32 *) (A))) #define uint2korr(A) (uint16) (*((uint16 *) (A))) -/* - Attention: Please, note, uint3korr reads 4 bytes (not 3)! - It means, that you have to provide enough allocated space. -*/ -#if defined(HAVE_valgrind) && !defined(_WIN32) #define uint3korr(A) (uint32) (((uint32) ((uchar) (A)[0])) +\ (((uint32) ((uchar) (A)[1])) << 8) +\ (((uint32) ((uchar) (A)[2])) << 16)) -#else -#define uint3korr(A) (uint32) (*((unsigned int *) (A)) & 0xFFFFFF) -#endif #define uint4korr(A) (uint32) (*((uint32 *) (A))) #define uint5korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) +\ (((uint32) ((uchar) (A)[1])) << 8) +\ diff --git a/include/my_global.h b/include/my_global.h index 191e08d9218ca..6222467901e67 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -888,8 +888,7 @@ typedef long long my_ptrdiff_t; and related routines are refactored. */ -#define my_offsetof(TYPE, MEMBER) \ - ((size_t)((char *)&(((TYPE *)0x10)->MEMBER) - (char*)0x10)) +#define my_offsetof(TYPE, MEMBER) PTR_BYTE_DIFF(&((TYPE *)0x10)->MEMBER, 0x10) #define NullS (char *) 0 diff --git a/include/my_sys.h b/include/my_sys.h index a0b7f4cc554e0..f6bf57e50a4e8 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. - Copyright (c) 2010, 2013, Monty Program Ab. + Copyright (c) 2010, 2016, Monty Program Ab. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -271,7 +271,7 @@ extern my_bool my_use_symdir; extern ulong my_default_record_cache_size; extern my_bool my_disable_locking, my_disable_async_io, my_disable_flush_key_blocks, my_disable_symlinks; -extern my_bool my_disable_sync; +extern my_bool my_disable_sync, my_disable_copystat_in_redel; extern char wild_many,wild_one,wild_prefix; extern const char *charsets_dir; extern my_bool timed_mutexes; diff --git a/libmysql/libmysql.c b/libmysql/libmysql.c index 446f1da0b0c7d..3a08ea26b1d57 100644 --- a/libmysql/libmysql.c +++ b/libmysql/libmysql.c @@ -450,8 +450,9 @@ void read_user_name(char *name) void read_user_name(char *name) { - char *str=getenv("USER"); /* ODBC will send user variable */ - strmake(name,str ? str : "ODBC", USERNAME_LENGTH); + DWORD len= USERNAME_LENGTH; + if (!GetUserName(name, &len)) + strmov(name,"UNKNOWN_USER"); } #endif diff --git a/mysql-test/extra/binlog_tests/database.test b/mysql-test/extra/binlog_tests/database.test index 82e8b3963570d..17f8e069fa3ce 100644 --- a/mysql-test/extra/binlog_tests/database.test +++ b/mysql-test/extra/binlog_tests/database.test @@ -52,7 +52,7 @@ eval SELECT 'hello' INTO OUTFILE 'fake_file.$prefix'; # Use '/' instead of '\' in the error message. On windows platform, dir is # formed with '\'. ---replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /17/39/ /File exists/Directory not empty/ +--replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /93/39/ /17/39/ /247/39/ /File exists/Directory not empty/ --error 1010 DROP DATABASE testing_1; let $wait_binlog_event= DROP TABLE IF EXIST; diff --git a/mysql-test/include/index_merge2.inc b/mysql-test/include/index_merge2.inc index c50a45a9923d9..03afa49d323f0 100644 --- a/mysql-test/include/index_merge2.inc +++ b/mysql-test/include/index_merge2.inc @@ -341,6 +341,7 @@ while ($1) alter table t1 add index i2(key2); alter table t1 add index i3(key3); update t1 set key2=key1,key3=key1; +analyze table t1; # to test the bug, the following must use "sort_union": --replace_column 9 REF diff --git a/mysql-test/include/search_pattern_in_file.inc b/mysql-test/include/search_pattern_in_file.inc index 0d09cdcd36efb..84237026ed06e 100644 --- a/mysql-test/include/search_pattern_in_file.inc +++ b/mysql-test/include/search_pattern_in_file.inc @@ -60,12 +60,12 @@ perl; use strict; - my $search_file= $ENV{'SEARCH_FILE'} or die "SEARCH_FILE not set"; - my $search_pattern= $ENV{'SEARCH_PATTERN'} or die "SEARCH_PATTERN not set"; - my $search_range= $ENV{'SEARCH_RANGE'}; + my $search_file= $ENV{'SEARCH_FILE'} or die "SEARCH_FILE not set"; + my $search_pattern= $ENV{'SEARCH_PATTERN'} or die "SEARCH_PATTERN not set"; + my $search_range= $ENV{'SEARCH_RANGE'}; my $file_content; $search_range= 50000 unless $search_range =~ /-?[0-9]+/; - open(FILE, "$search_file") or die("Unable to open '$search_file': $!\n"); + open(FILE, '<', $search_file) or die("Unable to open '$search_file': $!\n"); if ($search_range >= 0) { read(FILE, $file_content, $search_range, 0); } else { @@ -75,7 +75,10 @@ perl; read(FILE, $file_content, -$search_range, 0); } close(FILE); - if ( not $file_content =~ m{$search_pattern} ) { - die("# ERROR: The file '$search_file' does not contain the expected pattern $search_pattern\n->$file_content<-\n"); + $search_file =~ s{^.*?([^/\\]+)$}{$1}; + if ($file_content =~ m{$search_pattern}) { + print "FOUND /$search_pattern/ in $search_file\n" + } else { + print "NOT FOUND /$search_pattern/ in $search_file\n" } EOF diff --git a/mysql-test/lib/My/CoreDump.pm b/mysql-test/lib/My/CoreDump.pm index 0e90967ef9545..f9f7b3d8d4b23 100644 --- a/mysql-test/lib/My/CoreDump.pm +++ b/mysql-test/lib/My/CoreDump.pm @@ -261,11 +261,7 @@ sub show { # On Windows, rely on cdb to be there... if (IS_WINDOWS) { - # Starting cdb is unsafe when used with --parallel > 1 option - if ( $parallel < 2 ) - { - _cdb($core_name); - } + _cdb($core_name); return; } diff --git a/mysql-test/lib/My/Platform.pm b/mysql-test/lib/My/Platform.pm index 1776f1008daa6..110cf8a20e0ed 100644 --- a/mysql-test/lib/My/Platform.pm +++ b/mysql-test/lib/My/Platform.pm @@ -24,7 +24,7 @@ use File::Path; use base qw(Exporter); our @EXPORT= qw(IS_CYGWIN IS_WINDOWS IS_WIN32PERL native_path posix_path mixed_path - check_socket_path_length process_alive); + check_socket_path_length process_alive open_for_append); BEGIN { if ($^O eq "cygwin") { @@ -161,4 +161,51 @@ sub process_alive { } + +use Symbol qw( gensym ); + +use if $^O eq 'MSWin32', 'Win32API::File', qw( CloseHandle CreateFile GetOsFHandle OsFHandleOpen OPEN_ALWAYS FILE_APPEND_DATA + FILE_SHARE_READ FILE_SHARE_WRITE FILE_SHARE_DELETE ); +use if $^O eq 'MSWin32', 'Win32::API'; + +use constant WIN32API_FILE_NULL => []; + +# Open a file for append +# On Windows we use CreateFile with FILE_APPEND_DATA +# to insure that writes are atomic, not interleaved +# with writes by another processes. +sub open_for_append +{ + my ($file) = @_; + my $fh = gensym(); + + if (IS_WIN32PERL) + { + my $handle; + if (!($handle = CreateFile( + $file, + FILE_APPEND_DATA(), + FILE_SHARE_READ()|FILE_SHARE_WRITE()|FILE_SHARE_DELETE(), + WIN32API_FILE_NULL, + OPEN_ALWAYS(),# Create if doesn't exist. + 0, + WIN32API_FILE_NULL, + ))) + { + return undef; + } + + if (!OsFHandleOpen($fh, $handle, 'wat')) + { + CloseHandle($handle); + return undef; + } + return $fh; + } + + open($fh,">>",$file) or return undef; + return $fh; +} + + 1; diff --git a/mysql-test/lib/mtr_cases.pm b/mysql-test/lib/mtr_cases.pm index 124aff92895ed..5ec7553674c00 100644 --- a/mysql-test/lib/mtr_cases.pm +++ b/mysql-test/lib/mtr_cases.pm @@ -60,8 +60,6 @@ use My::Test; use My::Find; use My::Suite; -require "mtr_misc.pl"; - # locate plugin suites, depending on whether it's a build tree or installed my @plugin_suitedirs; my $plugin_suitedir_regex; @@ -1122,7 +1120,7 @@ sub get_tags_from_file($$) { $file_to_tags{$file}= $tags; $file_to_master_opts{$file}= $master_opts; $file_to_slave_opts{$file}= $slave_opts; - $file_combinations{$file}= [ uniq(@combinations) ]; + $file_combinations{$file}= [ ::uniq(@combinations) ]; $file_in_overlay{$file} = 1 if $in_overlay; return @{$tags}; } diff --git a/mysql-test/lib/mtr_io.pl b/mysql-test/lib/mtr_io.pl index 8c2803f042794..0de4d9612acee 100644 --- a/mysql-test/lib/mtr_io.pl +++ b/mysql-test/lib/mtr_io.pl @@ -21,6 +21,7 @@ use strict; use Carp; +use My::Platform; sub mtr_fromfile ($); sub mtr_tofile ($@); @@ -45,10 +46,10 @@ ($) sub mtr_tofile ($@) { my $file= shift; - - open(FILE,">>",$file) or mtr_error("can't open file \"$file\": $!"); - print FILE join("", @_); - close FILE; + my $fh= open_for_append $file; + mtr_error("can't open file \"$file\": $!") unless defined($fh); + print $fh join("", @_); + close $fh; } diff --git a/mysql-test/lib/mtr_report.pm b/mysql-test/lib/mtr_report.pm index 9ab82c454ed49..97ace54f0fbcc 100644 --- a/mysql-test/lib/mtr_report.pm +++ b/mysql-test/lib/mtr_report.pm @@ -34,7 +34,6 @@ use mtr_match; use My::Platform; use POSIX qw[ _exit ]; use IO::Handle qw[ flush ]; -require "mtr_io.pl"; use mtr_results; my $tot_real_time= 0; @@ -92,7 +91,7 @@ sub mtr_report_test_passed ($) { my $timer_str= ""; if ( $timer and -f "$::opt_vardir/log/timer" ) { - $timer_str= mtr_fromfile("$::opt_vardir/log/timer"); + $timer_str= ::mtr_fromfile("$::opt_vardir/log/timer"); $tinfo->{timer}= $timer_str; resfile_test_info('duration', $timer_str) if $::opt_resfile; } diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index 752b62fb915bc..b6263415d9d5f 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -102,11 +102,11 @@ BEGIN use IO::Socket::INET; use IO::Select; -require "lib/mtr_process.pl"; -require "lib/mtr_io.pl"; -require "lib/mtr_gcov.pl"; -require "lib/mtr_gprof.pl"; -require "lib/mtr_misc.pl"; +require "mtr_process.pl"; +require "mtr_io.pl"; +require "mtr_gcov.pl"; +require "mtr_gprof.pl"; +require "mtr_misc.pl"; $SIG{INT}= sub { mtr_error("Got ^C signal"); }; $SIG{HUP}= sub { mtr_error("Hangup detected on controlling terminal"); }; diff --git a/mysql-test/r/alter_table.result b/mysql-test/r/alter_table.result index e572fdb197cc8..2e371ac6ae6e3 100644 --- a/mysql-test/r/alter_table.result +++ b/mysql-test/r/alter_table.result @@ -2021,3 +2021,58 @@ ALTER TABLE t1 ADD PRIMARY KEY IF NOT EXISTS event_id (event_id,market_id); Warnings: Note 1061 Multiple primary key defined DROP TABLE t1; +# +# MDEV-11126 Crash while altering persistent virtual column +# +CREATE TABLE `tab1` ( +`id` bigint(20) NOT NULL AUTO_INCREMENT, +`field2` set('option1','option2','option3','option4') NOT NULL, +`field3` set('option1','option2','option3','option4','option5') NOT NULL, +`field4` set('option1','option2','option3','option4') NOT NULL, +`field5` varchar(32) NOT NULL, +`field6` varchar(32) NOT NULL, +`field7` varchar(32) NOT NULL, +`field8` varchar(32) NOT NULL, +`field9` int(11) NOT NULL DEFAULT '1', +`field10` varchar(16) NOT NULL, +`field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1', +`v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT, +PRIMARY KEY (`id`) +) DEFAULT CHARSET=latin1; +ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128); +SHOW CREATE TABLE `tab1`; +Table Create Table +tab1 CREATE TABLE `tab1` ( + `id` bigint(20) NOT NULL AUTO_INCREMENT, + `field2` set('option1','option2','option3','option4') NOT NULL, + `field3` set('option1','option2','option3','option4','option5') NOT NULL, + `field4` set('option1','option2','option3','option4') NOT NULL, + `field5` varchar(32) NOT NULL, + `field6` varchar(32) NOT NULL, + `field7` varchar(32) NOT NULL, + `field8` varchar(32) NOT NULL, + `field9` int(11) NOT NULL DEFAULT '1', + `field10` varchar(16) NOT NULL, + `field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1', + `v_col` varchar(128) DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT; +SHOW CREATE TABLE `tab1`; +Table Create Table +tab1 CREATE TABLE `tab1` ( + `id` bigint(20) NOT NULL AUTO_INCREMENT, + `field2` set('option1','option2','option3','option4') NOT NULL, + `field3` set('option1','option2','option3','option4','option5') NOT NULL, + `field4` set('option1','option2','option3','option4') NOT NULL, + `field5` varchar(32) NOT NULL, + `field6` varchar(32) NOT NULL, + `field7` varchar(32) NOT NULL, + `field8` varchar(32) NOT NULL, + `field9` int(11) NOT NULL DEFAULT '1', + `field10` varchar(16) NOT NULL, + `field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1', + `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT, + PRIMARY KEY (`id`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TABLE `tab1`; diff --git a/mysql-test/r/contributors.result b/mysql-test/r/contributors.result index 918ceaa496fe1..f3f5e227d3a98 100644 --- a/mysql-test/r/contributors.result +++ b/mysql-test/r/contributors.result @@ -9,6 +9,7 @@ Acronis http://www.acronis.com Silver Sponsor of the MariaDB Foundation Auttomattic https://automattic.com Bronze Sponsor of the MariaDB Foundation Verkkokauppa.com https://virtuozzo.com Bronze Sponsor of the MariaDB Foundation Virtuozzo https://virtuozzo.com/ Bronze Sponsor of the MariaDB Foundation +Tencent Game DBA http://tencentdba.com/about/ Bronze Sponsor of the MariaDB Foundation Google USA Sponsoring encryption, parallel replication and GTID Facebook USA Sponsoring non-blocking API, LIMIT ROWS EXAMINED etc Ronald Bradford Brisbane, Australia EFF contribution for UC2006 Auction diff --git a/mysql-test/r/create_or_replace.result b/mysql-test/r/create_or_replace.result index 3a894e9fcb140..a43dc2eaca447 100644 --- a/mysql-test/r/create_or_replace.result +++ b/mysql-test/r/create_or_replace.result @@ -442,3 +442,14 @@ KILL QUERY con_id; ERROR 70100: Query execution was interrupted drop table t1; DROP TABLE t2; +# +# MDEV-10824 - Crash in CREATE OR REPLACE TABLE t1 AS SELECT spfunc() +# +CREATE TABLE t1(a INT); +CREATE FUNCTION f1() RETURNS VARCHAR(16383) RETURN 'test'; +CREATE OR REPLACE TABLE t1 AS SELECT f1(); +LOCK TABLE t1 WRITE; +CREATE OR REPLACE TABLE t1 AS SELECT f1(); +UNLOCK TABLES; +DROP FUNCTION f1; +DROP TABLE t1; diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result index 0ec89a50c0f1a..3269e6c900eba 100644 --- a/mysql-test/r/ctype_utf32.result +++ b/mysql-test/r/ctype_utf32.result @@ -1658,6 +1658,9 @@ CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061)) SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061)); CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061)) 1 +select hex(lower(cast(0xffff0000 as char character set utf32))) as c; +c +FFFF0000 # # End of 5.5 tests # diff --git a/mysql-test/r/drop.result b/mysql-test/r/drop.result index c23ffbe327b63..c25ae9e305517 100644 --- a/mysql-test/r/drop.result +++ b/mysql-test/r/drop.result @@ -209,3 +209,9 @@ INSERT INTO table1 VALUES (1); ERROR 42S02: Unknown table 't.notable' DROP TABLE table1,table2; # End BUG#34750 +# +# MDEV-11105 Table named 'db' has weird side effect. +# +CREATE DATABASE mysqltest; +CREATE TABLE mysqltest.db(id INT); +DROP DATABASE mysqltest; diff --git a/mysql-test/r/group_min_max_innodb.result b/mysql-test/r/group_min_max_innodb.result index 77c74fbc041f9..2803107b97ed6 100644 --- a/mysql-test/r/group_min_max_innodb.result +++ b/mysql-test/r/group_min_max_innodb.result @@ -286,3 +286,19 @@ F 28 28 F 29 29 F 30 30 DROP TABLE t0,t1,t2; +# +# MDEV-MariaDB daemon leaks memory with specific query +# +CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL, +`language_id` int(11) unsigned NOT NULL DEFAULT '1' +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0', +`serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8; +insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000)); +SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0; +translation_resources serialized_c +NULL cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc +NULL bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb +NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +drop table t1,t2; diff --git a/mysql-test/r/index_merge_innodb.result b/mysql-test/r/index_merge_innodb.result index 5202c79f3c749..5bf56e213abbb 100644 --- a/mysql-test/r/index_merge_innodb.result +++ b/mysql-test/r/index_merge_innodb.result @@ -311,6 +311,9 @@ set @d=@d*2; alter table t1 add index i2(key2); alter table t1 add index i3(key3); update t1 set key2=key1,key3=key1; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40); id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 index_merge i2,i3 i3,i2 4,4 NULL REF Using sort_union(i3,i2); Using where diff --git a/mysql-test/r/index_merge_myisam.result b/mysql-test/r/index_merge_myisam.result index fcd5eebefa482..c63ed13266255 100644 --- a/mysql-test/r/index_merge_myisam.result +++ b/mysql-test/r/index_merge_myisam.result @@ -1146,6 +1146,9 @@ set @d=@d*2; alter table t1 add index i2(key2); alter table t1 add index i3(key3); update t1 set key2=key1,key3=key1; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40); id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 index_merge i2,i3 i3,i2 4,4 NULL REF Using sort_union(i3,i2); Using where diff --git a/mysql-test/r/information_schema.result b/mysql-test/r/information_schema.result index 9d61850fae674..852b520d1fc4e 100644 --- a/mysql-test/r/information_schema.result +++ b/mysql-test/r/information_schema.result @@ -1004,19 +1004,19 @@ show grants; Grants for user3@localhost GRANT USAGE ON *.* TO 'user3'@'localhost' GRANT SELECT ON `mysqltest`.* TO 'user3'@'localhost' -select * from information_schema.column_privileges where grantee like '%user%' +select * from information_schema.column_privileges where grantee like '\'user%' order by grantee; GRANTEE TABLE_CATALOG TABLE_SCHEMA TABLE_NAME COLUMN_NAME PRIVILEGE_TYPE IS_GRANTABLE 'user1'@'localhost' def mysqltest t1 f1 SELECT NO -select * from information_schema.table_privileges where grantee like '%user%' +select * from information_schema.table_privileges where grantee like '\'user%' order by grantee; GRANTEE TABLE_CATALOG TABLE_SCHEMA TABLE_NAME PRIVILEGE_TYPE IS_GRANTABLE 'user2'@'localhost' def mysqltest t2 SELECT NO -select * from information_schema.schema_privileges where grantee like '%user%' +select * from information_schema.schema_privileges where grantee like '\'user%' order by grantee; GRANTEE TABLE_CATALOG TABLE_SCHEMA PRIVILEGE_TYPE IS_GRANTABLE 'user3'@'localhost' def mysqltest SELECT NO -select * from information_schema.user_privileges where grantee like '%user%' +select * from information_schema.user_privileges where grantee like '\'user%' order by grantee; GRANTEE TABLE_CATALOG PRIVILEGE_TYPE IS_GRANTABLE 'user1'@'localhost' def USAGE NO diff --git a/mysql-test/r/lowercase_fs_on.result b/mysql-test/r/lowercase_fs_on.result index a090f46cfbf8b..b844b3f77dde4 100644 --- a/mysql-test/r/lowercase_fs_on.result +++ b/mysql-test/r/lowercase_fs_on.result @@ -1,3 +1,4 @@ # # Bug#20198490 : LOWER_CASE_TABLE_NAMES=0 ON WINDOWS LEADS TO PROBLEMS # +FOUND /\[ERROR\] The server option \'lower_case_table_names\' is configured to use case sensitive table names/ in my_restart.err diff --git a/mysql-test/r/merge.result b/mysql-test/r/merge.result index 89aaf48219e48..31edbc0fdcee7 100644 --- a/mysql-test/r/merge.result +++ b/mysql-test/r/merge.result @@ -3832,6 +3832,23 @@ test.m1 repair error Corrupt # Clean-up. drop tables m1, t1, t4; drop view t3; +# +# MDEV-10424 - Assertion `ticket == __null' failed in +# MDL_request::set_type +# +CREATE TABLE t1 (f1 INT) ENGINE=MyISAM; +CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1); +PREPARE stmt FROM "ANALYZE TABLE tmerge, t1"; +EXECUTE stmt; +Table Op Msg_type Msg_text +test.tmerge analyze note The storage engine for the table doesn't support analyze +test.t1 analyze status Table is already up to date +EXECUTE stmt; +Table Op Msg_type Msg_text +test.tmerge analyze note The storage engine for the table doesn't support analyze +test.t1 analyze status Table is already up to date +DEALLOCATE PREPARE stmt; +DROP TABLE t1, tmerge; End of 5.5 tests # # Additional coverage for refactoring which is made as part diff --git a/mysql-test/r/mysql.result b/mysql-test/r/mysql.result index cb705d285fe3d..dd0129df0d9fe 100644 --- a/mysql-test/r/mysql.result +++ b/mysql-test/r/mysql.result @@ -512,6 +512,14 @@ DROP DATABASE connected_db; create database `aa``bb````cc`; DATABASE() aa`bb``cc +DATABASE() +test +DATABASE() +aa`bb``cc +DATABASE() +test +DATABASE() +aa`bb``cc drop database `aa``bb````cc`; a >>\ndelimiter\n<< diff --git a/mysql-test/r/mysql_not_windows.result b/mysql-test/r/mysql_not_windows.result index d5670a1a9ca38..1df62d9a12dcf 100644 --- a/mysql-test/r/mysql_not_windows.result +++ b/mysql-test/r/mysql_not_windows.result @@ -3,3 +3,9 @@ a 1 End of tests +1 +1 +2 +2 +X +3 diff --git a/mysql-test/r/mysqldump-nl.result b/mysql-test/r/mysqldump-nl.result new file mode 100644 index 0000000000000..6de439bdf3c61 --- /dev/null +++ b/mysql-test/r/mysqldump-nl.result @@ -0,0 +1,126 @@ +create database `mysqltest1 +1tsetlqsym`; +use `mysqltest1 +1tsetlqsym`; +create table `t1 +1t` (`foobar +raboof` int); +create view `v1 +1v` as select * from `t1 +1t`; +create procedure sp() select * from `v1 +1v`; +flush tables; +use test; + +-- +-- Current Database: `mysqltest1 +-- 1tsetlqsym` +-- + +/*!40000 DROP DATABASE IF EXISTS `mysqltest1 +1tsetlqsym`*/; + +CREATE DATABASE /*!32312 IF NOT EXISTS*/ `mysqltest1 +1tsetlqsym` /*!40100 DEFAULT CHARACTER SET latin1 */; + +USE `mysqltest1 +1tsetlqsym`; + +-- +-- Table structure for table `t1 +-- 1t` +-- + +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `t1 +1t` ( + `foobar +raboof` int(11) DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `t1 +-- 1t` +-- + +-- +-- Temporary table structure for view `v1 +-- 1v` +-- + +SET @saved_cs_client = @@character_set_client; +SET character_set_client = utf8; +/*!50001 CREATE TABLE `v1 +1v` ( + `foobar +raboof` tinyint NOT NULL +) ENGINE=MyISAM */; +SET character_set_client = @saved_cs_client; + +-- +-- Dumping routines for database 'mysqltest1 +-- 1tsetlqsym' +-- +/*!50003 SET @saved_cs_client = @@character_set_client */ ; +/*!50003 SET @saved_cs_results = @@character_set_results */ ; +/*!50003 SET @saved_col_connection = @@collation_connection */ ; +/*!50003 SET character_set_client = latin1 */ ; +/*!50003 SET character_set_results = latin1 */ ; +/*!50003 SET collation_connection = latin1_swedish_ci */ ; +/*!50003 SET @saved_sql_mode = @@sql_mode */ ; +/*!50003 SET sql_mode = '' */ ; +DELIMITER ;; +CREATE DEFINER=`root`@`localhost` PROCEDURE `sp`() +select * from `v1 +1v` ;; +DELIMITER ; +/*!50003 SET sql_mode = @saved_sql_mode */ ; +/*!50003 SET character_set_client = @saved_cs_client */ ; +/*!50003 SET character_set_results = @saved_cs_results */ ; +/*!50003 SET collation_connection = @saved_col_connection */ ; + +-- +-- Current Database: `mysqltest1 +-- 1tsetlqsym` +-- + +USE `mysqltest1 +1tsetlqsym`; + +-- +-- Final view structure for view `v1 +-- 1v` +-- + +/*!50001 DROP TABLE IF EXISTS `v1 +1v`*/; +/*!50001 SET @saved_cs_client = @@character_set_client */; +/*!50001 SET @saved_cs_results = @@character_set_results */; +/*!50001 SET @saved_col_connection = @@collation_connection */; +/*!50001 SET character_set_client = latin1 */; +/*!50001 SET character_set_results = latin1 */; +/*!50001 SET collation_connection = latin1_swedish_ci */; +/*!50001 CREATE ALGORITHM=UNDEFINED */ +/*!50013 DEFINER=`root`@`localhost` SQL SECURITY DEFINER */ +/*!50001 VIEW `v1 +1v` AS select `t1 +1t`.`foobar +raboof` AS `foobar +raboof` from `t1 +1t` */; +/*!50001 SET character_set_client = @saved_cs_client */; +/*!50001 SET character_set_results = @saved_cs_results */; +/*!50001 SET collation_connection = @saved_col_connection */; +show tables from `mysqltest1 +1tsetlqsym`; +Tables_in_mysqltest1 +1tsetlqsym +t1 +1t +v1 +1v +drop database `mysqltest1 +1tsetlqsym`; diff --git a/mysql-test/r/mysqldump.result b/mysql-test/r/mysqldump.result index b6de51c8b03f2..cb3c28f42cdf2 100644 --- a/mysql-test/r/mysqldump.result +++ b/mysql-test/r/mysqldump.result @@ -5236,9 +5236,6 @@ SET @@global.log_output="TABLE"; SET @@global.general_log='OFF'; SET @@global.slow_query_log='OFF'; DROP DATABASE mysql; -Warnings: -Error 1146 Table 'mysql.proc' doesn't exist -Error 1146 Table 'mysql.event' doesn't exist SHOW CREATE TABLE mysql.general_log; Table Create Table general_log CREATE TABLE `general_log` ( diff --git a/mysql-test/r/mysqltest.result b/mysql-test/r/mysqltest.result index e258b1d156fd1..fa054d457f966 100644 --- a/mysql-test/r/mysqltest.result +++ b/mysql-test/r/mysqltest.result @@ -269,12 +269,6 @@ source database echo message echo message mysqltest: At line 1: Missing argument in exec -1 -1 -2 -2 -X -3 MySQL "MySQL" MySQL: The diff --git a/mysql-test/r/named_pipe.result b/mysql-test/r/named_pipe.result index ddd48f0ba9166..43fb44beece1d 100644 --- a/mysql-test/r/named_pipe.result +++ b/mysql-test/r/named_pipe.result @@ -2154,3 +2154,4 @@ Privat (Private Nutzung) Mobilfunk Warnings: Warning 1052 Column 'kundentyp' in group statement is ambiguous drop table t1; +FOUND /\[ERROR\] Create named pipe failed/ in second-mysqld.err diff --git a/mysql-test/r/ps.result b/mysql-test/r/ps.result index 517e2d23915fd..eb5c8ca9377ac 100644 --- a/mysql-test/r/ps.result +++ b/mysql-test/r/ps.result @@ -4076,4 +4076,35 @@ id value deallocate prepare stmt; SET SESSION sql_mode = @save_sql_mode; DROP TABLE t1,t2; -# End of 10.0 tests +# +# MDEV-8833: Crash of server on prepared statement with +# conversion to semi-join +# +CREATE TABLE t1 (column1 INT); +INSERT INTO t1 VALUES (3),(9); +CREATE TABLE t2 (column2 INT); +INSERT INTO t2 VALUES (1),(4); +CREATE TABLE t3 (column3 INT); +INSERT INTO t3 VALUES (6),(8); +CREATE TABLE t4 (column4 INT); +INSERT INTO t4 VALUES (2),(5); +PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1 +FROM t1 AS table1 +WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 ) +) AS sq +FROM t3 AS table3, t4 AS table4"; +EXECUTE stmt; +sq +NULL +NULL +NULL +NULL +EXECUTE stmt; +sq +NULL +NULL +NULL +NULL +deallocate prepare stmt; +drop table t1,t2,t3,t4; +# End of 5.5 tests diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result index 620bdc6bd50c2..8fb5cd17c518a 100644 --- a/mysql-test/r/selectivity.result +++ b/mysql-test/r/selectivity.result @@ -1446,3 +1446,74 @@ a b i set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; DROP TABLE t1,t2; set use_stat_tables=@save_use_stat_tables; +# +# Bug mdev-11096: range condition over column without statistical data +# +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; +create table t1(col1 char(32)); +insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t1 persistent for columns () indexes (); +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +explain extended +select * from t1 where col1 > 'b' and col1 < 'e'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 8 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'e')) +select * from t1 where col1 > 'b' and col1 < 'e'; +col1 +c +d +drop table t1; +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; +set use_stat_tables=@save_use_stat_tables; +# +# Bug mdev-9628: unindexed blob column without min-max statistics +# with optimizer_use_condition_selectivity=3 +# +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; +create table t1(col1 char(32)); +insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +create table t2(col1 text); +insert into t2 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t2; +Table Op Msg_type Msg_text +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +select * from t1 where col1 > 'b' and col1 < 'd'; +col1 +c +explain extended +select * from t1 where col1 > 'b' and col1 < 'd'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 8 28.57 Using where +Warnings: +Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'd')) +select * from t2 where col1 > 'b' and col1 < 'd'; +col1 +c +explain extended +select * from t2 where col1 > 'b' and col1 < 'd'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 8 100.00 Using where +Warnings: +Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where ((`test`.`t2`.`col1` > 'b') and (`test`.`t2`.`col1` < 'd')) +select * from t2 where col1 < 'b' and col1 > 'd'; +col1 +explain extended +select * from t2 where col1 < 'b' and col1 > 'd'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +Warnings: +Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where 0 +drop table t1,t2; +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; +set use_stat_tables=@save_use_stat_tables; diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result index 0acbb465ba852..3d15131dbb55e 100644 --- a/mysql-test/r/selectivity_innodb.result +++ b/mysql-test/r/selectivity_innodb.result @@ -802,9 +802,9 @@ insert into t2 values (2),(3); explain extended select * from t1 where a in ( select b from t2 ) AND ( a > 3 ); id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY t1 ALL NULL NULL NULL NULL 1 0.00 Using where +1 PRIMARY t1 ALL NULL NULL NULL NULL 1 100.00 Using where 1 PRIMARY eq_ref distinct_key distinct_key 4 func 1 100.00 -2 MATERIALIZED t2 ALL NULL NULL NULL NULL 2 0.00 +2 MATERIALIZED t2 ALL NULL NULL NULL NULL 2 100.00 Warnings: Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t1`.`a` > 3)) select * from t1 where a in ( select b from t2 ) AND ( a > 3 ); @@ -1450,6 +1450,77 @@ a b i set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; DROP TABLE t1,t2; set use_stat_tables=@save_use_stat_tables; +# +# Bug mdev-11096: range condition over column without statistical data +# +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; +create table t1(col1 char(32)); +insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t1 persistent for columns () indexes (); +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +explain extended +select * from t1 where col1 > 'b' and col1 < 'e'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 8 100.00 Using where +Warnings: +Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'e')) +select * from t1 where col1 > 'b' and col1 < 'e'; +col1 +c +d +drop table t1; +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; +set use_stat_tables=@save_use_stat_tables; +# +# Bug mdev-9628: unindexed blob column without min-max statistics +# with optimizer_use_condition_selectivity=3 +# +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; +create table t1(col1 char(32)); +insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +create table t2(col1 text); +insert into t2 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t2; +Table Op Msg_type Msg_text +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +select * from t1 where col1 > 'b' and col1 < 'd'; +col1 +c +explain extended +select * from t1 where col1 > 'b' and col1 < 'd'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 8 28.57 Using where +Warnings: +Note 1003 select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'd')) +select * from t2 where col1 > 'b' and col1 < 'd'; +col1 +c +explain extended +select * from t2 where col1 > 'b' and col1 < 'd'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 8 100.00 Using where +Warnings: +Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where ((`test`.`t2`.`col1` > 'b') and (`test`.`t2`.`col1` < 'd')) +select * from t2 where col1 < 'b' and col1 > 'd'; +col1 +explain extended +select * from t2 where col1 < 'b' and col1 > 'd'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +Warnings: +Note 1003 select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where 0 +drop table t1,t2; +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; +set use_stat_tables=@save_use_stat_tables; set optimizer_switch=@save_optimizer_switch_for_selectivity_test; set @tmp_ust= @@use_stat_tables; set @tmp_oucs= @@optimizer_use_condition_selectivity; @@ -1536,6 +1607,44 @@ where t1.child_user_id=t3.id and t1.child_group_id is null and t2.lower_group_na parent_id child_group_id child_user_id id lower_group_name directory_id id drop table t1,t2,t3; # +# MDEV-9187: duplicate of bug mdev-9628 +# +set use_stat_tables = preferably; +set optimizer_use_condition_selectivity=3; +CREATE TABLE t1 (f1 char(32)) ENGINE=InnoDB; +INSERT INTO t1 VALUES ('foo'),('bar'),('qux'); +ANALYZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +SELECT * FROM t1 WHERE f1 < 'm'; +f1 +foo +bar +EXPLAIN EXTENDED +SELECT * FROM t1 WHERE f1 < 'm'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 3 72.09 Using where +Warnings: +Note 1003 select `test`.`t1`.`f1` AS `f1` from `test`.`t1` where (`test`.`t1`.`f1` < 'm') +CREATE TABLE t2 (f1 TEXT) ENGINE=InnoDB; +INSERT INTO t2 VALUES ('foo'),('bar'),('qux'); +ANALYZE TABLE t2; +Table Op Msg_type Msg_text +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +SELECT * FROM t2 WHERE f1 <> 'qux'; +f1 +foo +bar +EXPLAIN EXTENDED +SELECT * FROM t2 WHERE f1 <> 'qux'; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 3 100.00 Using where +Warnings: +Note 1003 select `test`.`t2`.`f1` AS `f1` from `test`.`t2` where (`test`.`t2`.`f1` <> 'qux') +DROP TABLE t1,t2; +# # End of 10.0 tests # set use_stat_tables= @tmp_ust; diff --git a/mysql-test/r/type_uint.result b/mysql-test/r/type_uint.result index 10aa2f2f39396..c970f2ff8969c 100644 --- a/mysql-test/r/type_uint.result +++ b/mysql-test/r/type_uint.result @@ -14,6 +14,25 @@ this 0 4294967295 drop table t1; +create table t1 (a bigint unsigned, b mediumint unsigned); +insert t1 values (1,2),(0xffffffffffffffff,0xffffff); +select coalesce(a,b), coalesce(b,a) from t1; +coalesce(a,b) coalesce(b,a) +1 2 +18446744073709551615 16777215 +create table t2 as select a from t1 union select b from t1; +show create table t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `a` bigint(20) unsigned DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +select * from t2; +a +1 +18446744073709551615 +2 +16777215 +drop table t1, t2; # # Start of 10.0 tests # diff --git a/mysql-test/r/view.result b/mysql-test/r/view.result index 52c379d03aff6..924b3a11fef1c 100644 --- a/mysql-test/r/view.result +++ b/mysql-test/r/view.result @@ -5432,6 +5432,7 @@ DROP FUNCTION f1; DROP VIEW v1; DROP TABLE t1, t2; create view v1 as select 1; +FOUND /mariadb-version/ in v1.frm drop view v1; # # MDEV-7260: Crash in get_best_combination when executing multi-table diff --git a/mysql-test/r/wait_timeout_not_windows.result b/mysql-test/r/wait_timeout_not_windows.result index df70aa9922160..867787a8ed340 100644 --- a/mysql-test/r/wait_timeout_not_windows.result +++ b/mysql-test/r/wait_timeout_not_windows.result @@ -1,3 +1,4 @@ set global log_warnings=2; set @@wait_timeout=1; +FOUND /Aborted.*Got timeout reading communication packets/ in mysqld.1.err set global log_warnings=@@log_warnings; diff --git a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result b/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result index cc2a0373444f8..07e13008e2718 100644 --- a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result +++ b/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result @@ -33,6 +33,7 @@ INSERT INTO t1 VALUES(1,'X',1); SET DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace'; SELECT b FROM t1 LIMIT 3; ERROR HY000: Lost connection to MySQL server during query +FOUND /Wrote log record for ibuf update in place operation/ in my_restart.err CHECK TABLE t1; Table Op Msg_type Msg_text test.t1 check status OK diff --git a/mysql-test/suite/innodb/r/innodb_bug54044.result b/mysql-test/suite/innodb/r/innodb_bug54044.result index d80c451c84128..7d6133adb74e8 100644 --- a/mysql-test/suite/innodb/r/innodb_bug54044.result +++ b/mysql-test/suite/innodb/r/innodb_bug54044.result @@ -6,7 +6,8 @@ table_54044 CREATE TEMPORARY TABLE `table_54044` ( `IF(NULL IS NOT NULL, NULL, NULL)` binary(0) DEFAULT NULL ) ENGINE=InnoDB DEFAULT CHARSET=latin1 DROP TABLE table_54044; -CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL; +CREATE TABLE tmp ENGINE = INNODB +AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL; SHOW CREATE TABLE tmp; Table Create Table tmp CREATE TABLE `tmp` ( diff --git a/mysql-test/suite/innodb/r/system_tables.result b/mysql-test/suite/innodb/r/system_tables.result new file mode 100644 index 0000000000000..79a24f7e4556e --- /dev/null +++ b/mysql-test/suite/innodb/r/system_tables.result @@ -0,0 +1,8 @@ +alter table mysql.time_zone_name engine=InnoDB; +create table envois3 (starttime datetime) engine=InnoDB; +insert envois3 values ('2008-08-11 22:43:00'); +select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3; +starttime +2008-08-12 02:43:00 +drop table envois3; +alter table mysql.time_zone_name engine=MyISAM; diff --git a/mysql-test/suite/innodb/t/innodb_bug54044.test b/mysql-test/suite/innodb/t/innodb_bug54044.test index aa19c51018cad..61a09375ae174 100644 --- a/mysql-test/suite/innodb/t/innodb_bug54044.test +++ b/mysql-test/suite/innodb/t/innodb_bug54044.test @@ -10,7 +10,10 @@ CREATE TEMPORARY TABLE table_54044 ENGINE = INNODB SHOW CREATE TABLE table_54044; DROP TABLE table_54044; -CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL; +# This 'create table' should pass since it uses a Field_string of size 0. + +CREATE TABLE tmp ENGINE = INNODB + AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL; SHOW CREATE TABLE tmp; DROP TABLE tmp; @@ -23,4 +26,3 @@ FLUSH TABLES; --error 1005 CREATE TEMPORARY TABLE tmp ENGINE=InnoDB AS SELECT VALUES(a) FROM t1; DROP TABLE t1; - diff --git a/mysql-test/suite/innodb/t/system_tables.test b/mysql-test/suite/innodb/t/system_tables.test new file mode 100644 index 0000000000000..90cb8c59fbd1d --- /dev/null +++ b/mysql-test/suite/innodb/t/system_tables.test @@ -0,0 +1,12 @@ +--source include/have_innodb.inc + +# +# MDEV-10775 System table in InnoDB format allowed in MariaDB could lead to crash +# +alter table mysql.time_zone_name engine=InnoDB; +create table envois3 (starttime datetime) engine=InnoDB; +insert envois3 values ('2008-08-11 22:43:00'); +--source include/restart_mysqld.inc +select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3; +drop table envois3; +alter table mysql.time_zone_name engine=MyISAM; diff --git a/mysql-test/suite/perfschema/r/aggregate.result b/mysql-test/suite/perfschema/r/aggregate.result deleted file mode 100644 index c8fa1cc2b2412..0000000000000 --- a/mysql-test/suite/perfschema/r/aggregate.result +++ /dev/null @@ -1,121 +0,0 @@ -"General cleanup" -set @aria_checkpoint_interval_save= @@global.aria_checkpoint_interval; -set @@global.aria_checkpoint_interval= 0; -drop table if exists t1; -update performance_schema.setup_instruments set enabled = 'NO'; -update performance_schema.setup_consumers set enabled = 'NO'; -truncate table performance_schema.file_summary_by_event_name; -truncate table performance_schema.file_summary_by_instance; -truncate table performance_schema.socket_summary_by_event_name; -truncate table performance_schema.socket_summary_by_instance; -truncate table performance_schema.events_waits_summary_global_by_event_name; -truncate table performance_schema.events_waits_summary_by_instance; -truncate table performance_schema.events_waits_summary_by_thread_by_event_name; -update performance_schema.setup_consumers set enabled = 'YES'; -update performance_schema.setup_instruments -set enabled = 'YES', timed = 'YES'; -create table t1 ( -id INT PRIMARY KEY, -b CHAR(100) DEFAULT 'initial value') -ENGINE=MyISAM; -insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8); -update performance_schema.setup_instruments SET enabled = 'NO'; -update performance_schema.setup_consumers set enabled = 'NO'; -set @dump_all=FALSE; -"Verifying file aggregate consistency" -SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_READ <> SUM(i.COUNT_READ)) -OR @dump_all; -EVENT_NAME COUNT_READ SUM(i.COUNT_READ) -SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE)) -OR @dump_all; -EVENT_NAME COUNT_WRITE SUM(i.COUNT_WRITE) -SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ) -FROM performance_schema.socket_summary_by_event_name AS e -JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_READ <> SUM(i.COUNT_READ)) -OR @dump_all; -EVENT_NAME COUNT_READ SUM(i.COUNT_READ) -SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE) -FROM performance_schema.socket_summary_by_event_name AS e -JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE)) -OR @dump_all; -EVENT_NAME COUNT_WRITE SUM(i.COUNT_WRITE) -SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ)) -OR @dump_all; -EVENT_NAME SUM_NUMBER_OF_BYTES_READ SUM(i.SUM_NUMBER_OF_BYTES_READ) -SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE)) -OR @dump_all; -EVENT_NAME SUM_NUMBER_OF_BYTES_WRITE SUM(i.SUM_NUMBER_OF_BYTES_WRITE) -"Verifying waits aggregate consistency (instance)" -SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT)) -OR @dump_all; -EVENT_NAME SUM_TIMER_WAIT SUM(i.SUM_TIMER_WAIT) -SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT)) -AND (MIN(i.MIN_TIMER_WAIT) != 0) -OR @dump_all; -EVENT_NAME MIN_TIMER_WAIT MIN(i.MIN_TIMER_WAIT) -SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT)) -OR @dump_all; -EVENT_NAME MAX_TIMER_WAIT MAX(i.MAX_TIMER_WAIT) -"Verifying waits aggregate consistency (thread)" -SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT)) -OR @dump_all; -EVENT_NAME SUM_TIMER_WAIT SUM(t.SUM_TIMER_WAIT) -SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT)) -AND (MIN(t.MIN_TIMER_WAIT) != 0) -OR @dump_all; -EVENT_NAME MIN_TIMER_WAIT MIN(t.MIN_TIMER_WAIT) -SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT)) -OR @dump_all; -EVENT_NAME MAX_TIMER_WAIT MAX(t.MAX_TIMER_WAIT) -update performance_schema.setup_consumers set enabled = 'YES'; -update performance_schema.setup_instruments -set enabled = 'YES', timed = 'YES'; -drop table test.t1; -set @@global.aria_checkpoint_interval= @aria_checkpoint_interval_save; diff --git a/mysql-test/suite/perfschema/t/aggregate.test b/mysql-test/suite/perfschema/t/aggregate.test deleted file mode 100644 index fe30a7b869765..0000000000000 --- a/mysql-test/suite/perfschema/t/aggregate.test +++ /dev/null @@ -1,197 +0,0 @@ -# Tests for PERFORMANCE_SCHEMA -# Verify that statistics aggregated by different criteria are consistent. - ---source include/not_embedded.inc ---source include/have_perfschema.inc - ---echo "General cleanup" - -# MDEV-7187 - test fails sporadically in buildbot -set @aria_checkpoint_interval_save= @@global.aria_checkpoint_interval; -set @@global.aria_checkpoint_interval= 0; - ---disable_warnings -drop table if exists t1; ---enable_warnings - -update performance_schema.setup_instruments set enabled = 'NO'; -update performance_schema.setup_consumers set enabled = 'NO'; - -# Cleanup statistics -truncate table performance_schema.file_summary_by_event_name; -truncate table performance_schema.file_summary_by_instance; -truncate table performance_schema.socket_summary_by_event_name; -truncate table performance_schema.socket_summary_by_instance; -truncate table performance_schema.events_waits_summary_global_by_event_name; -truncate table performance_schema.events_waits_summary_by_instance; -truncate table performance_schema.events_waits_summary_by_thread_by_event_name; - -# Start recording data -update performance_schema.setup_consumers set enabled = 'YES'; -update performance_schema.setup_instruments - set enabled = 'YES', timed = 'YES'; - - -create table t1 ( - id INT PRIMARY KEY, - b CHAR(100) DEFAULT 'initial value') - ENGINE=MyISAM; - -insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8); - -# Stop recording data, so the select below don't add noise. -update performance_schema.setup_instruments SET enabled = 'NO'; -# Disable all consumers, for long standing waits -update performance_schema.setup_consumers set enabled = 'NO'; - -# Helper to debug -set @dump_all=FALSE; - -# Note that in general: -# - COUNT/SUM/MAX(file_summary_by_event_name) >= -# COUNT/SUM/MAX(file_summary_by_instance). -# - MIN(file_summary_by_event_name) <= -# MIN(file_summary_by_instance). -# There will be equality only when file instances are not removed, -# aka when a file is not deleted from the file system, -# because doing so removes a row in file_summary_by_instance. - -# Likewise: -# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >= -# COUNT/SUM/MAX(events_waits_summary_by_instance) -# - MIN(events_waits_summary_global_by_event_name) <= -# MIN(events_waits_summary_by_instance) -# There will be equality only when an instrument instance -# is not removed, which is next to impossible to predictably guarantee -# in the server. -# For example, a MyISAM table removed from the table cache -# will cause a mysql_mutex_destroy on myisam/MYISAM_SHARE::intern_lock. -# Another example, a thread terminating will cause a mysql_mutex_destroy -# on sql/LOCK_delete -# Both cause a row to be deleted from events_waits_summary_by_instance. - -# Likewise: -# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >= -# COUNT/SUM/MAX(events_waits_summary_by_thread_by_event_name) -# - MIN(events_waits_summary_global_by_event_name) <= -# MIN(events_waits_summary_by_thread_by_event_name) -# There will be equality only when no thread is removed, -# that is if no thread disconnects, or no sub thread (for example insert -# delayed) ever completes. -# A thread completing will cause rows in -# events_waits_summary_by_thread_by_event_name to be removed. - ---echo "Verifying file aggregate consistency" - -# Since the code generating the load in this test does: -# - create table -# - insert -# - does not cause temporary tables to be used -# we can test for equality here for file aggregates. - -# If any of these queries returns data, the test failed. - -SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_READ <> SUM(i.COUNT_READ)) -OR @dump_all; - -SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE)) -OR @dump_all; - -SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ) -FROM performance_schema.socket_summary_by_event_name AS e -JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_READ <> SUM(i.COUNT_READ)) -OR @dump_all; - -SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE) -FROM performance_schema.socket_summary_by_event_name AS e -JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE)) -OR @dump_all; - -SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ)) -OR @dump_all; - -SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE) -FROM performance_schema.file_summary_by_event_name AS e -JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE)) -OR @dump_all; - ---echo "Verifying waits aggregate consistency (instance)" - -SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT)) -OR @dump_all; - -SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT)) -AND (MIN(i.MIN_TIMER_WAIT) != 0) -OR @dump_all; - -SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT)) -OR @dump_all; - ---echo "Verifying waits aggregate consistency (thread)" - -SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT)) -OR @dump_all; - -SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT)) -AND (MIN(t.MIN_TIMER_WAIT) != 0) -OR @dump_all; - -SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT) -FROM performance_schema.events_waits_summary_global_by_event_name AS e -JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t -USING (EVENT_NAME) -GROUP BY EVENT_NAME -HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT)) -OR @dump_all; - - -# Cleanup - -update performance_schema.setup_consumers set enabled = 'YES'; -update performance_schema.setup_instruments - set enabled = 'YES', timed = 'YES'; - -drop table test.t1; - -set @@global.aria_checkpoint_interval= @aria_checkpoint_interval_save; - diff --git a/mysql-test/suite/plugins/r/server_audit.result b/mysql-test/suite/plugins/r/server_audit.result index 83b88ed048059..ceb75176b4357 100644 --- a/mysql-test/suite/plugins/r/server_audit.result +++ b/mysql-test/suite/plugins/r/server_audit.result @@ -8,7 +8,6 @@ server_audit_file_rotate_now OFF server_audit_file_rotate_size 1000000 server_audit_file_rotations 9 server_audit_incl_users -server_audit_loc_info server_audit_logging OFF server_audit_mode 0 server_audit_output_type file @@ -72,7 +71,6 @@ server_audit_file_rotate_now OFF server_audit_file_rotate_size 1000000 server_audit_file_rotations 9 server_audit_incl_users odin, root, dva, tri -server_audit_loc_info server_audit_logging ON server_audit_mode 0 server_audit_output_type file @@ -218,7 +216,6 @@ server_audit_file_rotate_now OFF server_audit_file_rotate_size 1000000 server_audit_file_rotations 9 server_audit_incl_users odin, root, dva, tri -server_audit_loc_info server_audit_logging ON server_audit_mode 1 server_audit_output_type file diff --git a/mysql-test/suite/plugins/r/thread_pool_server_audit.result b/mysql-test/suite/plugins/r/thread_pool_server_audit.result index 83b88ed048059..ceb75176b4357 100644 --- a/mysql-test/suite/plugins/r/thread_pool_server_audit.result +++ b/mysql-test/suite/plugins/r/thread_pool_server_audit.result @@ -8,7 +8,6 @@ server_audit_file_rotate_now OFF server_audit_file_rotate_size 1000000 server_audit_file_rotations 9 server_audit_incl_users -server_audit_loc_info server_audit_logging OFF server_audit_mode 0 server_audit_output_type file @@ -72,7 +71,6 @@ server_audit_file_rotate_now OFF server_audit_file_rotate_size 1000000 server_audit_file_rotations 9 server_audit_incl_users odin, root, dva, tri -server_audit_loc_info server_audit_logging ON server_audit_mode 0 server_audit_output_type file @@ -218,7 +216,6 @@ server_audit_file_rotate_now OFF server_audit_file_rotate_size 1000000 server_audit_file_rotations 9 server_audit_incl_users odin, root, dva, tri -server_audit_loc_info server_audit_logging ON server_audit_mode 1 server_audit_output_type file diff --git a/mysql-test/suite/rpl/r/rpl_checksum.result b/mysql-test/suite/rpl/r/rpl_checksum.result index 94d215e596a92..9e37fbf40b18e 100644 --- a/mysql-test/suite/rpl/r/rpl_checksum.result +++ b/mysql-test/suite/rpl/r/rpl_checksum.result @@ -143,6 +143,7 @@ SET debug_dbug= @old_dbug; INSERT INTO t4 VALUES (2); include/wait_for_slave_sql_error.inc [errno=1590] Last_SQL_Error = 'The incident LOST_EVENTS occurred on the master. Message: error writing to the binary log' +FOUND /Slave SQL: The incident LOST_EVENTS occurred on the master\. Message: error writing to the binary log, Internal MariaDB error code: 1590/ in mysqld.2.err SELECT * FROM t4 ORDER BY a; a 1 diff --git a/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result b/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result index 204615201d95d..e247ea9c2a714 100644 --- a/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result +++ b/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result @@ -38,5 +38,7 @@ a 3 4 5 +FOUND /Slave SQL: Error 'Duplicate entry .* on query\. .*Query: '.*', Gtid 0-1-100, Internal MariaDB error code:|Slave SQL: Could not execute Write_rows.*table test.t1; Duplicate entry.*, Gtid 0-1-100, Internal MariaDB error/ in mysqld.2.err +FOUND /Slave SQL: The incident LOST_EVENTS occurred on the master\. Message: , Internal MariaDB error code: 1590/ in mysqld.2.err DROP TABLE t1; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_stop_slave_error.result b/mysql-test/suite/rpl/r/rpl_stop_slave_error.result new file mode 100644 index 0000000000000..2bd372a9a915b --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_stop_slave_error.result @@ -0,0 +1,6 @@ +include/master-slave.inc +[connection master] +include/stop_slave.inc +NOT FOUND /Error reading packet from server: Lost connection/ in slave_log.err +include/start_slave.inc +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_drop_db.test b/mysql-test/suite/rpl/t/rpl_drop_db.test index a67850a66ddbf..f66187b12f515 100644 --- a/mysql-test/suite/rpl/t/rpl_drop_db.test +++ b/mysql-test/suite/rpl/t/rpl_drop_db.test @@ -13,7 +13,7 @@ insert into mysqltest1.t1 values (1); select * from mysqltest1.t1 into outfile 'mysqltest1/f1.txt'; create table mysqltest1.t2 (n int); create table mysqltest1.t3 (n int); ---replace_result \\ / 66 39 17 39 "File exists" "Directory not empty" +--replace_result \\ / 66 39 93 39 17 39 247 39 "File exists" "Directory not empty" --error 1010 drop database mysqltest1; use mysqltest1; @@ -30,7 +30,7 @@ while ($1) } --enable_query_log ---replace_result \\ / 66 39 17 39 "File exists" "Directory not empty" +--replace_result \\ / 66 39 93 39 17 39 247 39 "File exists" "Directory not empty" --error 1010 drop database mysqltest1; use mysqltest1; diff --git a/mysql-test/suite/rpl/t/rpl_stop_slave_error-slave.opt b/mysql-test/suite/rpl/t/rpl_stop_slave_error-slave.opt new file mode 100644 index 0000000000000..32c4527a91575 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_stop_slave_error-slave.opt @@ -0,0 +1 @@ +--log-error=$MYSQLTEST_VARDIR/tmp/slave_log.err diff --git a/mysql-test/suite/rpl/t/rpl_stop_slave_error.test b/mysql-test/suite/rpl/t/rpl_stop_slave_error.test new file mode 100644 index 0000000000000..a88981c15c4fd --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_stop_slave_error.test @@ -0,0 +1,17 @@ +# +# MDEV-8345 STOP SLAVE should not cause an ERROR to be logged to the error log +# +source include/have_binlog_format_mixed.inc; # don't repeat the test three times +source include/master-slave.inc; + +connection master; +sync_slave_with_master; +source include/stop_slave.inc; +let SEARCH_FILE=$MYSQLTEST_VARDIR/tmp/slave_log.err; +let SEARCH_PATTERN=Error reading packet from server: Lost connection; +let SEARCH_RANGE= -50000; +source include/search_pattern_in_file.inc; + +source include/start_slave.inc; +source include/rpl_end.inc; + diff --git a/mysql-test/t/alter_table.test b/mysql-test/t/alter_table.test index 05d915ec478b5..d2b8a6082a623 100644 --- a/mysql-test/t/alter_table.test +++ b/mysql-test/t/alter_table.test @@ -1712,3 +1712,28 @@ CREATE TABLE t1 ( ALTER TABLE t1 ADD PRIMARY KEY IF NOT EXISTS event_id (event_id,market_id); DROP TABLE t1; +--echo # +--echo # MDEV-11126 Crash while altering persistent virtual column +--echo # + +CREATE TABLE `tab1` ( + `id` bigint(20) NOT NULL AUTO_INCREMENT, + `field2` set('option1','option2','option3','option4') NOT NULL, + `field3` set('option1','option2','option3','option4','option5') NOT NULL, + `field4` set('option1','option2','option3','option4') NOT NULL, + `field5` varchar(32) NOT NULL, + `field6` varchar(32) NOT NULL, + `field7` varchar(32) NOT NULL, + `field8` varchar(32) NOT NULL, + `field9` int(11) NOT NULL DEFAULT '1', + `field10` varchar(16) NOT NULL, + `field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1', + `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT, + PRIMARY KEY (`id`) +) DEFAULT CHARSET=latin1; + +ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128); +SHOW CREATE TABLE `tab1`; +ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT; +SHOW CREATE TABLE `tab1`; +DROP TABLE `tab1`; diff --git a/mysql-test/t/create_or_replace.test b/mysql-test/t/create_or_replace.test index 7bba2b341c043..b37417f39d0c8 100644 --- a/mysql-test/t/create_or_replace.test +++ b/mysql-test/t/create_or_replace.test @@ -386,3 +386,15 @@ drop table t1; # Cleanup # DROP TABLE t2; + +--echo # +--echo # MDEV-10824 - Crash in CREATE OR REPLACE TABLE t1 AS SELECT spfunc() +--echo # +CREATE TABLE t1(a INT); +CREATE FUNCTION f1() RETURNS VARCHAR(16383) RETURN 'test'; +CREATE OR REPLACE TABLE t1 AS SELECT f1(); +LOCK TABLE t1 WRITE; +CREATE OR REPLACE TABLE t1 AS SELECT f1(); +UNLOCK TABLES; +DROP FUNCTION f1; +DROP TABLE t1; diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test index e6583f990cafb..600880d6be59a 100644 --- a/mysql-test/t/ctype_utf32.test +++ b/mysql-test/t/ctype_utf32.test @@ -889,6 +889,11 @@ SELECT CHAR_LENGTH(TRIM(BOTH 0x0001 FROM _utf32 0x00000061)); SELECT CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061)); SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061)); +# +# potential signedness issue +# +select hex(lower(cast(0xffff0000 as char character set utf32))) as c; + --echo # --echo # End of 5.5 tests --echo # diff --git a/mysql-test/t/drop.test b/mysql-test/t/drop.test index d9784bc819a86..a3e96953bac02 100644 --- a/mysql-test/t/drop.test +++ b/mysql-test/t/drop.test @@ -313,3 +313,12 @@ INSERT INTO table1 VALUES (1); DROP TABLE table1,table2; --echo # End BUG#34750 + +--echo # +--echo # MDEV-11105 Table named 'db' has weird side effect. +--echo # + +CREATE DATABASE mysqltest; +CREATE TABLE mysqltest.db(id INT); +DROP DATABASE mysqltest; + diff --git a/mysql-test/t/group_min_max_innodb.test b/mysql-test/t/group_min_max_innodb.test index 6967f84714729..91e0bd3279fb4 100644 --- a/mysql-test/t/group_min_max_innodb.test +++ b/mysql-test/t/group_min_max_innodb.test @@ -230,3 +230,16 @@ eval EXPLAIN $query; eval $query; DROP TABLE t0,t1,t2; + +--echo # +--echo # MDEV-MariaDB daemon leaks memory with specific query +--echo # + +CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL, + `language_id` int(11) unsigned NOT NULL DEFAULT '1' +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0', + `serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8; +insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000)); +SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0; +drop table t1,t2; diff --git a/mysql-test/t/information_schema.test b/mysql-test/t/information_schema.test index 30ae29844c223..ecfaa6510762a 100644 --- a/mysql-test/t/information_schema.test +++ b/mysql-test/t/information_schema.test @@ -612,13 +612,13 @@ select * from information_schema.schema_privileges order by grantee; select * from information_schema.user_privileges order by grantee; show grants; connection con4; -select * from information_schema.column_privileges where grantee like '%user%' +select * from information_schema.column_privileges where grantee like '\'user%' order by grantee; -select * from information_schema.table_privileges where grantee like '%user%' +select * from information_schema.table_privileges where grantee like '\'user%' order by grantee; -select * from information_schema.schema_privileges where grantee like '%user%' +select * from information_schema.schema_privileges where grantee like '\'user%' order by grantee; -select * from information_schema.user_privileges where grantee like '%user%' +select * from information_schema.user_privileges where grantee like '\'user%' order by grantee; show grants; connection default; diff --git a/mysql-test/t/merge.test b/mysql-test/t/merge.test index 77e896c7c05bc..0cf37a24f8ebe 100644 --- a/mysql-test/t/merge.test +++ b/mysql-test/t/merge.test @@ -2880,6 +2880,19 @@ drop tables m1, t1, t4; drop view t3; +--echo # +--echo # MDEV-10424 - Assertion `ticket == __null' failed in +--echo # MDL_request::set_type +--echo # +CREATE TABLE t1 (f1 INT) ENGINE=MyISAM; +CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1); +PREPARE stmt FROM "ANALYZE TABLE tmerge, t1"; +EXECUTE stmt; +EXECUTE stmt; +DEALLOCATE PREPARE stmt; +DROP TABLE t1, tmerge; + + --echo End of 5.5 tests diff --git a/mysql-test/t/mysql.test b/mysql-test/t/mysql.test index 2b4b1e69ab6d7..263e1103e8b8a 100644 --- a/mysql-test/t/mysql.test +++ b/mysql-test/t/mysql.test @@ -586,8 +586,16 @@ DROP DATABASE connected_db; # USE and names with backticks # --write_file $MYSQLTEST_VARDIR/tmp/backticks.sql +\u aa`bb``cc +SELECT DATABASE(); +USE test +SELECT DATABASE(); USE aa`bb``cc SELECT DATABASE(); +USE test +SELECT DATABASE(); +USE `aa``bb````cc` +SELECT DATABASE(); EOF create database `aa``bb````cc`; --exec $MYSQL < $MYSQLTEST_VARDIR/tmp/backticks.sql diff --git a/mysql-test/t/mysql_not_windows.test b/mysql-test/t/mysql_not_windows.test index 66853677f7bb6..591de74cbbf47 100644 --- a/mysql-test/t/mysql_not_windows.test +++ b/mysql-test/t/mysql_not_windows.test @@ -13,3 +13,12 @@ --echo --echo End of tests + +# Multi-line exec +exec $MYSQL \ + test -e "select 1"; +exec $MYSQL test -e "select + 2"; +let $query = select 3 + as X; +exec $MYSQL test -e "$query"; diff --git a/mysql-test/t/mysqldump-nl.test b/mysql-test/t/mysqldump-nl.test new file mode 100644 index 0000000000000..311996e77c305 --- /dev/null +++ b/mysql-test/t/mysqldump-nl.test @@ -0,0 +1,38 @@ +# +# New lines in identifiers +# + +# embedded server doesn't support external clients +--source include/not_embedded.inc +# cmd.exe doesn't like new lines on the command line +--source include/not_windows.inc + +create database `mysqltest1 +1tsetlqsym`; +use `mysqltest1 +1tsetlqsym`; + +create table `t1 +1t` (`foobar +raboof` int); +create view `v1 +1v` as select * from `t1 +1t`; + +create procedure sp() select * from `v1 +1v`; + +flush tables; +use test; + +exec $MYSQL_DUMP --compact --comment --routines --add-drop-database --databases 'mysqltest1 +1tsetlqsym'; + +exec $MYSQL_DUMP --compact --comment --routines --add-drop-database --databases 'mysqltest1 +1tsetlqsym' | $MYSQL; + +show tables from `mysqltest1 +1tsetlqsym`; + +drop database `mysqltest1 +1tsetlqsym`; diff --git a/mysql-test/t/mysqltest.test b/mysql-test/t/mysqltest.test index ae59c713c3d04..e85d793b6282c 100644 --- a/mysql-test/t/mysqltest.test +++ b/mysql-test/t/mysqltest.test @@ -741,15 +741,6 @@ echo ; --error 1 --exec echo "--exec " | $MYSQL_TEST 2>&1 -# Multi-line exec -exec $MYSQL - test -e "select 1"; -exec $MYSQL test -e "select - 2"; -let $query = select 3 - as X; -exec $MYSQL test -e "$query"; - # ---------------------------------------------------------------------------- # Test let command # ---------------------------------------------------------------------------- diff --git a/mysql-test/t/ps.test b/mysql-test/t/ps.test index c45b27281a79e..a6b238b84c66b 100644 --- a/mysql-test/t/ps.test +++ b/mysql-test/t/ps.test @@ -3653,5 +3653,32 @@ deallocate prepare stmt; SET SESSION sql_mode = @save_sql_mode; DROP TABLE t1,t2; +--echo # +--echo # MDEV-8833: Crash of server on prepared statement with +--echo # conversion to semi-join +--echo # + +CREATE TABLE t1 (column1 INT); +INSERT INTO t1 VALUES (3),(9); + +CREATE TABLE t2 (column2 INT); +INSERT INTO t2 VALUES (1),(4); + +CREATE TABLE t3 (column3 INT); +INSERT INTO t3 VALUES (6),(8); + +CREATE TABLE t4 (column4 INT); +INSERT INTO t4 VALUES (2),(5); + +PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1 +FROM t1 AS table1 +WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 ) +) AS sq +FROM t3 AS table3, t4 AS table4"; +EXECUTE stmt; +EXECUTE stmt; +deallocate prepare stmt; +drop table t1,t2,t3,t4; + ---echo # End of 10.0 tests +--echo # End of 5.5 tests diff --git a/mysql-test/t/selectivity.test b/mysql-test/t/selectivity.test index c46ff69295fa7..8efc5216ba09c 100644 --- a/mysql-test/t/selectivity.test +++ b/mysql-test/t/selectivity.test @@ -970,6 +970,58 @@ set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivit DROP TABLE t1,t2; +set use_stat_tables=@save_use_stat_tables; + +--echo # +--echo # Bug mdev-11096: range condition over column without statistical data +--echo # + +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; + +create table t1(col1 char(32)); +insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t1 persistent for columns () indexes (); + +explain extended +select * from t1 where col1 > 'b' and col1 < 'e'; +select * from t1 where col1 > 'b' and col1 < 'e'; + +drop table t1; + +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; +set use_stat_tables=@save_use_stat_tables; + +--echo # +--echo # Bug mdev-9628: unindexed blob column without min-max statistics +--echo # with optimizer_use_condition_selectivity=3 +--echo # + +set use_stat_tables='preferably'; +set optimizer_use_condition_selectivity=3; +create table t1(col1 char(32)); +insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t1; + +create table t2(col1 text); +insert into t2 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h'); +analyze table t2; + +select * from t1 where col1 > 'b' and col1 < 'd'; +explain extended +select * from t1 where col1 > 'b' and col1 < 'd'; + +select * from t2 where col1 > 'b' and col1 < 'd'; +explain extended +select * from t2 where col1 > 'b' and col1 < 'd'; + +select * from t2 where col1 < 'b' and col1 > 'd'; +explain extended +select * from t2 where col1 < 'b' and col1 > 'd'; + +drop table t1,t2; + +set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity; set use_stat_tables=@save_use_stat_tables; diff --git a/mysql-test/t/selectivity_innodb.test b/mysql-test/t/selectivity_innodb.test index d6a77eac60004..25aa0abbc3b45 100644 --- a/mysql-test/t/selectivity_innodb.test +++ b/mysql-test/t/selectivity_innodb.test @@ -109,6 +109,31 @@ where t1.child_user_id=t3.id and t1.child_group_id is null and t2.lower_group_na drop table t1,t2,t3; +--echo # +--echo # MDEV-9187: duplicate of bug mdev-9628 +--echo # + +set use_stat_tables = preferably; +set optimizer_use_condition_selectivity=3; + +CREATE TABLE t1 (f1 char(32)) ENGINE=InnoDB; +INSERT INTO t1 VALUES ('foo'),('bar'),('qux'); +ANALYZE TABLE t1; + +SELECT * FROM t1 WHERE f1 < 'm'; +EXPLAIN EXTENDED +SELECT * FROM t1 WHERE f1 < 'm'; + +CREATE TABLE t2 (f1 TEXT) ENGINE=InnoDB; +INSERT INTO t2 VALUES ('foo'),('bar'),('qux'); +ANALYZE TABLE t2; + +SELECT * FROM t2 WHERE f1 <> 'qux'; +EXPLAIN EXTENDED +SELECT * FROM t2 WHERE f1 <> 'qux'; + +DROP TABLE t1,t2; + --echo # --echo # End of 10.0 tests --echo # diff --git a/mysql-test/t/type_uint.test b/mysql-test/t/type_uint.test index 3a949c5c47a93..84fca993d09eb 100644 --- a/mysql-test/t/type_uint.test +++ b/mysql-test/t/type_uint.test @@ -16,6 +16,13 @@ drop table t1; # End of 4.1 tests +create table t1 (a bigint unsigned, b mediumint unsigned); +insert t1 values (1,2),(0xffffffffffffffff,0xffffff); +select coalesce(a,b), coalesce(b,a) from t1; +create table t2 as select a from t1 union select b from t1; +show create table t2; +select * from t2; +drop table t1, t2; --echo # --echo # Start of 10.0 tests diff --git a/mysql-test/unstable-tests b/mysql-test/unstable-tests index 6a46602eb0795..3e25115599fdc 100644 --- a/mysql-test/unstable-tests +++ b/mysql-test/unstable-tests @@ -23,77 +23,66 @@ # ############################################################################## -main.bootstrap : Modified on 2016-06-18 (MDEV-9969) main.create_delayed : MDEV-10605 - failed with timeout -main.create_or_replace : Modified on 2016-06-23 (MDEV-9728) -main.ctype_recoding : Modified on 2016-06-10 (MDEV-10181) -main.ctype_utf8 : Modified on 2016-06-21 (merge) -main.ctype_utf8mb4 : Modified on 2016-06-21 (merge) -main.events_1 : Modified on 2016-06-21 (MDEV-9524) +main.ctype_utf32 : Modified on 2016-09-27 (merge) main.func_group : Modified on 2016-08-08 (MDEV-10468) -main.func_in : Modified on 2016-06-20 (MDEV-10020) main.func_math : Modified on 2016-08-10 (merge) main.func_misc : Modified on 2016-08-10 (merge) -main.grant2 : Modified on 2016-07-18 (MDEV-8569) -main.help : Modified on 2016-06-21 (MDEV-9524) +main.group_min_max_innodb : Modified on 2016-08-25 (MDEV-10595) main.host_cache_size_functionality : MDEV-10606 - sporadic failure on shutdown main.index_intersect_innodb : MDEV-10643 - failed with timeout -main.index_merge_innodb : MDEV-7142 - sporadic wrong execution plan +main.index_merge_myisam : Modified on 2016-09-05 (include file changed) +main.index_merge_innodb : Modified on 2016-09-05 (MDEV-7142) main.information_schema_stats : Modified on 2016-07-25 (MDEV-10428) main.innodb_mysql_lock : MDEV-7861 - sporadic lock detection failure -main.insert_innodb : Modified on 2016-06-14 (merge from upstream) main.loaddata : Modified on 2016-08-10 (merge) -main.locale : Modified on 2016-06-21 (merge) main.mdev-504 : MDEV-10607 - sporadic "can't connect" main.mdev375 : MDEV-10607 - sporadic "can't connect" main.merge : MDEV-10607 - sporadic "can't connect" -main.multi_update : Modified on 2016-06-20 (MDEV-5973) main.myisam_enable_keys-10506 : New test, added on 2016-08-10 (MDEV-10506) main.mysqlcheck : Modified on 2016-08-10 (merge) main.mysqldump : MDEV-10512 - sporadic assertion failure +main.mysqlhotcopy_myisam : MDEV-10995 - test hangs on debug build main.mysqltest : MDEV-9269 - fails on Alpha main.named_pipe : Modified on 2016-08-02 (MDEV-10383) -main.openssl_1 : Modified on 2016-07-11 (MDEV-10211) -main.parser : Modified on 2016-06-21 (merge) main.pool_of_threads : MDEV-10100 - sporadic error on detecting max connections -main.ps_1general : Modified on 2016-07-12 (merge) +main.ps : MDEV-11017 - sporadic wrong Prepared_stmt_count main.range : Modified on 2016-08-10 (merge) main.range_mrr_icp : Modified on 2016-08-10 (merge) main.query_cache : MDEV-10611 - sporadic mutex problem -main.shutdown : MDEV-10612 - sporadic crashes +main.shutdown : MDEV-10563 - sporadic crashes main.sp-prelocking : Modified on 2016-08-10 (merge) main.sp-security : MDEV-10607 - sporadic "can't connect" -main.ssl : MDEV-10211 - different ciphers on some platforms -main.ssl_ca : Modified on 2016-07-11 (MDEV-10211) -main.ssl_compress : Modified on 2016-07-11 (MDEV-10211) -main.ssl_timeout : Modified on 2016-07-11 (MDEV-10211) +main.ssl_compress : MDEV-11110 - valgrind failures main.stat_tables_par_innodb : MDEV-10515 - sporadic wrong results -main.status_user : Modified on 2016-06-20 (MDEV-8633) main.subselect_innodb : MDEV-10614 - sporadic wrong results -main.temp_table : Modified on 2016-06-18 (MDEV-8569) main.type_date : Modified on 2016-08-10 (merge) -main.type_datetime : Modified on 2016-06-16 (MDEV-9374) +main.type_uint : Modified on 2016-09-27 (merge) main.view : Modified on 2016-08-10 (merge) main.xtradb_mrr : Modified on 2016-08-04 (MDEV-9946) #---------------------------------------------------------------- -archive.archive-big : MDEV-10615 - table is marked as crashed -archive.discover : MDEV-10510 - table is marked as crashed +archive.archive-big : MDEV-10615 - table is marked as crashed +archive.discover : MDEV-10510 - table is marked as crashed +archive.mysqlhotcopy_archive : MDEV-10995 - test hangs on debug build #---------------------------------------------------------------- binlog.binlog_commit_wait : MDEV-10150 - Error: too much time elapsed -binlog.binlog_dmls_on_tmp_tables_readonly : New test, added on 2016-05-04 (upstream) binlog.binlog_xa_recover : MDEV-8517 - Extra checkpoint #---------------------------------------------------------------- connect.tbl : MDEV-9844, MDEV-10179 - sporadic crashes, valgrind warnings, wrong results -connect.jdbc : New test, added on 2016-07-15 -connect.jdbc-new : New test, added on 2016-07-14 -connect.jdbc-oracle : New test, added on 2016-07-13 -connect.jdbc-postgresql : New test, added on 2016-07-13 + +#---------------------------------------------------------------- + +engines/rr_trx.* : MDEV-10998 - tests not maintained + +#---------------------------------------------------------------- + +extra/binlog_tests.database : Modified on 2016-10-21 (Upstream MIPS test fixes) #---------------------------------------------------------------- @@ -104,20 +93,19 @@ federated.federated_transactions : MDEV-10617, MDEV-10417 - Wrong checksum, time #---------------------------------------------------------------- -funcs_1.processlist_priv_no_prot : Include file modified on 2016-07-12 (merge) -funcs_1.processlist_priv_ps : Include file modified on 2016-07-12 (merge) +funcs_2/charset.* : MDEV-10999 - test not maintained #---------------------------------------------------------------- innodb.binlog_consistent : MDEV-10618 - Server fails to start innodb.innodb-alter-table : MDEV-10619 - Testcase timeout innodb.innodb-alter-tempfile : Modified on 2016-08-09 (MDEV-10469) -innodb.innodb_corrupt_bit : Modified on 2016-06-21 (merge) innodb.innodb_bug30423 : MDEV-7311 - Wrong number of rows in the plan -innodb.innodb-fk-warnings : Modified on 2016-07-18 (MDEV-8569) -innodb.innodb-fkcheck : Modified on 2016-06-13 (MDEV-10083) +innodb.innodb_bug54044 : Modified on 2016-09-27 (merge) +innodb.innodb_monitor : MDEV-10939 - Testcase timeout innodb.innodb-wl5522 : rdiff file modified on 2016-08-10 (merge) innodb.innodb-wl5522-debug-zip : MDEV-10427 - Warning: database page corruption +innodb.system_tables : Added on 2016-09-23 (MDEV-10775) #---------------------------------------------------------------- @@ -142,21 +130,16 @@ parts.partition_int_myisam : MDEV-10621 - Testcase timeout #---------------------------------------------------------------- -perfschema.digest_table_full : Modified on 2016-06-21 (merge) perfschema.func_file_io : MDEV-5708 - fails for s390x perfschema.func_mutex : MDEV-5708 - fails for s390x -perfschema.rpl_gtid_func : Modified on 2016-06-21 (merge) -perfschema.sizing_low : Modified on 2016-04-26 (5.6.30 merge) +perfschema.hostcache_ipv6_ssl : MDEV-10696 - crash on shutdown perfschema.socket_summary_by_event_name_func : MDEV-10622 - Socket summary tables do not match -perfschema.start_server_low_digest : Modified on 2016-06-21 (merge) -perfschema.statement_digest : Modified on 2016-06-21 (merge) -perfschema.statement_digest_consumers : Modified on 2016-06-21 (merge) -perfschema.statement_digest_long_query : Modified on 2016-06-21 (merge) -perfschema.table_name : New test, added on 2016-04-26 (5.6.30 merge) + +perfschema_stress.* : MDEV-10996 - tests not maintained #---------------------------------------------------------------- -plugins.feedback_plugin_send : MDEV-7932 - ssl failed for url +plugins.feedback_plugin_send : MDEV-7932 - ssl failed for url, MDEV-11112 - valgrind warnings plugins.pam : Modified on 2016-08-03 (MDEV-7329) plugins.pam_cleartext : Modified on 2016-08-03 plugins.server_audit : MDEV-9562 - crashes on sol10-sparc @@ -164,11 +147,6 @@ plugins.thread_pool_server_audit : MDEV-9562 - crashes on sol10-sparc #---------------------------------------------------------------- -roles.rpl_grant_revoke_current_role-8638 : New test, added on 2016-06-20 (MDEV-8638) -roles.set_role-9614 : New test, added on 2016-05-30 (MDEV-9614) - -#---------------------------------------------------------------- - rpl.last_insert_id : MDEV-10625 - warnings in error log rpl.rpl_auto_increment : MDEV-10417 - Fails on Mips rpl.rpl_auto_increment_bug45679 : MDEV-10417 - Fails on Mips @@ -177,11 +155,11 @@ rpl.rpl_binlog_index : MDEV-9501 - Warning: failed registering rpl.rpl_checksum_cache : MDEV-10626 - Testcase timeout rpl.rpl_circular_for_4_hosts : MDEV-10627 - Testcase timeout rpl.rpl_ddl : MDEV-10417 - Fails on Mips +rpl.rpl_drop_db : Modified on 2016-10-21 (Upstream MIPS test fixes) rpl.rpl_gtid_crash : MDEV-9501 - Warning: failed registering on master rpl.rpl_gtid_master_promote : MDEV-10628 - Timeout in sync_with_master rpl.rpl_gtid_stop_start : MDEV-10629 - Crash on shutdown rpl.rpl_gtid_until : MDEV-10625 - warnings in error log -rpl.rpl_ignore_table : Modified on 2016-06-22 rpl.rpl_innodb_bug30888 : MDEV-10417 - Fails on Mips rpl.rpl_insert : MDEV-9329 - Fails on Ubuntu/s390x rpl.rpl_insert_delayed : MDEV-9329 - Fails on Ubuntu/s390x @@ -201,6 +179,8 @@ rpl.rpl_temporary_error2 : MDEV-10634 - Wrong number of retries rpl.sec_behind_master-5114 : MDEV-8518 - Wrong value of Seconds_Behind_Master rpl.rpl_skip_replication : MDEV-9268 - Fails with timeout in sync_slave_with_master on Alpha +rpl/extra/rpl_tests.* : MDEV-10994 - tests not maintained + #---------------------------------------------------------------- spider.* : MDEV-9329 - tests are too memory-consuming @@ -214,6 +194,10 @@ spider/bg.vp_fixes : MDEV-9329 - Fails on Ubuntu/s390x #---------------------------------------------------------------- +sphinx.* : MDEV-10747 - tests are not run in buildbot, they can't be stable + +#---------------------------------------------------------------- + stress.ddl_innodb : MDEV-10635 - Testcase timeout #---------------------------------------------------------------- @@ -229,11 +213,14 @@ tokudb.background_job_manager : MDEV-10327 - Assertion failure on server tokudb.cluster_filter_unpack_varchar : MDEV-10636 - Wrong execution plan tokudb.* : MDEV-9891 - massive crashes on shutdown tokudb_alter_table.* : MDEV-9891 - massive crashes on shutdown +tokudb_backup.* : MDEV-11001 - tests don't work tokudb_bugs.checkpoint_lock : MDEV-10637 - Wrong processlist output tokudb_bugs.checkpoint_lock_3 : MDEV-10637 - Wrong processlist output tokudb_bugs.* : MDEV-9891 - massive crashes on shutdown tokudb_parts.* : MDEV-9891 - massive crashes on shutdown -rpl-tokudb.* : MDEV-9891 - massive crashes on shutdown, also modified on 2016-06-10 (Merge) +tokudb_rpl_suites.* : MDEV-11001 - tests don't work +tokudb_sys_vars.* : MDEV-11001 - tests don't work +rpl-tokudb.* : MDEV-9891 - massive crashes on shutdown tokudb/tokudb_add_index.* : MDEV-9891 - massive crashes on shutdown tokudb/tokudb_backup.* : MDEV-9891 - massive crashes on shutdown tokudb/tokudb_mariadb.* : MDEV-9891 - massive crashes on shutdown @@ -247,7 +234,6 @@ unit.ma_test_loghandler : MDEV-10638 - record read not ok #---------------------------------------------------------------- -vcol.charsets : Added on 2016-06-23 vcol.not_supported : MDEV-10639 - Testcase timeout vcol.vcol_keys_innodb : MDEV-10639 - Testcase timeout diff --git a/mysql-test/valgrind.supp b/mysql-test/valgrind.supp index 1cc5d1779720c..5f1af1a09053c 100644 --- a/mysql-test/valgrind.supp +++ b/mysql-test/valgrind.supp @@ -1228,6 +1228,125 @@ fun:dlopen@@GLIBC_2.2.5 } +# +# MDEV-11061: OpenSSL 0.9.8 problems +# + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Cond + obj:*/libz.so* + ... + obj:*/libcrypto.so.0.9.8 + ... + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Value8 + obj:*/libz.so* + ... + obj:*/libcrypto.so.0.9.8 + ... + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Cond + obj:*/libcrypto.so.0.9.8 + ... + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Value8 + obj:*/libcrypto.so.0.9.8 + ... + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Cond + obj:*/libssl.so.0.9.8 + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Value8 + obj:*/libssl.so.0.9.8 + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Cond + fun:memcpy + obj:*/libcrypto.so.0.9.8 + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Value8 + fun:memcpy + obj:*/libcrypto.so.0.9.8 + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Cond + fun:is_overlap + fun:memcpy + obj:*/libcrypto.so.0.9.8 + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Cond + fun:memset + obj:*/libcrypto.so.0.9.8 + ... + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Value8 + fun:memset + obj:*/libcrypto.so.0.9.8 + ... + obj:*/libssl.so.0.9.8 + ... +} + +{ + MDEV-11061: OpenSSL 0.9.8 + Memcheck:Param + write(buf) + obj:*/libpthread-2.9.so* + obj:*/libcrypto.so.0.9.8 + ... + obj:*/libssl.so.0.9.8 + ... +} + { GitHub codership/galera#330 Memcheck:Leak @@ -1316,7 +1435,7 @@ } { -g codership/mysql-wsrep/issues#176 + codership/mysql-wsrep/issues#176 Memcheck:Leak fun:_Z16wsrep_set_paramsRN6galera10ReplicatorEPKc } diff --git a/mysys/my_fopen.c b/mysys/my_fopen.c index 409f1bfc9dd81..a24f516116826 100644 --- a/mysys/my_fopen.c +++ b/mysys/my_fopen.c @@ -102,6 +102,7 @@ static FILE *my_win_freopen(const char *path, const char *mode, FILE *stream) HANDLE osfh; DBUG_ASSERT(path && stream); + DBUG_ASSERT(strchr(mode, 'a')); /* We use FILE_APPEND_DATA below */ /* Services don't have stdout/stderr on Windows, so _fileno returns -1. */ if (fd < 0) @@ -112,15 +113,14 @@ static FILE *my_win_freopen(const char *path, const char *mode, FILE *stream) fd= _fileno(stream); } - if ((osfh= CreateFile(path, GENERIC_READ | GENERIC_WRITE, + if ((osfh= CreateFile(path, GENERIC_READ | FILE_APPEND_DATA, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) return NULL; - if ((handle_fd= _open_osfhandle((intptr_t)osfh, - _O_APPEND | _O_TEXT)) == -1) + if ((handle_fd= _open_osfhandle((intptr_t)osfh, _O_TEXT)) == -1) { CloseHandle(osfh); return NULL; diff --git a/mysys/my_redel.c b/mysys/my_redel.c index 61e61b40791d7..976fc5a18c345 100644 --- a/mysys/my_redel.c +++ b/mysys/my_redel.c @@ -1,5 +1,5 @@ -/* - Copyright (c) 2000, 2010, Oracle and/or its affiliates +/* Copyright (c) 2000, 2010, Oracle and/or its affiliates + Copyright (c) 2009, 2016, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -49,7 +49,8 @@ int my_redel(const char *org_name, const char *tmp_name, DBUG_PRINT("my",("org_name: '%s' tmp_name: '%s' MyFlags: %lu", org_name,tmp_name,MyFlags)); - if (my_copystat(org_name,tmp_name,MyFlags) < 0) + if (!my_disable_copystat_in_redel && + my_copystat(org_name,tmp_name,MyFlags) < 0) goto end; if (MyFlags & MY_REDEL_MAKE_BACKUP) { diff --git a/mysys/my_static.c b/mysys/my_static.c index 4aca78e30a9ab..9236c1395fb86 100644 --- a/mysys/my_static.c +++ b/mysys/my_static.c @@ -98,3 +98,4 @@ my_bool my_disable_sync=0; my_bool my_disable_async_io=0; my_bool my_disable_flush_key_blocks=0; my_bool my_disable_symlinks=0; +my_bool my_disable_copystat_in_redel=0; diff --git a/plugin/feedback/utils.cc b/plugin/feedback/utils.cc index b83b69be0ce17..dad3d59e76dcc 100644 --- a/plugin/feedback/utils.cc +++ b/plugin/feedback/utils.cc @@ -43,7 +43,11 @@ static const char *get_os_version_name(OSVERSIONINFOEX *ver) { DWORD major = ver->dwMajorVersion; DWORD minor = ver->dwMinorVersion; - + if (major == 10 && minor == 0) + { + return (ver->wProductType == VER_NT_WORKSTATION) ? + "Windows 10" : "Windows Server 2016"; + } if (major == 6 && minor == 3) { return (ver->wProductType == VER_NT_WORKSTATION)? @@ -102,7 +106,12 @@ static int uname(struct utsname *buf) if(version_str && version_str[0]) sprintf(buf->version, "%s %s",version_str, ver.szCSDVersion); else - sprintf(buf->version, "%s", ver.szCSDVersion); + { + /* Fallback for unknown versions, e.g "Windows ." */ + sprintf(buf->version, "Windows %d.%d%s", + ver.dwMajorVersion, ver.dwMinorVersion, + (ver.wProductType == VER_NT_WORKSTATION ? "" : " Server")); + } #ifdef _WIN64 strcpy(buf->machine, "x64"); diff --git a/plugin/server_audit/server_audit.c b/plugin/server_audit/server_audit.c index b84f2b9480648..d48b6c37728db 100644 --- a/plugin/server_audit/server_audit.c +++ b/plugin/server_audit/server_audit.c @@ -427,9 +427,8 @@ static MYSQL_SYSVAR_UINT(query_log_limit, query_log_limit, char locinfo_ini_value[sizeof(struct connection_info)+4]; static MYSQL_THDVAR_STR(loc_info, - PLUGIN_VAR_READONLY | PLUGIN_VAR_MEMALLOC, - "Auxiliary info.", NULL, NULL, - locinfo_ini_value); + PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_MEMALLOC, + "Internal info", NULL, NULL, locinfo_ini_value); static const char *syslog_facility_names[]= { diff --git a/scripts/mysqld_safe.sh b/scripts/mysqld_safe.sh index b8df320bf9325..9d8bbce48604a 100644 --- a/scripts/mysqld_safe.sh +++ b/scripts/mysqld_safe.sh @@ -717,6 +717,10 @@ else logging=syslog fi +# close stdout and stderr, everything goes to $logging now +exec 1>&- +exec 2>&- + USER_OPTION="" if test -w / -o "$USER" = "root" then @@ -747,7 +751,7 @@ if [ ! -d $mysql_unix_port_dir ] then if ! `mkdir -p $mysql_unix_port_dir` then - echo "Fatal error Can't create database directory '$mysql_unix_port'" + log_error "Fatal error Can't create database directory '$mysql_unix_port'" exit 1 fi chown $user $mysql_unix_port_dir diff --git a/sql/contributors.h b/sql/contributors.h index f52d3243453ae..0359ec5402289 100644 --- a/sql/contributors.h +++ b/sql/contributors.h @@ -46,6 +46,7 @@ struct show_table_contributors_st show_table_contributors[]= { {"Auttomattic", "https://automattic.com", "Bronze Sponsor of the MariaDB Foundation"}, {"Verkkokauppa.com", "https://virtuozzo.com", "Bronze Sponsor of the MariaDB Foundation"}, {"Virtuozzo", "https://virtuozzo.com/", "Bronze Sponsor of the MariaDB Foundation"}, + {"Tencent Game DBA", "http://tencentdba.com/about/", "Bronze Sponsor of the MariaDB Foundation"}, /* Sponsors of important features */ {"Google", "USA", "Sponsoring encryption, parallel replication and GTID"}, diff --git a/sql/field.cc b/sql/field.cc index eb017802da3bd..49989a4a4ed8a 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -355,7 +355,7 @@ static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]= //MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP MYSQL_TYPE_LONGLONG, MYSQL_TYPE_VARCHAR, //MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24 - MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONG, + MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONGLONG, //MYSQL_TYPE_DATE MYSQL_TYPE_TIME MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR, //MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR diff --git a/sql/item.cc b/sql/item.cc index 6cdbccde72927..21c8b3f701e0f 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -2743,9 +2743,28 @@ void Item_field::fix_after_pullout(st_select_lex *new_parent, Item **ref) if (context) { Name_resolution_context *ctx= new Name_resolution_context(); - ctx->outer_context= NULL; // We don't build a complete name resolver - ctx->table_list= NULL; // We rely on first_name_resolution_table instead + if (context->select_lex == new_parent) + { + /* + This field was pushed in then pulled out + (for example left part of IN) + */ + ctx->outer_context= context->outer_context; + } + else if (context->outer_context) + { + /* just pull to the upper context */ + ctx->outer_context= context->outer_context->outer_context; + } + else + { + /* No upper context (merging Derived/VIEW where context chain ends) */ + ctx->outer_context= NULL; + } + ctx->table_list= context->first_name_resolution_table; ctx->select_lex= new_parent; + if (context->select_lex == NULL) + ctx->select_lex= NULL; ctx->first_name_resolution_table= context->first_name_resolution_table; ctx->last_name_resolution_table= context->last_name_resolution_table; ctx->error_processor= context->error_processor; diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 5cdfa427997bc..e70922bb5d3ca 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -2620,8 +2620,8 @@ static bool check_equality_for_exist2in(Item_func *func, args[0]->all_used_tables() == OUTER_REF_TABLE_BIT) { /* It is Item_field or Item_direct_view_ref) */ - DBUG_ASSERT(args[0]->type() == Item::FIELD_ITEM || - args[0]->type() == Item::REF_ITEM); + DBUG_ASSERT(args[1]->type() == Item::FIELD_ITEM || + args[1]->type() == Item::REF_ITEM); *local_field= (Item_ident *)args[1]; *outer_exp= args[0]; return TRUE; diff --git a/sql/log.cc b/sql/log.cc index 2479208b395e2..7fd185aa5d3d7 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -3144,7 +3144,7 @@ bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time, if (! write_error) { write_error= 1; - sql_print_error(ER(ER_ERROR_ON_WRITE), name, error); + sql_print_error(ER(ER_ERROR_ON_WRITE), name, tmp_errno); } } } diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 9d33d822961a6..d43f14c176ba6 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -4084,6 +4084,7 @@ static int init_common_variables() max_system_variables.pseudo_thread_id= (ulong)~0; server_start_time= flush_status_time= my_time(0); + my_disable_copystat_in_redel= 1; global_rpl_filter= new Rpl_filter; binlog_filter= new Rpl_filter; diff --git a/sql/net_serv.cc b/sql/net_serv.cc index 3e17ced92baed..f52549bfd0b5f 100644 --- a/sql/net_serv.cc +++ b/sql/net_serv.cc @@ -1,5 +1,5 @@ -/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. - Copyright (c) 2010, 2014, SkySQL Ab. +/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. + Copyright (c) 2012, 2016, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/sql/opt_range.cc b/sql/opt_range.cc index e0ca43e6d726f..5d6891a1edf53 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3345,9 +3345,16 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param, { Field *field= *field_ptr; uint16 store_length; + uint16 max_key_part_length= (uint16) table->file->max_key_part_length(); key_part->key= keys; key_part->part= 0; - key_part->length= (uint16) field->key_length(); + if (field->flags & BLOB_FLAG) + key_part->length= max_key_part_length; + else + { + key_part->length= (uint16) field->key_length(); + set_if_smaller(key_part->length, max_key_part_length); + } store_length= key_part->length; if (field->real_maybe_null()) store_length+= HA_KEY_NULL_LENGTH; diff --git a/sql/parse_file.h b/sql/parse_file.h index 2a0266e98b7eb..83a8eabcf5f72 100644 --- a/sql/parse_file.h +++ b/sql/parse_file.h @@ -42,9 +42,9 @@ enum file_opt_type { struct File_option { - LEX_STRING name; /**< Name of the option */ - int offset; /**< offset to base address of value */ - file_opt_type type; /**< Option type */ + LEX_STRING name; /**< Name of the option */ + my_ptrdiff_t offset; /**< offset to base address of value */ + file_opt_type type; /**< Option type */ }; diff --git a/sql/signal_handler.cc b/sql/signal_handler.cc index fd6f62fa100f9..c3f25848e8a16 100644 --- a/sql/signal_handler.cc +++ b/sql/signal_handler.cc @@ -64,13 +64,13 @@ extern "C" sig_handler handle_fatal_signal(int sig) struct tm tm; #ifdef HAVE_STACKTRACE THD *thd; -#endif /* This flag remembers if the query pointer was found invalid. We will try and print the query at the end of the signal handler, in case we're wrong. */ bool print_invalid_query_pointer= false; +#endif if (segfaulted) { @@ -265,6 +265,7 @@ extern "C" sig_handler handle_fatal_signal(int sig) "\"mlockall\" bugs.\n"); } +#ifdef HAVE_STACKTRACE if (print_invalid_query_pointer) { my_safe_printf_stderr( @@ -274,6 +275,7 @@ extern "C" sig_handler handle_fatal_signal(int sig) my_write_stderr(thd->query(), MY_MIN(65536U, thd->query_length())); my_safe_printf_stderr("\n\n"); } +#endif #ifdef HAVE_WRITE_CORE if (test_flags & TEST_CORE_ON_SIGNAL) diff --git a/sql/slave.cc b/sql/slave.cc index db6e9cbf0aa7c..359f4f8af9ce0 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -3126,8 +3126,13 @@ static ulong read_event(MYSQL* mysql, Master_info *mi, bool* suppress_warnings) *suppress_warnings= TRUE; } else - sql_print_error("Error reading packet from server: %s ( server_errno=%d)", - mysql_error(mysql), mysql_errno(mysql)); + { + if (!mi->rli.abort_slave) + { + sql_print_error("Error reading packet from server: %s (server_errno=%d)", + mysql_error(mysql), mysql_errno(mysql)); + } + } DBUG_RETURN(packet_error); } diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc index d32b213a83874..e1c66bceedc0f 100644 --- a/sql/sql_admin.cc +++ b/sql/sql_admin.cc @@ -455,7 +455,19 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, } thd->prepare_derived_at_open= FALSE; - table->next_global= save_next_global; + /* + MERGE engine may adjust table->next_global chain, thus we have to + append save_next_global after merge children. + */ + if (save_next_global) + { + TABLE_LIST *table_list_iterator= table; + while (table_list_iterator->next_global) + table_list_iterator= table_list_iterator->next_global; + table_list_iterator->next_global= save_next_global; + save_next_global->prev_global= &table_list_iterator->next_global; + } + table->next_local= save_next_local; thd->open_options&= ~extra_open_options; diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 4ff6903991768..efe9ac6f7f45d 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -9279,6 +9279,7 @@ open_system_tables_for_read(THD *thd, TABLE_LIST *table_list, */ lex->reset_n_backup_query_tables_list(&query_tables_list_backup); thd->reset_n_backup_open_tables_state(backup); + thd->lex->sql_command= SQLCOM_SELECT; if (open_and_lock_tables(thd, table_list, FALSE, MYSQL_OPEN_IGNORE_FLUSH | diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 10779aebbda55..b210a4d32dc4f 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -5666,9 +5666,11 @@ int THD::decide_logging_format(TABLE_LIST *tables) { static const char *prelocked_mode_name[] = { "NON_PRELOCKED", + "LOCK_TABLES", "PRELOCKED", "PRELOCKED_UNDER_LOCK_TABLES", }; + compile_time_assert(array_elements(prelocked_mode_name) == LTM_always_last); DBUG_PRINT("debug", ("prelocked_mode: %s", prelocked_mode_name[locked_tables_mode])); } diff --git a/sql/sql_class.h b/sql/sql_class.h index 08f014d7f4806..56ae659797ca8 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1203,7 +1203,8 @@ enum enum_locked_tables_mode LTM_NONE= 0, LTM_LOCK_TABLES, LTM_PRELOCKED, - LTM_PRELOCKED_UNDER_LOCK_TABLES + LTM_PRELOCKED_UNDER_LOCK_TABLES, + LTM_always_last }; @@ -4373,6 +4374,11 @@ class TMP_TABLE_PARAM :public Sql_alloc save_copy_field_end= copy_field_end= NULL; } } + void free_copy_field_data() + { + for (Copy_field *ptr= copy_field ; ptr != copy_field_end ; ptr++) + ptr->tmp.free(); + } }; class select_union :public select_result_interceptor diff --git a/sql/sql_db.cc b/sql/sql_db.cc index e89c3d9e74566..0a3ff64113f04 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -784,7 +784,7 @@ bool mysql_alter_db(THD *thd, const char *db, HA_CREATE_INFO *create_info) bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) { ulong deleted_tables= 0; - bool error= true; + bool error= true, rm_mysql_schema; char path[FN_REFLEN + 16]; MY_DIR *dirp; uint length; @@ -809,6 +809,18 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) length= build_table_filename(path, sizeof(path) - 1, db, "", "", 0); strmov(path+length, MY_DB_OPT_FILE); // Append db option file name del_dbopt(path); // Remove dboption hash entry + /* + Now remove the db.opt file. + The 'find_db_tables_and_rm_known_files' doesn't remove this file + if there exists a table with the name 'db', so let's just do it + separately. We know this file exists and needs to be deleted anyway. + */ + if (my_delete_with_symlink(path, MYF(0)) && my_errno != ENOENT) + { + my_error(EE_DELETE, MYF(0), path, my_errno); + DBUG_RETURN(true); + } + path[length]= '\0'; // Remove file name /* See if the directory exists */ @@ -835,7 +847,8 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) Disable drop of enabled log tables, must be done before name locking. This check is only needed if we are dropping the "mysql" database. */ - if ((my_strcasecmp(system_charset_info, MYSQL_SCHEMA_NAME.str, db) == 0)) + if ((rm_mysql_schema= + (my_strcasecmp(system_charset_info, MYSQL_SCHEMA_NAME.str, db) == 0))) { for (table= tables; table; table= table->next_local) if (check_if_log_table(table, TRUE, "DROP")) @@ -848,7 +861,7 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) lock_db_routines(thd, dbnorm)) goto exit; - if (!in_bootstrap) + if (!in_bootstrap && !rm_mysql_schema) { for (table= tables; table; table= table->next_local) { @@ -893,10 +906,13 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) ha_drop_database(path); tmp_disable_binlog(thd); query_cache_invalidate1(thd, dbnorm); - (void) sp_drop_db_routines(thd, dbnorm); /* @todo Do not ignore errors */ + if (!rm_mysql_schema) + { + (void) sp_drop_db_routines(thd, dbnorm); /* @todo Do not ignore errors */ #ifdef HAVE_EVENT_SCHEDULER - Events::drop_schema_events(thd, dbnorm); + Events::drop_schema_events(thd, dbnorm); #endif + } reenable_binlog(thd); /* diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index c1402b36737a4..4ed1b7a5323b0 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -3135,12 +3135,6 @@ case SQLCOM_PREPARE: create_info.table_charset= 0; } - /* - For CREATE TABLE we should not open the table even if it exists. - If the table exists, we should either not create it or replace it - */ - lex->query_tables->open_strategy= TABLE_LIST::OPEN_STUB; - /* If we are a slave, we should add OR REPLACE if we don't have IF EXISTS. This will help a slave to recover from @@ -8887,12 +8881,6 @@ bool create_table_precheck(THD *thd, TABLE_LIST *tables, if (check_fk_parent_table_access(thd, &lex->create_info, &lex->alter_info, create_table->db)) goto err; - /* - For CREATE TABLE we should not open the table even if it exists. - If the table exists, we should either not create it or replace it - */ - lex->query_tables->open_strategy= TABLE_LIST::OPEN_STUB; - error= FALSE; err: diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc index 956e3c9e57f80..14f1107be3360 100644 --- a/sql/sql_plugin.cc +++ b/sql/sql_plugin.cc @@ -2810,6 +2810,22 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name, } +static size_t var_storage_size(int flags) +{ + switch (flags & PLUGIN_VAR_TYPEMASK) { + case PLUGIN_VAR_BOOL: return sizeof(my_bool); + case PLUGIN_VAR_INT: return sizeof(int); + case PLUGIN_VAR_LONG: return sizeof(long); + case PLUGIN_VAR_ENUM: return sizeof(long); + case PLUGIN_VAR_LONGLONG: return sizeof(ulonglong); + case PLUGIN_VAR_SET: return sizeof(ulonglong); + case PLUGIN_VAR_STR: return sizeof(char*); + case PLUGIN_VAR_DOUBLE: return sizeof(double); + default: DBUG_ASSERT(0); return 0; + } +} + + /* returns a bookmark for thd-local variables, creating if neccessary. returns null for non thd-local variables. @@ -2818,39 +2834,13 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name, static st_bookmark *register_var(const char *plugin, const char *name, int flags) { - uint length= strlen(plugin) + strlen(name) + 3, size= 0, offset, new_size; + uint length= strlen(plugin) + strlen(name) + 3, size, offset, new_size; st_bookmark *result; char *varname, *p; - if (!(flags & PLUGIN_VAR_THDLOCAL)) - return NULL; - - switch (flags & PLUGIN_VAR_TYPEMASK) { - case PLUGIN_VAR_BOOL: - size= sizeof(my_bool); - break; - case PLUGIN_VAR_INT: - size= sizeof(int); - break; - case PLUGIN_VAR_LONG: - case PLUGIN_VAR_ENUM: - size= sizeof(long); - break; - case PLUGIN_VAR_LONGLONG: - case PLUGIN_VAR_SET: - size= sizeof(ulonglong); - break; - case PLUGIN_VAR_STR: - size= sizeof(char*); - break; - case PLUGIN_VAR_DOUBLE: - size= sizeof(double); - break; - default: - DBUG_ASSERT(0); - return NULL; - }; + DBUG_ASSERT(flags & PLUGIN_VAR_THDLOCAL); + size= var_storage_size(flags); varname= ((char*) my_alloca(length)); strxmov(varname + 1, plugin, "_", name, NullS); for (p= varname + 1; *p; p++) @@ -3052,25 +3042,17 @@ void sync_dynamic_session_variables(THD* thd, bool global_lock) */ for (idx= 0; idx < bookmark_hash.records; idx++) { - sys_var_pluginvar *pi; - sys_var *var; st_bookmark *v= (st_bookmark*) my_hash_element(&bookmark_hash,idx); if (v->version <= thd->variables.dynamic_variables_version) continue; /* already in thd->variables */ - if (!(var= intern_find_sys_var(v->key + 1, v->name_len)) || - !(pi= var->cast_pluginvar()) || - v->key[0] != plugin_var_bookmark_key(pi->plugin_var->flags)) - continue; - /* Here we do anything special that may be required of the data types */ - if ((pi->plugin_var->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR && - pi->plugin_var->flags & PLUGIN_VAR_MEMALLOC) + if ((v->key[0] & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR && + v->key[0] & BOOKMARK_MEMALLOC) { - int offset= ((thdvar_str_t *)(pi->plugin_var))->offset; - char **pp= (char**) (thd->variables.dynamic_variables_ptr + offset); + char **pp= (char**) (thd->variables.dynamic_variables_ptr + v->offset); if (*pp) *pp= my_strdup(*pp, MYF(MY_WME|MY_FAE)); } @@ -3331,69 +3313,58 @@ bool sys_var_pluginvar::session_update(THD *thd, set_var *var) return false; } -bool sys_var_pluginvar::global_update(THD *thd, set_var *var) +static const void *var_def_ptr(st_mysql_sys_var *pv) { - DBUG_ASSERT(!is_readonly()); - mysql_mutex_assert_owner(&LOCK_global_system_variables); - - void *tgt= real_value_ptr(thd, var->type); - const void *src= &var->save_result; - - if (!var->value) - { - switch (plugin_var->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) { + switch (pv->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) { case PLUGIN_VAR_INT: - src= &((sysvar_uint_t*) plugin_var)->def_val; - break; + return &((sysvar_uint_t*) pv)->def_val; case PLUGIN_VAR_LONG: - src= &((sysvar_ulong_t*) plugin_var)->def_val; - break; + return &((sysvar_ulong_t*) pv)->def_val; case PLUGIN_VAR_LONGLONG: - src= &((sysvar_ulonglong_t*) plugin_var)->def_val; - break; + return &((sysvar_ulonglong_t*) pv)->def_val; case PLUGIN_VAR_ENUM: - src= &((sysvar_enum_t*) plugin_var)->def_val; - break; + return &((sysvar_enum_t*) pv)->def_val; case PLUGIN_VAR_SET: - src= &((sysvar_set_t*) plugin_var)->def_val; - break; + return &((sysvar_set_t*) pv)->def_val; case PLUGIN_VAR_BOOL: - src= &((sysvar_bool_t*) plugin_var)->def_val; - break; + return &((sysvar_bool_t*) pv)->def_val; case PLUGIN_VAR_STR: - src= &((sysvar_str_t*) plugin_var)->def_val; - break; + return &((sysvar_str_t*) pv)->def_val; case PLUGIN_VAR_DOUBLE: - src= &((sysvar_double_t*) plugin_var)->def_val; - break; + return &((sysvar_double_t*) pv)->def_val; case PLUGIN_VAR_INT | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_uint_t*) plugin_var)->def_val; - break; + return &((thdvar_uint_t*) pv)->def_val; case PLUGIN_VAR_LONG | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_ulong_t*) plugin_var)->def_val; - break; + return &((thdvar_ulong_t*) pv)->def_val; case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_ulonglong_t*) plugin_var)->def_val; - break; + return &((thdvar_ulonglong_t*) pv)->def_val; case PLUGIN_VAR_ENUM | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_enum_t*) plugin_var)->def_val; - break; + return &((thdvar_enum_t*) pv)->def_val; case PLUGIN_VAR_SET | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_set_t*) plugin_var)->def_val; - break; + return &((thdvar_set_t*) pv)->def_val; case PLUGIN_VAR_BOOL | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_bool_t*) plugin_var)->def_val; - break; + return &((thdvar_bool_t*) pv)->def_val; case PLUGIN_VAR_STR | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_str_t*) plugin_var)->def_val; - break; + return &((thdvar_str_t*) pv)->def_val; case PLUGIN_VAR_DOUBLE | PLUGIN_VAR_THDLOCAL: - src= &((thdvar_double_t*) plugin_var)->def_val; - break; + return &((thdvar_double_t*) pv)->def_val; default: DBUG_ASSERT(0); + return NULL; } - } +} + + +bool sys_var_pluginvar::global_update(THD *thd, set_var *var) +{ + DBUG_ASSERT(!is_readonly()); + mysql_mutex_assert_owner(&LOCK_global_system_variables); + + void *tgt= real_value_ptr(thd, var->type); + const void *src= &var->save_result; + + if (!var->value) + src= var_def_ptr(plugin_var); plugin_var->update(thd, plugin_var, tgt, src); return false; @@ -3749,7 +3720,18 @@ static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp, *(int*)(opt + 1)= offset= v->offset; if (opt->flags & PLUGIN_VAR_NOCMDOPT) + { + char *val= global_system_variables.dynamic_variables_ptr + offset; + if (((opt->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR) && + (opt->flags & PLUGIN_VAR_MEMALLOC)) + { + char *def_val= *(char**)var_def_ptr(opt); + *(char**)val= def_val ? my_strdup(def_val, MYF(0)) : NULL; + } + else + memcpy(val, var_def_ptr(opt), var_storage_size(opt->flags)); continue; + } optname= (char*) memdup_root(mem_root, v->key + 1, (optnamelen= v->name_len) + 1); @@ -3957,10 +3939,11 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp, *str->value= strdup_root(mem_root, *str->value); } + var= find_bookmark(plugin_name.str, o->name, o->flags); if (o->flags & PLUGIN_VAR_NOSYSVAR) continue; tmp_backup[tmp->nbackups++].save(&o->name); - if ((var= find_bookmark(plugin_name.str, o->name, o->flags))) + if (var) v= new (mem_root) sys_var_pluginvar(&chain, var->key + 1, o, tmp); else { diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 063fe22140386..0cfb964307d2a 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -9004,9 +9004,26 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table) We need to destruct the copy_field (allocated in create_tmp_table()) before setting it to 0 if the join is not "reusable". */ - if (!tmp_join || tmp_join != this) - tmp_table_param.cleanup(); - tmp_table_param.copy_field= tmp_table_param.copy_field_end=0; + if (!tmp_join || tmp_join != this) + tmp_table_param.cleanup(); + else + { + /* + Free data buffered in copy_fields, but keep data pointed by copy_field + around for next iteration (possibly stored in save_copy_fields). + + It would be logically simpler to not clear copy_field + below, but as we have loops that runs over copy_field to + copy_field_end that should not be done anymore, it's simpler to + just clear the pointers. + + Another option would be to just clear copy_field_end and not run + the loops if this is not set or to have tmp_table_param.cleanup() + to run cleanup on save_copy_field if copy_field is not set. + */ + tmp_table_param.free_copy_field_data(); + tmp_table_param.copy_field= tmp_table_param.copy_field_end=0; + } first_record= sort_and_group=0; send_records= (ha_rows) 0; @@ -11687,7 +11704,7 @@ void JOIN::join_free() /** Free resources of given join. - @param fill true if we should free all resources, call with full==1 + @param full true if we should free all resources, call with full==1 should be last, before it this function can be called with full==0 @@ -11806,7 +11823,7 @@ void JOIN::cleanup(bool full) /* If we have tmp_join and 'this' JOIN is not tmp_join and tmp_table_param.copy_field's of them are equal then we have to remove - pointer to tmp_table_param.copy_field from tmp_join, because it qill + pointer to tmp_table_param.copy_field from tmp_join, because it will be removed in tmp_table_param.cleanup(). */ if (tmp_join && @@ -15710,6 +15727,7 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type, case Item::VARBIN_ITEM: case Item::CACHE_ITEM: case Item::EXPR_CACHE_ITEM: + case Item::PARAM_ITEM: if (make_copy_field) { DBUG_ASSERT(((Item_result_field*)item)->result_field); @@ -22240,7 +22258,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param, err: if (copy) delete [] param->copy_field; // This is never 0 - param->copy_field=0; + param->copy_field= 0; err2: DBUG_RETURN(TRUE); } diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 47a5a40ebeb44..70080a6b4f162 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1003,11 +1003,13 @@ class Column_stat: public Stat_table switch (i) { case COLUMN_STAT_MIN_VALUE: + table_field->read_stats->min_value->set_notnull(); stat_field->val_str(&val); table_field->read_stats->min_value->store(val.ptr(), val.length(), &my_charset_bin); break; case COLUMN_STAT_MAX_VALUE: + table_field->read_stats->max_value->set_notnull(); stat_field->val_str(&val); table_field->read_stats->max_value->store(val.ptr(), val.length(), &my_charset_bin); @@ -3659,17 +3661,8 @@ double get_column_range_cardinality(Field *field, { double avg_frequency= col_stats->get_avg_frequency(); res= avg_frequency; - /* - psergey-todo: what does check for min_value, max_value mean? - min/max_value are set to NULL in alloc_statistics_for_table() and - alloc_statistics_for_table_share(). Both functions will immediately - call create_min_max_statistical_fields_for_table and - create_min_max_statistical_fields_for_table_share() respectively, - which will set min/max_value to be valid pointers, unless OOM - occurs. - */ if (avg_frequency > 1.0 + 0.000001 && - col_stats->min_value && col_stats->max_value) + col_stats->min_max_values_are_provided()) { Histogram *hist= &col_stats->histogram; if (hist->is_available()) @@ -3692,7 +3685,7 @@ double get_column_range_cardinality(Field *field, } else { - if (col_stats->min_value && col_stats->max_value) + if (col_stats->min_max_values_are_provided()) { double sel, min_mp_pos, max_mp_pos; diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 46e5cef22d1ac..8e5f8107849a7 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -388,6 +388,11 @@ class Column_statistics avg_frequency= (ulong) (val * Scale_factor_avg_frequency); } + bool min_max_values_are_provided() + { + return !is_null(COLUMN_STAT_MIN_VALUE) && + !is_null(COLUMN_STAT_MIN_VALUE); + } }; diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 736827d378262..a2e51ca8e935e 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -2464,7 +2464,8 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists, if (table_type && table_type != view_pseudo_hton) ha_lock_engine(thd, table_type); - if (thd->locked_tables_mode) + if (thd->locked_tables_mode == LTM_LOCK_TABLES || + thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES) { if (wait_while_table_is_used(thd, table->table, HA_EXTRA_NOT_USED)) { @@ -6336,6 +6337,7 @@ static bool fill_alter_inplace_info(THD *thd, (field->stored_in_db || field->vcol_info->is_in_partitioning_expr())) { if (is_equal == IS_EQUAL_NO || + !new_field->vcol_info || !field->vcol_info->is_equal(new_field->vcol_info)) ha_alter_info->handler_flags|= Alter_inplace_info::ALTER_COLUMN_VCOL; else diff --git a/sql/table_cache.cc b/sql/table_cache.cc index 097f37d26d835..bdb7914c32b97 100644 --- a/sql/table_cache.cc +++ b/sql/table_cache.cc @@ -876,6 +876,8 @@ void tdc_release_share(TABLE_SHARE *share) } if (--share->tdc.ref_count) { + if (!share->is_view) + mysql_cond_broadcast(&share->tdc.COND_release); mysql_mutex_unlock(&share->tdc.LOCK_table_share); mysql_mutex_unlock(&LOCK_unused_shares); DBUG_VOID_RETURN; diff --git a/sql/threadpool_common.cc b/sql/threadpool_common.cc index ae8a81b1bcd7e..9d263038bc95f 100644 --- a/sql/threadpool_common.cc +++ b/sql/threadpool_common.cc @@ -73,17 +73,16 @@ struct Worker_thread_context void save() { -#ifdef HAVE_PSI_INTERFACE - psi_thread= PSI_server?PSI_server->get_thread():0; +#ifdef HAVE_PSI_THREAD_INTERFACE + psi_thread = PSI_THREAD_CALL(get_thread)(); #endif mysys_var= (st_my_thread_var *)pthread_getspecific(THR_KEY_mysys); } void restore() { -#ifdef HAVE_PSI_INTERFACE - if (PSI_server) - PSI_server->set_thread(psi_thread); +#ifdef HAVE_PSI_THREAD_INTERFACE + PSI_THREAD_CALL(set_thread)(psi_thread); #endif pthread_setspecific(THR_KEY_mysys,mysys_var); pthread_setspecific(THR_THD, 0); @@ -92,6 +91,41 @@ struct Worker_thread_context }; +#ifdef HAVE_PSI_INTERFACE + +/* + The following fixes PSI "idle" psi instrumentation. + The server assumes that connection becomes idle + just before net_read_packet() and switches to active after it. + In out setup, server becomes idle when async socket io is made. +*/ + +extern void net_before_header_psi(struct st_net *net, void *user_data, size_t); + +static void dummy_before_header(struct st_net *, void *, size_t) +{ +} + +static void re_init_net_server_extension(THD *thd) +{ + thd->m_net_server_extension.m_before_header = dummy_before_header; +} + +#else + +#define re_init_net_server_extension(thd) + +#endif /* HAVE_PSI_INTERFACE */ + + +static inline void set_thd_idle(THD *thd) +{ + thd->net.reading_or_writing= 1; +#ifdef HAVE_PSI_INTERFACE + net_before_header_psi(&thd->net, thd, 0); +#endif +} + /* Attach/associate the connection with the OS thread, */ @@ -100,10 +134,10 @@ static bool thread_attach(THD* thd) pthread_setspecific(THR_KEY_mysys,thd->mysys_var); thd->thread_stack=(char*)&thd; thd->store_globals(); -#ifdef HAVE_PSI_INTERFACE - if (PSI_server) - PSI_server->set_thread(thd->event_scheduler.m_psi); +#ifdef HAVE_PSI_THREAD_INTERFACE + PSI_THREAD_CALL(set_thread)(thd->event_scheduler.m_psi); #endif + mysql_socket_set_thread_owner(thd->net.vio->mysql_socket); return 0; } @@ -130,39 +164,38 @@ int threadpool_add_connection(THD *thd) } /* Create new PSI thread for use with the THD. */ -#ifdef HAVE_PSI_INTERFACE - if (PSI_server) - { - thd->event_scheduler.m_psi = - PSI_server->new_thread(key_thread_one_connection, thd, thd->thread_id); - } +#ifdef HAVE_PSI_THREAD_INTERFACE + thd->event_scheduler.m_psi= + PSI_THREAD_CALL(new_thread)(key_thread_one_connection, thd, thd->thread_id); #endif /* Login. */ thread_attach(thd); + re_init_net_server_extension(thd); ulonglong now= microsecond_interval_timer(); thd->prior_thr_create_utime= now; thd->start_utime= now; thd->thr_create_utime= now; - if (!setup_connection_thread_globals(thd)) - { - if (!thd_prepare_connection(thd)) - { - - /* - Check if THD is ok, as prepare_new_connection_state() - can fail, for example if init command failed. - */ - if (thd_is_connection_alive(thd)) - { - retval= 0; - thd->net.reading_or_writing= 1; - thd->skip_wait_timeout= true; - } - } - } + if (setup_connection_thread_globals(thd)) + goto end; + + if (thd_prepare_connection(thd)) + goto end; + + /* + Check if THD is ok, as prepare_new_connection_state() + can fail, for example if init command failed. + */ + if (!thd_is_connection_alive(thd)) + goto end; + + retval= 0; + thd->skip_wait_timeout= true; + set_thd_idle(thd); + +end: worker_context.restore(); return retval; } @@ -244,12 +277,13 @@ int threadpool_process_request(THD *thd) goto end; } + set_thd_idle(thd); + vio= thd->net.vio; if (!vio->has_data(vio)) { /* More info on this debug sync is in sql_parse.cc*/ DEBUG_SYNC(thd, "before_do_command_net_read"); - thd->net.reading_or_writing= 1; goto end; } } diff --git a/storage/connect/JdbcInterface.java b/storage/connect/JdbcInterface.java index f765052915d72..34af8c4e013c2 100644 --- a/storage/connect/JdbcInterface.java +++ b/storage/connect/JdbcInterface.java @@ -340,6 +340,18 @@ public int GetMaxValue(int n) { return m; } // end of GetMaxValue + public String GetQuoteString() { + String qs = null; + + try { + qs = dbmd.getIdentifierQuoteString(); + } catch(SQLException se) { + SetErrmsg(se); + } // end try/catch + + return qs; + } // end of GetQuoteString + public int GetColumns(String[] parms) { int ncol = -1; @@ -680,11 +692,11 @@ public int TimestampField(int n, String name) { return 0; } // end of TimestampField - public String ObjectField(int n, String name) { + public Object ObjectField(int n, String name) { if (rs == null) { System.out.println("No result set"); } else try { - return (n > 0) ? rs.getObject(n).toString() : rs.getObject(name).toString(); + return (n > 0) ? rs.getObject(n) : rs.getObject(name); } catch (SQLException se) { SetErrmsg(se); } //end try/catch diff --git a/storage/connect/filamdbf.cpp b/storage/connect/filamdbf.cpp index 8afda72357809..a4557facbd869 100644 --- a/storage/connect/filamdbf.cpp +++ b/storage/connect/filamdbf.cpp @@ -383,7 +383,7 @@ DBFBASE::DBFBASE(DBFBASE *txfp) /* and header length. Set Records, check that Reclen is equal to lrecl and */ /* return the header length or 0 in case of error. */ /****************************************************************************/ -int DBFBASE::ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath) +int DBFBASE::ScanHeader(PGLOBAL g, PSZ fn, int lrecl, int *rln, char *defpath) { int rc; char filename[_MAX_PATH]; @@ -393,7 +393,7 @@ int DBFBASE::ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath) /************************************************************************/ /* Open the input file. */ /************************************************************************/ - PlugSetPath(filename, fname, defpath); + PlugSetPath(filename, fn, defpath); if (!(infile= global_fopen(g, MSGID_CANNOT_OPEN, filename, "rb"))) return 0; // Assume file does not exist @@ -410,11 +410,7 @@ int DBFBASE::ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath) } else if (rc == RC_FX) return -1; - if ((int)header.Reclen() != lrecl) { - sprintf(g->Message, MSG(BAD_LRECL), lrecl, header.Reclen()); - return -1; - } // endif Lrecl - + *rln = (int)header.Reclen(); Records = (int)header.Records(); return (int)header.Headlen(); } // end of ScanHeader @@ -431,9 +427,27 @@ int DBFFAM::Cardinality(PGLOBAL g) if (!g) return 1; - if (!Headlen) - if ((Headlen = ScanHeader(g, To_File, Lrecl, Tdbp->GetPath())) < 0) - return -1; // Error in ScanHeader + if (!Headlen) { + int rln = 0; // Record length in the file header + + Headlen = ScanHeader(g, To_File, Lrecl, &rln, Tdbp->GetPath()); + + if (Headlen < 0) + return -1; // Error in ScanHeader + + if (rln && Lrecl != rln) { + // This happens always on some Linux platforms + sprintf(g->Message, MSG(BAD_LRECL), Lrecl, rln); + + if (Accept) { + Lrecl = rln; + PushWarning(g, Tdbp); + } else + return -1; + + } // endif rln + + } // endif Headlen // Set number of blocks for later use Block = (Records > 0) ? (Records + Nrec - 1) / Nrec : 0; @@ -565,7 +579,13 @@ bool DBFFAM::AllocateBuffer(PGLOBAL g) if (Lrecl != reclen) { sprintf(g->Message, MSG(BAD_LRECL), Lrecl, reclen); - return true; + + if (Accept) { + Lrecl = reclen; + PushWarning(g, Tdbp); + } else + return true; + } // endif Lrecl hlen = HEADLEN * (n + 1) + 2; @@ -641,8 +661,14 @@ bool DBFFAM::AllocateBuffer(PGLOBAL g) if ((rc = dbfhead(g, Stream, Tdbp->GetFile(g), &header)) == RC_OK) { if (Lrecl != (int)header.Reclen()) { sprintf(g->Message, MSG(BAD_LRECL), Lrecl, header.Reclen()); - return true; - } // endif Lrecl + + if (Accept) { + Lrecl = header.Reclen(); + PushWarning(g, Tdbp); + } else + return true; + + } // endif Lrecl Records = (int)header.Records(); Headlen = (int)header.Headlen(); @@ -916,9 +942,27 @@ int DBMFAM::Cardinality(PGLOBAL g) if (!g) return 1; - if (!Headlen) - if ((Headlen = ScanHeader(g, To_File, Lrecl, Tdbp->GetPath())) < 0) - return -1; // Error in ScanHeader + if (!Headlen) { + int rln = 0; // Record length in the file header + + Headlen = ScanHeader(g, To_File, Lrecl, &rln, Tdbp->GetPath()); + + if (Headlen < 0) + return -1; // Error in ScanHeader + + if (rln && Lrecl != rln) { + // This happens always on some Linux platforms + sprintf(g->Message, MSG(BAD_LRECL), Lrecl, rln); + + if (Accept) { + Lrecl = rln; + PushWarning(g, Tdbp); + } else + return -1; + + } // endif rln + + } // endif Headlen // Set number of blocks for later use Block = (Records > 0) ? (Records + Nrec - 1) / Nrec : 0; @@ -961,8 +1005,14 @@ bool DBMFAM::AllocateBuffer(PGLOBAL g) if (Lrecl != (int)hp->Reclen()) { sprintf(g->Message, MSG(BAD_LRECL), Lrecl, hp->Reclen()); - return true; - } // endif Lrecl + + if (Accept) { + Lrecl = hp->Reclen(); + PushWarning(g, Tdbp); + } else + return true; + + } // endif Lrecl Records = (int)hp->Records(); Headlen = (int)hp->Headlen(); diff --git a/storage/connect/filamdbf.h b/storage/connect/filamdbf.h index da84d7685a8dd..66458a10eaad9 100644 --- a/storage/connect/filamdbf.h +++ b/storage/connect/filamdbf.h @@ -31,7 +31,7 @@ class DllExport DBFBASE { DBFBASE(PDBF txfp); // Implementation - int ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath); + int ScanHeader(PGLOBAL g, PSZ fname, int lrecl, int *rlen, char *defpath); protected: // Default constructor, not to be used diff --git a/storage/connect/ha_connect.cc b/storage/connect/ha_connect.cc index ea6fb1b08c16d..cf945a73f4610 100644 --- a/storage/connect/ha_connect.cc +++ b/storage/connect/ha_connect.cc @@ -224,6 +224,7 @@ uint GetWorkSize(void); void SetWorkSize(uint); extern "C" const char *msglang(void); +static void PopUser(PCONNECT xp); static PCONNECT GetUser(THD *thd, PCONNECT xp); static PGLOBAL GetPlug(THD *thd, PCONNECT& lxp); @@ -831,34 +832,43 @@ ha_connect::~ha_connect(void) table ? table->s->table_name.str : "", xp, xp ? xp->count : 0); - if (xp) { - PCONNECT p; + PopUser(xp); +} // end of ha_connect destructor - xp->count--; - for (p= user_connect::to_users; p; p= p->next) - if (p == xp) - break; +/****************************************************************************/ +/* Check whether this user can be removed. */ +/****************************************************************************/ +static void PopUser(PCONNECT xp) +{ + if (xp) { + xp->count--; - if (p && !p->count) { - if (p->next) - p->next->previous= p->previous; + if (!xp->count) { + PCONNECT p; - if (p->previous) - p->previous->next= p->next; - else - user_connect::to_users= p->next; + for (p= user_connect::to_users; p; p= p->next) + if (p == xp) + break; - } // endif p + if (p) { + if (p->next) + p->next->previous= p->previous; - if (!xp->count) { - PlugCleanup(xp->g, true); - delete xp; - } // endif count + if (p->previous) + p->previous->next= p->next; + else + user_connect::to_users= p->next; - } // endif xp + } // endif p -} // end of ha_connect destructor + PlugCleanup(xp->g, true); + delete xp; + } // endif count + + } // endif xp + +} // end of PopUser /****************************************************************************/ @@ -866,7 +876,7 @@ ha_connect::~ha_connect(void) /****************************************************************************/ static PCONNECT GetUser(THD *thd, PCONNECT xp) { - if (!thd) + if (!thd) return NULL; if (xp && thd == xp->thdp) @@ -890,7 +900,6 @@ static PCONNECT GetUser(THD *thd, PCONNECT xp) return xp; } // end of GetUser - /****************************************************************************/ /* Get the global pointer of the user of this handler. */ /****************************************************************************/ @@ -5261,7 +5270,18 @@ static int connect_assisted_discovery(handlerton *, THD* thd, if (!(shm= (char*)db)) db= table_s->db.str; // Default value - // Check table type + // Save stack and allocation environment and prepare error return + if (g->jump_level == MAX_JUMP) { + strcpy(g->Message, MSG(TOO_MANY_JUMPS)); + goto jer; + } // endif jump_level + + if ((rc= setjmp(g->jumper[++g->jump_level])) != 0) { + my_message(ER_UNKNOWN_ERROR, g->Message, MYF(0)); + goto err; + } // endif rc + + // Check table type if (ttp == TAB_UNDEF) { topt->type= (src) ? "MYSQL" : (tab) ? "PROXY" : "DOS"; ttp= GetTypeID(topt->type); @@ -5270,20 +5290,9 @@ static int connect_assisted_discovery(handlerton *, THD* thd, } else if (ttp == TAB_NIY) { sprintf(g->Message, "Unsupported table type %s", topt->type); my_message(ER_UNKNOWN_ERROR, g->Message, MYF(0)); - return HA_ERR_INTERNAL_ERROR; + goto err; } // endif ttp - // Save stack and allocation environment and prepare error return - if (g->jump_level == MAX_JUMP) { - strcpy(g->Message, MSG(TOO_MANY_JUMPS)); - return HA_ERR_INTERNAL_ERROR; - } // endif jump_level - - if ((rc= setjmp(g->jumper[++g->jump_level])) != 0) { - my_message(ER_UNKNOWN_ERROR, g->Message, MYF(0)); - goto err; - } // endif rc - if (!tab) { if (ttp == TAB_TBL) { // Make tab the first table of the list @@ -5843,6 +5852,7 @@ static int connect_assisted_discovery(handlerton *, THD* thd, rc= init_table_share(thd, table_s, create_info, &sql); g->jump_level--; + PopUser(xp); return rc; } // endif ok @@ -5850,7 +5860,9 @@ static int connect_assisted_discovery(handlerton *, THD* thd, err: g->jump_level--; - return HA_ERR_INTERNAL_ERROR; + jer: + PopUser(xp); + return HA_ERR_INTERNAL_ERROR; } // end of connect_assisted_discovery /** diff --git a/storage/connect/jdbconn.cpp b/storage/connect/jdbconn.cpp index 3b8de3e975bfb..dca9bd0eac4e5 100644 --- a/storage/connect/jdbconn.cpp +++ b/storage/connect/jdbconn.cpp @@ -498,145 +498,6 @@ PQRYRES JDBCDrivers(PGLOBAL g, int maxres, bool info) return qrp; } // end of JDBCDrivers -#if 0 -/*************************************************************************/ -/* JDBCDataSources: constructs the result blocks containing all JDBC */ -/* data sources available on the local host. */ -/* Called with info=true to have result column names. */ -/*************************************************************************/ -PQRYRES JDBCDataSources(PGLOBAL g, int maxres, bool info) -{ - int buftyp[] ={ TYPE_STRING, TYPE_STRING }; - XFLD fldtyp[] ={ FLD_NAME, FLD_REM }; - unsigned int length[] ={ 0, 256 }; - bool b[] ={ false, true }; - int i, n = 0, ncol = 2; - PCOLRES crp; - PQRYRES qrp; - JDBConn *jcp = NULL; - - /************************************************************************/ - /* Do an evaluation of the result size. */ - /************************************************************************/ - if (!info) { - jcp = new(g)JDBConn(g, NULL); - n = jcp->GetMaxValue(SQL_MAX_DSN_LENGTH); - length[0] = (n) ? (n + 1) : 256; - - if (!maxres) - maxres = 512; // Estimated max number of data sources - - } else { - length[0] = 256; - maxres = 0; - } // endif info - - if (trace) - htrc("JDBCDataSources: max=%d len=%d\n", maxres, length[0]); - - /************************************************************************/ - /* Allocate the structures used to refer to the result set. */ - /************************************************************************/ - qrp = PlgAllocResult(g, ncol, maxres, IDS_DSRC, - buftyp, fldtyp, length, false, true); - - for (i = 0, crp = qrp->Colresp; crp; i++, crp = crp->Next) - if (b[i]) - crp->Kdata->SetNullable(true); - - /************************************************************************/ - /* Now get the results into blocks. */ - /************************************************************************/ - if (!info && qrp && jcp->GetDataSources(qrp)) - qrp = NULL; - - /************************************************************************/ - /* Return the result pointer for use by GetData routines. */ - /************************************************************************/ - return qrp; -} // end of JDBCDataSources - -/**************************************************************************/ -/* PrimaryKeys: constructs the result blocks containing all the */ -/* JDBC catalog information concerning primary keys. */ -/**************************************************************************/ -PQRYRES JDBCPrimaryKeys(PGLOBAL g, JDBConn *op, char *dsn, char *table) -{ - static int buftyp[] ={ TYPE_STRING, TYPE_STRING, TYPE_STRING, - TYPE_STRING, TYPE_SHORT, TYPE_STRING }; - static unsigned int length[] ={ 0, 0, 0, 0, 6, 128 }; - int n, ncol = 5; - int maxres; - PQRYRES qrp; - JCATPARM *cap; - JDBConn *jcp = op; - - if (!op) { - /**********************************************************************/ - /* Open the connection with the JDBC data source. */ - /**********************************************************************/ - jcp = new(g)JDBConn(g, NULL); - - if (jcp->Open(dsn, 2) < 1) // 2 is openReadOnly - return NULL; - - } // endif op - - /************************************************************************/ - /* Do an evaluation of the result size. */ - /************************************************************************/ - n = jcp->GetMaxValue(SQL_MAX_COLUMNS_IN_TABLE); - maxres = (n) ? (int)n : 250; - n = jcp->GetMaxValue(SQL_MAX_CATALOG_NAME_LEN); - length[0] = (n) ? (n + 1) : 128; - n = jcp->GetMaxValue(SQL_MAX_SCHEMA_NAME_LEN); - length[1] = (n) ? (n + 1) : 128; - n = jcp->GetMaxValue(SQL_MAX_TABLE_NAME_LEN); - length[2] = (n) ? (n + 1) : 128; - n = jcp->GetMaxValue(SQL_MAX_COLUMN_NAME_LEN); - length[3] = (n) ? (n + 1) : 128; - - if (trace) - htrc("JDBCPrimaryKeys: max=%d len=%d,%d,%d\n", - maxres, length[0], length[1], length[2]); - - /************************************************************************/ - /* Allocate the structure used to refer to the result set. */ - /************************************************************************/ - qrp = PlgAllocResult(g, ncol, maxres, IDS_PKEY, - buftyp, NULL, length, false, true); - - if (trace) - htrc("Getting pkey results ncol=%d\n", qrp->Nbcol); - - cap = AllocCatInfo(g, CAT_KEY, NULL, table, qrp); - - /************************************************************************/ - /* Now get the results into blocks. */ - /************************************************************************/ - if ((n = jcp->GetCatInfo(cap)) >= 0) { - qrp->Nblin = n; - // ResetNullValues(cap); - - if (trace) - htrc("PrimaryKeys: NBCOL=%d NBLIN=%d\n", qrp->Nbcol, qrp->Nblin); - - } else - qrp = NULL; - - /************************************************************************/ - /* Close any local connection. */ - /************************************************************************/ - if (!op) - jcp->Close(); - - /************************************************************************/ - /* Return the result pointer for use by GetData routines. */ - /************************************************************************/ - return qrp; -} // end of JDBCPrimaryKeys -#endif // 0 - /***********************************************************************/ /* JDBConn construction/destruction. */ /***********************************************************************/ @@ -651,7 +512,7 @@ JDBConn::JDBConn(PGLOBAL g, TDBJDBC *tdbp) xqid = xuid = xid = grs = readid = fetchid = typid = errid = nullptr; prepid = xpid = pcid = nullptr; chrfldid = intfldid = dblfldid = fltfldid = bigfldid = nullptr; - datfldid = timfldid = tspfldid = nullptr; + objfldid = datfldid = timfldid = tspfldid = nullptr; //m_LoginTimeout = DEFAULT_LOGIN_TIMEOUT; //m_QueryTimeout = DEFAULT_QUERY_TIMEOUT; //m_UpdateOptions = 0; @@ -739,60 +600,6 @@ bool JDBConn::gmID(PGLOBAL g, jmethodID& mid, const char *name, const char *sig } // end of gmID -#if 0 -/***********************************************************************/ -/* Utility routine. */ -/***********************************************************************/ -PSZ JDBConn::GetStringInfo(ushort infotype) -{ - //ASSERT(m_hdbc != SQL_NULL_HDBC); - char *p, buffer[MAX_STRING_INFO]; - SWORD result; - RETCODE rc; - - rc = SQLGetInfo(m_hdbc, infotype, buffer, sizeof(buffer), &result); - - if (!Check(rc)) { - ThrowDJX(rc, "SQLGetInfo"); // Temporary - // *buffer = '\0'; - } // endif rc - - p = PlugDup(m_G, buffer); - return p; -} // end of GetStringInfo - -/***********************************************************************/ -/* Utility routines. */ -/***********************************************************************/ -void JDBConn::OnSetOptions(HSTMT hstmt) -{ - RETCODE rc; - ASSERT(m_hdbc != SQL_NULL_HDBC); - - if ((signed)m_QueryTimeout != -1) { - // Attempt to set query timeout. Ignore failure - rc = SQLSetStmtOption(hstmt, SQL_QUERY_TIMEOUT, m_QueryTimeout); - - if (!Check(rc)) - // don't attempt it again - m_QueryTimeout = (DWORD)-1; - - } // endif m_QueryTimeout - - if (m_RowsetSize > 0) { - // Attempt to set rowset size. - // In case of failure reset it to 0 to use Fetch. - rc = SQLSetStmtOption(hstmt, SQL_ROWSET_SIZE, m_RowsetSize); - - if (!Check(rc)) - // don't attempt it again - m_RowsetSize = 0; - - } // endif m_RowsetSize - -} // end of OnSetOptions -#endif // 0 - /***********************************************************************/ /* Utility routine. */ /***********************************************************************/ @@ -1007,7 +814,7 @@ int JDBConn::Open(PJPARM sop) #define N 1 #endif - // Java source will be compiled as ajar file installed in the plugin dir + // Java source will be compiled as a jar file installed in the plugin dir jpop->Append(sep); jpop->Append(GetPluginDir()); jpop->Append("JdbcInterface.jar"); @@ -1204,6 +1011,21 @@ int JDBConn::Open(PJPARM sop) return RC_FX; } // endif Msg + jmethodID qcid = nullptr; + + if (!gmID(g, qcid, "GetQuoteString", "()Ljava/lang/String;")) { + jstring s = (jstring)env->CallObjectMethod(job, qcid); + + if (s != nullptr) { + char *qch = (char*)env->GetStringUTFChars(s, (jboolean)false); + m_IDQuoteChar[0] = *qch; + } else { + s = (jstring)env->CallObjectMethod(job, errid); + Msg = (char*)env->GetStringUTFChars(s, (jboolean)false); + } // endif s + + } // endif qcid + if (gmID(g, typid, "ColumnType", "(ILjava/lang/String;)I")) return RC_FX; else @@ -1345,9 +1167,10 @@ void JDBConn::Close() /***********************************************************************/ void JDBConn::SetColumnValue(int rank, PSZ name, PVAL val) { - PGLOBAL& g = m_G; - jint ctyp; - jstring cn, jn = nullptr; + PGLOBAL& g = m_G; + jint ctyp; + jstring cn, jn = nullptr; + jobject jb = nullptr; if (rank == 0) if (!name || (jn = env->NewStringUTF(name)) == nullptr) { @@ -1363,21 +1186,32 @@ void JDBConn::SetColumnValue(int rank, PSZ name, PVAL val) longjmp(g->jumper[g->jump_level], TYPE_AM_JDBC); } // endif Check + if (val->GetNullable()) + if (!gmID(g, objfldid, "ObjectField", "(ILjava/lang/String;)Ljava/lang/Object;")) { + jb = env->CallObjectMethod(job, objfldid, (jint)rank, jn); + + if (jb == nullptr) { + val->Reset(); + val->SetNull(true); + goto chk; + } // endif job + + } // endif objfldid + switch (ctyp) { case 12: // VARCHAR case -1: // LONGVARCHAR case 1: // CHAR - if (!gmID(g, chrfldid, "StringField", "(ILjava/lang/String;)Ljava/lang/String;")) { + if (jb) + cn = (jstring)jb; + else if (!gmID(g, chrfldid, "StringField", "(ILjava/lang/String;)Ljava/lang/String;")) cn = (jstring)env->CallObjectMethod(job, chrfldid, (jint)rank, jn); + else + cn = nullptr; - if (cn) { - const char *field = env->GetStringUTFChars(cn, (jboolean)false); - val->SetValue_psz((PSZ)field); - } else { - val->Reset(); - val->SetNull(true); - } // endif cn - + if (cn) { + const char *field = env->GetStringUTFChars(cn, (jboolean)false); + val->SetValue_psz((PSZ)field); } else val->Reset(); @@ -1449,6 +1283,7 @@ void JDBConn::SetColumnValue(int rank, PSZ name, PVAL val) val->Reset(); } // endswitch Type + chk: if (Check()) { if (rank == 0) env->DeleteLocalRef(jn); diff --git a/storage/connect/jdbconn.h b/storage/connect/jdbconn.h index 095b1565bd276..0a1c52d4576d1 100644 --- a/storage/connect/jdbconn.h +++ b/storage/connect/jdbconn.h @@ -165,6 +165,7 @@ class JDBConn : public BLOCK { jmethodID xpid; // The ExecutePrep method ID jmethodID pcid; // The ClosePrepStmt method ID jmethodID errid; // The GetErrmsg method ID + jmethodID objfldid; // The ObjectField method ID jmethodID chrfldid; // The StringField method ID jmethodID intfldid; // The IntField method ID jmethodID dblfldid; // The DoubleField method ID diff --git a/storage/connect/json.cpp b/storage/connect/json.cpp index 3558c5762bba2..c45630129f120 100644 --- a/storage/connect/json.cpp +++ b/storage/connect/json.cpp @@ -595,7 +595,7 @@ PSZ Serialize(PGLOBAL g, PJSON jsp, char *fn, int pretty) fputs(EL, fs); fclose(fs); str = (err) ? NULL : strcpy(g->Message, "Ok"); - } else if (!err) { + } else if (!err) { str = ((JOUTSTR*)jp)->Strp; jp->WriteChr('\0'); PlugSubAlloc(g, NULL, ((JOUTSTR*)jp)->N); diff --git a/storage/connect/reldef.cpp b/storage/connect/reldef.cpp index 2c8ada52e6f5c..ac2327212e0fa 100644 --- a/storage/connect/reldef.cpp +++ b/storage/connect/reldef.cpp @@ -294,7 +294,7 @@ int TABDEF::GetColCatInfo(PGLOBAL g) nlg+= nof; case TAB_DIR: case TAB_XML: - poff= loff + 1; + poff= loff + (pcf->Flags & U_VIRTUAL ? 0 : 1); break; case TAB_INI: case TAB_MAC: @@ -440,7 +440,11 @@ int TABDEF::GetColCatInfo(PGLOBAL g) } // endswitch tc // lrecl must be at least recln to avoid buffer overflow - recln= MY_MAX(recln, Hc->GetIntegerOption("Lrecl")); + if (trace) + htrc("Lrecl: Calculated=%d defined=%d\n", + recln, Hc->GetIntegerOption("Lrecl")); + + recln = MY_MAX(recln, Hc->GetIntegerOption("Lrecl")); Hc->SetIntegerOption("Lrecl", recln); ((PDOSDEF)this)->SetLrecl(recln); } // endif Lrecl diff --git a/storage/connect/tabjdbc.cpp b/storage/connect/tabjdbc.cpp index 86fd831b26201..e398523892f2e 100644 --- a/storage/connect/tabjdbc.cpp +++ b/storage/connect/tabjdbc.cpp @@ -686,6 +686,9 @@ bool TDBJDBC::MakeInsert(PGLOBAL g) else Prepared = true; + if (trace) + htrc("Insert=%s\n", Query->GetStr()); + return false; } // end of MakeInsert @@ -733,17 +736,18 @@ bool TDBJDBC::MakeCommand(PGLOBAL g) // If so, it must be quoted in the original query strlwr(strcat(strcat(strcpy(name, " "), Name), " ")); - if (!strstr(" update delete low_priority ignore quick from ", name)) - strlwr(strcpy(name, Name)); // Not a keyword - else + if (strstr(" update delete low_priority ignore quick from ", name)) { strlwr(strcat(strcat(strcpy(name, qc), Name), qc)); + k += 2; + } else + strlwr(strcpy(name, Name)); // Not a keyword if ((p = strstr(qrystr, name))) { for (i = 0; i < p - qrystr; i++) stmt[i] = (Qrystr[i] == '`') ? *qc : Qrystr[i]; stmt[i] = 0; - k = i + (int)strlen(Name); + k += i + (int)strlen(Name); if (qtd && *(p-1) == ' ') strcat(strcat(strcat(stmt, qc), TableName), qc); @@ -765,6 +769,9 @@ bool TDBJDBC::MakeCommand(PGLOBAL g) return NULL; } // endif p + if (trace) + htrc("Command=%s\n", stmt); + Query = new(g)STRING(g, 0, stmt); return (!Query->GetSize()); } // end of MakeCommand @@ -1214,6 +1221,10 @@ int TDBJDBC::WriteDB(PGLOBAL g) } // endif oom Query->RepLast(')'); + + if (trace > 1) + htrc("Inserting: %s\n", Query->GetStr()); + rc = Jcp->ExecuteUpdate(Query->GetStr()); Query->Truncate(len); // Restore query diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index b073398f8ecf7..a4aa43651f8a5 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -673,7 +673,10 @@ void dict_stats_copy( /*============*/ dict_table_t* dst, /*!< in/out: destination table */ - const dict_table_t* src) /*!< in: source table */ + const dict_table_t* src, /*!< in: source table */ + bool reset_ignored_indexes) /*!< in: if true, set ignored indexes + to have the same statistics as if + the table was empty */ { dst->stats_last_recalc = src->stats_last_recalc; dst->stat_n_rows = src->stat_n_rows; @@ -692,7 +695,16 @@ dict_stats_copy( && (src_idx = dict_table_get_next_index(src_idx)))) { if (dict_stats_should_ignore_index(dst_idx)) { - continue; + if (reset_ignored_indexes) { + /* Reset index statistics for all ignored indexes, + unless they are FT indexes (these have no statistics)*/ + if (dst_idx->type & DICT_FTS) { + continue; + } + dict_stats_empty_index(dst_idx); + } else { + continue; + } } ut_ad(!dict_index_is_univ(dst_idx)); @@ -782,7 +794,7 @@ dict_stats_snapshot_create( t = dict_stats_table_clone_create(table); - dict_stats_copy(t, table); + dict_stats_copy(t, table, false); t->stat_persistent = table->stat_persistent; t->stats_auto_recalc = table->stats_auto_recalc; @@ -3240,13 +3252,10 @@ dict_stats_update( dict_table_stats_lock(table, RW_X_LATCH); - /* Initialize all stats to dummy values before - copying because dict_stats_table_clone_create() does - skip corrupted indexes so our dummy object 't' may - have less indexes than the real object 'table'. */ - dict_stats_empty_table(table); - - dict_stats_copy(table, t); + /* Pass reset_ignored_indexes=true as parameter + to dict_stats_copy. This will cause statictics + for corrupted indexes to be set to empty values */ + dict_stats_copy(table, t, true); dict_stats_assert_initialized(table); diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index 99a25fead8442..6059c28eabc60 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -108,6 +108,7 @@ UNIV_INTERN mysql_pfs_key_t fts_pll_tokenize_mutex_key; /** variable to record innodb_fts_internal_tbl_name for information schema table INNODB_FTS_INSERTED etc. */ UNIV_INTERN char* fts_internal_tbl_name = NULL; +UNIV_INTERN char* fts_internal_tbl_name2 = NULL; /** InnoDB default stopword list: There are different versions of stopwords, the stop words listed @@ -6570,6 +6571,36 @@ fts_check_corrupt_index( return(0); } +/* Get parent table name if it's a fts aux table +@param[in] aux_table_name aux table name +@param[in] aux_table_len aux table length +@return parent table name, or NULL */ +char* +fts_get_parent_table_name( + const char* aux_table_name, + ulint aux_table_len) +{ + fts_aux_table_t aux_table; + char* parent_table_name = NULL; + + if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) { + dict_table_t* parent_table; + + parent_table = dict_table_open_on_id( + aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL); + + if (parent_table != NULL) { + parent_table_name = mem_strdupl( + parent_table->name, + strlen(parent_table->name)); + + dict_table_close(parent_table, TRUE, FALSE); + } + } + + return(parent_table_name); +} + /** Check the validity of the parent table. @param[in] aux_table auxiliary table @return true if it is a valid table or false if it is not */ diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 3b438d140c734..21bb795f5d97b 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -16217,7 +16217,12 @@ innodb_internal_table_update( my_free(old); } - fts_internal_tbl_name = *(char**) var_ptr; + fts_internal_tbl_name2 = *(char**) var_ptr; + if (fts_internal_tbl_name2 == NULL) { + fts_internal_tbl_name = const_cast("default"); + } else { + fts_internal_tbl_name = fts_internal_tbl_name2; + } } /****************************************************************//** @@ -18319,7 +18324,7 @@ static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache, "Whether to disable OS system file cache for sort I/O", NULL, NULL, FALSE); -static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name, +static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name2, PLUGIN_VAR_NOCMDARG, "FTS internal auxiliary table to be checked", innodb_internal_table_validate, diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 13bc757454298..e164d7e279c90 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -213,7 +213,10 @@ innobase_need_rebuild( const Alter_inplace_info* ha_alter_info, const TABLE* altered_table) { - if (ha_alter_info->handler_flags + Alter_inplace_info::HA_ALTER_FLAGS alter_inplace_flags = + ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE); + + if (alter_inplace_flags == Alter_inplace_info::CHANGE_CREATE_OPTION && !(ha_alter_info->create_info->used_fields & (HA_CREATE_USED_ROW_FORMAT @@ -3937,7 +3940,7 @@ ha_innobase::prepare_inplace_alter_table( } if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA) - || (ha_alter_info->handler_flags + || ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) == Alter_inplace_info::CHANGE_CREATE_OPTION && !innobase_need_rebuild(ha_alter_info, table))) { @@ -4111,7 +4114,7 @@ ha_innobase::inplace_alter_table( DBUG_RETURN(false); } - if (ha_alter_info->handler_flags + if ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) == Alter_inplace_info::CHANGE_CREATE_OPTION && !innobase_need_rebuild(ha_alter_info, table)) { goto ok_exit; diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index c5310e1369f04..50340e05860b0 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -3981,6 +3981,8 @@ i_s_fts_config_fill( DBUG_RETURN(0); } + DEBUG_SYNC_C("i_s_fts_config_fille_check"); + fields = table->field; /* Prevent DDL to drop fts aux tables. */ diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h index 87b5787d416be..3e2f359bbebef 100644 --- a/storage/innobase/include/fts0fts.h +++ b/storage/innobase/include/fts0fts.h @@ -375,6 +375,7 @@ extern bool fts_need_sync; /** Variable specifying the table that has Fulltext index to display its content through information schema table */ extern char* fts_internal_tbl_name; +extern char* fts_internal_tbl_name2; #define fts_que_graph_free(graph) \ do { \ @@ -823,6 +824,15 @@ void fts_drop_orphaned_tables(void); /*==========================*/ +/* Get parent table name if it's a fts aux table +@param[in] aux_table_name aux table name +@param[in] aux_table_len aux table length +@return parent table name, or NULL */ +char* +fts_get_parent_table_name( + const char* aux_table_name, + ulint aux_table_len); + /******************************************************************//** Since we do a horizontal split on the index table, we need to drop all the split tables. diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index 7c98e2877d5fc..e6ddf45fabaa9 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -44,7 +44,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 5 #define INNODB_VERSION_MINOR 6 -#define INNODB_VERSION_BUGFIX 32 +#define INNODB_VERSION_BUGFIX 33 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 59382455cea9b..77cca37ddd130 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -613,7 +613,7 @@ row_log_table_delete( &old_pk_extra_size); ut_ad(old_pk_extra_size < 0x100); - mrec_size = 4 + old_pk_size; + mrec_size = 6 + old_pk_size; /* Log enough prefix of the BLOB unless both the old and new table are in COMPACT or REDUNDANT format, @@ -643,8 +643,8 @@ row_log_table_delete( *b++ = static_cast(old_pk_extra_size); /* Log the size of external prefix we saved */ - mach_write_to_2(b, ext_size); - b += 2; + mach_write_to_4(b, ext_size); + b += 4; rec_convert_dtuple_to_temp( b + old_pk_extra_size, new_index, @@ -2268,14 +2268,14 @@ row_log_table_apply_op( break; case ROW_T_DELETE: - /* 1 (extra_size) + 2 (ext_size) + at least 1 (payload) */ - if (mrec + 4 >= mrec_end) { + /* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */ + if (mrec + 6 >= mrec_end) { return(NULL); } extra_size = *mrec++; - ext_size = mach_read_from_2(mrec); - mrec += 2; + ext_size = mach_read_from_4(mrec); + mrec += 4; ut_ad(mrec < mrec_end); /* We assume extra_size < 0x100 for the PRIMARY KEY prefix. diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 122c03b7cc9fe..07b841de3007e 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -2715,6 +2715,10 @@ row_drop_tables_for_mysql_in_background(void) return(n_tables + n_tables_dropped); } + DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep", + os_thread_sleep(5000000); + ); + table = dict_table_open_on_name(drop->table_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); @@ -2725,6 +2729,16 @@ row_drop_tables_for_mysql_in_background(void) goto already_dropped; } + if (!table->to_be_dropped) { + /* There is a scenario: the old table is dropped + just after it's added into drop list, and new + table with the same name is created, then we try + to drop the new table in background. */ + dict_table_close(table, FALSE, FALSE); + + goto already_dropped; + } + ut_a(!table->can_be_evicted); dict_table_close(table, FALSE, FALSE); @@ -3992,6 +4006,13 @@ row_drop_table_for_mysql( } } + + DBUG_EXECUTE_IF("row_drop_table_add_to_background", + row_add_table_to_background_drop_list(table->name); + err = DB_SUCCESS; + goto funct_exit; + ); + /* TODO: could we replace the counter n_foreign_key_checks_running with lock checks on the table? Acquire here an exclusive lock on the table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that @@ -4608,6 +4629,19 @@ row_drop_database_for_mysql( row_mysql_lock_data_dictionary(trx); while ((table_name = dict_get_first_table_name_in_db(name))) { + /* Drop parent table if it is a fts aux table, to + avoid accessing dropped fts aux tables in information + scheam when parent table still exists. + Note: Drop parent table will drop fts aux tables. */ + char* parent_table_name; + parent_table_name = fts_get_parent_table_name( + table_name, strlen(table_name)); + + if (parent_table_name != NULL) { + mem_free(table_name); + table_name = parent_table_name; + } + ut_a(memcmp(table_name, name, namelen) == 0); table = dict_table_open_on_name( diff --git a/storage/oqgraph/graphcore.cc b/storage/oqgraph/graphcore.cc index 4346b94805c7e..7c8ca53c0969c 100644 --- a/storage/oqgraph/graphcore.cc +++ b/storage/oqgraph/graphcore.cc @@ -485,7 +485,7 @@ namespace open_query optional oqgraph_share::find_vertex(VertexID id) const { - return ::boost::find_vertex(id, g); + return oqgraph3::find_vertex(id, g); } #if 0 diff --git a/storage/oqgraph/oqgraph_shim.h b/storage/oqgraph/oqgraph_shim.h index af240b88ebdcb..004d7f0f7c509 100644 --- a/storage/oqgraph/oqgraph_shim.h +++ b/storage/oqgraph/oqgraph_shim.h @@ -274,6 +274,33 @@ namespace boost }; #endif + template<> + struct property_map + { + typedef void type; + typedef oqgraph3::edge_weight_property_map const_type; + }; + + template<> + struct property_map + { + typedef void type; + typedef oqgraph3::vertex_index_property_map const_type; + }; + + template<> + struct property_map + { + typedef void type; + typedef oqgraph3::edge_index_property_map const_type; + }; + +} + +namespace oqgraph3 +{ + using namespace boost; + inline graph_traits::vertex_descriptor source( const graph_traits::edge_descriptor& e, @@ -401,27 +428,6 @@ namespace boost return count; } - template<> - struct property_map - { - typedef void type; - typedef oqgraph3::edge_weight_property_map const_type; - }; - - template<> - struct property_map - { - typedef void type; - typedef oqgraph3::vertex_index_property_map const_type; - }; - - template<> - struct property_map - { - typedef void type; - typedef oqgraph3::edge_index_property_map const_type; - }; - inline property_map< oqgraph3::graph, edge_weight_t>::const_type::reference diff --git a/storage/perfschema/ha_perfschema.cc b/storage/perfschema/ha_perfschema.cc index 444de87ba8ba3..2445bd80927ad 100644 --- a/storage/perfschema/ha_perfschema.cc +++ b/storage/perfschema/ha_perfschema.cc @@ -205,7 +205,7 @@ maria_declare_plugin(perfschema) 0x0001, pfs_status_vars, NULL, - "5.6.32", + "5.6.33", MariaDB_PLUGIN_MATURITY_STABLE } maria_declare_plugin_end; diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt index 8a9058bbb3e42..dad90fe96ebf7 100644 --- a/storage/tokudb/CMakeLists.txt +++ b/storage/tokudb/CMakeLists.txt @@ -1,4 +1,4 @@ -SET(TOKUDB_VERSION 5.6.31-77.0) +SET(TOKUDB_VERSION 5.6.33-79.0) # PerconaFT only supports x86-64 and cmake-2.8.9+ IF(CMAKE_VERSION VERSION_LESS "2.8.9") MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB") diff --git a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc index 4b62703480f16..7ede78b3c0db3 100644 --- a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc +++ b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc @@ -367,8 +367,8 @@ static void print_db_env_struct (void) { "int (*checkpointing_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic checkpoints. 0 means disabled. */", "int (*cleaner_set_period) (DB_ENV*, uint32_t) /* Change the delay between automatic cleaner attempts. 0 means disabled. */", "int (*cleaner_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic cleaner attempts. 0 means disabled. */", - "int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invokation. 0 means disabled. */", - "int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invokation. 0 means disabled. */", + "int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invocation. 0 means disabled. */", + "int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invocation. 0 means disabled. */", "int (*evictor_set_enable_partial_eviction) (DB_ENV*, bool) /* Enables or disabled partial eviction of nodes from cachetable. */", "int (*evictor_get_enable_partial_eviction) (DB_ENV*, bool*) /* Retrieve the status of partial eviction of nodes from cachetable. */", "int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */", @@ -422,6 +422,9 @@ static void print_db_env_struct (void) { "int (*set_checkpoint_pool_threads)(DB_ENV *, uint32_t)", "void (*set_check_thp)(DB_ENV *, bool new_val)", "bool (*get_check_thp)(DB_ENV *)", + "bool (*set_dir_per_db)(DB_ENV *, bool new_val)", + "bool (*get_dir_per_db)(DB_ENV *)", + "const char *(*get_data_dir)(DB_ENV *env)", NULL}; sort_and_dump_fields("db_env", true, extra); diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake index 4c5004cd6a538..883f35041e2fd 100644 --- a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake +++ b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake @@ -97,7 +97,7 @@ if (NOT HAVE_BACKTRACE_WITHOUT_EXECINFO) endif () endif () -if(HAVE_CLOCK_REALTIME) +if(HAVE_CLOCK_REALTIME AND (NOT APPLE)) list(APPEND EXTRA_SYSTEM_LIBS rt) else() list(APPEND EXTRA_SYSTEM_LIBS System) @@ -109,6 +109,8 @@ check_function_exists(pthread_rwlockattr_setkind_np HAVE_PTHREAD_RWLOCKATTR_SETK ## check for the right way to yield using pthreads check_function_exists(pthread_yield HAVE_PTHREAD_YIELD) check_function_exists(pthread_yield_np HAVE_PTHREAD_YIELD_NP) +## check if we have pthread_threadid_np() (i.e. osx) +check_function_exists(pthread_threadid_np HAVE_PTHREAD_THREADID_NP) ## check if we have pthread_getthreadid_np() (i.e. freebsd) check_function_exists(pthread_getthreadid_np HAVE_PTHREAD_GETTHREADID_NP) check_function_exists(sched_getcpu HAVE_SCHED_GETCPU) diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake index 6082178da75b6..769bdffa5d99c 100644 --- a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake +++ b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake @@ -98,11 +98,10 @@ set_cflags_if_supported( -Wno-error=address-of-array-temporary -Wno-error=tautological-constant-out-of-range-compare -Wno-error=maybe-uninitialized - -Wno-ignored-attributes -Wno-error=extern-c-compat - -Wno-pointer-bool-conversion -fno-rtti -fno-exceptions + -Wno-error=nonnull-compare ) ## set_cflags_if_supported_named("-Weffc++" -Weffcpp) diff --git a/storage/tokudb/PerconaFT/ft/CMakeLists.txt b/storage/tokudb/PerconaFT/ft/CMakeLists.txt index 11091073ac2a1..6696c26ecc00c 100644 --- a/storage/tokudb/PerconaFT/ft/CMakeLists.txt +++ b/storage/tokudb/PerconaFT/ft/CMakeLists.txt @@ -55,8 +55,8 @@ set(FT_SOURCES msg_buffer node pivotkeys + serialize/rbtree_mhs serialize/block_allocator - serialize/block_allocator_strategy serialize/block_table serialize/compress serialize/ft_node-serialize diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h b/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h index dc6aec9226d8f..05fb771de0871 100644 --- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h +++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h @@ -138,6 +138,8 @@ struct cachefile { // nor attempt to open any cachefile with the same fname (dname) // until this cachefile has been fully closed and unlinked. bool unlink_on_close; + // If set then fclose will not be logged in recovery log. + bool skip_log_recover_on_close; int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */ CACHETABLE cachetable; struct fileid fileid; diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc index 5bba977de1a33..6d753805fa9fa 100644 --- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc +++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc @@ -467,6 +467,10 @@ toku_cachefile_fname_in_env (CACHEFILE cf) { return cf->fname_in_env; } +void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env) { + cf->fname_in_env = new_fname_in_env; +} + int toku_cachefile_get_fd (CACHEFILE cf) { return cf->fd; @@ -2903,6 +2907,18 @@ bool toku_cachefile_is_unlink_on_close(CACHEFILE cf) { return cf->unlink_on_close; } +void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf) { + cf->skip_log_recover_on_close = true; +} + +void toku_cachefile_do_log_recover_on_close(CACHEFILE cf) { + cf->skip_log_recover_on_close = false; +} + +bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf) { + return cf->skip_log_recover_on_close; +} + uint64_t toku_cachefile_size(CACHEFILE cf) { int64_t file_size; int fd = toku_cachefile_get_fd(cf); diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h index 148326562ab81..3b3cb0a2d4691 100644 --- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h +++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h @@ -500,12 +500,18 @@ int toku_cachefile_get_fd (CACHEFILE); // Return the filename char * toku_cachefile_fname_in_env (CACHEFILE cf); +void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env); + // Make it so when the cachefile closes, the underlying file is unlinked void toku_cachefile_unlink_on_close(CACHEFILE cf); // is this cachefile marked as unlink on close? bool toku_cachefile_is_unlink_on_close(CACHEFILE cf); +void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf); +void toku_cachefile_do_log_recover_on_close(CACHEFILE cf); +bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf); + // Return the logger associated with the cachefile struct tokulogger *toku_cachefile_logger(CACHEFILE cf); diff --git a/storage/tokudb/PerconaFT/ft/ft-flusher.cc b/storage/tokudb/PerconaFT/ft/ft-flusher.cc index fb456ea6a18e3..e6452f60cfcc6 100644 --- a/storage/tokudb/PerconaFT/ft/ft-flusher.cc +++ b/storage/tokudb/PerconaFT/ft/ft-flusher.cc @@ -496,7 +496,7 @@ handle_split_of_child( // We never set the rightmost blocknum to be the root. // Instead, we wait for the root to split and let promotion initialize the rightmost - // blocknum to be the first non-root leaf node on the right extreme to recieve an insert. + // blocknum to be the first non-root leaf node on the right extreme to receive an insert. BLOCKNUM rightmost_blocknum = toku_unsafe_fetch(&ft->rightmost_blocknum); invariant(ft->h->root_blocknum.b != rightmost_blocknum.b); if (childa->blocknum.b == rightmost_blocknum.b) { @@ -1470,7 +1470,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) // It is possible after reading in the entire child, // that we now know that the child is not reactive // if so, we can unpin parent right now - // we wont be splitting/merging child + // we won't be splitting/merging child // and we have already replaced the bnc // for the root with a fresh one enum reactivity child_re = toku_ftnode_get_reactivity(ft, child); diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc index 8f61bc67339fa..30a8710d7aa1e 100644 --- a/storage/tokudb/PerconaFT/ft/ft-ops.cc +++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc @@ -149,22 +149,23 @@ basement nodes, bulk fetch, and partial fetch: #include "ft/cachetable/checkpoint.h" #include "ft/cursor.h" -#include "ft/ft.h" #include "ft/ft-cachetable-wrappers.h" #include "ft/ft-flusher.h" #include "ft/ft-internal.h" -#include "ft/msg.h" +#include "ft/ft.h" #include "ft/leafentry.h" #include "ft/logger/log-internal.h" +#include "ft/msg.h" #include "ft/node.h" #include "ft/serialize/block_table.h" -#include "ft/serialize/sub_block.h" #include "ft/serialize/ft-serialize.h" #include "ft/serialize/ft_layout_version.h" #include "ft/serialize/ft_node-serialize.h" +#include "ft/serialize/sub_block.h" #include "ft/txn/txn_manager.h" -#include "ft/ule.h" #include "ft/txn/xids.h" +#include "ft/ule.h" +#include "src/ydb-internal.h" #include @@ -179,6 +180,7 @@ basement nodes, bulk fetch, and partial fetch: #include +#include /* Status is intended for display to humans to help understand system behavior. * It does not need to be perfectly thread-safe. */ @@ -598,15 +600,12 @@ void toku_ftnode_checkpoint_complete_callback(void *value_data) { } } -void toku_ftnode_clone_callback( - void* value_data, - void** cloned_value_data, - long* clone_size, - PAIR_ATTR* new_attr, - bool for_checkpoint, - void* write_extraargs - ) -{ +void toku_ftnode_clone_callback(void *value_data, + void **cloned_value_data, + long *clone_size, + PAIR_ATTR *new_attr, + bool for_checkpoint, + void *write_extraargs) { FTNODE node = static_cast(value_data); toku_ftnode_assert_fully_in_memory(node); FT ft = static_cast(write_extraargs); @@ -618,13 +617,16 @@ void toku_ftnode_clone_callback( toku_ftnode_leaf_rebalance(node, ft->h->basementnodesize); } - cloned_node->oldest_referenced_xid_known = node->oldest_referenced_xid_known; - cloned_node->max_msn_applied_to_node_on_disk = node->max_msn_applied_to_node_on_disk; + cloned_node->oldest_referenced_xid_known = + node->oldest_referenced_xid_known; + cloned_node->max_msn_applied_to_node_on_disk = + node->max_msn_applied_to_node_on_disk; cloned_node->flags = node->flags; cloned_node->blocknum = node->blocknum; cloned_node->layout_version = node->layout_version; cloned_node->layout_version_original = node->layout_version_original; - cloned_node->layout_version_read_from_disk = node->layout_version_read_from_disk; + cloned_node->layout_version_read_from_disk = + node->layout_version_read_from_disk; cloned_node->build_id = node->build_id; cloned_node->height = node->height; cloned_node->dirty = node->dirty; @@ -649,38 +651,39 @@ void toku_ftnode_clone_callback( // set new pair attr if necessary if (node->height == 0) { *new_attr = make_ftnode_pair_attr(node); - } - else { + for (int i = 0; i < node->n_children; i++) { + BLB(node, i)->logical_rows_delta = 0; + BLB(cloned_node, i)->logical_rows_delta = 0; + } + } else { new_attr->is_valid = false; } *clone_size = ftnode_memory_size(cloned_node); *cloned_value_data = cloned_node; } -void toku_ftnode_flush_callback( - CACHEFILE UU(cachefile), - int fd, - BLOCKNUM blocknum, - void *ftnode_v, - void** disk_data, - void *extraargs, - PAIR_ATTR size __attribute__((unused)), - PAIR_ATTR* new_size, - bool write_me, - bool keep_me, - bool for_checkpoint, - bool is_clone - ) -{ - FT ft = (FT) extraargs; - FTNODE ftnode = (FTNODE) ftnode_v; - FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data; +void toku_ftnode_flush_callback(CACHEFILE UU(cachefile), + int fd, + BLOCKNUM blocknum, + void *ftnode_v, + void **disk_data, + void *extraargs, + PAIR_ATTR size __attribute__((unused)), + PAIR_ATTR *new_size, + bool write_me, + bool keep_me, + bool for_checkpoint, + bool is_clone) { + FT ft = (FT)extraargs; + FTNODE ftnode = (FTNODE)ftnode_v; + FTNODE_DISK_DATA *ndd = (FTNODE_DISK_DATA *)disk_data; assert(ftnode->blocknum.b == blocknum.b); int height = ftnode->height; if (write_me) { toku_ftnode_assert_fully_in_memory(ftnode); if (height > 0 && !is_clone) { - // cloned nodes already had their stale messages moved, see toku_ftnode_clone_callback() + // cloned nodes already had their stale messages moved, see + // toku_ftnode_clone_callback() toku_move_ftnode_messages_to_stale(ft, ftnode); } else if (height == 0) { toku_ftnode_leaf_run_gc(ft, ftnode); @@ -688,7 +691,8 @@ void toku_ftnode_flush_callback( toku_ftnode_update_disk_stats(ftnode, ft, for_checkpoint); } } - int r = toku_serialize_ftnode_to(fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint); + int r = toku_serialize_ftnode_to( + fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint); assert_zero(r); ftnode->layout_version_read_from_disk = FT_LAYOUT_VERSION; } @@ -703,20 +707,22 @@ void toku_ftnode_flush_callback( FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF_BYTES, node_size); } toku_free(*disk_data); - } - else { + } else { if (ftnode->height == 0) { for (int i = 0; i < ftnode->n_children; i++) { - if (BP_STATE(ftnode,i) == PT_AVAIL) { + if (BP_STATE(ftnode, i) == PT_AVAIL) { BASEMENTNODE bn = BLB(ftnode, i); - toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta); + toku_ft_decrease_stats(&ft->in_memory_stats, + bn->stat64_delta); + if (!ftnode->dirty) + toku_ft_adjust_logical_row_count( + ft, -bn->logical_rows_delta); } } } } toku_ftnode_free(&ftnode); - } - else { + } else { *new_size = make_ftnode_pair_attr(ftnode); } } @@ -845,10 +851,13 @@ static void compress_internal_node_partition(FTNODE node, int i, enum toku_compr } // callback for partially evicting a node -int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_extraargs, - void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra) { - FTNODE node = (FTNODE) ftnode_pv; - FT ft = (FT) write_extraargs; +int toku_ftnode_pe_callback(void *ftnode_pv, + PAIR_ATTR old_attr, + void *write_extraargs, + void (*finalize)(PAIR_ATTR new_attr, void *extra), + void *finalize_extra) { + FTNODE node = (FTNODE)ftnode_pv; + FT ft = (FT)write_extraargs; int num_partial_evictions = 0; // Hold things we intend to destroy here. @@ -866,7 +875,8 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext } // Don't partially evict nodes whose partitions can't be read back // from disk individually - if (node->layout_version_read_from_disk < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) { + if (node->layout_version_read_from_disk < + FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) { goto exit; } // @@ -874,77 +884,77 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext // if (node->height > 0) { for (int i = 0; i < node->n_children; i++) { - if (BP_STATE(node,i) == PT_AVAIL) { - if (BP_SHOULD_EVICT(node,i)) { + if (BP_STATE(node, i) == PT_AVAIL) { + if (BP_SHOULD_EVICT(node, i)) { NONLEAF_CHILDINFO bnc = BNC(node, i); if (ft_compress_buffers_before_eviction && - // We may not serialize and compress a partition in memory if its - // in memory layout version is different than what's on disk (and - // therefore requires upgrade). + // We may not serialize and compress a partition in + // memory if its in memory layout version is different + // than what's on disk (and therefore requires upgrade). // - // Auto-upgrade code assumes that if a node's layout version read - // from disk is not current, it MUST require upgrade. Breaking - // this rule would cause upgrade code to upgrade this partition - // again after we serialize it as the current version, which is bad. - node->layout_version == node->layout_version_read_from_disk) { + // Auto-upgrade code assumes that if a node's layout + // version read from disk is not current, it MUST + // require upgrade. + // Breaking this rule would cause upgrade code to + // upgrade this partition again after we serialize it as + // the current version, which is bad. + node->layout_version == + node->layout_version_read_from_disk) { toku_ft_bnc_move_messages_to_stale(ft, bnc); compress_internal_node_partition( node, i, // Always compress with quicklz - TOKU_QUICKLZ_METHOD - ); + TOKU_QUICKLZ_METHOD); } else { // We're not compressing buffers before eviction. Simply - // detach the buffer and set the child's state to on-disk. + // detach the buffer and set the child's state to + // on-disk. set_BNULL(node, i); BP_STATE(node, i) = PT_ON_DISK; } buffers_to_destroy[num_buffers_to_destroy++] = bnc; num_partial_evictions++; + } else { + BP_SWEEP_CLOCK(node, i); } - else { - BP_SWEEP_CLOCK(node,i); - } - } - else { + } else { continue; } } - } - // - // partial eviction strategy for basement nodes: - // if the bn is compressed, evict it - // else: check if it requires eviction, if it does, evict it, if not, sweep the clock count - // - else { + } else { + // + // partial eviction strategy for basement nodes: + // if the bn is compressed, evict it + // else: check if it requires eviction, if it does, evict it, if not, + // sweep the clock count + // for (int i = 0; i < node->n_children; i++) { // Get rid of compressed stuff no matter what. - if (BP_STATE(node,i) == PT_COMPRESSED) { + if (BP_STATE(node, i) == PT_COMPRESSED) { SUB_BLOCK sb = BSB(node, i); pointers_to_free[num_pointers_to_free++] = sb->compressed_ptr; pointers_to_free[num_pointers_to_free++] = sb; set_BNULL(node, i); - BP_STATE(node,i) = PT_ON_DISK; + BP_STATE(node, i) = PT_ON_DISK; num_partial_evictions++; - } - else if (BP_STATE(node,i) == PT_AVAIL) { - if (BP_SHOULD_EVICT(node,i)) { + } else if (BP_STATE(node, i) == PT_AVAIL) { + if (BP_SHOULD_EVICT(node, i)) { BASEMENTNODE bn = BLB(node, i); basements_to_destroy[num_basements_to_destroy++] = bn; - toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta); + toku_ft_decrease_stats(&ft->in_memory_stats, + bn->stat64_delta); + toku_ft_adjust_logical_row_count(ft, + -bn->logical_rows_delta); set_BNULL(node, i); BP_STATE(node, i) = PT_ON_DISK; num_partial_evictions++; + } else { + BP_SWEEP_CLOCK(node, i); } - else { - BP_SWEEP_CLOCK(node,i); - } - } - else if (BP_STATE(node,i) == PT_ON_DISK) { + } else if (BP_STATE(node, i) == PT_ON_DISK) { continue; - } - else { + } else { abort(); } } @@ -2378,12 +2388,16 @@ ft_send_update_msg(FT_HANDLE ft_h, const ft_msg &msg, TOKUTXN txn) { toku_ft_root_put_msg(ft_h->ft, msg, &gc_info); } -void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra, - TOKUTXN txn, bool oplsn_valid, LSN oplsn, - bool do_logging) { +void toku_ft_maybe_update(FT_HANDLE ft_h, + const DBT *key, + const DBT *update_function_extra, + TOKUTXN txn, + bool oplsn_valid, + LSN oplsn, + bool do_logging) { TXNID_PAIR xid = toku_txn_get_txnid(txn); if (txn) { - BYTESTRING keybs = { key->size, (char *) key->data }; + BYTESTRING keybs = {key->size, (char *)key->data}; toku_logger_save_rollback_cmdupdate( txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs); toku_txn_maybe_note_ft(txn, ft_h->ft); @@ -2392,22 +2406,33 @@ void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_func TOKULOGGER logger; logger = toku_txn_logger(txn); if (do_logging && logger) { - BYTESTRING keybs = {.len=key->size, .data=(char *) key->data}; - BYTESTRING extrabs = {.len=update_function_extra->size, - .data = (char *) update_function_extra->data}; - toku_log_enq_update(logger, NULL, 0, txn, - toku_cachefile_filenum(ft_h->ft->cf), - xid, keybs, extrabs); + BYTESTRING keybs = {.len = key->size, .data = (char *)key->data}; + BYTESTRING extrabs = {.len = update_function_extra->size, + .data = (char *)update_function_extra->data}; + toku_log_enq_update(logger, + NULL, + 0, + txn, + toku_cachefile_filenum(ft_h->ft->cf), + xid, + keybs, + extrabs); } LSN treelsn; - if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { + if (oplsn_valid && + oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) { // do nothing } else { - XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids(); - ft_msg msg(key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids); + XIDS message_xids = + txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids(); + ft_msg msg( + key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids); ft_send_update_msg(ft_h, msg, txn); } + // updates get converted to insert messages, which should do a -1 on the + // logical row count when the messages are permanently applied + toku_ft_adjust_logical_row_count(ft_h->ft, 1); } void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra, @@ -2570,12 +2595,104 @@ static inline int ft_open_maybe_direct(const char *filename, int oflag, int mode static const mode_t file_mode = S_IRUSR+S_IWUSR+S_IRGRP+S_IWGRP+S_IROTH+S_IWOTH; +inline bool toku_file_is_root(const char *path, const char *last_slash) { + return last_slash == path; +} + +static std::unique_ptr toku_file_get_parent_dir( + const char *path) { + std::unique_ptr result(nullptr, &toku_free); + + bool has_trailing_slash = false; + + /* Find the offset of the last slash */ + const char *last_slash = strrchr(path, OS_PATH_SEPARATOR); + + if (!last_slash) { + /* No slash in the path, return NULL */ + return result; + } + + /* Ok, there is a slash. Is there anything after it? */ + if (static_cast(last_slash - path + 1) == strlen(path)) { + has_trailing_slash = true; + } + + /* Reduce repetative slashes. */ + while (last_slash > path && last_slash[-1] == OS_PATH_SEPARATOR) { + last_slash--; + } + + /* Check for the root of a drive. */ + if (toku_file_is_root(path, last_slash)) { + return result; + } + + /* If a trailing slash prevented the first strrchr() from trimming + the last component of the path, trim that component now. */ + if (has_trailing_slash) { + /* Back up to the previous slash. */ + last_slash--; + while (last_slash > path && last_slash[0] != OS_PATH_SEPARATOR) { + last_slash--; + } + + /* Reduce repetative slashes. */ + while (last_slash > path && last_slash[-1] == OS_PATH_SEPARATOR) { + last_slash--; + } + } + + /* Check for the root of a drive. */ + if (toku_file_is_root(path, last_slash)) { + return result; + } + + result.reset(toku_strndup(path, last_slash - path)); + return result; +} + +static bool toku_create_subdirs_if_needed(const char *path) { + static const mode_t dir_mode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | + S_IWGRP | S_IXGRP | S_IROTH | S_IXOTH; + + toku_struct_stat stat; + bool subdir_exists = true; + auto subdir = toku_file_get_parent_dir(path); + + if (!subdir.get()) + return true; + + if (toku_stat(subdir.get(), &stat) == -1) { + if (ENOENT == get_error_errno()) + subdir_exists = false; + else + return false; + } + + if (subdir_exists) { + if (!S_ISDIR(stat.st_mode)) + return false; + return true; + } + + if (!toku_create_subdirs_if_needed(subdir.get())) + return false; + + if (toku_os_mkdir(subdir.get(), dir_mode)) + return false; + + return true; +} + // open a file for use by the ft // Requires: File does not exist. static int ft_create_file(FT_HANDLE UU(ft_handle), const char *fname, int *fdp) { int r; int fd; int er; + if (!toku_create_subdirs_if_needed(fname)) + return get_error_errno(); fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode); assert(fd==-1); if ((er = get_maybe_error_errno()) != ENOENT) { @@ -4404,6 +4521,55 @@ void toku_ft_unlink(FT_HANDLE handle) { toku_cachefile_unlink_on_close(cf); } +int toku_ft_rename_iname(DB_TXN *txn, + const char *data_dir, + const char *old_iname, + const char *new_iname, + CACHETABLE ct) { + int r = 0; + + std::unique_ptr new_iname_full(nullptr, + &toku_free); + std::unique_ptr old_iname_full(nullptr, + &toku_free); + + new_iname_full.reset(toku_construct_full_name(2, data_dir, new_iname)); + old_iname_full.reset(toku_construct_full_name(2, data_dir, old_iname)); + + if (txn) { + BYTESTRING bs_old_name = {static_cast(strlen(old_iname) + 1), + const_cast(old_iname)}; + BYTESTRING bs_new_name = {static_cast(strlen(new_iname) + 1), + const_cast(new_iname)}; + FILENUM filenum = FILENUM_NONE; + { + CACHEFILE cf; + r = toku_cachefile_of_iname_in_env(ct, old_iname, &cf); + if (r != ENOENT) { + char *old_fname_in_cf = toku_cachefile_fname_in_env(cf); + toku_cachefile_set_fname_in_env(cf, toku_xstrdup(new_iname)); + toku_free(old_fname_in_cf); + filenum = toku_cachefile_filenum(cf); + } + } + toku_logger_save_rollback_frename( + db_txn_struct_i(txn)->tokutxn, &bs_old_name, &bs_new_name); + toku_log_frename(db_txn_struct_i(txn)->tokutxn->logger, + (LSN *)0, + 0, + toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn), + bs_old_name, + filenum, + bs_new_name); + } + + r = toku_os_rename(old_iname_full.get(), new_iname_full.get()); + if (r != 0) + return r; + r = toku_fsync_directory(new_iname_full.get()); + return r; +} + int toku_ft_get_fragmentation(FT_HANDLE ft_handle, TOKU_DB_FRAGMENTATION report) { int fd = toku_cachefile_get_fd(ft_handle->ft->cf); toku_ft_lock(ft_handle->ft); diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.h b/storage/tokudb/PerconaFT/ft/ft-ops.h index 313a74628ea17..70cf045d43c1c 100644 --- a/storage/tokudb/PerconaFT/ft/ft-ops.h +++ b/storage/tokudb/PerconaFT/ft/ft-ops.h @@ -48,6 +48,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "ft/msg.h" #include "util/dbt.h" +#define OS_PATH_SEPARATOR '/' + typedef struct ft_handle *FT_HANDLE; int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result)); diff --git a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc index adac96f488213..e31d80772d56e 100644 --- a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc +++ b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc @@ -73,30 +73,20 @@ static bool recount_rows_interrupt(void* extra, uint64_t deleted_rows) { return rre->_cancelled = rre->_progress_callback(rre->_keys, deleted_rows, rre->_progress_extra); } -int toku_ft_recount_rows( - FT_HANDLE ft, - int (*progress_callback)( - uint64_t count, - uint64_t deleted, - void* progress_extra), - void* progress_extra) { - +int toku_ft_recount_rows(FT_HANDLE ft, + int (*progress_callback)(uint64_t count, + uint64_t deleted, + void* progress_extra), + void* progress_extra) { int ret = 0; - recount_rows_extra_t rre = { - progress_callback, - progress_extra, - 0, - false - }; + recount_rows_extra_t rre = {progress_callback, progress_extra, 0, false}; ft_cursor c; ret = toku_ft_cursor_create(ft, &c, nullptr, C_READ_ANY, false, false); - if (ret) return ret; + if (ret) + return ret; - toku_ft_cursor_set_check_interrupt_cb( - &c, - recount_rows_interrupt, - &rre); + toku_ft_cursor_set_check_interrupt_cb(&c, recount_rows_interrupt, &rre); ret = toku_ft_cursor_first(&c, recount_rows_found, &rre); while (FT_LIKELY(ret == 0)) { @@ -108,6 +98,7 @@ int toku_ft_recount_rows( if (rre._cancelled == false) { // update ft count toku_unsafe_set(&ft->ft->in_memory_logical_rows, rre._keys); + ft->ft->h->dirty = 1; ret = 0; } diff --git a/storage/tokudb/PerconaFT/ft/ft.cc b/storage/tokudb/PerconaFT/ft/ft.cc index 93d21233bf73d..7c94b4c59d345 100644 --- a/storage/tokudb/PerconaFT/ft/ft.cc +++ b/storage/tokudb/PerconaFT/ft/ft.cc @@ -253,7 +253,19 @@ static void ft_close(CACHEFILE cachefile, int fd, void *header_v, bool oplsn_val char* fname_in_env = toku_cachefile_fname_in_env(cachefile); assert(fname_in_env); BYTESTRING bs = {.len=(uint32_t) strlen(fname_in_env), .data=fname_in_env}; - toku_log_fclose(logger, &lsn, ft->h->dirty, bs, toku_cachefile_filenum(cachefile)); // flush the log on close (if new header is being written), otherwise it might not make it out. + if (!toku_cachefile_is_skip_log_recover_on_close(cachefile)) { + toku_log_fclose( + logger, + &lsn, + ft->h->dirty, + bs, + toku_cachefile_filenum(cachefile)); // flush the log on + // close (if new header + // is being written), + // otherwise it might + // not make it out. + toku_cachefile_do_log_recover_on_close(cachefile); + } } } if (ft->h->dirty) { // this is the only place this bit is tested (in currentheader) @@ -903,6 +915,9 @@ void toku_ft_adjust_logical_row_count(FT ft, int64_t delta) { // must be returned in toku_ft_stat64. if (delta != 0 && ft->in_memory_logical_rows != (uint64_t)-1) { toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), delta); + if (ft->in_memory_logical_rows == (uint64_t)-1) { + toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), 1); + } } } diff --git a/storage/tokudb/PerconaFT/ft/ft.h b/storage/tokudb/PerconaFT/ft/ft.h index d600e093bdcf0..7a3c4fa783cb9 100644 --- a/storage/tokudb/PerconaFT/ft/ft.h +++ b/storage/tokudb/PerconaFT/ft/ft.h @@ -53,6 +53,12 @@ typedef struct ft_options *FT_OPTIONS; void toku_ft_unlink(FT_HANDLE handle); void toku_ft_unlink_on_commit(FT_HANDLE handle, TOKUTXN txn); +int toku_ft_rename_iname(DB_TXN *txn, + const char *data_dir, + const char *old_iname, + const char *new_iname, + CACHETABLE ct); + void toku_ft_init_reflock(FT ft); void toku_ft_destroy_reflock(FT ft); void toku_ft_grab_reflock(FT ft); diff --git a/storage/tokudb/PerconaFT/ft/loader/loader-internal.h b/storage/tokudb/PerconaFT/ft/loader/loader-internal.h index dd070373e26d8..1aa2c20383150 100644 --- a/storage/tokudb/PerconaFT/ft/loader/loader-internal.h +++ b/storage/tokudb/PerconaFT/ft/loader/loader-internal.h @@ -301,7 +301,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp, void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error); -// For test purposes only. (In production, the rowset size is determined by negotation with the cachetable for some memory. See #2613.) +// For test purposes only. (In production, the rowset size is determined by negotiation with the cachetable for some memory. See #2613.) uint64_t toku_ft_loader_get_rowset_budget_for_testing (void); int toku_ft_loader_finish_extractor(FTLOADER bl); diff --git a/storage/tokudb/PerconaFT/ft/loader/loader.cc b/storage/tokudb/PerconaFT/ft/loader/loader.cc index 20f9363da1efa..528c86a8f79bf 100644 --- a/storage/tokudb/PerconaFT/ft/loader/loader.cc +++ b/storage/tokudb/PerconaFT/ft/loader/loader.cc @@ -91,7 +91,7 @@ toku_ft_loader_set_size_factor(uint32_t factor) { uint64_t toku_ft_loader_get_rowset_budget_for_testing (void) -// For test purposes only. In production, the rowset size is determined by negotation with the cachetable for some memory. (See #2613). +// For test purposes only. In production, the rowset size is determined by negotiation with the cachetable for some memory. (See #2613). { return 16ULL*size_factor*1024ULL; } diff --git a/storage/tokudb/PerconaFT/ft/logger/logformat.cc b/storage/tokudb/PerconaFT/ft/logger/logformat.cc index 6f3baa81c86b2..49b611388038d 100644 --- a/storage/tokudb/PerconaFT/ft/logger/logformat.cc +++ b/storage/tokudb/PerconaFT/ft/logger/logformat.cc @@ -90,6 +90,10 @@ const struct logtype rollbacks[] = { {"fcreate", 'F', FA{{"FILENUM", "filenum", 0}, {"BYTESTRING", "iname", 0}, NULLFIELD}, LOG_BEGIN_ACTION_NA}, + //rename file + {"frename", 'n', FA{{"BYTESTRING", "old_iname", 0}, + {"BYTESTRING", "new_iname", 0}, + NULLFIELD}, LOG_BEGIN_ACTION_NA}, // cmdinsert is used to insert a key-value pair into a DB. For rollback we don't need the data. {"cmdinsert", 'i', FA{ {"FILENUM", "filenum", 0}, @@ -195,6 +199,11 @@ const struct logtype logtypes[] = { {"fdelete", 'U', FA{{"TXNID_PAIR", "xid", 0}, {"FILENUM", "filenum", 0}, NULLFIELD}, SHOULD_LOG_BEGIN}, + {"frename", 'n', FA{{"TXNID_PAIR", "xid", 0}, + {"BYTESTRING", "old_iname", 0}, + {"FILENUM", "old_filenum", 0}, + {"BYTESTRING", "new_iname", 0}, + NULLFIELD}, IGNORE_LOG_BEGIN}, {"enq_insert", 'I', FA{{"FILENUM", "filenum", 0}, {"TXNID_PAIR", "xid", 0}, {"BYTESTRING", "key", 0}, diff --git a/storage/tokudb/PerconaFT/ft/logger/recover.cc b/storage/tokudb/PerconaFT/ft/logger/recover.cc index 38f29773bd630..a9c30c0e37a91 100644 --- a/storage/tokudb/PerconaFT/ft/logger/recover.cc +++ b/storage/tokudb/PerconaFT/ft/logger/recover.cc @@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." +#include #include "ft/cachetable/cachetable.h" #include "ft/cachetable/checkpoint.h" #include "ft/ft.h" @@ -935,6 +936,83 @@ static int toku_recover_backward_fdelete (struct logtype_fdelete *UU(l), RECOVER return 0; } +static int toku_recover_frename(struct logtype_frename *l, RECOVER_ENV renv) { + assert(renv); + assert(renv->env); + + toku_struct_stat stat; + const char *data_dir = renv->env->get_data_dir(renv->env); + bool old_exist = true; + bool new_exist = true; + + assert(data_dir); + + struct file_map_tuple *tuple; + + std::unique_ptr old_iname_full( + toku_construct_full_name(2, data_dir, l->old_iname.data), &toku_free); + std::unique_ptr new_iname_full( + toku_construct_full_name(2, data_dir, l->new_iname.data), &toku_free); + + if (toku_stat(old_iname_full.get(), &stat) == -1) { + if (ENOENT == errno) + old_exist = false; + else + return 1; + } + + if (toku_stat(new_iname_full.get(), &stat) == -1) { + if (ENOENT == errno) + new_exist = false; + else + return 1; + } + + // Both old and new files can exist if: + // - rename() is not completed + // - fcreate was replayed during recovery + // 'Stalled cachefiles' container cachefile_list::m_stale_fileid contains + // closed but not yet evicted cachefiles and the key of this container is + // fs-dependent file id - (device id, inode number) pair. As it is supposed + // new file have not yet created during recovery process the 'stalled + // cachefile' container can contain only cache file of old file. + // To preserve the old cachefile file's id and keep it in + // 'stalled cachefiles' container the new file is removed + // and the old file is renamed. + if (old_exist && new_exist && + (toku_os_unlink(new_iname_full.get()) == -1 || + toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 || + toku_fsync_directory(old_iname_full.get()) == -1 || + toku_fsync_directory(new_iname_full.get()) == -1)) + return 1; + + if (old_exist && !new_exist && + (toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 || + toku_fsync_directory(old_iname_full.get()) == -1 || + toku_fsync_directory(new_iname_full.get()) == -1)) + return 1; + + if (file_map_find(&renv->fmap, l->old_filenum, &tuple) != DB_NOTFOUND) { + if (tuple->iname) + toku_free(tuple->iname); + tuple->iname = toku_xstrdup(l->new_iname.data); + } + + TOKUTXN txn = NULL; + toku_txnid2txn(renv->logger, l->xid, &txn); + + if (txn) + toku_logger_save_rollback_frename(txn, &l->old_iname, &l->new_iname); + + return 0; +} + +static int toku_recover_backward_frename(struct logtype_frename *UU(l), + RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + static int toku_recover_enq_insert (struct logtype_enq_insert *l, RECOVER_ENV renv) { int r; TOKUTXN txn = NULL; diff --git a/storage/tokudb/PerconaFT/ft/node.cc b/storage/tokudb/PerconaFT/ft/node.cc index 58ba675eb7c24..12e5fda226e95 100644 --- a/storage/tokudb/PerconaFT/ft/node.cc +++ b/storage/tokudb/PerconaFT/ft/node.cc @@ -373,52 +373,48 @@ find_bounds_within_message_tree( } } -/** - * For each message in the ancestor's buffer (determined by childnum) that - * is key-wise between lower_bound_exclusive and upper_bound_inclusive, - * apply the message to the basement node. We treat the bounds as minus - * or plus infinity respectively if they are NULL. Do not mark the node - * as dirty (preserve previous state of 'dirty' bit). - */ +// For each message in the ancestor's buffer (determined by childnum) that +// is key-wise between lower_bound_exclusive and upper_bound_inclusive, +// apply the message to the basement node. We treat the bounds as minus +// or plus infinity respectively if they are NULL. Do not mark the node +// as dirty (preserve previous state of 'dirty' bit). static void bnc_apply_messages_to_basement_node( - FT_HANDLE t, // used for comparison function - BASEMENTNODE bn, // where to apply messages + FT_HANDLE t, // used for comparison function + BASEMENTNODE bn, // where to apply messages FTNODE ancestor, // the ancestor node where we can find messages to apply - int childnum, // which child buffer of ancestor contains messages we want - const pivot_bounds &bounds, // contains pivot key bounds of this basement node - txn_gc_info* gc_info, - bool* msgs_applied) { - + int childnum, // which child buffer of ancestor contains messages we want + const pivot_bounds & + bounds, // contains pivot key bounds of this basement node + txn_gc_info *gc_info, + bool *msgs_applied) { int r; NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum); // Determine the offsets in the message trees between which we need to // apply messages from this buffer - STAT64INFO_S stats_delta = {0,0}; + STAT64INFO_S stats_delta = {0, 0}; uint64_t workdone_this_ancestor = 0; int64_t logical_rows_delta = 0; uint32_t stale_lbi, stale_ube; if (!bn->stale_ancestor_messages_applied) { - find_bounds_within_message_tree( - t->ft->cmp, - bnc->stale_message_tree, - &bnc->msg_buffer, - bounds, - &stale_lbi, - &stale_ube); + find_bounds_within_message_tree(t->ft->cmp, + bnc->stale_message_tree, + &bnc->msg_buffer, + bounds, + &stale_lbi, + &stale_ube); } else { stale_lbi = 0; stale_ube = 0; } uint32_t fresh_lbi, fresh_ube; - find_bounds_within_message_tree( - t->ft->cmp, - bnc->fresh_message_tree, - &bnc->msg_buffer, - bounds, - &fresh_lbi, - &fresh_ube); + find_bounds_within_message_tree(t->ft->cmp, + bnc->fresh_message_tree, + &bnc->msg_buffer, + bounds, + &fresh_lbi, + &fresh_ube); // We now know where all the messages we must apply are, so one of the // following 4 cases will do the application, depending on which of @@ -432,44 +428,53 @@ static void bnc_apply_messages_to_basement_node( // We have messages in multiple trees, so we grab all // the relevant messages' offsets and sort them by MSN, then apply // them in MSN order. - const int buffer_size = ((stale_ube - stale_lbi) + - (fresh_ube - fresh_lbi) + - bnc->broadcast_list.size()); + const int buffer_size = + ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) + + bnc->broadcast_list.size()); toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t)); int32_t *offsets = reinterpret_cast(offsets_buf.get()); - struct store_msg_buffer_offset_extra sfo_extra = { .offsets = offsets, .i = 0 }; + struct store_msg_buffer_offset_extra sfo_extra = {.offsets = offsets, + .i = 0}; // Populate offsets array with offsets to stale messages - r = bnc->stale_message_tree.iterate_on_range(stale_lbi, stale_ube, &sfo_extra); + r = bnc->stale_message_tree + .iterate_on_range( + stale_lbi, stale_ube, &sfo_extra); assert_zero(r); // Then store fresh offsets, and mark them to be moved to stale later. - r = bnc->fresh_message_tree.iterate_and_mark_range(fresh_lbi, fresh_ube, &sfo_extra); + r = bnc->fresh_message_tree + .iterate_and_mark_range( + fresh_lbi, fresh_ube, &sfo_extra); assert_zero(r); // Store offsets of all broadcast messages. - r = bnc->broadcast_list.iterate(&sfo_extra); + r = bnc->broadcast_list.iterate(&sfo_extra); assert_zero(r); invariant(sfo_extra.i == buffer_size); // Sort by MSN. - toku::sort::mergesort_r(offsets, buffer_size, bnc->msg_buffer); + toku::sort:: + mergesort_r(offsets, buffer_size, bnc->msg_buffer); // Apply the messages in MSN order. for (int i = 0; i < buffer_size; ++i) { *msgs_applied = true; - do_bn_apply_msg( - t, - bn, - &bnc->msg_buffer, - offsets[i], - gc_info, - &workdone_this_ancestor, - &stats_delta, - &logical_rows_delta); + do_bn_apply_msg(t, + bn, + &bnc->msg_buffer, + offsets[i], + gc_info, + &workdone_this_ancestor, + &stats_delta, + &logical_rows_delta); } } else if (stale_lbi == stale_ube) { - // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later. + // No stale messages to apply, we just apply fresh messages, and mark + // them to be moved to stale later. struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, @@ -477,16 +482,20 @@ static void bnc_apply_messages_to_basement_node( .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta, - .logical_rows_delta = &logical_rows_delta - }; - if (fresh_ube - fresh_lbi > 0) *msgs_applied = true; - r = bnc->fresh_message_tree.iterate_and_mark_range(fresh_lbi, fresh_ube, &iter_extra); + .logical_rows_delta = &logical_rows_delta}; + if (fresh_ube - fresh_lbi > 0) + *msgs_applied = true; + r = bnc->fresh_message_tree + .iterate_and_mark_range( + fresh_lbi, fresh_ube, &iter_extra); assert_zero(r); } else { invariant(fresh_lbi == fresh_ube); // No fresh messages to apply, we just apply stale messages. - if (stale_ube - stale_lbi > 0) *msgs_applied = true; + if (stale_ube - stale_lbi > 0) + *msgs_applied = true; struct iterate_do_bn_apply_msg_extra iter_extra = { .t = t, .bn = bn, @@ -494,22 +503,26 @@ static void bnc_apply_messages_to_basement_node( .gc_info = gc_info, .workdone = &workdone_this_ancestor, .stats_to_update = &stats_delta, - .logical_rows_delta = &logical_rows_delta - }; + .logical_rows_delta = &logical_rows_delta}; - r = bnc->stale_message_tree.iterate_on_range(stale_lbi, stale_ube, &iter_extra); + r = bnc->stale_message_tree + .iterate_on_range( + stale_lbi, stale_ube, &iter_extra); assert_zero(r); } // // update stats // if (workdone_this_ancestor > 0) { - (void) toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), workdone_this_ancestor); + (void)toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), + workdone_this_ancestor); } if (stats_delta.numbytes || stats_delta.numrows) { toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta); } toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta); + bn->logical_rows_delta += logical_rows_delta; } static void diff --git a/storage/tokudb/PerconaFT/ft/node.h b/storage/tokudb/PerconaFT/ft/node.h index ad0298e81c5de..52eefec0936ae 100644 --- a/storage/tokudb/PerconaFT/ft/node.h +++ b/storage/tokudb/PerconaFT/ft/node.h @@ -199,6 +199,7 @@ struct ftnode_leaf_basement_node { MSN max_msn_applied; // max message sequence number applied bool stale_ancestor_messages_applied; STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk + int64_t logical_rows_delta; }; typedef struct ftnode_leaf_basement_node *BASEMENTNODE; diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc index 1355f3739ee1b..19811373d1636 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc @@ -46,415 +46,214 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "portability/toku_stdlib.h" #include "ft/serialize/block_allocator.h" -#include "ft/serialize/block_allocator_strategy.h" +#include "ft/serialize/rbtree_mhs.h" #if TOKU_DEBUG_PARANOID -#define VALIDATE() validate() +#define VALIDATE() Validate() #else #define VALIDATE() #endif -static FILE *ba_trace_file = nullptr; - -void block_allocator::maybe_initialize_trace(void) { - const char *ba_trace_path = getenv("TOKU_BA_TRACE_PATH"); - if (ba_trace_path != nullptr) { - ba_trace_file = toku_os_fopen(ba_trace_path, "w"); - if (ba_trace_file == nullptr) { - fprintf(stderr, "tokuft: error: block allocator trace path found in environment (%s), " - "but it could not be opened for writing (errno %d)\n", - ba_trace_path, get_maybe_error_errno()); - } else { - fprintf(stderr, "tokuft: block allocator tracing enabled, path: %s\n", ba_trace_path); - } - } -} - -void block_allocator::maybe_close_trace() { - if (ba_trace_file != nullptr) { - int r = toku_os_fclose(ba_trace_file); - if (r != 0) { - fprintf(stderr, "tokuft: error: block allocator trace file did not close properly (r %d, errno %d)\n", - r, get_maybe_error_errno()); - } else { - fprintf(stderr, "tokuft: block allocator tracing finished, file closed successfully\n"); - } - } -} - -void block_allocator::_create_internal(uint64_t reserve_at_beginning, uint64_t alignment) { - // the alignment must be at least 512 and aligned with 512 to work with direct I/O - assert(alignment >= 512 && (alignment % 512) == 0); +void BlockAllocator::CreateInternal(uint64_t reserve_at_beginning, + uint64_t alignment) { + // the alignment must be at least 512 and aligned with 512 to work with + // direct I/O + invariant(alignment >= 512 && (alignment % 512) == 0); _reserve_at_beginning = reserve_at_beginning; _alignment = alignment; _n_blocks = 0; - _blocks_array_size = 1; - XMALLOC_N(_blocks_array_size, _blocks_array); _n_bytes_in_use = reserve_at_beginning; - _strategy = BA_STRATEGY_FIRST_FIT; - - memset(&_trace_lock, 0, sizeof(toku_mutex_t)); - toku_mutex_init(&_trace_lock, nullptr); + _tree = new MhsRbTree::Tree(alignment); +} +void BlockAllocator::Create(uint64_t reserve_at_beginning, uint64_t alignment) { + CreateInternal(reserve_at_beginning, alignment); + _tree->Insert({reserve_at_beginning, MAX_BYTE}); VALIDATE(); } -void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) { - _create_internal(reserve_at_beginning, alignment); - _trace_create(); +void BlockAllocator::Destroy() { + delete _tree; } -void block_allocator::destroy() { - toku_free(_blocks_array); - _trace_destroy(); - toku_mutex_destroy(&_trace_lock); -} +void BlockAllocator::CreateFromBlockPairs(uint64_t reserve_at_beginning, + uint64_t alignment, + struct BlockPair *translation_pairs, + uint64_t n_blocks) { + CreateInternal(reserve_at_beginning, alignment); + _n_blocks = n_blocks; -void block_allocator::set_strategy(enum allocation_strategy strategy) { - _strategy = strategy; -} + struct BlockPair *XMALLOC_N(n_blocks, pairs); + memcpy(pairs, translation_pairs, n_blocks * sizeof(struct BlockPair)); + std::sort(pairs, pairs + n_blocks); -void block_allocator::grow_blocks_array_by(uint64_t n_to_add) { - if (_n_blocks + n_to_add > _blocks_array_size) { - uint64_t new_size = _n_blocks + n_to_add; - uint64_t at_least = _blocks_array_size * 2; - if (at_least > new_size) { - new_size = at_least; - } - _blocks_array_size = new_size; - XREALLOC_N(_blocks_array_size, _blocks_array); + if (pairs[0]._offset > reserve_at_beginning) { + _tree->Insert( + {reserve_at_beginning, pairs[0]._offset - reserve_at_beginning}); } -} - -void block_allocator::grow_blocks_array() { - grow_blocks_array_by(1); -} - -void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment, - struct blockpair *pairs, uint64_t n_blocks) { - _create_internal(reserve_at_beginning, alignment); - - _n_blocks = n_blocks; - grow_blocks_array_by(_n_blocks); - memcpy(_blocks_array, pairs, _n_blocks * sizeof(struct blockpair)); - std::sort(_blocks_array, _blocks_array + _n_blocks); for (uint64_t i = 0; i < _n_blocks; i++) { - // Allocator does not support size 0 blocks. See block_allocator_free_block. - invariant(_blocks_array[i].size > 0); - invariant(_blocks_array[i].offset >= _reserve_at_beginning); - invariant(_blocks_array[i].offset % _alignment == 0); - - _n_bytes_in_use += _blocks_array[i].size; + // Allocator does not support size 0 blocks. See + // block_allocator_free_block. + invariant(pairs[i]._size > 0); + invariant(pairs[i]._offset >= _reserve_at_beginning); + invariant(pairs[i]._offset % _alignment == 0); + + _n_bytes_in_use += pairs[i]._size; + + MhsRbTree::OUUInt64 free_size(MAX_BYTE); + MhsRbTree::OUUInt64 free_offset(pairs[i]._offset + pairs[i]._size); + if (i < n_blocks - 1) { + MhsRbTree::OUUInt64 next_offset(pairs[i + 1]._offset); + invariant(next_offset >= free_offset); + free_size = next_offset - free_offset; + if (free_size == 0) + continue; + } + _tree->Insert({free_offset, free_size}); } - + toku_free(pairs); VALIDATE(); - - _trace_create_from_blockpairs(); } // Effect: align a value by rounding up. -static inline uint64_t align(uint64_t value, uint64_t ba_alignment) { +static inline uint64_t Align(uint64_t value, uint64_t ba_alignment) { return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment; } -struct block_allocator::blockpair * -block_allocator::choose_block_to_alloc_after(size_t size, uint64_t heat) { - switch (_strategy) { - case BA_STRATEGY_FIRST_FIT: - return block_allocator_strategy::first_fit(_blocks_array, _n_blocks, size, _alignment); - case BA_STRATEGY_BEST_FIT: - return block_allocator_strategy::best_fit(_blocks_array, _n_blocks, size, _alignment); - case BA_STRATEGY_HEAT_ZONE: - return block_allocator_strategy::heat_zone(_blocks_array, _n_blocks, size, _alignment, heat); - case BA_STRATEGY_PADDED_FIT: - return block_allocator_strategy::padded_fit(_blocks_array, _n_blocks, size, _alignment); - default: - abort(); - } -} - -// Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512). -void block_allocator::alloc_block(uint64_t size, uint64_t heat, uint64_t *offset) { - struct blockpair *bp; - +// Effect: Allocate a block. The resulting block must be aligned on the +// ba->alignment (which to make direct_io happy must be a positive multiple of +// 512). +void BlockAllocator::AllocBlock(uint64_t size, + uint64_t *offset) { // Allocator does not support size 0 blocks. See block_allocator_free_block. invariant(size > 0); - grow_blocks_array(); _n_bytes_in_use += size; + *offset = _tree->Remove(size); - uint64_t end_of_reserve = align(_reserve_at_beginning, _alignment); - - if (_n_blocks == 0) { - // First and only block - assert(_n_bytes_in_use == _reserve_at_beginning + size); // we know exactly how many are in use - _blocks_array[0].offset = align(_reserve_at_beginning, _alignment); - _blocks_array[0].size = size; - *offset = _blocks_array[0].offset; - goto done; - } else if (end_of_reserve + size <= _blocks_array[0].offset ) { - // Check to see if the space immediately after the reserve is big enough to hold the new block. - bp = &_blocks_array[0]; - memmove(bp + 1, bp, _n_blocks * sizeof(*bp)); - bp[0].offset = end_of_reserve; - bp[0].size = size; - *offset = end_of_reserve; - goto done; - } - - bp = choose_block_to_alloc_after(size, heat); - if (bp != nullptr) { - // our allocation strategy chose the space after `bp' to fit the new block - uint64_t answer_offset = align(bp->offset + bp->size, _alignment); - uint64_t blocknum = bp - _blocks_array; - invariant(&_blocks_array[blocknum] == bp); - invariant(blocknum < _n_blocks); - memmove(bp + 2, bp + 1, (_n_blocks - blocknum - 1) * sizeof(*bp)); - bp[1].offset = answer_offset; - bp[1].size = size; - *offset = answer_offset; - } else { - // It didn't fit anywhere, so fit it on the end. - assert(_n_blocks < _blocks_array_size); - bp = &_blocks_array[_n_blocks]; - uint64_t answer_offset = align(bp[-1].offset + bp[-1].size, _alignment); - bp->offset = answer_offset; - bp->size = size; - *offset = answer_offset; - } - -done: _n_blocks++; VALIDATE(); - - _trace_alloc(size, heat, *offset); -} - -// Find the index in the blocks array that has a particular offset. Requires that the block exist. -// Use binary search so it runs fast. -int64_t block_allocator::find_block(uint64_t offset) { - VALIDATE(); - if (_n_blocks == 1) { - assert(_blocks_array[0].offset == offset); - return 0; - } - - uint64_t lo = 0; - uint64_t hi = _n_blocks; - while (1) { - assert(lo < hi); // otherwise no such block exists. - uint64_t mid = (lo + hi) / 2; - uint64_t thisoff = _blocks_array[mid].offset; - if (thisoff < offset) { - lo = mid + 1; - } else if (thisoff > offset) { - hi = mid; - } else { - return mid; - } - } } -// To support 0-sized blocks, we need to include size as an input to this function. +// To support 0-sized blocks, we need to include size as an input to this +// function. // All 0-sized blocks at the same offset can be considered identical, but // a 0-sized block can share offset with a non-zero sized block. -// The non-zero sized block is not exchangable with a zero sized block (or vice versa), -// so inserting 0-sized blocks can cause corruption here. -void block_allocator::free_block(uint64_t offset) { +// The non-zero sized block is not exchangable with a zero sized block (or vice +// versa), so inserting 0-sized blocks can cause corruption here. +void BlockAllocator::FreeBlock(uint64_t offset, uint64_t size) { VALIDATE(); - int64_t bn = find_block(offset); - assert(bn >= 0); // we require that there is a block with that offset. - _n_bytes_in_use -= _blocks_array[bn].size; - memmove(&_blocks_array[bn], &_blocks_array[bn + 1], - (_n_blocks - bn - 1) * sizeof(struct blockpair)); + _n_bytes_in_use -= size; + _tree->Insert({offset, size}); _n_blocks--; VALIDATE(); - - _trace_free(offset); -} - -uint64_t block_allocator::block_size(uint64_t offset) { - int64_t bn = find_block(offset); - assert(bn >=0); // we require that there is a block with that offset. - return _blocks_array[bn].size; } -uint64_t block_allocator::allocated_limit() const { - if (_n_blocks == 0) { - return _reserve_at_beginning; - } else { - struct blockpair *last = &_blocks_array[_n_blocks - 1]; - return last->offset + last->size; - } +uint64_t BlockAllocator::AllocatedLimit() const { + MhsRbTree::Node *max_node = _tree->MaxNode(); + return rbn_offset(max_node).ToInt(); } -// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth. +// Effect: Consider the blocks in sorted order. The reserved block at the +// beginning is number 0. The next one is number 1 and so forth. // Return the offset and size of the block with that number. // Return 0 if there is a block that big, return nonzero if b is too big. -int block_allocator::get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size) { - if (b ==0 ) { +int BlockAllocator::NthBlockInLayoutOrder(uint64_t b, + uint64_t *offset, + uint64_t *size) { + MhsRbTree::Node *x, *y; + if (b == 0) { *offset = 0; *size = _reserve_at_beginning; - return 0; + return 0; } else if (b > _n_blocks) { return -1; } else { - *offset =_blocks_array[b - 1].offset; - *size =_blocks_array[b - 1].size; + x = _tree->MinNode(); + for (uint64_t i = 1; i <= b; i++) { + y = x; + x = _tree->Successor(x); + } + *size = (rbn_offset(x) - (rbn_offset(y) + rbn_size(y))).ToInt(); + *offset = (rbn_offset(y) + rbn_size(y)).ToInt(); return 0; } } +struct VisUnusedExtra { + TOKU_DB_FRAGMENTATION _report; + uint64_t _align; +}; + +static void VisUnusedCollector(void *extra, + MhsRbTree::Node *node, + uint64_t UU(depth)) { + struct VisUnusedExtra *v_e = (struct VisUnusedExtra *)extra; + TOKU_DB_FRAGMENTATION report = v_e->_report; + uint64_t alignm = v_e->_align; + + MhsRbTree::OUUInt64 offset = rbn_offset(node); + MhsRbTree::OUUInt64 size = rbn_size(node); + MhsRbTree::OUUInt64 answer_offset(Align(offset.ToInt(), alignm)); + uint64_t free_space = (offset + size - answer_offset).ToInt(); + if (free_space > 0) { + report->unused_bytes += free_space; + report->unused_blocks++; + if (free_space > report->largest_unused_block) { + report->largest_unused_block = free_space; + } + } +} // Requires: report->file_size_bytes is filled in // Requires: report->data_bytes is filled in // Requires: report->checkpoint_bytes_additional is filled in -void block_allocator::get_unused_statistics(TOKU_DB_FRAGMENTATION report) { - assert(_n_bytes_in_use == report->data_bytes + report->checkpoint_bytes_additional); +void BlockAllocator::UnusedStatistics(TOKU_DB_FRAGMENTATION report) { + invariant(_n_bytes_in_use == + report->data_bytes + report->checkpoint_bytes_additional); report->unused_bytes = 0; report->unused_blocks = 0; report->largest_unused_block = 0; - if (_n_blocks > 0) { - //Deal with space before block 0 and after reserve: - { - struct blockpair *bp = &_blocks_array[0]; - assert(bp->offset >= align(_reserve_at_beginning, _alignment)); - uint64_t free_space = bp->offset - align(_reserve_at_beginning, _alignment); - if (free_space > 0) { - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } - - //Deal with space between blocks: - for (uint64_t blocknum = 0; blocknum +1 < _n_blocks; blocknum ++) { - // Consider the space after blocknum - struct blockpair *bp = &_blocks_array[blocknum]; - uint64_t this_offset = bp[0].offset; - uint64_t this_size = bp[0].size; - uint64_t end_of_this_block = align(this_offset+this_size, _alignment); - uint64_t next_offset = bp[1].offset; - uint64_t free_space = next_offset - end_of_this_block; - if (free_space > 0) { - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } - - //Deal with space after last block - { - struct blockpair *bp = &_blocks_array[_n_blocks-1]; - uint64_t this_offset = bp[0].offset; - uint64_t this_size = bp[0].size; - uint64_t end_of_this_block = align(this_offset+this_size, _alignment); - if (end_of_this_block < report->file_size_bytes) { - uint64_t free_space = report->file_size_bytes - end_of_this_block; - assert(free_space > 0); - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } - } else { - // No blocks. Just the reserve. - uint64_t end_of_this_block = align(_reserve_at_beginning, _alignment); - if (end_of_this_block < report->file_size_bytes) { - uint64_t free_space = report->file_size_bytes - end_of_this_block; - assert(free_space > 0); - report->unused_bytes += free_space; - report->unused_blocks++; - if (free_space > report->largest_unused_block) { - report->largest_unused_block = free_space; - } - } - } + struct VisUnusedExtra extra = {report, _alignment}; + _tree->InOrderVisitor(VisUnusedCollector, &extra); } -void block_allocator::get_statistics(TOKU_DB_FRAGMENTATION report) { - report->data_bytes = _n_bytes_in_use; - report->data_blocks = _n_blocks; +void BlockAllocator::Statistics(TOKU_DB_FRAGMENTATION report) { + report->data_bytes = _n_bytes_in_use; + report->data_blocks = _n_blocks; report->file_size_bytes = 0; report->checkpoint_bytes_additional = 0; - get_unused_statistics(report); + UnusedStatistics(report); } -void block_allocator::validate() const { - uint64_t n_bytes_in_use = _reserve_at_beginning; - for (uint64_t i = 0; i < _n_blocks; i++) { - n_bytes_in_use += _blocks_array[i].size; - if (i > 0) { - assert(_blocks_array[i].offset > _blocks_array[i - 1].offset); - assert(_blocks_array[i].offset >= _blocks_array[i - 1].offset + _blocks_array[i - 1].size ); - } - } - assert(n_bytes_in_use == _n_bytes_in_use); -} - -// Tracing - -void block_allocator::_trace_create(void) { - if (ba_trace_file != nullptr) { - toku_mutex_lock(&_trace_lock); - fprintf(ba_trace_file, "ba_trace_create %p %" PRIu64 " %" PRIu64 "\n", - this, _reserve_at_beginning, _alignment); - toku_mutex_unlock(&_trace_lock); - - fflush(ba_trace_file); - } -} - -void block_allocator::_trace_create_from_blockpairs(void) { - if (ba_trace_file != nullptr) { - toku_mutex_lock(&_trace_lock); - fprintf(ba_trace_file, "ba_trace_create_from_blockpairs %p %" PRIu64 " %" PRIu64 " ", - this, _reserve_at_beginning, _alignment); - for (uint64_t i = 0; i < _n_blocks; i++) { - fprintf(ba_trace_file, "[%" PRIu64 " %" PRIu64 "] ", - _blocks_array[i].offset, _blocks_array[i].size); - } - fprintf(ba_trace_file, "\n"); - toku_mutex_unlock(&_trace_lock); - - fflush(ba_trace_file); - } -} - -void block_allocator::_trace_destroy(void) { - if (ba_trace_file != nullptr) { - toku_mutex_lock(&_trace_lock); - fprintf(ba_trace_file, "ba_trace_destroy %p\n", this); - toku_mutex_unlock(&_trace_lock); - - fflush(ba_trace_file); - } -} - -void block_allocator::_trace_alloc(uint64_t size, uint64_t heat, uint64_t offset) { - if (ba_trace_file != nullptr) { - toku_mutex_lock(&_trace_lock); - fprintf(ba_trace_file, "ba_trace_alloc %p %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", - this, size, heat, offset); - toku_mutex_unlock(&_trace_lock); - - fflush(ba_trace_file); +struct ValidateExtra { + uint64_t _bytes; + MhsRbTree::Node *_pre_node; +}; +static void VisUsedBlocksInOrder(void *extra, + MhsRbTree::Node *cur_node, + uint64_t UU(depth)) { + struct ValidateExtra *v_e = (struct ValidateExtra *)extra; + MhsRbTree::Node *pre_node = v_e->_pre_node; + // verify no overlaps + if (pre_node) { + invariant(rbn_size(pre_node) > 0); + invariant(rbn_offset(cur_node) > + rbn_offset(pre_node) + rbn_size(pre_node)); + MhsRbTree::OUUInt64 used_space = + rbn_offset(cur_node) - (rbn_offset(pre_node) + rbn_size(pre_node)); + v_e->_bytes += used_space.ToInt(); + } else { + v_e->_bytes += rbn_offset(cur_node).ToInt(); } + v_e->_pre_node = cur_node; } -void block_allocator::_trace_free(uint64_t offset) { - if (ba_trace_file != nullptr) { - toku_mutex_lock(&_trace_lock); - fprintf(ba_trace_file, "ba_trace_free %p %" PRIu64 "\n", this, offset); - toku_mutex_unlock(&_trace_lock); - - fflush(ba_trace_file); - } +void BlockAllocator::Validate() const { + _tree->ValidateBalance(); + _tree->ValidateMhs(); + struct ValidateExtra extra = {0, nullptr}; + _tree->InOrderVisitor(VisUsedBlocksInOrder, &extra); + invariant(extra._bytes == _n_bytes_in_use); } diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h index 9b2c1553e7f93..648ea9a9ef2c8 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h +++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h @@ -43,6 +43,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "portability/toku_pthread.h" #include "portability/toku_stdint.h" #include "portability/toku_stdlib.h" +#include "ft/serialize/rbtree_mhs.h" // Block allocator. // @@ -51,151 +52,128 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. // The allocation of block numbers is handled elsewhere. // // When creating a block allocator we also specify a certain-sized -// block at the beginning that is preallocated (and cannot be allocated or freed) +// block at the beginning that is preallocated (and cannot be allocated or +// freed) // // We can allocate blocks of a particular size at a particular location. -// We can allocate blocks of a particular size at a location chosen by the allocator. // We can free blocks. // We can determine the size of a block. - -class block_allocator { -public: +#define MAX_BYTE 0xffffffffffffffff +class BlockAllocator { + public: static const size_t BLOCK_ALLOCATOR_ALIGNMENT = 4096; // How much must be reserved at the beginning for the block? - // The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root. + // The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 + // pointer for each root. // So 4096 should be enough. static const size_t BLOCK_ALLOCATOR_HEADER_RESERVE = 4096; - - static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT == 0, + + static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT == + 0, "block allocator header must have proper alignment"); - static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = BLOCK_ALLOCATOR_HEADER_RESERVE * 2; + static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = + BLOCK_ALLOCATOR_HEADER_RESERVE * 2; - enum allocation_strategy { - BA_STRATEGY_FIRST_FIT = 1, - BA_STRATEGY_BEST_FIT, - BA_STRATEGY_PADDED_FIT, - BA_STRATEGY_HEAT_ZONE - }; - - struct blockpair { - uint64_t offset; - uint64_t size; - blockpair(uint64_t o, uint64_t s) : - offset(o), size(s) { - } - int operator<(const struct blockpair &rhs) const { - return offset < rhs.offset; - } - int operator<(const uint64_t &o) const { - return offset < o; + struct BlockPair { + uint64_t _offset; + uint64_t _size; + BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {} + int operator<(const struct BlockPair &rhs) const { + return _offset < rhs._offset; } + int operator<(const uint64_t &o) const { return _offset < o; } }; - // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block. - // The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT) + // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING + // bytes are not put into a block. + // The default allocation strategy is first fit + // (BA_STRATEGY_FIRST_FIT) // All blocks be start on a multiple of ALIGNMENT. // Aborts if we run out of memory. // Parameters - // reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned. + // reserve_at_beginning (IN) Size of reserved block at beginning. + // This size does not have to be aligned. // alignment (IN) Block alignment. - void create(uint64_t reserve_at_beginning, uint64_t alignment); + void Create(uint64_t reserve_at_beginning, uint64_t alignment); - // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block. - // The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT) - // The allocator is initialized to contain `n_blocks' of blockpairs, taken from `pairs' + // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING + // bytes are not put into a block. + // The allocator is initialized to contain `n_blocks' of BlockPairs, + // taken from `pairs' // All blocks be start on a multiple of ALIGNMENT. // Aborts if we run out of memory. // Parameters // pairs, unowned array of pairs to copy // n_blocks, Size of pairs array - // reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned. + // reserve_at_beginning (IN) Size of reserved block at beginning. + // This size does not have to be aligned. // alignment (IN) Block alignment. - void create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment, - struct blockpair *pairs, uint64_t n_blocks); + void CreateFromBlockPairs(uint64_t reserve_at_beginning, + uint64_t alignment, + struct BlockPair *pairs, + uint64_t n_blocks); // Effect: Destroy this block allocator - void destroy(); - - // Effect: Set the allocation strategy that the allocator should use - // Requires: No other threads are operating on this block allocator - void set_strategy(enum allocation_strategy strategy); + void Destroy(); - // Effect: Allocate a block of the specified size at an address chosen by the allocator. + // Effect: Allocate a block of the specified size at an address chosen by + // the allocator. // Aborts if anything goes wrong. // The block address will be a multiple of the alignment. // Parameters: - // size (IN): The size of the block. (The size does not have to be aligned.) + // size (IN): The size of the block. (The size does not have to be + // aligned.) // offset (OUT): The location of the block. - // heat (IN): A higher heat means we should be prepared to free this block soon (perhaps in the next checkpoint) - // Heat values are lexiographically ordered (like integers), but their specific values are arbitrary - void alloc_block(uint64_t size, uint64_t heat, uint64_t *offset); + // block soon (perhaps in the next checkpoint) + // Heat values are lexiographically ordered (like integers), + // but their specific values are arbitrary + void AllocBlock(uint64_t size, uint64_t *offset); // Effect: Free the block at offset. // Requires: There must be a block currently allocated at that offset. // Parameters: // offset (IN): The offset of the block. - void free_block(uint64_t offset); + void FreeBlock(uint64_t offset, uint64_t size); - // Effect: Return the size of the block that starts at offset. - // Requires: There must be a block currently allocated at that offset. - // Parameters: - // offset (IN): The offset of the block. - uint64_t block_size(uint64_t offset); - - // Effect: Check to see if the block allocator is OK. This may take a long time. + // Effect: Check to see if the block allocator is OK. This may take a long + // time. // Usage Hints: Probably only use this for unit tests. // TODO: Private? - void validate() const; + void Validate() const; // Effect: Return the unallocated block address of "infinite" size. - // That is, return the smallest address that is above all the allocated blocks. - uint64_t allocated_limit() const; + // That is, return the smallest address that is above all the allocated + // blocks. + uint64_t AllocatedLimit() const; - // Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth. + // Effect: Consider the blocks in sorted order. The reserved block at the + // beginning is number 0. The next one is number 1 and so forth. // Return the offset and size of the block with that number. // Return 0 if there is a block that big, return nonzero if b is too big. // Rationale: This is probably useful only for tests. - int get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size); + int NthBlockInLayoutOrder(uint64_t b, uint64_t *offset, uint64_t *size); // Effect: Fill in report to indicate how the file is used. - // Requires: + // Requires: // report->file_size_bytes is filled in // report->data_bytes is filled in // report->checkpoint_bytes_additional is filled in - void get_unused_statistics(TOKU_DB_FRAGMENTATION report); + void UnusedStatistics(TOKU_DB_FRAGMENTATION report); // Effect: Fill in report->data_bytes with the number of bytes in use - // Fill in report->data_blocks with the number of blockpairs in use + // Fill in report->data_blocks with the number of BlockPairs in use // Fill in unused statistics using this->get_unused_statistics() // Requires: // report->file_size is ignored on return // report->checkpoint_bytes_additional is ignored on return - void get_statistics(TOKU_DB_FRAGMENTATION report); - - // Block allocator tracing. - // - Enabled by setting TOKU_BA_TRACE_PATH to the file that the trace file - // should be written to. - // - Trace may be replayed by ba_trace_replay tool in tools/ directory - // eg: "cat mytracefile | ba_trace_replay" - static void maybe_initialize_trace(); - static void maybe_close_trace(); - -private: - void _create_internal(uint64_t reserve_at_beginning, uint64_t alignment); - void grow_blocks_array_by(uint64_t n_to_add); - void grow_blocks_array(); - int64_t find_block(uint64_t offset); - struct blockpair *choose_block_to_alloc_after(size_t size, uint64_t heat); - - // Tracing - toku_mutex_t _trace_lock; - void _trace_create(void); - void _trace_create_from_blockpairs(void); - void _trace_destroy(void); - void _trace_alloc(uint64_t size, uint64_t heat, uint64_t offset); - void _trace_free(uint64_t offset); + void Statistics(TOKU_DB_FRAGMENTATION report); + + virtual ~BlockAllocator(){}; + + private: + void CreateInternal(uint64_t reserve_at_beginning, uint64_t alignment); // How much to reserve at the beginning uint64_t _reserve_at_beginning; @@ -203,12 +181,8 @@ class block_allocator { uint64_t _alignment; // How many blocks uint64_t _n_blocks; - // How big is the blocks_array. Must be >= n_blocks. - uint64_t _blocks_array_size; - // These blocks are sorted by address. - struct blockpair *_blocks_array; - // Including the reserve_at_beginning uint64_t _n_bytes_in_use; - // The allocation strategy are we using - enum allocation_strategy _strategy; + + // These blocks are sorted by address. + MhsRbTree::Tree *_tree; }; diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc b/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc deleted file mode 100644 index 62bb8fc4a87bc..0000000000000 --- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc +++ /dev/null @@ -1,224 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/*====== -This file is part of PerconaFT. - - -Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. - - PerconaFT is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License, version 2, - as published by the Free Software Foundation. - - PerconaFT is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with PerconaFT. If not, see . - ----------------------------------------- - - PerconaFT is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License, version 3, - as published by the Free Software Foundation. - - PerconaFT is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with PerconaFT. If not, see . -======= */ - -#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." - -#include - -#include - -#include "portability/toku_assert.h" - -#include "ft/serialize/block_allocator_strategy.h" - -static uint64_t _align(uint64_t value, uint64_t ba_alignment) { - return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment; -} - -static uint64_t _roundup_to_power_of_two(uint64_t value) { - uint64_t r = 4096; - while (r < value) { - r *= 2; - invariant(r > 0); - } - return r; -} - -// First fit block allocation -static struct block_allocator::blockpair * -_first_fit(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment, - uint64_t max_padding) { - if (n_blocks == 1) { - // won't enter loop, can't underflow the direction < 0 case - return nullptr; - } - - struct block_allocator::blockpair *bp = &blocks_array[0]; - for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0; - n_spaces_to_check--, bp++) { - // Consider the space after bp - uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment; - uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment); - if (possible_offset + size <= bp[1].offset) { // bp[1] is always valid since bp < &blocks_array[n_blocks-1] - invariant(bp - blocks_array < (int64_t) n_blocks); - return bp; - } - } - return nullptr; -} - -static struct block_allocator::blockpair * -_first_fit_bw(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment, - uint64_t max_padding, struct block_allocator::blockpair *blocks_array_limit) { - if (n_blocks == 1) { - // won't enter loop, can't underflow the direction < 0 case - return nullptr; - } - - struct block_allocator::blockpair *bp = &blocks_array[-1]; - for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0; - n_spaces_to_check--, bp--) { - // Consider the space after bp - uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment; - uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment); - if (&bp[1] < blocks_array_limit && possible_offset + size <= bp[1].offset) { - invariant(blocks_array - bp < (int64_t) n_blocks); - return bp; - } - } - return nullptr; -} - -struct block_allocator::blockpair * -block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment) { - return _first_fit(blocks_array, n_blocks, size, alignment, 0); -} - -// Best fit block allocation -struct block_allocator::blockpair * -block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment) { - struct block_allocator::blockpair *best_bp = nullptr; - uint64_t best_hole_size = 0; - for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) { - // Consider the space after blocknum - struct block_allocator::blockpair *bp = &blocks_array[blocknum]; - uint64_t possible_offset = _align(bp->offset + bp->size, alignment); - uint64_t possible_end_offset = possible_offset + size; - if (possible_end_offset <= bp[1].offset) { - // It fits here. Is it the best fit? - uint64_t hole_size = bp[1].offset - possible_end_offset; - if (best_bp == nullptr || hole_size < best_hole_size) { - best_hole_size = hole_size; - best_bp = bp; - } - } - } - return best_bp; -} - -static uint64_t padded_fit_alignment = 4096; - -// TODO: These compiler specific directives should be abstracted in a portability header -// portability/toku_compiler.h? -__attribute__((__constructor__)) -static void determine_padded_fit_alignment_from_env(void) { - // TODO: Should be in portability as 'toku_os_getenv()?' - const char *s = getenv("TOKU_BA_PADDED_FIT_ALIGNMENT"); - if (s != nullptr && strlen(s) > 0) { - const int64_t alignment = strtoll(s, nullptr, 10); - if (alignment <= 0) { - fprintf(stderr, "tokuft: error: block allocator padded fit alignment found in environment (%s), " - "but it's out of range (should be an integer > 0). defaulting to %" PRIu64 "\n", - s, padded_fit_alignment); - } else { - padded_fit_alignment = _roundup_to_power_of_two(alignment); - fprintf(stderr, "tokuft: setting block allocator padded fit alignment to %" PRIu64 "\n", - padded_fit_alignment); - } - } -} - -// First fit into a block that is oversized by up to max_padding. -// The hope is that if we purposefully waste a bit of space at allocation -// time we'll be more likely to reuse this block later. -struct block_allocator::blockpair * -block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment) { - return _first_fit(blocks_array, n_blocks, size, alignment, padded_fit_alignment); -} - -static double hot_zone_threshold = 0.85; - -// TODO: These compiler specific directives should be abstracted in a portability header -// portability/toku_compiler.h? -__attribute__((__constructor__)) -static void determine_hot_zone_threshold_from_env(void) { - // TODO: Should be in portability as 'toku_os_getenv()?' - const char *s = getenv("TOKU_BA_HOT_ZONE_THRESHOLD"); - if (s != nullptr && strlen(s) > 0) { - const double hot_zone = strtod(s, nullptr); - if (hot_zone < 1 || hot_zone > 99) { - fprintf(stderr, "tokuft: error: block allocator hot zone threshold found in environment (%s), " - "but it's out of range (should be an integer 1 through 99). defaulting to 85\n", s); - hot_zone_threshold = 85 / 100; - } else { - fprintf(stderr, "tokuft: setting block allocator hot zone threshold to %s\n", s); - hot_zone_threshold = hot_zone / 100; - } - } -} - -struct block_allocator::blockpair * -block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment, - uint64_t heat) { - if (heat > 0) { - struct block_allocator::blockpair *bp, *boundary_bp; - - // Hot allocation. Find the beginning of the hot zone. - boundary_bp = &blocks_array[n_blocks - 1]; - uint64_t highest_offset = _align(boundary_bp->offset + boundary_bp->size, alignment); - uint64_t hot_zone_offset = static_cast(hot_zone_threshold * highest_offset); - - boundary_bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset); - uint64_t blocks_in_zone = (blocks_array + n_blocks) - boundary_bp; - uint64_t blocks_outside_zone = boundary_bp - blocks_array; - invariant(blocks_in_zone + blocks_outside_zone == n_blocks); - - if (blocks_in_zone > 0) { - // Find the first fit in the hot zone, going forward. - bp = _first_fit(boundary_bp, blocks_in_zone, size, alignment, 0); - if (bp != nullptr) { - return bp; - } - } - if (blocks_outside_zone > 0) { - // Find the first fit in the cold zone, going backwards. - bp = _first_fit_bw(boundary_bp, blocks_outside_zone, size, alignment, 0, &blocks_array[n_blocks]); - if (bp != nullptr) { - return bp; - } - } - } else { - // Cold allocations are simply first-fit from the beginning. - return _first_fit(blocks_array, n_blocks, size, alignment, 0); - } - return nullptr; -} diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc index 7101ba9f58c17..d2532134d96cf 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc @@ -46,31 +46,27 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "ft/ft-internal.h" // TODO: reorganize this dependency (FT-303) -#include "ft/ft-ops.h" // for toku_maybe_truncate_file +#include "ft/ft-ops.h" // for toku_maybe_truncate_file #include "ft/serialize/block_table.h" #include "ft/serialize/rbuf.h" #include "ft/serialize/wbuf.h" #include "ft/serialize/block_allocator.h" - #include "util/nb_mutex.h" #include "util/scoped_malloc.h" // indicates the end of a freelist -static const BLOCKNUM freelist_null = { -1 }; +static const BLOCKNUM freelist_null = {-1}; // value of block_translation_pair.size if blocknum is unused -static const DISKOFF size_is_free = (DISKOFF) -1; +static const DISKOFF size_is_free = (DISKOFF)-1; -// value of block_translation_pair.u.diskoff if blocknum is used but does not yet have a diskblock -static const DISKOFF diskoff_unused = (DISKOFF) -2; +// value of block_translation_pair.u.diskoff if blocknum is used but does not +// yet have a diskblock +static const DISKOFF diskoff_unused = (DISKOFF)-2; -void block_table::_mutex_lock() { - toku_mutex_lock(&_mutex); -} +void block_table::_mutex_lock() { toku_mutex_lock(&_mutex); } -void block_table::_mutex_unlock() { - toku_mutex_unlock(&_mutex); -} +void block_table::_mutex_unlock() { toku_mutex_unlock(&_mutex); } // TODO: Move lock to FT void toku_ft_lock(FT ft) { @@ -85,13 +81,16 @@ void toku_ft_unlock(FT ft) { bt->_mutex_unlock(); } -// There are two headers: the reserve must fit them both and be suitably aligned. -static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE % - block_allocator::BLOCK_ALLOCATOR_ALIGNMENT == 0, +// There are two headers: the reserve must fit them both and be suitably +// aligned. +static_assert(BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE % + BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT == + 0, "Block allocator's header reserve must be suitibly aligned"); -static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 == - block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, - "Block allocator's total header reserve must exactly fit two headers"); +static_assert( + BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 == + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, + "Block allocator's total header reserve must exactly fit two headers"); // does NOT initialize the block allocator: the caller is responsible void block_table::_create_internal() { @@ -99,25 +98,30 @@ void block_table::_create_internal() { memset(&_inprogress, 0, sizeof(struct translation)); memset(&_checkpointed, 0, sizeof(struct translation)); memset(&_mutex, 0, sizeof(_mutex)); + _bt_block_allocator = new BlockAllocator(); toku_mutex_init(&_mutex, nullptr); nb_mutex_init(&_safe_file_size_lock); } -// Fill in the checkpointed translation from buffer, and copy checkpointed to current. -// The one read from disk is the last known checkpointed one, so we are keeping it in -// place and then setting current (which is never stored on disk) for current use. -// The translation_buffer has translation only, we create the rest of the block_table. -int block_table::create_from_buffer(int fd, - DISKOFF location_on_disk, //Location of translation_buffer - DISKOFF size_on_disk, - unsigned char *translation_buffer) { +// Fill in the checkpointed translation from buffer, and copy checkpointed to +// current. +// The one read from disk is the last known checkpointed one, so we are keeping +// it in +// place and then setting current (which is never stored on disk) for current +// use. +// The translation_buffer has translation only, we create the rest of the +// block_table. +int block_table::create_from_buffer( + int fd, + DISKOFF location_on_disk, // Location of translation_buffer + DISKOFF size_on_disk, + unsigned char *translation_buffer) { // Does not initialize the block allocator _create_internal(); // Deserialize the translation and copy it to current - int r = _translation_deserialize_from_buffer(&_checkpointed, - location_on_disk, size_on_disk, - translation_buffer); + int r = _translation_deserialize_from_buffer( + &_checkpointed, location_on_disk, size_on_disk, translation_buffer); if (r != 0) { return r; } @@ -130,22 +134,26 @@ int block_table::create_from_buffer(int fd, invariant(file_size >= 0); _safe_file_size = file_size; - // Gather the non-empty translations and use them to create the block allocator + // Gather the non-empty translations and use them to create the block + // allocator toku::scoped_malloc pairs_buf(_checkpointed.smallest_never_used_blocknum.b * - sizeof(struct block_allocator::blockpair)); - struct block_allocator::blockpair *CAST_FROM_VOIDP(pairs, pairs_buf.get()); + sizeof(struct BlockAllocator::BlockPair)); + struct BlockAllocator::BlockPair *CAST_FROM_VOIDP(pairs, pairs_buf.get()); uint64_t n_pairs = 0; for (int64_t i = 0; i < _checkpointed.smallest_never_used_blocknum.b; i++) { struct block_translation_pair pair = _checkpointed.block_translation[i]; if (pair.size > 0) { invariant(pair.u.diskoff != diskoff_unused); - pairs[n_pairs++] = block_allocator::blockpair(pair.u.diskoff, pair.size); + pairs[n_pairs++] = + BlockAllocator::BlockPair(pair.u.diskoff, pair.size); } } - _bt_block_allocator.create_from_blockpairs(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, - block_allocator::BLOCK_ALLOCATOR_ALIGNMENT, - pairs, n_pairs); + _bt_block_allocator->CreateFromBlockPairs( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, + BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT, + pairs, + n_pairs); return 0; } @@ -155,8 +163,10 @@ void block_table::create() { _create_internal(); _checkpointed.type = TRANSLATION_CHECKPOINTED; - _checkpointed.smallest_never_used_blocknum = make_blocknum(RESERVED_BLOCKNUMS); - _checkpointed.length_of_array = _checkpointed.smallest_never_used_blocknum.b; + _checkpointed.smallest_never_used_blocknum = + make_blocknum(RESERVED_BLOCKNUMS); + _checkpointed.length_of_array = + _checkpointed.smallest_never_used_blocknum.b; _checkpointed.blocknum_freelist_head = freelist_null; XMALLOC_N(_checkpointed.length_of_array, _checkpointed.block_translation); for (int64_t i = 0; i < _checkpointed.length_of_array; i++) { @@ -164,12 +174,13 @@ void block_table::create() { _checkpointed.block_translation[i].u.diskoff = diskoff_unused; } - // we just created a default checkpointed, now copy it to current. + // we just created a default checkpointed, now copy it to current. _copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT); // Create an empty block allocator. - _bt_block_allocator.create(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, - block_allocator::BLOCK_ALLOCATOR_ALIGNMENT); + _bt_block_allocator->Create( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, + BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT); } // TODO: Refactor with FT-303 @@ -185,20 +196,24 @@ static void ft_set_dirty(FT ft, bool for_checkpoint) { void block_table::_maybe_truncate_file(int fd, uint64_t size_needed_before) { toku_mutex_assert_locked(&_mutex); - uint64_t new_size_needed = _bt_block_allocator.allocated_limit(); - //Save a call to toku_os_get_file_size (kernel call) if unlikely to be useful. - if (new_size_needed < size_needed_before && new_size_needed < _safe_file_size) { + uint64_t new_size_needed = _bt_block_allocator->AllocatedLimit(); + // Save a call to toku_os_get_file_size (kernel call) if unlikely to be + // useful. + if (new_size_needed < size_needed_before && + new_size_needed < _safe_file_size) { nb_mutex_lock(&_safe_file_size_lock, &_mutex); // Must hold _safe_file_size_lock to change _safe_file_size. if (new_size_needed < _safe_file_size) { int64_t safe_file_size_before = _safe_file_size; - // Not safe to use the 'to-be-truncated' portion until truncate is done. + // Not safe to use the 'to-be-truncated' portion until truncate is + // done. _safe_file_size = new_size_needed; _mutex_unlock(); uint64_t size_after; - toku_maybe_truncate_file(fd, new_size_needed, safe_file_size_before, &size_after); + toku_maybe_truncate_file( + fd, new_size_needed, safe_file_size_before, &size_after); _mutex_lock(); _safe_file_size = size_after; @@ -213,26 +228,35 @@ void block_table::maybe_truncate_file_on_open(int fd) { _mutex_unlock(); } -void block_table::_copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype) { - // We intend to malloc a fresh block, so the incoming translation should be empty +void block_table::_copy_translation(struct translation *dst, + struct translation *src, + enum translation_type newtype) { + // We intend to malloc a fresh block, so the incoming translation should be + // empty invariant_null(dst->block_translation); invariant(src->length_of_array >= src->smallest_never_used_blocknum.b); invariant(newtype == TRANSLATION_DEBUG || - (src->type == TRANSLATION_CURRENT && newtype == TRANSLATION_INPROGRESS) || - (src->type == TRANSLATION_CHECKPOINTED && newtype == TRANSLATION_CURRENT)); + (src->type == TRANSLATION_CURRENT && + newtype == TRANSLATION_INPROGRESS) || + (src->type == TRANSLATION_CHECKPOINTED && + newtype == TRANSLATION_CURRENT)); dst->type = newtype; dst->smallest_never_used_blocknum = src->smallest_never_used_blocknum; - dst->blocknum_freelist_head = src->blocknum_freelist_head; + dst->blocknum_freelist_head = src->blocknum_freelist_head; - // destination btt is of fixed size. Allocate + memcpy the exact length necessary. + // destination btt is of fixed size. Allocate + memcpy the exact length + // necessary. dst->length_of_array = dst->smallest_never_used_blocknum.b; XMALLOC_N(dst->length_of_array, dst->block_translation); - memcpy(dst->block_translation, src->block_translation, dst->length_of_array * sizeof(*dst->block_translation)); + memcpy(dst->block_translation, + src->block_translation, + dst->length_of_array * sizeof(*dst->block_translation)); // New version of btt is not yet stored on disk. dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size = 0; - dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = diskoff_unused; + dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = + diskoff_unused; } int64_t block_table::get_blocks_in_use_unlocked() { @@ -240,8 +264,9 @@ int64_t block_table::get_blocks_in_use_unlocked() { struct translation *t = &_current; int64_t num_blocks = 0; { - //Reserved blocknums do not get upgraded; They are part of the header. - for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) { + // Reserved blocknums do not get upgraded; They are part of the header. + for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; + b.b++) { if (t->block_translation[b.b].size != size_is_free) { num_blocks++; } @@ -251,38 +276,43 @@ int64_t block_table::get_blocks_in_use_unlocked() { } void block_table::_maybe_optimize_translation(struct translation *t) { - //Reduce 'smallest_never_used_blocknum.b' (completely free blocknums instead of just - //on a free list. Doing so requires us to regenerate the free list. - //This is O(n) work, so do it only if you're already doing that. + // Reduce 'smallest_never_used_blocknum.b' (completely free blocknums + // instead of just + // on a free list. Doing so requires us to regenerate the free list. + // This is O(n) work, so do it only if you're already doing that. BLOCKNUM b; paranoid_invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS); - //Calculate how large the free suffix is. + // Calculate how large the free suffix is. int64_t freed; { - for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS; b.b--) { - if (t->block_translation[b.b-1].size != size_is_free) { + for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS; + b.b--) { + if (t->block_translation[b.b - 1].size != size_is_free) { break; } } freed = t->smallest_never_used_blocknum.b - b.b; } - if (freed>0) { + if (freed > 0) { t->smallest_never_used_blocknum.b = b.b; - if (t->length_of_array/4 > t->smallest_never_used_blocknum.b) { - //We're using more memory than necessary to represent this now. Reduce. + if (t->length_of_array / 4 > t->smallest_never_used_blocknum.b) { + // We're using more memory than necessary to represent this now. + // Reduce. uint64_t new_length = t->smallest_never_used_blocknum.b * 2; XREALLOC_N(new_length, t->block_translation); t->length_of_array = new_length; - //No need to zero anything out. + // No need to zero anything out. } - //Regenerate free list. + // Regenerate free list. t->blocknum_freelist_head.b = freelist_null.b; - for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) { + for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; + b.b++) { if (t->block_translation[b.b].size == size_is_free) { - t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head; - t->blocknum_freelist_head = b; + t->block_translation[b.b].u.next_free_blocknum = + t->blocknum_freelist_head; + t->blocknum_freelist_head = b; } } } @@ -303,14 +333,16 @@ void block_table::note_start_checkpoint_unlocked() { } void block_table::note_skipped_checkpoint() { - //Purpose, alert block translation that the checkpoint was skipped, e.x. for a non-dirty header + // Purpose, alert block translation that the checkpoint was skipped, e.x. + // for a non-dirty header _mutex_lock(); paranoid_invariant_notnull(_inprogress.block_translation); _checkpoint_skipped = true; _mutex_unlock(); } -// Purpose: free any disk space used by previous checkpoint that isn't in use by either +// Purpose: free any disk space used by previous checkpoint that isn't in use by +// either // - current state // - in-progress checkpoint // capture inprogress as new checkpointed. @@ -323,7 +355,7 @@ void block_table::note_skipped_checkpoint() { void block_table::note_end_checkpoint(int fd) { // Free unused blocks _mutex_lock(); - uint64_t allocated_limit_at_start = _bt_block_allocator.allocated_limit(); + uint64_t allocated_limit_at_start = _bt_block_allocator->AllocatedLimit(); paranoid_invariant_notnull(_inprogress.block_translation); if (_checkpoint_skipped) { toku_free(_inprogress.block_translation); @@ -331,17 +363,23 @@ void block_table::note_end_checkpoint(int fd) { goto end; } - //Make certain inprogress was allocated space on disk - assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0); - assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff > 0); + // Make certain inprogress was allocated space on disk + invariant( + _inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0); + invariant( + _inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff > + 0); { struct translation *t = &_checkpointed; for (int64_t i = 0; i < t->length_of_array; i++) { struct block_translation_pair *pair = &t->block_translation[i]; - if (pair->size > 0 && !_translation_prevents_freeing(&_inprogress, make_blocknum(i), pair)) { - assert(!_translation_prevents_freeing(&_current, make_blocknum(i), pair)); - _bt_block_allocator.free_block(pair->u.diskoff); + if (pair->size > 0 && + !_translation_prevents_freeing( + &_inprogress, make_blocknum(i), pair)) { + invariant(!_translation_prevents_freeing( + &_current, make_blocknum(i), pair)); + _bt_block_allocator->FreeBlock(pair->u.diskoff, pair->size); } } toku_free(_checkpointed.block_translation); @@ -359,53 +397,65 @@ bool block_table::_is_valid_blocknum(struct translation *t, BLOCKNUM b) { return b.b >= 0 && b.b < t->smallest_never_used_blocknum.b; } -void block_table::_verify_valid_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) { +void block_table::_verify_valid_blocknum(struct translation *UU(t), + BLOCKNUM UU(b)) { invariant(_is_valid_blocknum(t, b)); } -bool block_table::_is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b) { +bool block_table::_is_valid_freeable_blocknum(struct translation *t, + BLOCKNUM b) { invariant(t->length_of_array >= t->smallest_never_used_blocknum.b); return b.b >= RESERVED_BLOCKNUMS && b.b < t->smallest_never_used_blocknum.b; } // should be freeable -void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) { +void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t), + BLOCKNUM UU(b)) { invariant(_is_valid_freeable_blocknum(t, b)); } // Also used only in ft-serialize-test. -void block_table::block_free(uint64_t offset) { +void block_table::block_free(uint64_t offset, uint64_t size) { _mutex_lock(); - _bt_block_allocator.free_block(offset); + _bt_block_allocator->FreeBlock(offset, size); _mutex_unlock(); } int64_t block_table::_calculate_size_on_disk(struct translation *t) { - return 8 + // smallest_never_used_blocknum - 8 + // blocknum_freelist_head - t->smallest_never_used_blocknum.b * 16 + // Array - 4; // 4 for checksum + return 8 + // smallest_never_used_blocknum + 8 + // blocknum_freelist_head + t->smallest_never_used_blocknum.b * 16 + // Array + 4; // 4 for checksum } -// We cannot free the disk space allocated to this blocknum if it is still in use by the given translation table. -bool block_table::_translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair) { - return t->block_translation && - b.b < t->smallest_never_used_blocknum.b && +// We cannot free the disk space allocated to this blocknum if it is still in +// use by the given translation table. +bool block_table::_translation_prevents_freeing( + struct translation *t, + BLOCKNUM b, + struct block_translation_pair *old_pair) { + return t->block_translation && b.b < t->smallest_never_used_blocknum.b && old_pair->u.diskoff == t->block_translation[b.b].u.diskoff; } -void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint, uint64_t heat) { +void block_table::_realloc_on_disk_internal(BLOCKNUM b, + DISKOFF size, + DISKOFF *offset, + FT ft, + bool for_checkpoint) { toku_mutex_assert_locked(&_mutex); ft_set_dirty(ft, for_checkpoint); struct translation *t = &_current; struct block_translation_pair old_pair = t->block_translation[b.b]; - //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint - bool cannot_free = (bool) - ((!for_checkpoint && _translation_prevents_freeing(&_inprogress, b, &old_pair)) || - _translation_prevents_freeing(&_checkpointed, b, &old_pair)); - if (!cannot_free && old_pair.u.diskoff!=diskoff_unused) { - _bt_block_allocator.free_block(old_pair.u.diskoff); + // Free the old block if it is not still in use by the checkpoint in + // progress or the previous checkpoint + bool cannot_free = + (!for_checkpoint && + _translation_prevents_freeing(&_inprogress, b, &old_pair)) || + _translation_prevents_freeing(&_checkpointed, b, &old_pair); + if (!cannot_free && old_pair.u.diskoff != diskoff_unused) { + _bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size); } uint64_t allocator_offset = diskoff_unused; @@ -413,19 +463,22 @@ void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *o if (size > 0) { // Allocate a new block if the size is greater than 0, // if the size is just 0, offset will be set to diskoff_unused - _bt_block_allocator.alloc_block(size, heat, &allocator_offset); + _bt_block_allocator->AllocBlock(size, &allocator_offset); } t->block_translation[b.b].u.diskoff = allocator_offset; *offset = allocator_offset; - //Update inprogress btt if appropriate (if called because Pending bit is set). + // Update inprogress btt if appropriate (if called because Pending bit is + // set). if (for_checkpoint) { paranoid_invariant(b.b < _inprogress.length_of_array); _inprogress.block_translation[b.b] = t->block_translation[b.b]; } } -void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset) { +void block_table::_ensure_safe_write_unlocked(int fd, + DISKOFF block_size, + DISKOFF block_offset) { // Requires: holding _mutex uint64_t size_needed = block_size + block_offset; if (size_needed > _safe_file_size) { @@ -435,7 +488,8 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF _mutex_unlock(); int64_t size_after; - toku_maybe_preallocate_in_file(fd, size_needed, _safe_file_size, &size_after); + toku_maybe_preallocate_in_file( + fd, size_needed, _safe_file_size, &size_after); _mutex_lock(); _safe_file_size = size_after; @@ -444,11 +498,16 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF } } -void block_table::realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint, uint64_t heat) { +void block_table::realloc_on_disk(BLOCKNUM b, + DISKOFF size, + DISKOFF *offset, + FT ft, + int fd, + bool for_checkpoint) { _mutex_lock(); struct translation *t = &_current; _verify_valid_freeable_blocknum(t, b); - _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint, heat); + _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint); _ensure_safe_write_unlocked(fd, size, *offset); _mutex_unlock(); @@ -458,70 +517,97 @@ bool block_table::_pair_is_unallocated(struct block_translation_pair *pair) { return pair->size == 0 && pair->u.diskoff == diskoff_unused; } -// Effect: figure out where to put the inprogress btt on disk, allocate space for it there. -// The space must be 512-byte aligned (both the starting address and the size). -// As a result, the allcoated space may be a little bit bigger (up to the next 512-byte boundary) than the actual btt. +// Effect: figure out where to put the inprogress btt on disk, allocate space +// for it there. +// The space must be 512-byte aligned (both the starting address and the +// size). +// As a result, the allcoated space may be a little bit bigger (up to the next +// 512-byte boundary) than the actual btt. void block_table::_alloc_inprogress_translation_on_disk_unlocked() { toku_mutex_assert_locked(&_mutex); struct translation *t = &_inprogress; paranoid_invariant_notnull(t->block_translation); BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); - //Each inprogress is allocated only once + // Each inprogress is allocated only once paranoid_invariant(_pair_is_unallocated(&t->block_translation[b.b])); - //Allocate a new block + // Allocate a new block int64_t size = _calculate_size_on_disk(t); uint64_t offset; - _bt_block_allocator.alloc_block(size, 0, &offset); + _bt_block_allocator->AllocBlock(size, &offset); t->block_translation[b.b].u.diskoff = offset; - t->block_translation[b.b].size = size; + t->block_translation[b.b].size = size; } // Effect: Serializes the blocktable to a wbuf (which starts uninitialized) -// A clean shutdown runs checkpoint start so that current and inprogress are copies. -// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needd) -// The address is guaranteed to be 512-byte aligned, but the size is not guaranteed. -// It *is* guaranteed that we can read up to the next 512-byte boundary, however -void block_table::serialize_translation_to_wbuf(int fd, struct wbuf *w, - int64_t *address, int64_t *size) { +// A clean shutdown runs checkpoint start so that current and inprogress are +// copies. +// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the +// total length is a multiple of 512 (so we pad with zeros at the end if +// needd) +// The address is guaranteed to be 512-byte aligned, but the size is not +// guaranteed. +// It *is* guaranteed that we can read up to the next 512-byte boundary, +// however +void block_table::serialize_translation_to_wbuf(int fd, + struct wbuf *w, + int64_t *address, + int64_t *size) { _mutex_lock(); struct translation *t = &_inprogress; BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); - _alloc_inprogress_translation_on_disk_unlocked(); // The allocated block must be 512-byte aligned to make O_DIRECT happy. + _alloc_inprogress_translation_on_disk_unlocked(); // The allocated block + // must be 512-byte + // aligned to make + // O_DIRECT happy. uint64_t size_translation = _calculate_size_on_disk(t); - uint64_t size_aligned = roundup_to_multiple(512, size_translation); - assert((int64_t)size_translation==t->block_translation[b.b].size); + uint64_t size_aligned = roundup_to_multiple(512, size_translation); + invariant((int64_t)size_translation == t->block_translation[b.b].size); { - //Init wbuf + // Init wbuf if (0) - printf("%s:%d writing translation table of size_translation %" PRIu64 " at %" PRId64 "\n", __FILE__, __LINE__, size_translation, t->block_translation[b.b].u.diskoff); + printf( + "%s:%d writing translation table of size_translation %" PRIu64 + " at %" PRId64 "\n", + __FILE__, + __LINE__, + size_translation, + t->block_translation[b.b].u.diskoff); char *XMALLOC_N_ALIGNED(512, size_aligned, buf); - for (uint64_t i=size_translation; ismallest_never_used_blocknum); - wbuf_BLOCKNUM(w, t->blocknum_freelist_head); + wbuf_BLOCKNUM(w, t->smallest_never_used_blocknum); + wbuf_BLOCKNUM(w, t->blocknum_freelist_head); int64_t i; - for (i=0; ismallest_never_used_blocknum.b; i++) { + for (i = 0; i < t->smallest_never_used_blocknum.b; i++) { if (0) - printf("%s:%d %" PRId64 ",%" PRId64 "\n", __FILE__, __LINE__, t->block_translation[i].u.diskoff, t->block_translation[i].size); + printf("%s:%d %" PRId64 ",%" PRId64 "\n", + __FILE__, + __LINE__, + t->block_translation[i].u.diskoff, + t->block_translation[i].size); wbuf_DISKOFF(w, t->block_translation[i].u.diskoff); wbuf_DISKOFF(w, t->block_translation[i].size); } uint32_t checksum = toku_x1764_finish(&w->checksum); wbuf_int(w, checksum); *address = t->block_translation[b.b].u.diskoff; - *size = size_translation; - assert((*address)%512 == 0); + *size = size_translation; + invariant((*address) % 512 == 0); _ensure_safe_write_unlocked(fd, size_aligned, *address); _mutex_unlock(); } -// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?) -void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) { +// Perhaps rename: purpose is get disk address of a block, given its blocknum +// (blockid?) +void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, + DISKOFF *offset, + DISKOFF *size) { struct translation *t = &_current; _verify_valid_blocknum(t, b); if (offset) { @@ -532,8 +618,11 @@ void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOF } } -// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?) -void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) { +// Perhaps rename: purpose is get disk address of a block, given its blocknum +// (blockid?) +void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, + DISKOFF *offset, + DISKOFF *size) { _mutex_lock(); _translate_blocknum_to_offset_size_unlocked(b, offset, size); _mutex_unlock(); @@ -544,13 +633,13 @@ void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, // given that one more never-used blocknum will soon be used. void block_table::_maybe_expand_translation(struct translation *t) { if (t->length_of_array <= t->smallest_never_used_blocknum.b) { - //expansion is necessary + // expansion is necessary uint64_t new_length = t->smallest_never_used_blocknum.b * 2; XREALLOC_N(new_length, t->block_translation); uint64_t i; for (i = t->length_of_array; i < new_length; i++) { t->block_translation[i].u.next_free_blocknum = freelist_null; - t->block_translation[i].size = size_is_free; + t->block_translation[i].size = size_is_free; } t->length_of_array = new_length; } @@ -563,7 +652,8 @@ void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) { if (t->blocknum_freelist_head.b == freelist_null.b) { // no previously used blocknums are available // use a never used blocknum - _maybe_expand_translation(t); //Ensure a never used blocknums is available + _maybe_expand_translation( + t); // Ensure a never used blocknums is available result = t->smallest_never_used_blocknum; t->smallest_never_used_blocknum.b++; } else { // reuse a previously used blocknum @@ -571,11 +661,11 @@ void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) { BLOCKNUM next = t->block_translation[result.b].u.next_free_blocknum; t->blocknum_freelist_head = next; } - //Verify the blocknum is free + // Verify the blocknum is free paranoid_invariant(t->block_translation[result.b].size == size_is_free); - //blocknum is not free anymore + // blocknum is not free anymore t->block_translation[result.b].u.diskoff = diskoff_unused; - t->block_translation[result.b].size = 0; + t->block_translation[result.b].size = 0; _verify_valid_freeable_blocknum(t, result); *res = result; ft_set_dirty(ft, false); @@ -587,42 +677,46 @@ void block_table::allocate_blocknum(BLOCKNUM *res, FT ft) { _mutex_unlock(); } -void block_table::_free_blocknum_in_translation(struct translation *t, BLOCKNUM b) { +void block_table::_free_blocknum_in_translation(struct translation *t, + BLOCKNUM b) { _verify_valid_freeable_blocknum(t, b); paranoid_invariant(t->block_translation[b.b].size != size_is_free); - t->block_translation[b.b].size = size_is_free; + t->block_translation[b.b].size = size_is_free; t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head; - t->blocknum_freelist_head = b; + t->blocknum_freelist_head = b; } // Effect: Free a blocknum. // If the blocknum holds the only reference to a block on disk, free that block -void block_table::_free_blocknum_unlocked(BLOCKNUM *bp, FT ft, bool for_checkpoint) { +void block_table::_free_blocknum_unlocked(BLOCKNUM *bp, + FT ft, + bool for_checkpoint) { toku_mutex_assert_locked(&_mutex); BLOCKNUM b = *bp; - bp->b = 0; //Remove caller's reference. + bp->b = 0; // Remove caller's reference. struct block_translation_pair old_pair = _current.block_translation[b.b]; _free_blocknum_in_translation(&_current, b); if (for_checkpoint) { - paranoid_invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS); + paranoid_invariant(ft->checkpoint_header->type == + FT_CHECKPOINT_INPROGRESS); _free_blocknum_in_translation(&_inprogress, b); } - //If the size is 0, no disk block has ever been assigned to this blocknum. + // If the size is 0, no disk block has ever been assigned to this blocknum. if (old_pair.size > 0) { - //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint - bool cannot_free = (bool) - (_translation_prevents_freeing(&_inprogress, b, &old_pair) || - _translation_prevents_freeing(&_checkpointed, b, &old_pair)); + // Free the old block if it is not still in use by the checkpoint in + // progress or the previous checkpoint + bool cannot_free = + _translation_prevents_freeing(&_inprogress, b, &old_pair) || + _translation_prevents_freeing(&_checkpointed, b, &old_pair); if (!cannot_free) { - _bt_block_allocator.free_block(old_pair.u.diskoff); + _bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size); } - } - else { - paranoid_invariant(old_pair.size==0); + } else { + paranoid_invariant(old_pair.size == 0); paranoid_invariant(old_pair.u.diskoff == diskoff_unused); } ft_set_dirty(ft, for_checkpoint); @@ -644,13 +738,14 @@ void block_table::verify_no_free_blocknums() { void block_table::free_unused_blocknums(BLOCKNUM root) { _mutex_lock(); int64_t smallest = _current.smallest_never_used_blocknum.b; - for (int64_t i=RESERVED_BLOCKNUMS; i < smallest; i++) { + for (int64_t i = RESERVED_BLOCKNUMS; i < smallest; i++) { if (i == root.b) { continue; } BLOCKNUM b = make_blocknum(i); if (_current.block_translation[b.b].size == 0) { - invariant(_current.block_translation[b.b].u.diskoff == diskoff_unused); + invariant(_current.block_translation[b.b].u.diskoff == + diskoff_unused); _free_blocknum_in_translation(&_current, b); } } @@ -675,13 +770,14 @@ bool block_table::_no_data_blocks_except_root(BLOCKNUM root) { goto cleanup; } } - cleanup: +cleanup: _mutex_unlock(); return ok; } // Verify there are no data blocks except root. -// TODO(leif): This actually takes a lock, but I don't want to fix all the callers right now. +// TODO(leif): This actually takes a lock, but I don't want to fix all the +// callers right now. void block_table::verify_no_data_blocks_except_root(BLOCKNUM UU(root)) { paranoid_invariant(_no_data_blocks_except_root(root)); } @@ -705,13 +801,24 @@ void block_table::_dump_translation_internal(FILE *f, struct translation *t) { if (t->block_translation) { BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION); fprintf(f, " length_of_array[%" PRId64 "]", t->length_of_array); - fprintf(f, " smallest_never_used_blocknum[%" PRId64 "]", t->smallest_never_used_blocknum.b); - fprintf(f, " blocknum_free_list_head[%" PRId64 "]", t->blocknum_freelist_head.b); - fprintf(f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size); - fprintf(f, " location_on_disk[%" PRId64 "]\n", t->block_translation[b.b].u.diskoff); + fprintf(f, + " smallest_never_used_blocknum[%" PRId64 "]", + t->smallest_never_used_blocknum.b); + fprintf(f, + " blocknum_free_list_head[%" PRId64 "]", + t->blocknum_freelist_head.b); + fprintf( + f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size); + fprintf(f, + " location_on_disk[%" PRId64 "]\n", + t->block_translation[b.b].u.diskoff); int64_t i; - for (i=0; ilength_of_array; i++) { - fprintf(f, " %" PRId64 ": %" PRId64 " %" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size); + for (i = 0; i < t->length_of_array; i++) { + fprintf(f, + " %" PRId64 ": %" PRId64 " %" PRId64 "\n", + i, + t->block_translation[i].u.diskoff, + t->block_translation[i].size); } fprintf(f, "\n"); } else { @@ -724,9 +831,13 @@ void block_table::_dump_translation_internal(FILE *f, struct translation *t) { void block_table::dump_translation_table_pretty(FILE *f) { _mutex_lock(); struct translation *t = &_checkpointed; - assert(t->block_translation != nullptr); + invariant(t->block_translation != nullptr); for (int64_t i = 0; i < t->length_of_array; ++i) { - fprintf(f, "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size); + fprintf(f, + "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n", + i, + t->block_translation[i].u.diskoff, + t->block_translation[i].size); } _mutex_unlock(); } @@ -750,7 +861,10 @@ void block_table::blocknum_dump_translation(BLOCKNUM b) { struct translation *t = &_current; if (b.b < t->length_of_array) { struct block_translation_pair *bx = &t->block_translation[b.b]; - printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n", b.b, bx->u.diskoff, bx->size); + printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n", + b.b, + bx->u.diskoff, + bx->size); } _mutex_unlock(); } @@ -763,26 +877,31 @@ void block_table::destroy(void) { toku_free(_inprogress.block_translation); toku_free(_checkpointed.block_translation); - _bt_block_allocator.destroy(); + _bt_block_allocator->Destroy(); + delete _bt_block_allocator; toku_mutex_destroy(&_mutex); nb_mutex_destroy(&_safe_file_size_lock); } -int block_table::_translation_deserialize_from_buffer(struct translation *t, - DISKOFF location_on_disk, - uint64_t size_on_disk, - // out: buffer with serialized translation - unsigned char *translation_buffer) { +int block_table::_translation_deserialize_from_buffer( + struct translation *t, + DISKOFF location_on_disk, + uint64_t size_on_disk, + // out: buffer with serialized translation + unsigned char *translation_buffer) { int r = 0; - assert(location_on_disk != 0); + invariant(location_on_disk != 0); t->type = TRANSLATION_CHECKPOINTED; // check the checksum uint32_t x1764 = toku_x1764_memory(translation_buffer, size_on_disk - 4); uint64_t offset = size_on_disk - 4; - uint32_t stored_x1764 = toku_dtoh32(*(int*)(translation_buffer + offset)); + uint32_t stored_x1764 = toku_dtoh32(*(int *)(translation_buffer + offset)); if (x1764 != stored_x1764) { - fprintf(stderr, "Translation table checksum failure: calc=0x%08x read=0x%08x\n", x1764, stored_x1764); + fprintf(stderr, + "Translation table checksum failure: calc=0x%08x read=0x%08x\n", + x1764, + stored_x1764); r = TOKUDB_BAD_CHECKSUM; goto exit; } @@ -790,42 +909,47 @@ int block_table::_translation_deserialize_from_buffer(struct translation *t, struct rbuf rb; rb.buf = translation_buffer; rb.ndone = 0; - rb.size = size_on_disk-4;//4==checksum + rb.size = size_on_disk - 4; // 4==checksum - t->smallest_never_used_blocknum = rbuf_blocknum(&rb); + t->smallest_never_used_blocknum = rbuf_blocknum(&rb); t->length_of_array = t->smallest_never_used_blocknum.b; invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS); - t->blocknum_freelist_head = rbuf_blocknum(&rb); + t->blocknum_freelist_head = rbuf_blocknum(&rb); XMALLOC_N(t->length_of_array, t->block_translation); for (int64_t i = 0; i < t->length_of_array; i++) { t->block_translation[i].u.diskoff = rbuf_DISKOFF(&rb); t->block_translation[i].size = rbuf_DISKOFF(&rb); } - invariant(_calculate_size_on_disk(t) == (int64_t) size_on_disk); - invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size == (int64_t) size_on_disk); - invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff == location_on_disk); + invariant(_calculate_size_on_disk(t) == (int64_t)size_on_disk); + invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size == + (int64_t)size_on_disk); + invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff == + location_on_disk); exit: return r; } int block_table::iterate(enum translation_type type, - BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only) { + BLOCKTABLE_CALLBACK f, + void *extra, + bool data_only, + bool used_only) { struct translation *src; - + int r = 0; switch (type) { - case TRANSLATION_CURRENT: - src = &_current; - break; - case TRANSLATION_INPROGRESS: - src = &_inprogress; - break; - case TRANSLATION_CHECKPOINTED: - src = &_checkpointed; - break; - default: - r = EINVAL; + case TRANSLATION_CURRENT: + src = &_current; + break; + case TRANSLATION_INPROGRESS: + src = &_inprogress; + break; + case TRANSLATION_CHECKPOINTED: + src = &_checkpointed; + break; + default: + r = EINVAL; } struct translation fakecurrent; @@ -839,12 +963,15 @@ int block_table::iterate(enum translation_type type, src->block_translation[RESERVED_BLOCKNUM_TRANSLATION]; _mutex_unlock(); int64_t i; - for (i=0; ismallest_never_used_blocknum.b; i++) { + for (i = 0; i < t->smallest_never_used_blocknum.b; i++) { struct block_translation_pair pair = t->block_translation[i]; - if (data_only && i< RESERVED_BLOCKNUMS) continue; - if (used_only && pair.size <= 0) continue; + if (data_only && i < RESERVED_BLOCKNUMS) + continue; + if (used_only && pair.size <= 0) + continue; r = f(make_blocknum(i), pair.size, pair.u.diskoff, extra); - if (r!=0) break; + if (r != 0) + break; } toku_free(t->block_translation); } @@ -856,8 +983,11 @@ typedef struct { int64_t total_space; } frag_extra; -static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extra) { - frag_extra *info = (frag_extra *) extra; +static int frag_helper(BLOCKNUM UU(b), + int64_t size, + int64_t address, + void *extra) { + frag_extra *info = (frag_extra *)extra; if (size + address > info->total_space) info->total_space = size + address; @@ -865,22 +995,30 @@ static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extr return 0; } -void block_table::internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep) { - frag_extra info = { 0, 0 }; +void block_table::internal_fragmentation(int64_t *total_sizep, + int64_t *used_sizep) { + frag_extra info = {0, 0}; int r = iterate(TRANSLATION_CHECKPOINTED, frag_helper, &info, false, true); - assert_zero(r); + invariant_zero(r); - if (total_sizep) *total_sizep = info.total_space; - if (used_sizep) *used_sizep = info.used_space; + if (total_sizep) + *total_sizep = info.total_space; + if (used_sizep) + *used_sizep = info.used_space; } -void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, FT ft) { +void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, + DISKOFF *offset, + FT ft) { toku_mutex_assert_locked(&_mutex); BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR); - _realloc_on_disk_internal(b, size, offset, ft, false, 0); + _realloc_on_disk_internal(b, size, offset, ft, false); } -void block_table::realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, FT ft, int fd) { +void block_table::realloc_descriptor_on_disk(DISKOFF size, + DISKOFF *offset, + FT ft, + int fd) { _mutex_lock(); _realloc_descriptor_on_disk_unlocked(size, offset, ft); _ensure_safe_write_unlocked(fd, size, *offset); @@ -897,11 +1035,12 @@ void block_table::get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size) { void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) { // Requires: blocktable lock is held. // Requires: report->file_size_bytes is already filled in. - + // Count the headers. - report->data_bytes = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + report->data_bytes = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE; report->data_blocks = 1; - report->checkpoint_bytes_additional = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + report->checkpoint_bytes_additional = + BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE; report->checkpoint_blocks_additional = 1; struct translation *current = &_current; @@ -915,30 +1054,34 @@ void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) { struct translation *checkpointed = &_checkpointed; for (int64_t i = 0; i < checkpointed->length_of_array; i++) { - struct block_translation_pair *pair = &checkpointed->block_translation[i]; - if (pair->size > 0 && !(i < current->length_of_array && - current->block_translation[i].size > 0 && - current->block_translation[i].u.diskoff == pair->u.diskoff)) { - report->checkpoint_bytes_additional += pair->size; - report->checkpoint_blocks_additional++; + struct block_translation_pair *pair = + &checkpointed->block_translation[i]; + if (pair->size > 0 && + !(i < current->length_of_array && + current->block_translation[i].size > 0 && + current->block_translation[i].u.diskoff == pair->u.diskoff)) { + report->checkpoint_bytes_additional += pair->size; + report->checkpoint_blocks_additional++; } } struct translation *inprogress = &_inprogress; for (int64_t i = 0; i < inprogress->length_of_array; i++) { struct block_translation_pair *pair = &inprogress->block_translation[i]; - if (pair->size > 0 && !(i < current->length_of_array && - current->block_translation[i].size > 0 && - current->block_translation[i].u.diskoff == pair->u.diskoff) && - !(i < checkpointed->length_of_array && - checkpointed->block_translation[i].size > 0 && - checkpointed->block_translation[i].u.diskoff == pair->u.diskoff)) { + if (pair->size > 0 && + !(i < current->length_of_array && + current->block_translation[i].size > 0 && + current->block_translation[i].u.diskoff == pair->u.diskoff) && + !(i < checkpointed->length_of_array && + checkpointed->block_translation[i].size > 0 && + checkpointed->block_translation[i].u.diskoff == + pair->u.diskoff)) { report->checkpoint_bytes_additional += pair->size; report->checkpoint_blocks_additional++; } } - _bt_block_allocator.get_unused_statistics(report); + _bt_block_allocator->UnusedStatistics(report); } void block_table::get_info64(struct ftinfo64 *s) { @@ -967,25 +1110,38 @@ void block_table::get_info64(struct ftinfo64 *s) { _mutex_unlock(); } -int block_table::iterate_translation_tables(uint64_t checkpoint_count, - int (*iter)(uint64_t checkpoint_count, - int64_t total_num_rows, - int64_t blocknum, - int64_t diskoff, - int64_t size, - void *extra), - void *iter_extra) { +int block_table::iterate_translation_tables( + uint64_t checkpoint_count, + int (*iter)(uint64_t checkpoint_count, + int64_t total_num_rows, + int64_t blocknum, + int64_t diskoff, + int64_t size, + void *extra), + void *iter_extra) { int error = 0; _mutex_lock(); - int64_t total_num_rows = _current.length_of_array + _checkpointed.length_of_array; + int64_t total_num_rows = + _current.length_of_array + _checkpointed.length_of_array; for (int64_t i = 0; error == 0 && i < _current.length_of_array; ++i) { struct block_translation_pair *block = &_current.block_translation[i]; - error = iter(checkpoint_count, total_num_rows, i, block->u.diskoff, block->size, iter_extra); + error = iter(checkpoint_count, + total_num_rows, + i, + block->u.diskoff, + block->size, + iter_extra); } for (int64_t i = 0; error == 0 && i < _checkpointed.length_of_array; ++i) { - struct block_translation_pair *block = &_checkpointed.block_translation[i]; - error = iter(checkpoint_count - 1, total_num_rows, i, block->u.diskoff, block->size, iter_extra); + struct block_translation_pair *block = + &_checkpointed.block_translation[i]; + error = iter(checkpoint_count - 1, + total_num_rows, + i, + block->u.diskoff, + block->size, + iter_extra); } _mutex_unlock(); diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.h b/storage/tokudb/PerconaFT/ft/serialize/block_table.h index 8d39167454094..dd732d4f3726c 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/block_table.h +++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.h @@ -62,13 +62,16 @@ enum { RESERVED_BLOCKNUMS }; -typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra); +typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, + int64_t size, + int64_t address, + void *extra); static inline BLOCKNUM make_blocknum(int64_t b) { - BLOCKNUM result = { .b = b }; + BLOCKNUM result = {.b = b}; return result; } -static const BLOCKNUM ROLLBACK_NONE = { .b = 0 }; +static const BLOCKNUM ROLLBACK_NONE = {.b = 0}; /** * There are three copies of the translation table (btt) in the block table: @@ -80,18 +83,20 @@ static const BLOCKNUM ROLLBACK_NONE = { .b = 0 }; * * inprogress Is only filled by copying from current, * and is the only version ever serialized to disk. - * (It is serialized to disk on checkpoint and clean shutdown.) + * (It is serialized to disk on checkpoint and clean + *shutdown.) * At end of checkpoint it replaces 'checkpointed'. * During a checkpoint, any 'pending' dirty writes will update * inprogress. * * current Is initialized by copying from checkpointed, - * is the only version ever modified while the database is in use, + * is the only version ever modified while the database is in + *use, * and is the only version ever copied to inprogress. * It is never stored on disk. */ class block_table { -public: + public: enum translation_type { TRANSLATION_NONE = 0, TRANSLATION_CURRENT, @@ -102,7 +107,10 @@ class block_table { void create(); - int create_from_buffer(int fd, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer); + int create_from_buffer(int fd, + DISKOFF location_on_disk, + DISKOFF size_on_disk, + unsigned char *translation_buffer); void destroy(); @@ -114,11 +122,21 @@ class block_table { // Blocknums void allocate_blocknum(BLOCKNUM *res, struct ft *ft); - void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint, uint64_t heat); + void realloc_on_disk(BLOCKNUM b, + DISKOFF size, + DISKOFF *offset, + struct ft *ft, + int fd, + bool for_checkpoint); void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint); - void translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size); + void translate_blocknum_to_offset_size(BLOCKNUM b, + DISKOFF *offset, + DISKOFF *size); void free_unused_blocknums(BLOCKNUM root); - void realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, struct ft *ft, int fd); + void realloc_descriptor_on_disk(DISKOFF size, + DISKOFF *offset, + struct ft *ft, + int fd); void get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size); // External verfication @@ -127,15 +145,22 @@ class block_table { void verify_no_free_blocknums(); // Serialization - void serialize_translation_to_wbuf(int fd, struct wbuf *w, int64_t *address, int64_t *size); + void serialize_translation_to_wbuf(int fd, + struct wbuf *w, + int64_t *address, + int64_t *size); // DEBUG ONLY (ftdump included), tests included void blocknum_dump_translation(BLOCKNUM b); void dump_translation_table_pretty(FILE *f); void dump_translation_table(FILE *f); - void block_free(uint64_t offset); + void block_free(uint64_t offset, uint64_t size); - int iterate(enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only); + int iterate(enum translation_type type, + BLOCKTABLE_CALLBACK f, + void *extra, + bool data_only, + bool used_only); void internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep); // Requires: blocktable lock is held. @@ -146,13 +171,16 @@ class block_table { void get_info64(struct ftinfo64 *); - int iterate_translation_tables(uint64_t, int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), void *); + int iterate_translation_tables( + uint64_t, + int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), + void *); -private: + private: struct block_translation_pair { // If in the freelist, use next_free_blocknum, otherwise diskoff. union { - DISKOFF diskoff; + DISKOFF diskoff; BLOCKNUM next_free_blocknum; } u; @@ -173,7 +201,8 @@ class block_table { struct translation { enum translation_type type; - // Number of elements in array (block_translation). always >= smallest_never_used_blocknum + // Number of elements in array (block_translation). always >= + // smallest_never_used_blocknum int64_t length_of_array; BLOCKNUM smallest_never_used_blocknum; @@ -181,20 +210,28 @@ class block_table { BLOCKNUM blocknum_freelist_head; struct block_translation_pair *block_translation; - // size_on_disk is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].size - // location_on is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff + // size_on_disk is stored in + // block_translation[RESERVED_BLOCKNUM_TRANSLATION].size + // location_on is stored in + // block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff }; void _create_internal(); - int _translation_deserialize_from_buffer(struct translation *t, // destination into which to deserialize - DISKOFF location_on_disk, // location of translation_buffer - uint64_t size_on_disk, - unsigned char * translation_buffer); // buffer with serialized translation - - void _copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype); + int _translation_deserialize_from_buffer( + struct translation *t, // destination into which to deserialize + DISKOFF location_on_disk, // location of translation_buffer + uint64_t size_on_disk, + unsigned char * + translation_buffer); // buffer with serialized translation + + void _copy_translation(struct translation *dst, + struct translation *src, + enum translation_type newtype); void _maybe_optimize_translation(struct translation *t); void _maybe_expand_translation(struct translation *t); - bool _translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair); + bool _translation_prevents_freeing(struct translation *t, + BLOCKNUM b, + struct block_translation_pair *old_pair); void _free_blocknum_in_translation(struct translation *t, BLOCKNUM b); int64_t _calculate_size_on_disk(struct translation *t); bool _pair_is_unallocated(struct block_translation_pair *pair); @@ -203,14 +240,26 @@ class block_table { // Blocknum management void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft); - void _free_blocknum_unlocked(BLOCKNUM *bp, struct ft *ft, bool for_checkpoint); - void _realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, struct ft *ft); - void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint, uint64_t heat); - void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size); + void _free_blocknum_unlocked(BLOCKNUM *bp, + struct ft *ft, + bool for_checkpoint); + void _realloc_descriptor_on_disk_unlocked(DISKOFF size, + DISKOFF *offset, + struct ft *ft); + void _realloc_on_disk_internal(BLOCKNUM b, + DISKOFF size, + DISKOFF *offset, + struct ft *ft, + bool for_checkpoint); + void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, + DISKOFF *offset, + DISKOFF *size); // File management void _maybe_truncate_file(int fd, uint64_t size_needed_before); - void _ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset); + void _ensure_safe_write_unlocked(int fd, + DISKOFF block_size, + DISKOFF block_offset); // Verification bool _is_valid_blocknum(struct translation *t, BLOCKNUM b); @@ -220,29 +269,33 @@ class block_table { bool _no_data_blocks_except_root(BLOCKNUM root); bool _blocknum_allocated(BLOCKNUM b); - // Locking + // Locking // // TODO: Move the lock to the FT void _mutex_lock(); void _mutex_unlock(); - // The current translation is the one used by client threads. + // The current translation is the one used by client threads. // It is not represented on disk. struct translation _current; - // The translation used by the checkpoint currently in progress. - // If the checkpoint thread allocates a block, it must also update the current translation. + // The translation used by the checkpoint currently in progress. + // If the checkpoint thread allocates a block, it must also update the + // current translation. struct translation _inprogress; - // The translation for the data that shall remain inviolate on disk until the next checkpoint finishes, + // The translation for the data that shall remain inviolate on disk until + // the next checkpoint finishes, // after which any blocks used only in this translation can be freed. struct translation _checkpointed; - // The in-memory data structure for block allocation. + // The in-memory data structure for block allocation. // There is no on-disk data structure for block allocation. - // Note: This is *allocation* not *translation* - the block allocator is unaware of which - // blocks are used for which translation, but simply allocates and deallocates blocks. - block_allocator _bt_block_allocator; + // Note: This is *allocation* not *translation* - the block allocator is + // unaware of which + // blocks are used for which translation, but simply allocates and + // deallocates blocks. + BlockAllocator *_bt_block_allocator; toku_mutex_t _mutex; struct nb_mutex _safe_file_size_lock; bool _checkpoint_skipped; @@ -257,16 +310,16 @@ class block_table { #include "ft/serialize/wbuf.h" -static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) { +static inline void wbuf_BLOCKNUM(struct wbuf *w, BLOCKNUM b) { wbuf_ulonglong(w, b.b); } -static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) { +static inline void wbuf_nocrc_BLOCKNUM(struct wbuf *w, BLOCKNUM b) { wbuf_nocrc_ulonglong(w, b.b); } static inline void wbuf_DISKOFF(struct wbuf *wb, DISKOFF off) { - wbuf_ulonglong(wb, (uint64_t) off); + wbuf_ulonglong(wb, (uint64_t)off); } #include "ft/serialize/rbuf.h" @@ -280,6 +333,8 @@ static inline BLOCKNUM rbuf_blocknum(struct rbuf *rb) { return result; } -static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb, memarena *UU(ma), BLOCKNUM *blocknum) { +static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb, + memarena *UU(ma), + BLOCKNUM *blocknum) { *blocknum = rbuf_blocknum(rb); } diff --git a/storage/tokudb/PerconaFT/ft/serialize/compress.cc b/storage/tokudb/PerconaFT/ft/serialize/compress.cc index 1719b6b7cb51d..c2f815c6cf22c 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/compress.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/compress.cc @@ -235,7 +235,7 @@ void toku_decompress (Bytef *dest, uLongf destLen, strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; - char windowBits = source[1]; + int8_t windowBits = source[1]; int r = inflateInit2(&strm, windowBits); lazy_assert(r == Z_OK); strm.next_out = dest; diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc index 49d4368a3ab83..8fcb529341205 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc @@ -217,8 +217,8 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) // translation table itself won't fit in main memory. ssize_t readsz = toku_os_pread(fd, tbuf, size_to_read, translation_address_on_disk); - assert(readsz >= translation_size_on_disk); - assert(readsz <= (ssize_t)size_to_read); + invariant(readsz >= translation_size_on_disk); + invariant(readsz <= (ssize_t)size_to_read); } // Create table and read in data. r = ft->blocktable.create_from_buffer(fd, @@ -411,73 +411,90 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) return r; } -static size_t -serialize_ft_min_size (uint32_t version) { +static size_t serialize_ft_min_size(uint32_t version) { size_t size = 0; - switch(version) { - case FT_LAYOUT_VERSION_29: - size += sizeof(uint64_t); // logrows in ft - case FT_LAYOUT_VERSION_28: - size += sizeof(uint32_t); // fanout in ft - case FT_LAYOUT_VERSION_27: - case FT_LAYOUT_VERSION_26: - case FT_LAYOUT_VERSION_25: - case FT_LAYOUT_VERSION_24: - case FT_LAYOUT_VERSION_23: - case FT_LAYOUT_VERSION_22: - case FT_LAYOUT_VERSION_21: - size += sizeof(MSN); // max_msn_in_ft - case FT_LAYOUT_VERSION_20: - case FT_LAYOUT_VERSION_19: - size += 1; // compression method - size += sizeof(MSN); // highest_unused_msn_for_upgrade - case FT_LAYOUT_VERSION_18: - size += sizeof(uint64_t); // time_of_last_optimize_begin - size += sizeof(uint64_t); // time_of_last_optimize_end - size += sizeof(uint32_t); // count_of_optimize_in_progress - size += sizeof(MSN); // msn_at_start_of_last_completed_optimize - size -= 8; // removed num_blocks_to_upgrade_14 - size -= 8; // removed num_blocks_to_upgrade_13 - case FT_LAYOUT_VERSION_17: - size += 16; - invariant(sizeof(STAT64INFO_S) == 16); - case FT_LAYOUT_VERSION_16: - case FT_LAYOUT_VERSION_15: - size += 4; // basement node size - size += 8; // num_blocks_to_upgrade_14 (previously num_blocks_to_upgrade, now one int each for upgrade from 13, 14 - size += 8; // time of last verification - case FT_LAYOUT_VERSION_14: - size += 8; //TXNID that created - case FT_LAYOUT_VERSION_13: - size += ( 4 // build_id - +4 // build_id_original - +8 // time_of_creation - +8 // time_of_last_modification - ); + switch (version) { + case FT_LAYOUT_VERSION_29: + size += sizeof(uint64_t); // logrows in ft + case FT_LAYOUT_VERSION_28: + size += sizeof(uint32_t); // fanout in ft + case FT_LAYOUT_VERSION_27: + case FT_LAYOUT_VERSION_26: + case FT_LAYOUT_VERSION_25: + case FT_LAYOUT_VERSION_24: + case FT_LAYOUT_VERSION_23: + case FT_LAYOUT_VERSION_22: + case FT_LAYOUT_VERSION_21: + size += sizeof(MSN); // max_msn_in_ft + case FT_LAYOUT_VERSION_20: + case FT_LAYOUT_VERSION_19: + size += 1; // compression method + size += sizeof(MSN); // highest_unused_msn_for_upgrade + case FT_LAYOUT_VERSION_18: + size += sizeof(uint64_t); // time_of_last_optimize_begin + size += sizeof(uint64_t); // time_of_last_optimize_end + size += sizeof(uint32_t); // count_of_optimize_in_progress + size += sizeof(MSN); // msn_at_start_of_last_completed_optimize + size -= 8; // removed num_blocks_to_upgrade_14 + size -= 8; // removed num_blocks_to_upgrade_13 + case FT_LAYOUT_VERSION_17: + size += 16; + invariant(sizeof(STAT64INFO_S) == 16); + case FT_LAYOUT_VERSION_16: + case FT_LAYOUT_VERSION_15: + size += 4; // basement node size + size += 8; // num_blocks_to_upgrade_14 (previously + // num_blocks_to_upgrade, now one int each for upgrade + // from 13, 14 + size += 8; // time of last verification + case FT_LAYOUT_VERSION_14: + size += 8; // TXNID that created + case FT_LAYOUT_VERSION_13: + size += (4 // build_id + + + 4 // build_id_original + + + 8 // time_of_creation + + + 8 // time_of_last_modification + ); // fall through - case FT_LAYOUT_VERSION_12: - size += (+8 // "tokudata" - +4 // version - +4 // original_version - +4 // size - +8 // byte order verification - +8 // checkpoint_count - +8 // checkpoint_lsn - +4 // tree's nodesize - +8 // translation_size_on_disk - +8 // translation_address_on_disk - +4 // checksum - +8 // Number of blocks in old version. - +8 // diskoff - +4 // flags - ); - break; - default: - abort(); - } - - lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE); + case FT_LAYOUT_VERSION_12: + size += (+8 // "tokudata" + + + 4 // version + + + 4 // original_version + + + 4 // size + + + 8 // byte order verification + + + 8 // checkpoint_count + + + 8 // checkpoint_lsn + + + 4 // tree's nodesize + + + 8 // translation_size_on_disk + + + 8 // translation_address_on_disk + + + 4 // checksum + + + 8 // Number of blocks in old version. + + + 8 // diskoff + + + 4 // flags + ); + break; + default: + abort(); + } + + lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE); return size; } @@ -486,7 +503,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd, struct rbuf *rb, uint64_t *checkpoint_count, LSN *checkpoint_lsn, - uint32_t * version_p) + uint32_t *version_p) // Effect: Read and parse the header of a fractalal tree // // Simply reading the raw bytes of the header into an rbuf is insensitive @@ -496,18 +513,18 @@ int deserialize_ft_from_fd_into_rbuf(int fd, // file AND the header is useless { int r = 0; - const int64_t prefix_size = 8 + // magic ("tokudata") - 4 + // version - 4 + // build_id - 4; // size + const int64_t prefix_size = 8 + // magic ("tokudata") + 4 + // version + 4 + // build_id + 4; // size const int64_t read_size = roundup_to_multiple(512, prefix_size); unsigned char *XMALLOC_N_ALIGNED(512, read_size, prefix); rb->buf = NULL; int64_t n = toku_os_pread(fd, prefix, read_size, offset_of_header); if (n != read_size) { - if (n==0) { + if (n == 0) { r = TOKUDB_DICTIONARY_NO_HEADER; - } else if (n<0) { + } else if (n < 0) { r = get_error_errno(); } else { r = EINVAL; @@ -518,95 +535,102 @@ int deserialize_ft_from_fd_into_rbuf(int fd, rbuf_init(rb, prefix, prefix_size); - //Check magic number + // Check magic number const void *magic; rbuf_literal_bytes(rb, &magic, 8); - if (memcmp(magic,"tokudata",8)!=0) { - if ((*(uint64_t*)magic) == 0) { + if (memcmp(magic, "tokudata", 8) != 0) { + if ((*(uint64_t *)magic) == 0) { r = TOKUDB_DICTIONARY_NO_HEADER; } else { - r = EINVAL; //Not a tokudb file! Do not use. + r = EINVAL; // Not a tokudb file! Do not use. } goto exit; } - //Version MUST be in network order regardless of disk order. + // Version MUST be in network order regardless of disk order. uint32_t version; version = rbuf_network_int(rb); *version_p = version; if (version < FT_LAYOUT_MIN_SUPPORTED_VERSION) { - r = TOKUDB_DICTIONARY_TOO_OLD; //Cannot use + r = TOKUDB_DICTIONARY_TOO_OLD; // Cannot use goto exit; } else if (version > FT_LAYOUT_VERSION) { - r = TOKUDB_DICTIONARY_TOO_NEW; //Cannot use + r = TOKUDB_DICTIONARY_TOO_NEW; // Cannot use goto exit; } - //build_id MUST be in network order regardless of disk order. + // build_id MUST be in network order regardless of disk order. uint32_t build_id __attribute__((__unused__)); build_id = rbuf_network_int(rb); int64_t min_header_size; min_header_size = serialize_ft_min_size(version); - //Size MUST be in network order regardless of disk order. + // Size MUST be in network order regardless of disk order. uint32_t size; size = rbuf_network_int(rb); - //If too big, it is corrupt. We would probably notice during checksum - //but may have to do a multi-gigabyte malloc+read to find out. - //If its too small reading rbuf would crash, so verify. - if (size > block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE || size < min_header_size) { + // If too big, it is corrupt. We would probably notice during checksum + // but may have to do a multi-gigabyte malloc+read to find out. + // If its too small reading rbuf would crash, so verify. + if (size > BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE || + size < min_header_size) { r = TOKUDB_DICTIONARY_NO_HEADER; goto exit; } - lazy_assert(rb->ndone==prefix_size); + lazy_assert(rb->ndone == prefix_size); rb->size = size; { toku_free(rb->buf); uint32_t size_to_read = roundup_to_multiple(512, size); XMALLOC_N_ALIGNED(512, size_to_read, rb->buf); - assert(offset_of_header%512==0); + invariant(offset_of_header % 512 == 0); n = toku_os_pread(fd, rb->buf, size_to_read, offset_of_header); if (n != size_to_read) { if (n < 0) { r = get_error_errno(); } else { - r = EINVAL; //Header might be useless (wrong size) or could be a disk read error. + r = EINVAL; // Header might be useless (wrong size) or could be + // a disk read error. } goto exit; } } - //It's version 14 or later. Magic looks OK. - //We have an rbuf that represents the header. - //Size is within acceptable bounds. + // It's version 14 or later. Magic looks OK. + // We have an rbuf that represents the header. + // Size is within acceptable bounds. - //Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function changed) + // Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function + // changed) uint32_t calculated_x1764; - calculated_x1764 = toku_x1764_memory(rb->buf, rb->size-4); + calculated_x1764 = toku_x1764_memory(rb->buf, rb->size - 4); uint32_t stored_x1764; - stored_x1764 = toku_dtoh32(*(int*)(rb->buf+rb->size-4)); + stored_x1764 = toku_dtoh32(*(int *)(rb->buf + rb->size - 4)); if (calculated_x1764 != stored_x1764) { - r = TOKUDB_BAD_CHECKSUM; //Header useless - fprintf(stderr, "Header checksum failure: calc=0x%08x read=0x%08x\n", calculated_x1764, stored_x1764); + r = TOKUDB_BAD_CHECKSUM; // Header useless + fprintf(stderr, + "Header checksum failure: calc=0x%08x read=0x%08x\n", + calculated_x1764, + stored_x1764); goto exit; } - //Verify byte order + // Verify byte order const void *tmp_byte_order_check; lazy_assert((sizeof toku_byte_order_host) == 8); - rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order + rbuf_literal_bytes( + rb, &tmp_byte_order_check, 8); // Must not translate byte order int64_t byte_order_stored; - byte_order_stored = *(int64_t*)tmp_byte_order_check; + byte_order_stored = *(int64_t *)tmp_byte_order_check; if (byte_order_stored != toku_byte_order_host) { - r = TOKUDB_DICTIONARY_NO_HEADER; //Cannot use dictionary + r = TOKUDB_DICTIONARY_NO_HEADER; // Cannot use dictionary goto exit; } - //Load checkpoint count + // Load checkpoint count *checkpoint_count = rbuf_ulonglong(rb); *checkpoint_lsn = rbuf_LSN(rb); - //Restart at beginning during regular deserialization + // Restart at beginning during regular deserialization rb->ndone = 0; exit: @@ -620,11 +644,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd, // Read ft from file into struct. Read both headers and use one. // We want the latest acceptable header whose checkpoint_lsn is no later // than max_acceptable_lsn. -int -toku_deserialize_ft_from(int fd, - LSN max_acceptable_lsn, - FT *ft) -{ +int toku_deserialize_ft_from(int fd, LSN max_acceptable_lsn, FT *ft) { struct rbuf rb_0; struct rbuf rb_1; uint64_t checkpoint_count_0 = 0; @@ -638,13 +658,23 @@ toku_deserialize_ft_from(int fd, int r0, r1, r; toku_off_t header_0_off = 0; - r0 = deserialize_ft_from_fd_into_rbuf(fd, header_0_off, &rb_0, &checkpoint_count_0, &checkpoint_lsn_0, &version_0); + r0 = deserialize_ft_from_fd_into_rbuf(fd, + header_0_off, + &rb_0, + &checkpoint_count_0, + &checkpoint_lsn_0, + &version_0); if (r0 == 0 && checkpoint_lsn_0.lsn <= max_acceptable_lsn.lsn) { h0_acceptable = true; } - toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; - r1 = deserialize_ft_from_fd_into_rbuf(fd, header_1_off, &rb_1, &checkpoint_count_1, &checkpoint_lsn_1, &version_1); + toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + r1 = deserialize_ft_from_fd_into_rbuf(fd, + header_1_off, + &rb_1, + &checkpoint_count_1, + &checkpoint_lsn_1, + &version_1); if (r1 == 0 && checkpoint_lsn_1.lsn <= max_acceptable_lsn.lsn) { h1_acceptable = true; } @@ -655,24 +685,29 @@ toku_deserialize_ft_from(int fd, // We were unable to read either header or at least one is too // new. Certain errors are higher priority than others. Order of // these if/else if is important. - if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) { + if (r0 == TOKUDB_DICTIONARY_TOO_NEW || + r1 == TOKUDB_DICTIONARY_TOO_NEW) { r = TOKUDB_DICTIONARY_TOO_NEW; - } else if (r0 == TOKUDB_DICTIONARY_TOO_OLD || r1 == TOKUDB_DICTIONARY_TOO_OLD) { + } else if (r0 == TOKUDB_DICTIONARY_TOO_OLD || + r1 == TOKUDB_DICTIONARY_TOO_OLD) { r = TOKUDB_DICTIONARY_TOO_OLD; } else if (r0 == TOKUDB_BAD_CHECKSUM && r1 == TOKUDB_BAD_CHECKSUM) { fprintf(stderr, "Both header checksums failed.\n"); r = TOKUDB_BAD_CHECKSUM; - } else if (r0 == TOKUDB_DICTIONARY_NO_HEADER || r1 == TOKUDB_DICTIONARY_NO_HEADER) { + } else if (r0 == TOKUDB_DICTIONARY_NO_HEADER || + r1 == TOKUDB_DICTIONARY_NO_HEADER) { r = TOKUDB_DICTIONARY_NO_HEADER; } else { - r = r0 ? r0 : r1; //Arbitrarily report the error from the - //first header, unless it's readable + r = r0 ? r0 : r1; // Arbitrarily report the error from the + // first header, unless it's readable } - // it should not be possible for both headers to be later than the max_acceptable_lsn - invariant(!((r0==0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) && - (r1==0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn))); - invariant(r!=0); + // it should not be possible for both headers to be later than the + // max_acceptable_lsn + invariant( + !((r0 == 0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) && + (r1 == 0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn))); + invariant(r != 0); goto exit; } @@ -682,8 +717,7 @@ toku_deserialize_ft_from(int fd, invariant(version_0 >= version_1); rb = &rb_0; version = version_0; - } - else { + } else { invariant(checkpoint_count_1 == checkpoint_count_0 + 1); invariant(version_1 >= version_0); rb = &rb_1; @@ -692,14 +726,18 @@ toku_deserialize_ft_from(int fd, } else if (h0_acceptable) { if (r1 == TOKUDB_BAD_CHECKSUM) { // print something reassuring - fprintf(stderr, "Header 2 checksum failed, but header 1 ok. Proceeding.\n"); + fprintf( + stderr, + "Header 2 checksum failed, but header 1 ok. Proceeding.\n"); } rb = &rb_0; version = version_0; } else if (h1_acceptable) { if (r0 == TOKUDB_BAD_CHECKSUM) { // print something reassuring - fprintf(stderr, "Header 1 checksum failed, but header 2 ok. Proceeding.\n"); + fprintf( + stderr, + "Header 1 checksum failed, but header 2 ok. Proceeding.\n"); } rb = &rb_1; version = version_1; @@ -718,15 +756,13 @@ toku_deserialize_ft_from(int fd, return r; } - -size_t toku_serialize_ft_size (FT_HEADER h) { +size_t toku_serialize_ft_size(FT_HEADER h) { size_t size = serialize_ft_min_size(h->layout_version); - //There is no dynamic data. - lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE); + // There is no dynamic data. + lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE); return size; } - void toku_serialize_ft_to_wbuf ( struct wbuf *wbuf, FT_HEADER h, @@ -771,52 +807,60 @@ void toku_serialize_ft_to_wbuf ( } void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) { - lazy_assert(h->type==FT_CHECKPOINT_INPROGRESS); + lazy_assert(h->type == FT_CHECKPOINT_INPROGRESS); struct wbuf w_translation; int64_t size_translation; int64_t address_translation; // Must serialize translation first, to get address,size for header. - bt->serialize_translation_to_wbuf(fd, &w_translation, - &address_translation, - &size_translation); - assert(size_translation == w_translation.ndone); + bt->serialize_translation_to_wbuf( + fd, &w_translation, &address_translation, &size_translation); + invariant(size_translation == w_translation.ndone); - // the number of bytes available in the buffer is 0 mod 512, and those last bytes are all initialized. - assert(w_translation.size % 512 == 0); + // the number of bytes available in the buffer is 0 mod 512, and those last + // bytes are all initialized. + invariant(w_translation.size % 512 == 0); struct wbuf w_main; - size_t size_main = toku_serialize_ft_size(h); + size_t size_main = toku_serialize_ft_size(h); size_t size_main_aligned = roundup_to_multiple(512, size_main); - assert(size_main_alignedcheckpoint_count & 0x1) ? 0 : block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + main_offset = (h->checkpoint_count & 0x1) + ? 0 + : BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE; toku_os_full_pwrite(fd, w_main.buf, size_main_aligned, main_offset); toku_free(w_main.buf); toku_free(w_translation.buf); diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc index c4f4886b6a03b..5914f8a1050e9 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc @@ -99,13 +99,11 @@ void toku_ft_serialize_layer_init(void) { num_cores = toku_os_get_number_active_processors(); int r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r); - block_allocator::maybe_initialize_trace(); toku_serialize_in_parallel = false; } void toku_ft_serialize_layer_destroy(void) { toku_thread_pool_destroy(&ft_pool); - block_allocator::maybe_close_trace(); } enum { FILE_CHANGE_INCREMENT = (16 << 20) }; @@ -773,19 +771,23 @@ int toku_serialize_ftnode_to_memory(FTNODE node, return 0; } -int -toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT ft, bool for_checkpoint) { - +int toku_serialize_ftnode_to(int fd, + BLOCKNUM blocknum, + FTNODE node, + FTNODE_DISK_DATA *ndd, + bool do_rebalancing, + FT ft, + bool for_checkpoint) { size_t n_to_write; size_t n_uncompressed_bytes; char *compressed_buf = nullptr; - // because toku_serialize_ftnode_to is only called for + // because toku_serialize_ftnode_to is only called for // in toku_ftnode_flush_callback, we pass false // for in_parallel. The reasoning is that when we write - // nodes to disk via toku_ftnode_flush_callback, we + // nodes to disk via toku_ftnode_flush_callback, we // assume that it is being done on a non-critical - // background thread (probably for checkpointing), and therefore + // background thread (probably for checkpointing), and therefore // should not hog CPU, // // Should the above facts change, we may want to revisit @@ -802,32 +804,32 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA toku_unsafe_fetch(&toku_serialize_in_parallel), &n_to_write, &n_uncompressed_bytes, - &compressed_buf - ); + &compressed_buf); if (r != 0) { return r; } - // If the node has never been written, then write the whole buffer, including the zeros - invariant(blocknum.b>=0); + // If the node has never been written, then write the whole buffer, + // including the zeros + invariant(blocknum.b >= 0); DISKOFF offset; // Dirties the ft - ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset, - ft, fd, for_checkpoint, - // Allocations for nodes high in the tree are considered 'hot', - // as they are likely to move again in the next checkpoint. - node->height); + ft->blocktable.realloc_on_disk( + blocknum, n_to_write, &offset, ft, fd, for_checkpoint); tokutime_t t0 = toku_time_now(); toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset); tokutime_t t1 = toku_time_now(); tokutime_t io_time = t1 - t0; - toku_ft_status_update_flush_reason(node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint); + toku_ft_status_update_flush_reason( + node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint); toku_free(compressed_buf); - node->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction. + node->dirty = 0; // See #1957. Must set the node to be clean after + // serializing it so that it doesn't get written again on + // the next checkpoint or eviction. return 0; } @@ -994,6 +996,7 @@ BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) { bn->seqinsert = orig_bn->seqinsert; bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied; bn->stat64_delta = orig_bn->stat64_delta; + bn->logical_rows_delta = orig_bn->logical_rows_delta; bn->data_buffer.clone(&orig_bn->data_buffer); return bn; } @@ -1004,6 +1007,7 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) { bn->seqinsert = 0; bn->stale_ancestor_messages_applied = false; bn->stat64_delta = ZEROSTATS; + bn->logical_rows_delta = 0; bn->data_buffer.init_zero(); return bn; } @@ -1897,7 +1901,7 @@ read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum, /* out */ int *layout_version_p); // This function upgrades a version 14 or 13 ftnode to the current -// verison. NOTE: This code assumes the first field of the rbuf has +// version. NOTE: This code assumes the first field of the rbuf has // already been read from the buffer (namely the layout_version of the // ftnode.) static int @@ -2488,9 +2492,12 @@ toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIAL serialized->blocknum = log->blocknum; } -int -toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized, - FT ft, bool for_checkpoint) { +int toku_serialize_rollback_log_to(int fd, + ROLLBACK_LOG_NODE log, + SERIALIZED_ROLLBACK_LOG_NODE serialized_log, + bool is_serialized, + FT ft, + bool for_checkpoint) { size_t n_to_write; char *compressed_buf; struct serialized_rollback_log_node serialized_local; @@ -2511,21 +2518,21 @@ toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBA serialized_log->n_sub_blocks, serialized_log->sub_block, ft->h->compression_method, - &n_to_write, &compressed_buf); + &n_to_write, + &compressed_buf); // Dirties the ft DISKOFF offset; - ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset, - ft, fd, for_checkpoint, - // We consider rollback log flushing the hottest possible allocation, - // since rollback logs are short-lived compared to FT nodes. - INT_MAX); + ft->blocktable.realloc_on_disk( + blocknum, n_to_write, &offset, ft, fd, for_checkpoint); toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset); toku_free(compressed_buf); if (!is_serialized) { toku_static_serialized_rollback_log_destroy(&serialized_local); - log->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction. + log->dirty = 0; // See #1957. Must set the node to be clean after + // serializing it so that it doesn't get written again + // on the next checkpoint or eviction. } return 0; } @@ -2704,7 +2711,7 @@ decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, s } static int decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) { - // This function exists solely to accomodate future changes in compression. + // This function exists solely to accommodate future changes in compression. int r = 0; if ((version == FT_LAYOUT_VERSION_13 || version == FT_LAYOUT_VERSION_14) || (FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) || diff --git a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc new file mode 100644 index 0000000000000..922850fb3e096 --- /dev/null +++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc @@ -0,0 +1,833 @@ +/*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILIT or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see . + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see . +======= */ + +#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#include "ft/serialize/rbtree_mhs.h" +#include "portability/toku_assert.h" +#include "portability/toku_portability.h" +#include + +namespace MhsRbTree { + + Tree::Tree() : _root(NULL), _align(1) {} + + Tree::Tree(uint64_t align) : _root(NULL), _align(align) {} + + Tree::~Tree() { Destroy(); } + + void Tree::PreOrder(Node *tree) const { + if (tree != NULL) { + fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt()); + PreOrder(tree->_left); + PreOrder(tree->_right); + } + } + + void Tree::PreOrder() { PreOrder(_root); } + + void Tree::InOrder(Node *tree) const { + if (tree != NULL) { + InOrder(tree->_left); + fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt()); + InOrder(tree->_right); + } + } + + // yeah, i only care about in order visitor. -Jun + void Tree::InOrderVisitor(Node *tree, + void (*f)(void *, Node *, uint64_t), + void *extra, + uint64_t depth) { + if (tree != NULL) { + InOrderVisitor(tree->_left, f, extra, depth + 1); + f(extra, tree, depth); + InOrderVisitor(tree->_right, f, extra, depth + 1); + } + } + + void Tree::InOrderVisitor(void (*f)(void *, Node *, uint64_t), + void *extra) { + InOrderVisitor(_root, f, extra, 0); + } + + void Tree::InOrder() { InOrder(_root); } + + void Tree::PostOrder(Node *tree) const { + if (tree != NULL) { + PostOrder(tree->_left); + PostOrder(tree->_right); + fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt()); + } + } + + void Tree::PostOrder() { PostOrder(_root); } + + Node *Tree::SearchByOffset(uint64_t offset) { + Node *x = _root; + while ((x != NULL) && (rbn_offset(x).ToInt() != offset)) { + if (offset < rbn_offset(x).ToInt()) + x = x->_left; + else + x = x->_right; + } + + return x; + } + + // mostly for testing + Node *Tree::SearchFirstFitBySize(uint64_t size) { + if (EffectiveSize(_root) < size && rbn_left_mhs(_root) < size && + rbn_right_mhs(_root) < size) { + return nullptr; + } else { + return SearchFirstFitBySizeHelper(_root, size); + } + } + + Node *Tree::SearchFirstFitBySizeHelper(Node *x, uint64_t size) { + if (EffectiveSize(x) >= size) { + // only possible to go left + if (rbn_left_mhs(x) >= size) + return SearchFirstFitBySizeHelper(x->_left, size); + else + return x; + } + if (rbn_left_mhs(x) >= size) + return SearchFirstFitBySizeHelper(x->_left, size); + + if (rbn_right_mhs(x) >= size) + return SearchFirstFitBySizeHelper(x->_right, size); + + // this is an invalid state + Dump(); + ValidateBalance(); + ValidateMhs(); + invariant(0); + return NULL; + } + + Node *Tree::MinNode(Node *tree) { + if (tree == NULL) + return NULL; + + while (tree->_left != NULL) + tree = tree->_left; + return tree; + } + + Node *Tree::MinNode() { return MinNode(_root); } + + Node *Tree::MaxNode(Node *tree) { + if (tree == NULL) + return NULL; + + while (tree->_right != NULL) + tree = tree->_right; + return tree; + } + + Node *Tree::MaxNode() { return MaxNode(_root); } + + Node *Tree::SuccessorHelper(Node *y, Node *x) { + while ((y != NULL) && (x == y->_right)) { + x = y; + y = y->_parent; + } + return y; + } + Node *Tree::Successor(Node *x) { + if (x->_right != NULL) + return MinNode(x->_right); + + Node *y = x->_parent; + return SuccessorHelper(y, x); + } + + Node *Tree::PredecessorHelper(Node *y, Node *x) { + while ((y != NULL) && (x == y->_left)) { + x = y; + y = y->_parent; + } + + return y; + } + Node *Tree::Predecessor(Node *x) { + if (x->_left != NULL) + return MaxNode(x->_left); + + Node *y = x->_parent; + return SuccessorHelper(y, x); + } + + /* + * px px + * / / + * x y + * / \ --(left rotation)--> / \ # + * lx y x ry + * / \ / \ + * ly ry lx ly + * max_hole_size updates are pretty local + */ + + void Tree::LeftRotate(Node *&root, Node *x) { + Node *y = x->_right; + + x->_right = y->_left; + rbn_right_mhs(x) = rbn_left_mhs(y); + + if (y->_left != NULL) + y->_left->_parent = x; + + y->_parent = x->_parent; + + if (x->_parent == NULL) { + root = y; + } else { + if (x->_parent->_left == x) { + x->_parent->_left = y; + } else { + x->_parent->_right = y; + } + } + y->_left = x; + rbn_left_mhs(y) = mhs_of_subtree(x); + + x->_parent = y; + } + + /* py py + * / / + * y x + * / \ --(right rotate)--> / \ # + * x ry lx y + * / \ / \ # + * lx rx rx ry + * + */ + + void Tree::RightRotate(Node *&root, Node *y) { + Node *x = y->_left; + + y->_left = x->_right; + rbn_left_mhs(y) = rbn_right_mhs(x); + + if (x->_right != NULL) + x->_right->_parent = y; + + x->_parent = y->_parent; + + if (y->_parent == NULL) { + root = x; + } else { + if (y == y->_parent->_right) + y->_parent->_right = x; + else + y->_parent->_left = x; + } + + x->_right = y; + rbn_right_mhs(x) = mhs_of_subtree(y); + y->_parent = x; + } + + // walking from this node up to update the mhs info + // whenver there is change on left/right mhs or size we should recalculate. + // prerequisit: the children of the node are mhs up-to-date. + void Tree::RecalculateMhs(Node *node) { + uint64_t *p_node_mhs = 0; + Node *parent = node->_parent; + + if (!parent) + return; + + uint64_t max_mhs = mhs_of_subtree(node); + if (node == parent->_left) { + p_node_mhs = &rbn_left_mhs(parent); + } else if (node == parent->_right) { + p_node_mhs = &rbn_right_mhs(parent); + } else { + return; + } + if (*p_node_mhs != max_mhs) { + *p_node_mhs = max_mhs; + RecalculateMhs(parent); + } + } + + void Tree::IsNewNodeMergable(Node *pred, + Node *succ, + Node::BlockPair pair, + bool *left_merge, + bool *right_merge) { + if (pred) { + OUUInt64 end_of_pred = rbn_size(pred) + rbn_offset(pred); + if (end_of_pred < pair._offset) + *left_merge = false; + else { + invariant(end_of_pred == pair._offset); + *left_merge = true; + } + } + if (succ) { + OUUInt64 begin_of_succ = rbn_offset(succ); + OUUInt64 end_of_node = pair._offset + pair._size; + if (end_of_node < begin_of_succ) { + *right_merge = false; + } else { + invariant(end_of_node == begin_of_succ); + *right_merge = true; + } + } + } + + void Tree::AbsorbNewNode(Node *pred, + Node *succ, + Node::BlockPair pair, + bool left_merge, + bool right_merge, + bool is_right_child) { + invariant(left_merge || right_merge); + if (left_merge && right_merge) { + // merge to the succ + if (!is_right_child) { + rbn_size(succ) += pair._size; + rbn_offset(succ) = pair._offset; + // merge to the pred + rbn_size(pred) += rbn_size(succ); + // to keep the invariant of the tree -no overlapping holes + rbn_offset(succ) += rbn_size(succ); + rbn_size(succ) = 0; + RecalculateMhs(succ); + RecalculateMhs(pred); + // pred dominates succ. this is going to + // update the pred labels separately. + // remove succ + RawRemove(_root, succ); + } else { + rbn_size(pred) += pair._size; + rbn_offset(succ) = rbn_offset(pred); + rbn_size(succ) += rbn_size(pred); + rbn_offset(pred) += rbn_size(pred); + rbn_size(pred) = 0; + RecalculateMhs(pred); + RecalculateMhs(succ); + // now remove pred + RawRemove(_root, pred); + } + } else if (left_merge) { + rbn_size(pred) += pair._size; + RecalculateMhs(pred); + } else if (right_merge) { + rbn_offset(succ) -= pair._size; + rbn_size(succ) += pair._size; + RecalculateMhs(succ); + } + } + // this is the most tedious part, but not complicated: + // 1.find where to insert the pair + // 2.if the pred and succ can merge with the pair. merge with them. either + // pred + // or succ can be removed. + // 3. if only left-mergable or right-mergeable, just merge + // 4. non-mergable case. insert the node and run the fixup. + + int Tree::Insert(Node *&root, Node::BlockPair pair) { + Node *x = _root; + Node *y = NULL; + bool left_merge = false; + bool right_merge = false; + Node *node = NULL; + + while (x != NULL) { + y = x; + if (pair._offset < rbn_key(x)) + x = x->_left; + else + x = x->_right; + } + + // we found where to insert, lets find out the pred and succ for + // possible + // merges. + // node->parent = y; + Node *pred, *succ; + if (y != NULL) { + if (pair._offset < rbn_key(y)) { + // as the left child + pred = PredecessorHelper(y->_parent, y); + succ = y; + IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge); + if (left_merge || right_merge) { + AbsorbNewNode( + pred, succ, pair, left_merge, right_merge, false); + } else { + // construct the node + Node::Pair mhsp {0, 0}; + node = + new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr); + if (!node) + return -1; + y->_left = node; + node->_parent = y; + RecalculateMhs(node); + } + + } else { + // as the right child + pred = y; + succ = SuccessorHelper(y->_parent, y); + IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge); + if (left_merge || right_merge) { + AbsorbNewNode( + pred, succ, pair, left_merge, right_merge, true); + } else { + // construct the node + Node::Pair mhsp {0, 0}; + node = + new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr); + if (!node) + return -1; + y->_right = node; + node->_parent = y; + RecalculateMhs(node); + } + } + } else { + Node::Pair mhsp {0, 0}; + node = new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr); + if (!node) + return -1; + root = node; + } + if (!left_merge && !right_merge) { + invariant_notnull(node); + node->_color = EColor::RED; + return InsertFixup(root, node); + } + return 0; + } + + int Tree::InsertFixup(Node *&root, Node *node) { + Node *parent, *gparent; + while ((parent = rbn_parent(node)) && rbn_is_red(parent)) { + gparent = rbn_parent(parent); + if (parent == gparent->_left) { + { + Node *uncle = gparent->_right; + if (uncle && rbn_is_red(uncle)) { + rbn_set_black(uncle); + rbn_set_black(parent); + rbn_set_red(gparent); + node = gparent; + continue; + } + } + + if (parent->_right == node) { + Node *tmp; + LeftRotate(root, parent); + tmp = parent; + parent = node; + node = tmp; + } + + rbn_set_black(parent); + rbn_set_red(gparent); + RightRotate(root, gparent); + } else { + { + Node *uncle = gparent->_left; + if (uncle && rbn_is_red(uncle)) { + rbn_set_black(uncle); + rbn_set_black(parent); + rbn_set_red(gparent); + node = gparent; + continue; + } + } + + if (parent->_left == node) { + Node *tmp; + RightRotate(root, parent); + tmp = parent; + parent = node; + node = tmp; + } + rbn_set_black(parent); + rbn_set_red(gparent); + LeftRotate(root, gparent); + } + } + rbn_set_black(root); + return 0; + } + + int Tree::Insert(Node::BlockPair pair) { return Insert(_root, pair); } + + uint64_t Tree::Remove(size_t size) { + Node *node = SearchFirstFitBySize(size); + return Remove(_root, node, size); + } + + void Tree::RawRemove(Node *&root, Node *node) { + Node *child, *parent; + EColor color; + + if ((node->_left != NULL) && (node->_right != NULL)) { + Node *replace = node; + replace = replace->_right; + while (replace->_left != NULL) + replace = replace->_left; + + if (rbn_parent(node)) { + if (rbn_parent(node)->_left == node) + rbn_parent(node)->_left = replace; + else + rbn_parent(node)->_right = replace; + } else { + root = replace; + } + child = replace->_right; + parent = rbn_parent(replace); + color = rbn_color(replace); + + if (parent == node) { + parent = replace; + } else { + if (child) + rbn_parent(child) = parent; + + parent->_left = child; + rbn_left_mhs(parent) = rbn_right_mhs(replace); + RecalculateMhs(parent); + replace->_right = node->_right; + rbn_set_parent(node->_right, replace); + rbn_right_mhs(replace) = rbn_right_mhs(node); + } + + replace->_parent = node->_parent; + replace->_color = node->_color; + replace->_left = node->_left; + rbn_left_mhs(replace) = rbn_left_mhs(node); + node->_left->_parent = replace; + RecalculateMhs(replace); + if (color == EColor::BLACK) + RawRemoveFixup(root, child, parent); + delete node; + return; + } + + if (node->_left != NULL) + child = node->_left; + else + child = node->_right; + + parent = node->_parent; + color = node->_color; + + if (child) + child->_parent = parent; + + if (parent) { + if (parent->_left == node) { + parent->_left = child; + rbn_left_mhs(parent) = child ? mhs_of_subtree(child) : 0; + } else { + parent->_right = child; + rbn_right_mhs(parent) = child ? mhs_of_subtree(child) : 0; + } + RecalculateMhs(parent); + } else + root = child; + if (color == EColor::BLACK) + RawRemoveFixup(root, child, parent); + delete node; + } + + void Tree::RawRemove(uint64_t offset) { + Node *node = SearchByOffset(offset); + RawRemove(_root, node); + } + static inline uint64_t align(uint64_t value, uint64_t ba_alignment) { + return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment; + } + uint64_t Tree::Remove(Node *&root, Node *node, size_t size) { + OUUInt64 n_offset = rbn_offset(node); + OUUInt64 n_size = rbn_size(node); + OUUInt64 answer_offset(align(rbn_offset(node).ToInt(), _align)); + + invariant((answer_offset + size) <= (n_offset + n_size)); + if (answer_offset == n_offset) { + rbn_offset(node) += size; + rbn_size(node) -= size; + RecalculateMhs(node); + if (rbn_size(node) == 0) { + RawRemove(root, node); + } + + } else { + if (answer_offset + size == n_offset + n_size) { + rbn_size(node) -= size; + RecalculateMhs(node); + } else { + // well, cut in the middle... + rbn_size(node) = answer_offset - n_offset; + RecalculateMhs(node); + Insert(_root, + {(answer_offset + size), + (n_offset + n_size) - (answer_offset + size)}); + } + } + return answer_offset.ToInt(); + } + + void Tree::RawRemoveFixup(Node *&root, Node *node, Node *parent) { + Node *other; + while ((!node || rbn_is_black(node)) && node != root) { + if (parent->_left == node) { + other = parent->_right; + if (rbn_is_red(other)) { + // Case 1: the brother of X, w, is read + rbn_set_black(other); + rbn_set_red(parent); + LeftRotate(root, parent); + other = parent->_right; + } + if ((!other->_left || rbn_is_black(other->_left)) && + (!other->_right || rbn_is_black(other->_right))) { + // Case 2: w is black and both of w's children are black + rbn_set_red(other); + node = parent; + parent = rbn_parent(node); + } else { + if (!other->_right || rbn_is_black(other->_right)) { + // Case 3: w is black and left child of w is red but + // right + // child is black + rbn_set_black(other->_left); + rbn_set_red(other); + RightRotate(root, other); + other = parent->_right; + } + // Case 4: w is black and right child of w is red, + // regardless of + // left child's color + rbn_set_color(other, rbn_color(parent)); + rbn_set_black(parent); + rbn_set_black(other->_right); + LeftRotate(root, parent); + node = root; + break; + } + } else { + other = parent->_left; + if (rbn_is_red(other)) { + // Case 1: w is red + rbn_set_black(other); + rbn_set_red(parent); + RightRotate(root, parent); + other = parent->_left; + } + if ((!other->_left || rbn_is_black(other->_left)) && + (!other->_right || rbn_is_black(other->_right))) { + // Case 2: w is black and both children are black + rbn_set_red(other); + node = parent; + parent = rbn_parent(node); + } else { + if (!other->_left || rbn_is_black(other->_left)) { + // Case 3: w is black and left child of w is red whereas + // right child is black + rbn_set_black(other->_right); + rbn_set_red(other); + LeftRotate(root, other); + other = parent->_left; + } + // Case 4:w is black and right child of w is red, regardless + // of + // the left child's color + rbn_set_color(other, rbn_color(parent)); + rbn_set_black(parent); + rbn_set_black(other->_left); + RightRotate(root, parent); + node = root; + break; + } + } + } + if (node) + rbn_set_black(node); + } + + void Tree::Destroy(Node *&tree) { + if (tree == NULL) + return; + + if (tree->_left != NULL) + Destroy(tree->_left); + if (tree->_right != NULL) + Destroy(tree->_right); + + delete tree; + tree = NULL; + } + + void Tree::Destroy() { Destroy(_root); } + + void Tree::Dump(Node *tree, Node::BlockPair pair, EDirection dir) { + if (tree != NULL) { + if (dir == EDirection::NONE) + fprintf(stderr, + "(%" PRIu64 ",%" PRIu64 ", mhs:(%" PRIu64 ",%" PRIu64 + "))(B) is root\n", + rbn_offset(tree).ToInt(), + rbn_size(tree).ToInt(), + rbn_left_mhs(tree), + rbn_right_mhs(tree)); + else + fprintf(stderr, + "(%" PRIu64 ",%" PRIu64 ",mhs:(%" PRIu64 ",%" PRIu64 + "))(%c) is %" PRIu64 "'s %s\n", + rbn_offset(tree).ToInt(), + rbn_size(tree).ToInt(), + rbn_left_mhs(tree), + rbn_right_mhs(tree), + rbn_is_red(tree) ? 'R' : 'B', + pair._offset.ToInt(), + dir == EDirection::RIGHT ? "right child" : "left child"); + + Dump(tree->_left, tree->_hole, EDirection::LEFT); + Dump(tree->_right, tree->_hole, EDirection::RIGHT); + } + } + + uint64_t Tree::EffectiveSize(Node *node) { + OUUInt64 offset = rbn_offset(node); + OUUInt64 size = rbn_size(node); + OUUInt64 end = offset + size; + OUUInt64 aligned_offset(align(offset.ToInt(), _align)); + if (aligned_offset > end) { + return 0; + } + return (end - aligned_offset).ToInt(); + } + + void Tree::Dump() { + if (_root != NULL) + Dump(_root, _root->_hole, (EDirection)0); + } + + static void vis_bal_f(void *extra, Node *node, uint64_t depth) { + uint64_t **p = (uint64_t **)extra; + uint64_t min = *p[0]; + uint64_t max = *p[1]; + if (node->_left) { + Node *left = node->_left; + invariant(node == left->_parent); + } + + if (node->_right) { + Node *right = node->_right; + invariant(node == right->_parent); + } + + if (!node->_left || !node->_right) { + if (min > depth) { + *p[0] = depth; + } else if (max < depth) { + *p[1] = depth; + } + } + } + + void Tree::ValidateBalance() { + uint64_t min_depth = 0xffffffffffffffff; + uint64_t max_depth = 0; + if (!_root) { + return; + } + uint64_t *p[2] = {&min_depth, &max_depth}; + InOrderVisitor(vis_bal_f, (void *)p); + invariant((min_depth + 1) * 2 >= max_depth + 1); + } + + static void vis_cmp_f(void *extra, Node *node, uint64_t UU(depth)) { + Node::BlockPair **p = (Node::BlockPair **)extra; + + invariant_notnull(*p); + invariant((*p)->_offset == node->_hole._offset); + + *p = *p + 1; + } + + // validate the input pairs matches with sorted pairs + void Tree::ValidateInOrder(Node::BlockPair *pairs) { + InOrderVisitor(vis_cmp_f, &pairs); + } + + uint64_t Tree::ValidateMhs(Node *node) { + if (!node) + return 0; + else { + uint64_t mhs_left = ValidateMhs(node->_left); + uint64_t mhs_right = ValidateMhs(node->_right); + if (mhs_left != rbn_left_mhs(node)) { + printf("assert failure: mhs_left = %" PRIu64 "\n", mhs_left); + Dump(node, node->_hole, (EDirection)0); + } + invariant(mhs_left == rbn_left_mhs(node)); + + if (mhs_right != rbn_right_mhs(node)) { + printf("assert failure: mhs_right = %" PRIu64 "\n", mhs_right); + Dump(node, node->_hole, (EDirection)0); + } + invariant(mhs_right == rbn_right_mhs(node)); + return std::max(EffectiveSize(node), std::max(mhs_left, mhs_right)); + } + } + + void Tree::ValidateMhs() { + if (!_root) + return; + uint64_t mhs_left = ValidateMhs(_root->_left); + uint64_t mhs_right = ValidateMhs(_root->_right); + invariant(mhs_left == rbn_left_mhs(_root)); + invariant(mhs_right == rbn_right_mhs(_root)); + } + +} // namespace MhsRbTree diff --git a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h new file mode 100644 index 0000000000000..eb8c953b08c13 --- /dev/null +++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h @@ -0,0 +1,355 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see . + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see . +======= */ + +#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#pragma once + +#include + +#include "portability/toku_pthread.h" +#include "portability/toku_stdint.h" +#include "portability/toku_stdlib.h" + +// RBTree(Red-black tree) with max hole sizes for subtrees. + +// This is a tentative data struct to improve the block allocation time +// complexity from the linear time to the log time. Please be noted this DS only +// supports first-fit for now. It is actually easier to do it with +// best-fit.(just +// sort by size). + +// RBTree is a classic data struct with O(log(n)) for insertion, deletion and +// search. Many years have seen its efficiency. + +// a *hole* is the representation of an available BlockPair for allocation. +// defined as (start_address,size) or (offset, size) interchangably. + +// each node has a *label* to indicate a pair of the max hole sizes for its +// subtree. + +// We are implementing a RBTree with max hole sizes for subtree. It is a red +// black tree that is sorted by the start_address but also labeld with the max +// hole sizes of the subtrees. + +// [(6,3)] -> [(offset, size)], the hole +// [{2,5}] -> [{mhs_of_left, mhs_of_right}], the label +/* / \ */ +// [(0, 1)] [(10, 5)] +// [{0, 2}] [{0, 0}] +/* \ */ +// [(3, 2)] +// [{0, 0}] +// request of allocation size=2 goes from root to [(3,2)]. + +// above example shows a simplified RBTree_max_holes. +// it is easier to tell the search time is O(log(n)) as we can make a decision +// on each descent until we get to the target. + +// the only question is if we can keep the maintenance cost low -- and i think +// it is not a problem becoz an insertion/deletion is only going to update the +// max_hole_sizes of the nodes along the path from the root to the node to be +// deleted/inserted. The path can be cached and search is anyway O(log(n)). + +// unlike the typical rbtree, Tree has to handle the inserts and deletes +// with more care: an allocation that triggers the delete might leave some +// unused space which we can simply update the start_addr and size without +// worrying overlapping. An free might not only mean the insertion but also +// *merging* with the adjacent holes. + +namespace MhsRbTree { + +#define offset_t uint64_t + enum class EColor { RED, BLACK }; + enum class EDirection { NONE = 0, LEFT, RIGHT }; + + // I am a bit tired of fixing overflow/underflow, just quickly craft some + // int + // class that has an infinity-like max value and prevents overflow and + // underflow. If you got a file offset larger than MHS_MAX_VAL, it is not + // a problem here. :-/ - JYM + class OUUInt64 { + public: + static const uint64_t MHS_MAX_VAL = 0xffffffffffffffff; + OUUInt64() : _value(0) {} + OUUInt64(uint64_t s) : _value(s) {} + OUUInt64(const OUUInt64& o) : _value(o._value) {} + bool operator<(const OUUInt64 &r) const { + invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL)); + return _value < r.ToInt(); + } + bool operator>(const OUUInt64 &r) const { + invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL)); + return _value > r.ToInt(); + } + bool operator<=(const OUUInt64 &r) const { + invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL)); + return _value <= r.ToInt(); + } + bool operator>=(const OUUInt64 &r) const { + invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL)); + return _value >= r.ToInt(); + } + OUUInt64 operator+(const OUUInt64 &r) const { + if (_value == MHS_MAX_VAL || r.ToInt() == MHS_MAX_VAL) { + OUUInt64 tmp(MHS_MAX_VAL); + return tmp; + } else { + // detecting overflow + invariant((MHS_MAX_VAL - _value) >= r.ToInt()); + uint64_t plus = _value + r.ToInt(); + OUUInt64 tmp(plus); + return tmp; + } + } + OUUInt64 operator-(const OUUInt64 &r) const { + invariant(r.ToInt() != MHS_MAX_VAL); + if (_value == MHS_MAX_VAL) { + return *this; + } else { + invariant(_value >= r.ToInt()); + uint64_t minus = _value - r.ToInt(); + OUUInt64 tmp(minus); + return tmp; + } + } + OUUInt64 operator-=(const OUUInt64 &r) { + if (_value != MHS_MAX_VAL) { + invariant(r.ToInt() != MHS_MAX_VAL); + invariant(_value >= r.ToInt()); + _value -= r.ToInt(); + } + return *this; + } + OUUInt64 operator+=(const OUUInt64 &r) { + if (_value != MHS_MAX_VAL) { + if (r.ToInt() == MHS_MAX_VAL) { + _value = MHS_MAX_VAL; + } else { + invariant((MHS_MAX_VAL - _value) >= r.ToInt()); + this->_value += r.ToInt(); + } + } + return *this; + } + bool operator==(const OUUInt64 &r) const { + return _value == r.ToInt(); + } + bool operator!=(const OUUInt64 &r) const { + return _value != r.ToInt(); + } + OUUInt64 operator=(const OUUInt64 &r) { + _value = r.ToInt(); + return *this; + } + uint64_t ToInt() const { return _value; } + + private: + uint64_t _value; + }; + + class Node { + public: + class BlockPair { + public: + OUUInt64 _offset; + OUUInt64 _size; + + BlockPair() : _offset(0), _size(0) {} + BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {} + BlockPair(OUUInt64 o, OUUInt64 s) : _offset(o), _size(s) {} + BlockPair(const BlockPair &o) + : _offset(o._offset), _size(o._size) {} + + int operator<(const BlockPair &rhs) const { + return _offset < rhs._offset; + } + int operator<(const uint64_t &o) const { return _offset < o; } + }; + + struct Pair { + uint64_t _left; + uint64_t _right; + Pair(uint64_t l, uint64_t r) : _left(l), _right(r) {} + }; + + EColor _color; + BlockPair _hole; + Pair _label; + Node *_left; + Node *_right; + Node *_parent; + + Node(EColor c, + Node::BlockPair h, + Pair lb, + Node *l, + Node *r, + Node *p) + : _color(c), + _hole(h), + _label(lb), + _left(l), + _right(r), + _parent(p) {} + }; + + class Tree { + private: + Node *_root; + uint64_t _align; + + public: + Tree(); + Tree(uint64_t); + ~Tree(); + + void PreOrder(); + void InOrder(); + void PostOrder(); + // immutable operations + Node *SearchByOffset(uint64_t addr); + Node *SearchFirstFitBySize(uint64_t size); + + Node *MinNode(); + Node *MaxNode(); + + Node *Successor(Node *); + Node *Predecessor(Node *); + + // mapped from tree_allocator::free_block + int Insert(Node::BlockPair pair); + // mapped from tree_allocator::alloc_block + uint64_t Remove(size_t size); + // mapped from tree_allocator::alloc_block_after + + void RawRemove(uint64_t offset); + void Destroy(); + // print the tree + void Dump(); + // validation + // balance + void ValidateBalance(); + void ValidateInOrder(Node::BlockPair *); + void InOrderVisitor(void (*f)(void *, Node *, uint64_t), void *); + void ValidateMhs(); + + private: + void PreOrder(Node *node) const; + void InOrder(Node *node) const; + void PostOrder(Node *node) const; + Node *SearchByOffset(Node *node, offset_t addr) const; + Node *SearchFirstFitBySize(Node *node, size_t size) const; + + Node *MinNode(Node *node); + Node *MaxNode(Node *node); + + // rotations to fix up. we will have to update the labels too. + void LeftRotate(Node *&root, Node *x); + void RightRotate(Node *&root, Node *y); + + int Insert(Node *&root, Node::BlockPair pair); + int InsertFixup(Node *&root, Node *node); + + void RawRemove(Node *&root, Node *node); + uint64_t Remove(Node *&root, Node *node, size_t size); + void RawRemoveFixup(Node *&root, Node *node, Node *parent); + + void Destroy(Node *&tree); + void Dump(Node *tree, Node::BlockPair pair, EDirection dir); + void RecalculateMhs(Node *node); + void IsNewNodeMergable(Node *, Node *, Node::BlockPair, bool *, bool *); + void AbsorbNewNode(Node *, Node *, Node::BlockPair, bool, bool, bool); + Node *SearchFirstFitBySizeHelper(Node *x, uint64_t size); + + Node *SuccessorHelper(Node *y, Node *x); + + Node *PredecessorHelper(Node *y, Node *x); + + void InOrderVisitor(Node *, + void (*f)(void *, Node *, uint64_t), + void *, + uint64_t); + uint64_t ValidateMhs(Node *); + + uint64_t EffectiveSize(Node *); +// mixed with some macros..... +#define rbn_parent(r) ((r)->_parent) +#define rbn_color(r) ((r)->_color) +#define rbn_is_red(r) ((r)->_color == EColor::RED) +#define rbn_is_black(r) ((r)->_color == EColor::BLACK) +#define rbn_set_black(r) \ + do { \ + (r)->_color = EColor::BLACK; \ + } while (0) +#define rbn_set_red(r) \ + do { \ + (r)->_color = EColor::RED; \ + } while (0) +#define rbn_set_parent(r, p) \ + do { \ + (r)->_parent = (p); \ + } while (0) +#define rbn_set_color(r, c) \ + do { \ + (r)->_color = (c); \ + } while (0) +#define rbn_set_offset(r) \ + do { \ + (r)->_hole._offset = (c); \ + } while (0) +#define rbn_set_size(r, c) \ + do { \ + (r)->_hole._size = (c); \ + } while (0) +#define rbn_set_left_mhs(r, c) \ + do { \ + (r)->_label._left = (c); \ + } while (0) +#define rbn_set_right_mhs(r, c) \ + do { \ + (r)->_label._right = (c); \ + } while (0) +#define rbn_size(r) ((r)->_hole._size) +#define rbn_offset(r) ((r)->_hole._offset) +#define rbn_key(r) ((r)->_hole._offset) +#define rbn_left_mhs(r) ((r)->_label._left) +#define rbn_right_mhs(r) ((r)->_label._right) +#define mhs_of_subtree(y) \ + (std::max(std::max(rbn_left_mhs(y), rbn_right_mhs(y)), EffectiveSize(y))) + }; + +} // namespace MhsRbTree diff --git a/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc b/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc deleted file mode 100644 index 3670ef81cc2f7..0000000000000 --- a/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc +++ /dev/null @@ -1,126 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/*====== -This file is part of PerconaFT. - - -Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. - - PerconaFT is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License, version 2, - as published by the Free Software Foundation. - - PerconaFT is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with PerconaFT. If not, see . - ----------------------------------------- - - PerconaFT is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License, version 3, - as published by the Free Software Foundation. - - PerconaFT is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with PerconaFT. If not, see . -======= */ - -#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." - -#include "ft/tests/test.h" - -#include "ft/serialize/block_allocator_strategy.h" - -static const uint64_t alignment = 4096; - -static void test_first_vs_best_fit(void) { - struct block_allocator::blockpair pairs[] = { - block_allocator::blockpair(1 * alignment, 6 * alignment), - // hole between 7x align -> 8x align - block_allocator::blockpair(8 * alignment, 4 * alignment), - // hole between 12x align -> 16x align - block_allocator::blockpair(16 * alignment, 1 * alignment), - block_allocator::blockpair(17 * alignment, 2 * alignment), - // hole between 19 align -> 21x align - block_allocator::blockpair(21 * alignment, 2 * alignment), - }; - const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]); - - block_allocator::blockpair *bp; - - // first fit - bp = block_allocator_strategy::first_fit(pairs, n_blocks, 100, alignment); - assert(bp == &pairs[0]); - bp = block_allocator_strategy::first_fit(pairs, n_blocks, 4096, alignment); - assert(bp == &pairs[0]); - bp = block_allocator_strategy::first_fit(pairs, n_blocks, 3 * 4096, alignment); - assert(bp == &pairs[1]); - bp = block_allocator_strategy::first_fit(pairs, n_blocks, 5 * 4096, alignment); - assert(bp == nullptr); - - // best fit - bp = block_allocator_strategy::best_fit(pairs, n_blocks, 100, alignment); - assert(bp == &pairs[0]); - bp = block_allocator_strategy::best_fit(pairs, n_blocks, 4100, alignment); - assert(bp == &pairs[3]); - bp = block_allocator_strategy::best_fit(pairs, n_blocks, 3 * 4096, alignment); - assert(bp == &pairs[1]); - bp = block_allocator_strategy::best_fit(pairs, n_blocks, 5 * 4096, alignment); - assert(bp == nullptr); -} - -static void test_padded_fit(void) { - struct block_allocator::blockpair pairs[] = { - block_allocator::blockpair(1 * alignment, 1 * alignment), - // 4096 byte hole after bp[0] - block_allocator::blockpair(3 * alignment, 1 * alignment), - // 8192 byte hole after bp[1] - block_allocator::blockpair(6 * alignment, 1 * alignment), - // 16384 byte hole after bp[2] - block_allocator::blockpair(11 * alignment, 1 * alignment), - // 32768 byte hole after bp[3] - block_allocator::blockpair(17 * alignment, 1 * alignment), - // 116kb hole after bp[4] - block_allocator::blockpair(113 * alignment, 1 * alignment), - // 256kb hole after bp[5] - block_allocator::blockpair(371 * alignment, 1 * alignment), - }; - const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]); - - block_allocator::blockpair *bp; - - // padding for a 100 byte allocation will be < than standard alignment, - // so it should fit in the first 4096 byte hole. - bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 4000, alignment); - assert(bp == &pairs[0]); - - // Even padded, a 12kb alloc will fit in a 16kb hole - bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 3 * alignment, alignment); - assert(bp == &pairs[2]); - - // would normally fit in the 116kb hole but the padding will bring it over - bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 116 * alignment, alignment); - assert(bp == &pairs[5]); - - bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 127 * alignment, alignment); - assert(bp == &pairs[5]); -} - -int test_main(int argc, const char *argv[]) { - (void) argc; - (void) argv; - - test_first_vs_best_fit(); - test_padded_fit(); - - return 0; -} diff --git a/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc b/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc index d80ee83cbc95f..3eff52b915d66 100644 --- a/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc +++ b/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc @@ -38,253 +38,243 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "test.h" -static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) { - ba->validate(); +static void ba_alloc(BlockAllocator *ba, uint64_t size, uint64_t *answer) { + ba->Validate(); uint64_t actual_answer; - const uint64_t heat = random() % 2; - ba->alloc_block(512 * size, heat, &actual_answer); - ba->validate(); + ba->AllocBlock(512 * size, &actual_answer); + ba->Validate(); - assert(actual_answer%512==0); - *answer = actual_answer/512; + invariant(actual_answer % 512 == 0); + *answer = actual_answer / 512; } -static void ba_free(block_allocator *ba, uint64_t offset) { - ba->validate(); - ba->free_block(offset * 512); - ba->validate(); +static void ba_free(BlockAllocator *ba, uint64_t offset, uint64_t size) { + ba->Validate(); + ba->FreeBlock(offset * 512, 512 * size); + ba->Validate(); } -static void ba_check_l(block_allocator *ba, uint64_t blocknum_in_layout_order, - uint64_t expected_offset, uint64_t expected_size) { +static void ba_check_l(BlockAllocator *ba, + uint64_t blocknum_in_layout_order, + uint64_t expected_offset, + uint64_t expected_size) { uint64_t actual_offset, actual_size; - int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size); - assert(r==0); - assert(expected_offset*512 == actual_offset); - assert(expected_size *512 == actual_size); + int r = ba->NthBlockInLayoutOrder( + blocknum_in_layout_order, &actual_offset, &actual_size); + invariant(r == 0); + invariant(expected_offset * 512 == actual_offset); + invariant(expected_size * 512 == actual_size); } -static void ba_check_none(block_allocator *ba, uint64_t blocknum_in_layout_order) { +static void ba_check_none(BlockAllocator *ba, + uint64_t blocknum_in_layout_order) { uint64_t actual_offset, actual_size; - int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size); - assert(r==-1); + int r = ba->NthBlockInLayoutOrder( + blocknum_in_layout_order, &actual_offset, &actual_size); + invariant(r == -1); } - // Simple block allocator test -static void test_ba0(block_allocator::allocation_strategy strategy) { - block_allocator allocator; - block_allocator *ba = &allocator; - ba->create(100*512, 1*512); - ba->set_strategy(strategy); - assert(ba->allocated_limit()==100*512); +static void test_ba0() { + BlockAllocator allocator; + BlockAllocator *ba = &allocator; + ba->Create(100 * 512, 1 * 512); + invariant(ba->AllocatedLimit() == 100 * 512); uint64_t b2, b3, b4, b5, b6, b7; - ba_alloc(ba, 100, &b2); - ba_alloc(ba, 100, &b3); - ba_alloc(ba, 100, &b4); - ba_alloc(ba, 100, &b5); - ba_alloc(ba, 100, &b6); - ba_alloc(ba, 100, &b7); - ba_free(ba, b2); - ba_alloc(ba, 100, &b2); - ba_free(ba, b4); - ba_free(ba, b6); + ba_alloc(ba, 100, &b2); + ba_alloc(ba, 100, &b3); + ba_alloc(ba, 100, &b4); + ba_alloc(ba, 100, &b5); + ba_alloc(ba, 100, &b6); + ba_alloc(ba, 100, &b7); + ba_free(ba, b2, 100); + ba_alloc(ba, 100, &b2); + ba_free(ba, b4, 100); + ba_free(ba, b6, 100); uint64_t b8, b9; - ba_alloc(ba, 100, &b4); - ba_free(ba, b2); - ba_alloc(ba, 100, &b6); - ba_alloc(ba, 100, &b8); - ba_alloc(ba, 100, &b9); - ba_free(ba, b6); - ba_free(ba, b7); - ba_free(ba, b8); - ba_alloc(ba, 100, &b6); - ba_alloc(ba, 100, &b7); - ba_free(ba, b4); - ba_alloc(ba, 100, &b4); - - ba->destroy(); + ba_alloc(ba, 100, &b4); + ba_free(ba, b2, 100); + ba_alloc(ba, 100, &b6); + ba_alloc(ba, 100, &b8); + ba_alloc(ba, 100, &b9); + ba_free(ba, b6, 100); + ba_free(ba, b7, 100); + ba_free(ba, b8, 100); + ba_alloc(ba, 100, &b6); + ba_alloc(ba, 100, &b7); + ba_free(ba, b4, 100); + ba_alloc(ba, 100, &b4); + + ba->Destroy(); } // Manually to get coverage of all the code in the block allocator. -static void -test_ba1(block_allocator::allocation_strategy strategy, int n_initial) { - block_allocator allocator; - block_allocator *ba = &allocator; - ba->create(0*512, 1*512); - ba->set_strategy(strategy); - - int n_blocks=0; +static void test_ba1(int n_initial) { + BlockAllocator allocator; + BlockAllocator *ba = &allocator; + ba->Create(0 * 512, 1 * 512); + + int n_blocks = 0; uint64_t blocks[1000]; for (int i = 0; i < 1000; i++) { - if (i < n_initial || random() % 2 == 0) { - if (n_blocks < 1000) { - ba_alloc(ba, 1, &blocks[n_blocks]); - //printf("A[%d]=%ld\n", n_blocks, blocks[n_blocks]); - n_blocks++; - } - } else { - if (n_blocks > 0) { - int blocknum = random()%n_blocks; - //printf("F[%d]%ld\n", blocknum, blocks[blocknum]); - ba_free(ba, blocks[blocknum]); - blocks[blocknum]=blocks[n_blocks-1]; - n_blocks--; - } - } + if (i < n_initial || random() % 2 == 0) { + if (n_blocks < 1000) { + ba_alloc(ba, 1, &blocks[n_blocks]); + // printf("A[%d]=%ld\n", n_blocks, blocks[n_blocks]); + n_blocks++; + } + } else { + if (n_blocks > 0) { + int blocknum = random() % n_blocks; + // printf("F[%d]=%ld\n", blocknum, blocks[blocknum]); + ba_free(ba, blocks[blocknum], 1); + blocks[blocknum] = blocks[n_blocks - 1]; + n_blocks--; + } + } } - - ba->destroy(); + + ba->Destroy(); } - + // Check to see if it is first fit or best fit. -static void -test_ba2 (void) -{ - block_allocator allocator; - block_allocator *ba = &allocator; +static void test_ba2(void) { + BlockAllocator allocator; + BlockAllocator *ba = &allocator; uint64_t b[6]; enum { BSIZE = 1024 }; - ba->create(100*512, BSIZE*512); - ba->set_strategy(block_allocator::BA_STRATEGY_FIRST_FIT); - assert(ba->allocated_limit()==100*512); - - ba_check_l (ba, 0, 0, 100); - ba_check_none (ba, 1); - - ba_alloc (ba, 100, &b[0]); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_none (ba, 2); - - ba_alloc (ba, BSIZE + 100, &b[1]); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); - ba_check_none (ba, 3); - - ba_alloc (ba, 100, &b[2]); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); - ba_check_l (ba, 3, 4*BSIZE, 100); - ba_check_none (ba, 4); - - ba_alloc (ba, 100, &b[3]); - ba_alloc (ba, 100, &b[4]); - ba_alloc (ba, 100, &b[5]); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); - ba_check_l (ba, 3, 4*BSIZE, 100); - ba_check_l (ba, 4, 5*BSIZE, 100); - ba_check_l (ba, 5, 6*BSIZE, 100); - ba_check_l (ba, 6, 7*BSIZE, 100); - ba_check_none (ba, 7); - - ba_free (ba, 4*BSIZE); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); - ba_check_l (ba, 3, 5*BSIZE, 100); - ba_check_l (ba, 4, 6*BSIZE, 100); - ba_check_l (ba, 5, 7*BSIZE, 100); - ba_check_none (ba, 6); + ba->Create(100 * 512, BSIZE * 512); + invariant(ba->AllocatedLimit() == 100 * 512); + + ba_check_l(ba, 0, 0, 100); + ba_check_none(ba, 1); + + ba_alloc(ba, 100, &b[0]); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_none(ba, 2); + + ba_alloc(ba, BSIZE + 100, &b[1]); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100); + ba_check_none(ba, 3); + + ba_alloc(ba, 100, &b[2]); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100); + ba_check_l(ba, 3, 4 * BSIZE, 100); + ba_check_none(ba, 4); + + ba_alloc(ba, 100, &b[3]); + ba_alloc(ba, 100, &b[4]); + ba_alloc(ba, 100, &b[5]); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100); + ba_check_l(ba, 3, 4 * BSIZE, 100); + ba_check_l(ba, 4, 5 * BSIZE, 100); + ba_check_l(ba, 5, 6 * BSIZE, 100); + ba_check_l(ba, 6, 7 * BSIZE, 100); + ba_check_none(ba, 7); + + ba_free(ba, 4 * BSIZE, 100); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100); + ba_check_l(ba, 3, 5 * BSIZE, 100); + ba_check_l(ba, 4, 6 * BSIZE, 100); + ba_check_l(ba, 5, 7 * BSIZE, 100); + ba_check_none(ba, 6); uint64_t b2; ba_alloc(ba, 100, &b2); - assert(b2==4*BSIZE); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); - ba_check_l (ba, 3, 4*BSIZE, 100); - ba_check_l (ba, 4, 5*BSIZE, 100); - ba_check_l (ba, 5, 6*BSIZE, 100); - ba_check_l (ba, 6, 7*BSIZE, 100); - ba_check_none (ba, 7); - - ba_free (ba, BSIZE); - ba_free (ba, 5*BSIZE); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, 2*BSIZE, BSIZE + 100); - ba_check_l (ba, 2, 4*BSIZE, 100); - ba_check_l (ba, 3, 6*BSIZE, 100); - ba_check_l (ba, 4, 7*BSIZE, 100); - ba_check_none (ba, 5); - - // This alloc will allocate the first block after the reserve space in the case of first fit. + invariant(b2 == 4 * BSIZE); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100); + ba_check_l(ba, 3, 4 * BSIZE, 100); + ba_check_l(ba, 4, 5 * BSIZE, 100); + ba_check_l(ba, 5, 6 * BSIZE, 100); + ba_check_l(ba, 6, 7 * BSIZE, 100); + ba_check_none(ba, 7); + + ba_free(ba, BSIZE, 100); + ba_free(ba, 5 * BSIZE, 100); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, 2 * BSIZE, BSIZE + 100); + ba_check_l(ba, 2, 4 * BSIZE, 100); + ba_check_l(ba, 3, 6 * BSIZE, 100); + ba_check_l(ba, 4, 7 * BSIZE, 100); + ba_check_none(ba, 5); + + // This alloc will allocate the first block after the reserve space in the + // case of first fit. uint64_t b3; ba_alloc(ba, 100, &b3); - assert(b3== BSIZE); // First fit. + invariant(b3 == BSIZE); // First fit. // if (b3==5*BSIZE) then it is next fit. // Now 5*BSIZE is free uint64_t b5; ba_alloc(ba, 100, &b5); - assert(b5==5*BSIZE); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); - ba_check_l (ba, 3, 4*BSIZE, 100); - ba_check_l (ba, 4, 5*BSIZE, 100); - ba_check_l (ba, 5, 6*BSIZE, 100); - ba_check_l (ba, 6, 7*BSIZE, 100); - ba_check_none (ba, 7); + invariant(b5 == 5 * BSIZE); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100); + ba_check_l(ba, 3, 4 * BSIZE, 100); + ba_check_l(ba, 4, 5 * BSIZE, 100); + ba_check_l(ba, 5, 6 * BSIZE, 100); + ba_check_l(ba, 6, 7 * BSIZE, 100); + ba_check_none(ba, 7); // Now all blocks are busy uint64_t b6, b7, b8; ba_alloc(ba, 100, &b6); ba_alloc(ba, 100, &b7); ba_alloc(ba, 100, &b8); - assert(b6==8*BSIZE); - assert(b7==9*BSIZE); - assert(b8==10*BSIZE); - ba_check_l (ba, 0, 0, 100); - ba_check_l (ba, 1, BSIZE, 100); - ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100); - ba_check_l (ba, 3, 4*BSIZE, 100); - ba_check_l (ba, 4, 5*BSIZE, 100); - ba_check_l (ba, 5, 6*BSIZE, 100); - ba_check_l (ba, 6, 7*BSIZE, 100); - ba_check_l (ba, 7, 8*BSIZE, 100); - ba_check_l (ba, 8, 9*BSIZE, 100); - ba_check_l (ba, 9, 10*BSIZE, 100); - ba_check_none (ba, 10); - - ba_free(ba, 9*BSIZE); - ba_free(ba, 7*BSIZE); + invariant(b6 == 8 * BSIZE); + invariant(b7 == 9 * BSIZE); + invariant(b8 == 10 * BSIZE); + ba_check_l(ba, 0, 0, 100); + ba_check_l(ba, 1, BSIZE, 100); + ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100); + ba_check_l(ba, 3, 4 * BSIZE, 100); + ba_check_l(ba, 4, 5 * BSIZE, 100); + ba_check_l(ba, 5, 6 * BSIZE, 100); + ba_check_l(ba, 6, 7 * BSIZE, 100); + ba_check_l(ba, 7, 8 * BSIZE, 100); + ba_check_l(ba, 8, 9 * BSIZE, 100); + ba_check_l(ba, 9, 10 * BSIZE, 100); + ba_check_none(ba, 10); + + ba_free(ba, 9 * BSIZE, 100); + ba_free(ba, 7 * BSIZE, 100); uint64_t b9; ba_alloc(ba, 100, &b9); - assert(b9==7*BSIZE); + invariant(b9 == 7 * BSIZE); - ba_free(ba, 5*BSIZE); - ba_free(ba, 2*BSIZE); + ba_free(ba, 5 * BSIZE, 100); + ba_free(ba, 2 * BSIZE, BSIZE + 100); uint64_t b10, b11; ba_alloc(ba, 100, &b10); - assert(b10==2*BSIZE); + invariant(b10 == 2 * BSIZE); ba_alloc(ba, 100, &b11); - assert(b11==3*BSIZE); + invariant(b11 == 3 * BSIZE); ba_alloc(ba, 100, &b11); - assert(b11==5*BSIZE); + invariant(b11 == 5 * BSIZE); - ba->destroy(); + ba->Destroy(); } -int -test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { - enum block_allocator::allocation_strategy strategies[] = { - block_allocator::BA_STRATEGY_FIRST_FIT, - block_allocator::BA_STRATEGY_BEST_FIT, - block_allocator::BA_STRATEGY_PADDED_FIT, - block_allocator::BA_STRATEGY_HEAT_ZONE, - }; - for (size_t i = 0; i < sizeof(strategies) / sizeof(strategies[0]); i++) { - test_ba0(strategies[i]); - test_ba1(strategies[i], 0); - test_ba1(strategies[i], 10); - test_ba1(strategies[i], 20); - } +int test_main(int argc __attribute__((__unused__)), + const char *argv[] __attribute__((__unused__))) { + test_ba0(); + test_ba1(0); + test_ba1(10); + test_ba1(20); test_ba2(); return 0; } diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc index a7c48ef709af6..ee68ab3ef0bb9 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc @@ -45,7 +45,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. // #5978 is fixed. Here is what we do. We have four pairs with // blocknums and fullhashes of 1,2,3,4. The cachetable has only // two bucket mutexes, so 1 and 3 share a pair mutex, as do 2 and 4. -// We pin all four with expensive write locks. Then, on backgroud threads, +// We pin all four with expensive write locks. Then, on background threads, // we call get_and_pin_nonblocking on 3, where the unlockers unpins 2, and // we call get_and_pin_nonblocking on 4, where the unlockers unpins 1. Run this // enough times, and we should see a deadlock before the fix, and no deadlock diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc index be4bae898bebb..51cf70c3e76a6 100644 --- a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc +++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc @@ -77,7 +77,7 @@ flush ( // // test the following things for simple cloning: -// - verifies that after teh checkpoint ends, the PAIR is properly +// - verifies that after the checkpoint ends, the PAIR is properly // dirty or clean based on the second unpin // static void diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc b/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc index cb03a23e0fc65..7abd2267a7ea6 100644 --- a/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc +++ b/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc @@ -38,69 +38,72 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "test.h" -static int -int64_key_cmp (DB *db UU(), const DBT *a, const DBT *b) { - int64_t x = *(int64_t *) a->data; - int64_t y = *(int64_t *) b->data; - - if (xy) return 1; +static int int64_key_cmp(DB *db UU(), const DBT *a, const DBT *b) { + int64_t x = *(int64_t *)a->data; + int64_t y = *(int64_t *)b->data; + + if (x < y) + return -1; + if (x > y) + return 1; return 0; } -static void -test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { +static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { int r; FT_CURSOR XMALLOC(cursor); FTNODE dn = NULL; PAIR_ATTR attr; - + // first test that prefetching everything should work - memset(&cursor->range_lock_left_key, 0 , sizeof(DBT)); - memset(&cursor->range_lock_right_key, 0 , sizeof(DBT)); + memset(&cursor->range_lock_left_key, 0, sizeof(DBT)); + memset(&cursor->range_lock_right_key, 0, sizeof(DBT)); cursor->left_is_neg_infty = true; cursor->right_is_pos_infty = true; cursor->disable_prefetching = false; - + ftnode_fetch_extra bfe; // quick test to see that we have the right behavior when we set // disable_prefetching to true cursor->disable_prefetching = true; - bfe.create_for_prefetch( ft_h, cursor); + bfe.create_for_prefetch(ft_h, cursor); FTNODE_DISK_DATA ndd = NULL; - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_ON_DISK); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_ON_DISK); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); // now enable prefetching again cursor->disable_prefetching = false; - - bfe.create_for_prefetch( ft_h, cursor); - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_AVAIL); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_AVAIL); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_COMPRESSED); - assert(BP_STATE(dn,1) == PT_COMPRESSED); - assert(BP_STATE(dn,2) == PT_COMPRESSED); + + bfe.create_for_prefetch(ft_h, cursor); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_AVAIL); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_AVAIL); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_COMPRESSED); + invariant(BP_STATE(dn, 1) == PT_COMPRESSED); + invariant(BP_STATE(dn, 2) == PT_COMPRESSED); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_AVAIL); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_AVAIL); + invariant(BP_STATE(dn, 0) == PT_AVAIL); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_AVAIL); bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); @@ -108,21 +111,23 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { uint64_t left_key = 150; toku_fill_dbt(&cursor->range_lock_left_key, &left_key, sizeof(uint64_t)); cursor->left_is_neg_infty = false; - bfe.create_for_prefetch( ft_h, cursor); - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_AVAIL); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_COMPRESSED); - assert(BP_STATE(dn,2) == PT_COMPRESSED); + bfe.create_for_prefetch(ft_h, cursor); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_AVAIL); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_COMPRESSED); + invariant(BP_STATE(dn, 2) == PT_COMPRESSED); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_AVAIL); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_AVAIL); bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); @@ -130,63 +135,69 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { uint64_t right_key = 151; toku_fill_dbt(&cursor->range_lock_right_key, &right_key, sizeof(uint64_t)); cursor->right_is_pos_infty = false; - bfe.create_for_prefetch( ft_h, cursor); - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_ON_DISK); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_COMPRESSED); - assert(BP_STATE(dn,2) == PT_ON_DISK); + bfe.create_for_prefetch(ft_h, cursor); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_COMPRESSED); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_ON_DISK); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); left_key = 100000; right_key = 100000; - bfe.create_for_prefetch( ft_h, cursor); - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_AVAIL); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_COMPRESSED); + bfe.create_for_prefetch(ft_h, cursor); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_AVAIL); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_COMPRESSED); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_AVAIL); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_AVAIL); bfe.destroy(); toku_free(ndd); toku_ftnode_free(&dn); left_key = 100; right_key = 100; - bfe.create_for_prefetch( ft_h, cursor); - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_AVAIL); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_ON_DISK); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_COMPRESSED); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_ON_DISK); + bfe.create_for_prefetch(ft_h, cursor); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_AVAIL); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_COMPRESSED); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_AVAIL); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_ON_DISK); + invariant(BP_STATE(dn, 0) == PT_AVAIL); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); bfe.destroy(); toku_ftnode_free(&dn); toku_free(ndd); @@ -194,20 +205,19 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) { toku_free(cursor); } -static void -test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) { +static void test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) { int r; FT_CURSOR XMALLOC(cursor); FTNODE dn = NULL; FTNODE_DISK_DATA ndd = NULL; PAIR_ATTR attr; - + // first test that prefetching everything should work - memset(&cursor->range_lock_left_key, 0 , sizeof(DBT)); - memset(&cursor->range_lock_right_key, 0 , sizeof(DBT)); + memset(&cursor->range_lock_left_key, 0, sizeof(DBT)); + memset(&cursor->range_lock_right_key, 0, sizeof(DBT)); cursor->left_is_neg_infty = true; cursor->right_is_pos_infty = true; - + uint64_t left_key = 150; uint64_t right_key = 151; DBT left, right; @@ -216,101 +226,106 @@ test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) { ftnode_fetch_extra bfe; bfe.create_for_subset_read( - ft_h, - NULL, - &left, - &right, - false, - false, - false, - false - ); - + ft_h, NULL, &left, &right, false, false, false, false); + // fake the childnum to read // set disable_prefetching ON bfe.child_to_read = 2; bfe.disable_prefetching = true; - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_AVAIL); - // need to call this twice because we had a subset read before, that touched the clock - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_AVAIL); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_COMPRESSED); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_AVAIL); + // need to call this twice because we had a subset read before, that touched + // the clock + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_AVAIL); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_COMPRESSED); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_ON_DISK); - assert(BP_STATE(dn,2) == PT_AVAIL); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_ON_DISK); + invariant(BP_STATE(dn, 2) == PT_AVAIL); toku_ftnode_free(&dn); toku_free(ndd); // fake the childnum to read bfe.child_to_read = 2; bfe.disable_prefetching = false; - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_AVAIL); - // need to call this twice because we had a subset read before, that touched the clock - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_COMPRESSED); - assert(BP_STATE(dn,2) == PT_AVAIL); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_COMPRESSED); - assert(BP_STATE(dn,2) == PT_COMPRESSED); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_AVAIL); + // need to call this twice because we had a subset read before, that touched + // the clock + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_COMPRESSED); + invariant(BP_STATE(dn, 2) == PT_AVAIL); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_COMPRESSED); + invariant(BP_STATE(dn, 2) == PT_COMPRESSED); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_ON_DISK); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_AVAIL); + invariant(BP_STATE(dn, 0) == PT_ON_DISK); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_AVAIL); toku_ftnode_free(&dn); toku_free(ndd); // fake the childnum to read bfe.child_to_read = 0; - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); - assert(r==0); - assert(dn->n_children == 3); - assert(BP_STATE(dn,0) == PT_AVAIL); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_ON_DISK); - // need to call this twice because we had a subset read before, that touched the clock - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_AVAIL); - assert(BP_STATE(dn,1) == PT_COMPRESSED); - assert(BP_STATE(dn,2) == PT_ON_DISK); - toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(dn,0) == PT_COMPRESSED); - assert(BP_STATE(dn,1) == PT_COMPRESSED); - assert(BP_STATE(dn,2) == PT_ON_DISK); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe); + invariant(r == 0); + invariant(dn->n_children == 3); + invariant(BP_STATE(dn, 0) == PT_AVAIL); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); + // need to call this twice because we had a subset read before, that touched + // the clock + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_AVAIL); + invariant(BP_STATE(dn, 1) == PT_COMPRESSED); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); + toku_ftnode_pe_callback( + dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + invariant(BP_STATE(dn, 0) == PT_COMPRESSED); + invariant(BP_STATE(dn, 1) == PT_COMPRESSED); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr); - assert(BP_STATE(dn,0) == PT_AVAIL); - assert(BP_STATE(dn,1) == PT_AVAIL); - assert(BP_STATE(dn,2) == PT_ON_DISK); + invariant(BP_STATE(dn, 0) == PT_AVAIL); + invariant(BP_STATE(dn, 1) == PT_AVAIL); + invariant(BP_STATE(dn, 2) == PT_ON_DISK); toku_ftnode_free(&dn); toku_free(ndd); toku_free(cursor); } - -static void -test_prefetching(void) { +static void test_prefetching(void) { // struct ft_handle source_ft; struct ftnode sn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -327,7 +342,7 @@ test_prefetching(void) { uint64_t key1 = 100; uint64_t key2 = 200; - + MALLOC_N(sn.n_children, sn.bp); DBT pivotkeys[2]; toku_fill_dbt(&pivotkeys[0], &key1, sizeof(key1)); @@ -336,13 +351,13 @@ test_prefetching(void) { BP_BLOCKNUM(&sn, 0).b = 30; BP_BLOCKNUM(&sn, 1).b = 35; BP_BLOCKNUM(&sn, 2).b = 40; - BP_STATE(&sn,0) = PT_AVAIL; - BP_STATE(&sn,1) = PT_AVAIL; - BP_STATE(&sn,2) = PT_AVAIL; + BP_STATE(&sn, 0) = PT_AVAIL; + BP_STATE(&sn, 1) = PT_AVAIL; + BP_STATE(&sn, 2) = PT_AVAIL; set_BNC(&sn, 0, toku_create_empty_nl()); set_BNC(&sn, 1, toku_create_empty_nl()); set_BNC(&sn, 2, toku_create_empty_nl()); - //Create XIDS + // Create XIDS XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; XIDS xids_234; @@ -352,7 +367,7 @@ test_prefetching(void) { CKERR(r); // data in the buffers does not matter in this test - //Cleanup: + // Cleanup: toku_xids_destroy(&xids_0); toku_xids_destroy(&xids_123); toku_xids_destroy(&xids_234); @@ -363,41 +378,48 @@ test_prefetching(void) { make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft_h->cmp.create(int64_key_cmp, nullptr); ft->ft = ft_h; ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA ndd = NULL; - r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); - assert(r==0); + r = toku_serialize_ftnode_to( + fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); + invariant(r == 0); - test_prefetch_read(fd, ft, ft_h); + test_prefetch_read(fd, ft, ft_h); test_subset_read(fd, ft, ft_h); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); ft_h->cmp.destroy(); toku_free(ft_h->h); @@ -405,11 +427,12 @@ test_prefetching(void) { toku_free(ft); toku_free(ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -int -test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { +int test_main(int argc __attribute__((__unused__)), + const char *argv[] __attribute__((__unused__))) { test_prefetching(); return 0; diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc index ceef3772e2a8b..26a3dae673cd9 100644 --- a/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc +++ b/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc @@ -40,38 +40,28 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "ft/cursor.h" -enum ftnode_verify_type { - read_all=1, - read_compressed, - read_none -}; +enum ftnode_verify_type { read_all = 1, read_compressed, read_none }; #ifndef MIN #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #endif -static int -string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) -{ +static int string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) { char *CAST_FROM_VOIDP(s, a->data); char *CAST_FROM_VOIDP(t, b->data); return strcmp(s, t); } -static void -le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keylen, const char *val, int vallen) -{ +static void le_add_to_bn(bn_data *bn, + uint32_t idx, + const char *key, + int keylen, + const char *val, + int vallen) { LEAFENTRY r = NULL; uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen); void *maybe_free = nullptr; - bn->get_space_for_insert( - idx, - key, - keylen, - size_needed, - &r, - &maybe_free - ); + bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free); if (maybe_free) { toku_free(maybe_free); } @@ -81,70 +71,67 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keylen, const char memcpy(r->u.clean.val, val, vallen); } - -static void -le_malloc(bn_data* bn, uint32_t idx, const char *key, const char *val) -{ +static void le_malloc(bn_data *bn, + uint32_t idx, + const char *key, + const char *val) { int keylen = strlen(key) + 1; int vallen = strlen(val) + 1; le_add_to_bn(bn, idx, key, keylen, val, vallen); } - -static void -test1(int fd, FT ft_h, FTNODE *dn) { +static void test1(int fd, FT ft_h, FTNODE *dn) { int r; ftnode_fetch_extra bfe_all; bfe_all.create_for_full_read(ft_h); FTNODE_DISK_DATA ndd = NULL; - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_all); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_all); bool is_leaf = ((*dn)->height == 0); - assert(r==0); + invariant(r == 0); for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_AVAIL); + invariant(BP_STATE(*dn, i) == PT_AVAIL); } // should sweep and NOT get rid of anything PAIR_ATTR attr; - memset(&attr,0,sizeof(attr)); + memset(&attr, 0, sizeof(attr)); toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_AVAIL); + invariant(BP_STATE(*dn, i) == PT_AVAIL); } // should sweep and get compress all toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { if (!is_leaf) { - assert(BP_STATE(*dn,i) == PT_COMPRESSED); - } - else { - assert(BP_STATE(*dn,i) == PT_ON_DISK); + invariant(BP_STATE(*dn, i) == PT_COMPRESSED); + } else { + invariant(BP_STATE(*dn, i) == PT_ON_DISK); } } PAIR_ATTR size; bool req = toku_ftnode_pf_req_callback(*dn, &bfe_all); - assert(req); + invariant(req); toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size); toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_AVAIL); + invariant(BP_STATE(*dn, i) == PT_AVAIL); } // should sweep and get compress all toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { if (!is_leaf) { - assert(BP_STATE(*dn,i) == PT_COMPRESSED); - } - else { - assert(BP_STATE(*dn,i) == PT_ON_DISK); + invariant(BP_STATE(*dn, i) == PT_COMPRESSED); + } else { + invariant(BP_STATE(*dn, i) == PT_ON_DISK); } - } + } req = toku_ftnode_pf_req_callback(*dn, &bfe_all); - assert(req); + invariant(req); toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size); toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_AVAIL); + invariant(BP_STATE(*dn, i) == PT_AVAIL); } (*dn)->dirty = 1; toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); @@ -152,101 +139,102 @@ test1(int fd, FT ft_h, FTNODE *dn) { toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_AVAIL); + invariant(BP_STATE(*dn, i) == PT_AVAIL); } toku_free(ndd); toku_ftnode_free(dn); } - -static int search_cmp(const struct ft_search& UU(so), const DBT* UU(key)) { +static int search_cmp(const struct ft_search &UU(so), const DBT *UU(key)) { return 0; } -static void -test2(int fd, FT ft_h, FTNODE *dn) { +static void test2(int fd, FT ft_h, FTNODE *dn) { DBT left, right; DB dummy_db; memset(&dummy_db, 0, sizeof(dummy_db)); memset(&left, 0, sizeof(left)); memset(&right, 0, sizeof(right)); ft_search search; - + ftnode_fetch_extra bfe_subset; bfe_subset.create_for_subset_read( ft_h, - ft_search_init(&search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr), + ft_search_init( + &search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr), &left, &right, true, true, false, - false - ); + false); FTNODE_DISK_DATA ndd = NULL; - int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_subset); - assert(r==0); + int r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_subset); + invariant(r == 0); bool is_leaf = ((*dn)->height == 0); - // at this point, although both partitions are available, only the + // at this point, although both partitions are available, only the // second basement node should have had its clock // touched - assert(BP_STATE(*dn, 0) == PT_AVAIL); - assert(BP_STATE(*dn, 1) == PT_AVAIL); - assert(BP_SHOULD_EVICT(*dn, 0)); - assert(!BP_SHOULD_EVICT(*dn, 1)); + invariant(BP_STATE(*dn, 0) == PT_AVAIL); + invariant(BP_STATE(*dn, 1) == PT_AVAIL); + invariant(BP_SHOULD_EVICT(*dn, 0)); + invariant(!BP_SHOULD_EVICT(*dn, 1)); PAIR_ATTR attr; - memset(&attr,0,sizeof(attr)); + memset(&attr, 0, sizeof(attr)); toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED); - assert(BP_STATE(*dn, 1) == PT_AVAIL); - assert(BP_SHOULD_EVICT(*dn, 1)); + invariant(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED); + invariant(BP_STATE(*dn, 1) == PT_AVAIL); + invariant(BP_SHOULD_EVICT(*dn, 1)); toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr); - assert(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED); + invariant(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED); bool req = toku_ftnode_pf_req_callback(*dn, &bfe_subset); - assert(req); + invariant(req); toku_ftnode_pf_callback(*dn, ndd, &bfe_subset, fd, &attr); - assert(BP_STATE(*dn, 0) == PT_AVAIL); - assert(BP_STATE(*dn, 1) == PT_AVAIL); - assert(BP_SHOULD_EVICT(*dn, 0)); - assert(!BP_SHOULD_EVICT(*dn, 1)); + invariant(BP_STATE(*dn, 0) == PT_AVAIL); + invariant(BP_STATE(*dn, 1) == PT_AVAIL); + invariant(BP_SHOULD_EVICT(*dn, 0)); + invariant(!BP_SHOULD_EVICT(*dn, 1)); toku_free(ndd); toku_ftnode_free(dn); } -static void -test3_leaf(int fd, FT ft_h, FTNODE *dn) { +static void test3_leaf(int fd, FT ft_h, FTNODE *dn) { DBT left, right; DB dummy_db; memset(&dummy_db, 0, sizeof(dummy_db)); memset(&left, 0, sizeof(left)); memset(&right, 0, sizeof(right)); - + ftnode_fetch_extra bfe_min; bfe_min.create_for_min_read(ft_h); FTNODE_DISK_DATA ndd = NULL; - int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_min); - assert(r==0); + int r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_min); + invariant(r == 0); // // make sure we have a leaf // - assert((*dn)->height == 0); + invariant((*dn)->height == 0); for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn, i) == PT_ON_DISK); + invariant(BP_STATE(*dn, i) == PT_ON_DISK); } toku_ftnode_free(dn); toku_free(ndd); } -static void -test_serialize_nonleaf(void) { +static void test_serialize_nonleaf(void) { // struct ft_handle source_ft; struct ftnode sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -265,11 +253,11 @@ test_serialize_nonleaf(void) { sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1); BP_BLOCKNUM(&sn, 0).b = 30; BP_BLOCKNUM(&sn, 1).b = 35; - BP_STATE(&sn,0) = PT_AVAIL; - BP_STATE(&sn,1) = PT_AVAIL; + BP_STATE(&sn, 0) = PT_AVAIL; + BP_STATE(&sn, 1) = PT_AVAIL; set_BNC(&sn, 0, toku_create_empty_nl()); set_BNC(&sn, 1, toku_create_empty_nl()); - //Create XIDS + // Create XIDS XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; XIDS xids_234; @@ -281,11 +269,38 @@ test_serialize_nonleaf(void) { toku::comparator cmp; cmp.create(string_key_cmp, nullptr); - toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp); - toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp); - toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp); - - //Cleanup: + toku_bnc_insert_msg(BNC(&sn, 0), + "a", + 2, + "aval", + 5, + FT_NONE, + next_dummymsn(), + xids_0, + true, + cmp); + toku_bnc_insert_msg(BNC(&sn, 0), + "b", + 2, + "bval", + 5, + FT_NONE, + next_dummymsn(), + xids_123, + false, + cmp); + toku_bnc_insert_msg(BNC(&sn, 1), + "x", + 2, + "xval", + 5, + FT_NONE, + next_dummymsn(), + xids_234, + true, + cmp); + + // Cleanup: toku_xids_destroy(&xids_0); toku_xids_destroy(&xids_123); toku_xids_destroy(&xids_234); @@ -297,35 +312,41 @@ test_serialize_nonleaf(void) { make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft_h->cmp.create(string_key_cmp, nullptr); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA ndd = NULL; - r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); - assert(r==0); + r = toku_serialize_ftnode_to( + fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); + invariant(r == 0); test1(fd, ft_h, &dn); test2(fd, ft_h, &dn); @@ -333,22 +354,26 @@ test_serialize_nonleaf(void) { toku_destroy_ftnode_internals(&sn); toku_free(ndd); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); ft_h->cmp.destroy(); toku_free(ft_h); toku_free(ft); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -static void -test_serialize_leaf(void) { +static void test_serialize_leaf(void) { // struct ft_handle source_ft; struct ftnode sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -364,8 +389,8 @@ test_serialize_leaf(void) { MALLOC_N(sn.n_children, sn.bp); DBT pivotkey; sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1); - BP_STATE(&sn,0) = PT_AVAIL; - BP_STATE(&sn,1) = PT_AVAIL; + BP_STATE(&sn, 0) = PT_AVAIL; + BP_STATE(&sn, 1) = PT_AVAIL; set_BLB(&sn, 0, toku_create_empty_bn()); set_BLB(&sn, 1, toku_create_empty_bn()); le_malloc(BLB_DATA(&sn, 0), 0, "a", "aval"); @@ -378,51 +403,59 @@ test_serialize_leaf(void) { make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA ndd = NULL; - r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); - assert(r==0); + r = toku_serialize_ftnode_to( + fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); + invariant(r == 0); test1(fd, ft_h, &dn); - test3_leaf(fd, ft_h,&dn); + test3_leaf(fd, ft_h, &dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); toku_free(ft); toku_free(ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -int -test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { +int test_main(int argc __attribute__((__unused__)), + const char *argv[] __attribute__((__unused__))) { initialize_dummymsn(); test_serialize_nonleaf(); test_serialize_leaf(); diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc index 9828f49513c79..d50488ae19708 100644 --- a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc +++ b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc @@ -41,27 +41,21 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include #include "test.h" - - #ifndef MIN #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #endif const double USECS_PER_SEC = 1000000.0; -static void -le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int vallen) -{ +static void le_add_to_bn(bn_data *bn, + uint32_t idx, + char *key, + int keylen, + char *val, + int vallen) { LEAFENTRY r = NULL; uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen); void *maybe_free = nullptr; - bn->get_space_for_insert( - idx, - key, - keylen, - size_needed, - &r, - &maybe_free - ); + bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free); if (maybe_free) { toku_free(maybe_free); } @@ -71,20 +65,24 @@ le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int va memcpy(r->u.clean.val, val, vallen); } -static int -long_key_cmp(DB *UU(e), const DBT *a, const DBT *b) -{ +static int long_key_cmp(DB *UU(e), const DBT *a, const DBT *b) { const long *CAST_FROM_VOIDP(x, a->data); const long *CAST_FROM_VOIDP(y, b->data); return (*x > *y) - (*x < *y); } -static void -test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) { +static void test_serialize_leaf(int valsize, + int nelts, + double entropy, + int ser_runs, + int deser_runs) { // struct ft_handle source_ft; struct ftnode *sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -102,7 +100,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de MALLOC_N(sn->n_children, sn->bp); sn->pivotkeys.create_empty(); for (int i = 0; i < sn->n_children; ++i) { - BP_STATE(sn,i) = PT_AVAIL; + BP_STATE(sn, i) = PT_AVAIL; set_BLB(sn, i, toku_create_empty_bn()); } int nperbn = nelts / sn->n_children; @@ -112,24 +110,19 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de k = ck * nperbn + i; char buf[valsize]; int c; - for (c = 0; c < valsize * entropy; ) { - int *p = (int *) &buf[c]; + for (c = 0; c < valsize * entropy;) { + int *p = (int *)&buf[c]; *p = rand(); c += sizeof(*p); } memset(&buf[c], 0, valsize - c); le_add_to_bn( - BLB_DATA(sn,ck), - i, - (char *)&k, - sizeof k, - buf, - sizeof buf - ); + BLB_DATA(sn, ck), i, (char *)&k, sizeof k, buf, sizeof buf); } if (ck < 7) { DBT pivotkey; - sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck); + sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), + ck); } } @@ -139,31 +132,36 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft_h->cmp.create(long_key_cmp, nullptr); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } struct timeval total_start; @@ -176,8 +174,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de gettimeofday(&t[0], NULL); ndd = NULL; sn->dirty = 1; - r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, &ndd, true, ft->ft, false); - assert(r==0); + r = toku_serialize_ftnode_to( + fd, make_blocknum(20), sn, &ndd, true, ft->ft, false); + invariant(r == 0); gettimeofday(&t[1], NULL); total_start.tv_sec += t[0].tv_sec; total_start.tv_usec += t[0].tv_usec; @@ -186,12 +185,14 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de toku_free(ndd); } double dt; - dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC); + dt = (total_end.tv_sec - total_start.tv_sec) + + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC); dt *= 1000; dt /= ser_runs; - printf("serialize leaf(ms): %0.05lf (average of %d runs)\n", dt, ser_runs); + printf( + "serialize leaf(ms): %0.05lf (average of %d runs)\n", dt, ser_runs); - //reset + // reset total_start.tv_sec = total_start.tv_usec = 0; total_end.tv_sec = total_end.tv_usec = 0; @@ -200,8 +201,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de bfe.create_for_full_read(ft_h); gettimeofday(&t[0], NULL); FTNODE_DISK_DATA ndd2 = NULL; - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe); - assert(r==0); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe); + invariant(r == 0); gettimeofday(&t[1], NULL); total_start.tv_sec += t[0].tv_sec; @@ -212,35 +214,46 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de toku_ftnode_free(&dn); toku_free(ndd2); } - dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC); + dt = (total_end.tv_sec - total_start.tv_sec) + + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC); dt *= 1000; dt /= deser_runs; - printf("deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs); - printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (average of %d runs)\n", - tokutime_to_seconds(bfe.io_time)*1000, - tokutime_to_seconds(bfe.decompress_time)*1000, - tokutime_to_seconds(bfe.deserialize_time)*1000, - deser_runs - ); + printf( + "deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs); + printf( + "io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf " + "(average of %d runs)\n", + tokutime_to_seconds(bfe.io_time) * 1000, + tokutime_to_seconds(bfe.decompress_time) * 1000, + tokutime_to_seconds(bfe.deserialize_time) * 1000, + deser_runs); toku_ftnode_free(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); ft_h->cmp.destroy(); toku_free(ft_h->h); toku_free(ft_h); toku_free(ft); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -static void -test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) { +static void test_serialize_nonleaf(int valsize, + int nelts, + double entropy, + int ser_runs, + int deser_runs) { // struct ft_handle source_ft; struct ftnode sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -257,11 +270,11 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int MALLOC_N(sn.n_children, sn.bp); sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { - BP_BLOCKNUM(&sn, i).b = 30 + (i*5); - BP_STATE(&sn,i) = PT_AVAIL; + BP_BLOCKNUM(&sn, i).b = 30 + (i * 5); + BP_STATE(&sn, i) = PT_AVAIL; set_BNC(&sn, i, toku_create_empty_nl()); } - //Create XIDS + // Create XIDS XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123); @@ -276,14 +289,23 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int k = ck * nperchild + i; char buf[valsize]; int c; - for (c = 0; c < valsize * entropy; ) { - int *p = (int *) &buf[c]; + for (c = 0; c < valsize * entropy;) { + int *p = (int *)&buf[c]; *p = rand(); c += sizeof(*p); } memset(&buf[c], 0, valsize - c); - toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, cmp); + toku_bnc_insert_msg(bnc, + &k, + sizeof k, + buf, + valsize, + FT_NONE, + next_dummymsn(), + xids_123, + true, + cmp); } if (ck < 7) { DBT pivotkey; @@ -291,7 +313,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int } } - //Cleanup: + // Cleanup: toku_xids_destroy(&xids_0); toku_xids_destroy(&xids_123); cmp.destroy(); @@ -302,65 +324,78 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft_h->cmp.create(long_key_cmp, nullptr); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } struct timeval t[2]; gettimeofday(&t[0], NULL); FTNODE_DISK_DATA ndd = NULL; - r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); - assert(r==0); + r = toku_serialize_ftnode_to( + fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false); + invariant(r == 0); gettimeofday(&t[1], NULL); double dt; - dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); + dt = (t[1].tv_sec - t[0].tv_sec) + + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); dt *= 1000; - printf("serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs); + printf( + "serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs); ftnode_fetch_extra bfe; bfe.create_for_full_read(ft_h); gettimeofday(&t[0], NULL); FTNODE_DISK_DATA ndd2 = NULL; - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe); - assert(r==0); + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe); + invariant(r == 0); gettimeofday(&t[1], NULL); - dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); + dt = (t[1].tv_sec - t[0].tv_sec) + + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); dt *= 1000; - printf("deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs); - printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (IGNORED RUNS=%d)\n", - tokutime_to_seconds(bfe.io_time)*1000, - tokutime_to_seconds(bfe.decompress_time)*1000, - tokutime_to_seconds(bfe.deserialize_time)*1000, - deser_runs - ); + printf( + "deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs); + printf( + "io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf " + "(IGNORED RUNS=%d)\n", + tokutime_to_seconds(bfe.io_time) * 1000, + tokutime_to_seconds(bfe.decompress_time) * 1000, + tokutime_to_seconds(bfe.deserialize_time) * 1000, + deser_runs); toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); ft_h->cmp.destroy(); @@ -369,17 +404,21 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int toku_free(ndd); toku_free(ndd2); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -int -test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { +int test_main(int argc __attribute__((__unused__)), + const char *argv[] __attribute__((__unused__))) { const int DEFAULT_RUNS = 5; long valsize, nelts, ser_runs = DEFAULT_RUNS, deser_runs = DEFAULT_RUNS; double entropy = 0.3; if (argc != 3 && argc != 5) { - fprintf(stderr, "Usage: %s [ ]\n", argv[0]); + fprintf(stderr, + "Usage: %s [ " + "]\n", + argv[0]); fprintf(stderr, "Default (and min) runs is %d\n", DEFAULT_RUNS); return 2; } diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc index 332aaa0c170b1..0cddaf1965141 100644 --- a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc +++ b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc @@ -39,26 +39,20 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "test.h" #include "bndata.h" - - #ifndef MIN #define MIN(x, y) (((x) < (y)) ? (x) : (y)) #endif -static size_t -le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keysize, const char *val, int valsize) -{ +static size_t le_add_to_bn(bn_data *bn, + uint32_t idx, + const char *key, + int keysize, + const char *val, + int valsize) { LEAFENTRY r = NULL; uint32_t size_needed = LE_CLEAN_MEMSIZE(valsize); void *maybe_free = nullptr; - bn->get_space_for_insert( - idx, - key, - keysize, - size_needed, - &r, - &maybe_free - ); + bn->get_space_for_insert(idx, key, keysize, size_needed, &r, &maybe_free); if (maybe_free) { toku_free(maybe_free); } @@ -70,16 +64,19 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keysize, const cha } class test_key_le_pair { - public: + public: uint32_t keylen; - char* keyp; + char *keyp; LEAFENTRY le; test_key_le_pair() : keylen(), keyp(), le() {} void init(const char *_keyp, const char *_val) { init(_keyp, strlen(_keyp) + 1, _val, strlen(_val) + 1); } - void init(const char * _keyp, uint32_t _keylen, const char*_val, uint32_t _vallen) { + void init(const char *_keyp, + uint32_t _keylen, + const char *_val, + uint32_t _vallen) { keylen = _keylen; CAST_FROM_VOIDP(le, toku_malloc(LE_CLEAN_MEMSIZE(_vallen))); @@ -95,126 +92,144 @@ class test_key_le_pair { } }; -enum ftnode_verify_type { - read_all=1, - read_compressed, - read_none -}; +enum ftnode_verify_type { read_all = 1, read_compressed, read_none }; -static int -string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) -{ +static int string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) { char *CAST_FROM_VOIDP(s, a->data); char *CAST_FROM_VOIDP(t, b->data); return strcmp(s, t); } -static void -setup_dn(enum ftnode_verify_type bft, int fd, FT ft_h, FTNODE *dn, FTNODE_DISK_DATA* ndd) { +static void setup_dn(enum ftnode_verify_type bft, + int fd, + FT ft_h, + FTNODE *dn, + FTNODE_DISK_DATA *ndd) { int r; if (bft == read_all) { ftnode_fetch_extra bfe; bfe.create_for_full_read(ft_h); - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe); - assert(r==0); - } - else if (bft == read_compressed || bft == read_none) { + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe); + invariant(r == 0); + } else if (bft == read_compressed || bft == read_none) { ftnode_fetch_extra bfe; bfe.create_for_min_read(ft_h); - r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe); - assert(r==0); - // assert all bp's are compressed or on disk. + r = toku_deserialize_ftnode_from( + fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe); + invariant(r == 0); + // invariant all bp's are compressed or on disk. for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_COMPRESSED || BP_STATE(*dn, i) == PT_ON_DISK); + invariant(BP_STATE(*dn, i) == PT_COMPRESSED || + BP_STATE(*dn, i) == PT_ON_DISK); } // if read_none, get rid of the compressed bp's if (bft == read_none) { if ((*dn)->height == 0) { - toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); - // assert all bp's are on disk + toku_ftnode_pe_callback(*dn, + make_pair_attr(0xffffffff), + ft_h, + def_pe_finalize_impl, + nullptr); + // invariant all bp's are on disk for (int i = 0; i < (*dn)->n_children; i++) { if ((*dn)->height == 0) { - assert(BP_STATE(*dn,i) == PT_ON_DISK); - assert(is_BNULL(*dn, i)); - } - else { - assert(BP_STATE(*dn,i) == PT_COMPRESSED); + invariant(BP_STATE(*dn, i) == PT_ON_DISK); + invariant(is_BNULL(*dn, i)); + } else { + invariant(BP_STATE(*dn, i) == PT_COMPRESSED); } } - } - else { + } else { // first decompress everything, and make sure // that it is available // then run partial eviction to get it compressed PAIR_ATTR attr; bfe.create_for_full_read(ft_h); - assert(toku_ftnode_pf_req_callback(*dn, &bfe)); + invariant(toku_ftnode_pf_req_callback(*dn, &bfe)); r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr); - assert(r==0); - // assert all bp's are available + invariant(r == 0); + // invariant all bp's are available for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_AVAIL); + invariant(BP_STATE(*dn, i) == PT_AVAIL); } - toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, + make_pair_attr(0xffffffff), + ft_h, + def_pe_finalize_impl, + nullptr); for (int i = 0; i < (*dn)->n_children; i++) { - // assert all bp's are still available, because we touched the clock - assert(BP_STATE(*dn,i) == PT_AVAIL); - // now assert all should be evicted - assert(BP_SHOULD_EVICT(*dn, i)); + // invariant all bp's are still available, because we touched + // the clock + invariant(BP_STATE(*dn, i) == PT_AVAIL); + // now invariant all should be evicted + invariant(BP_SHOULD_EVICT(*dn, i)); } - toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr); + toku_ftnode_pe_callback(*dn, + make_pair_attr(0xffffffff), + ft_h, + def_pe_finalize_impl, + nullptr); for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_COMPRESSED); + invariant(BP_STATE(*dn, i) == PT_COMPRESSED); } } } // now decompress them bfe.create_for_full_read(ft_h); - assert(toku_ftnode_pf_req_callback(*dn, &bfe)); + invariant(toku_ftnode_pf_req_callback(*dn, &bfe)); PAIR_ATTR attr; r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr); - assert(r==0); - // assert all bp's are available + invariant(r == 0); + // invariant all bp's are available for (int i = 0; i < (*dn)->n_children; i++) { - assert(BP_STATE(*dn,i) == PT_AVAIL); + invariant(BP_STATE(*dn, i) == PT_AVAIL); } // continue on with test - } - else { + } else { // if we get here, this is a test bug, NOT a bug in development code - assert(false); + invariant(false); } } -static void write_sn_to_disk(int fd, FT_HANDLE ft, FTNODE sn, FTNODE_DISK_DATA* src_ndd, bool do_clone) { +static void write_sn_to_disk(int fd, + FT_HANDLE ft, + FTNODE sn, + FTNODE_DISK_DATA *src_ndd, + bool do_clone) { int r; if (do_clone) { - void* cloned_node_v = NULL; + void *cloned_node_v = NULL; PAIR_ATTR attr; long clone_size; - toku_ftnode_clone_callback(sn, &cloned_node_v, &clone_size, &attr, false, ft->ft); + toku_ftnode_clone_callback( + sn, &cloned_node_v, &clone_size, &attr, false, ft->ft); FTNODE CAST_FROM_VOIDP(cloned_node, cloned_node_v); - r = toku_serialize_ftnode_to(fd, make_blocknum(20), cloned_node, src_ndd, false, ft->ft, false); - assert(r==0); + r = toku_serialize_ftnode_to( + fd, make_blocknum(20), cloned_node, src_ndd, false, ft->ft, false); + invariant(r == 0); toku_ftnode_free(&cloned_node); - } - else { - r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, src_ndd, true, ft->ft, false); - assert(r==0); + } else { + r = toku_serialize_ftnode_to( + fd, make_blocknum(20), sn, src_ndd, true, ft->ft, false); + invariant(r == 0); } } -static void -test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { +static void test_serialize_leaf_check_msn(enum ftnode_verify_type bft, + bool do_clone) { // struct ft_handle source_ft; struct ftnode sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; -#define PRESERIALIZE_MSN_ON_DISK ((MSN) { MIN_MSN.msn + 42 }) -#define POSTSERIALIZE_MSN_ON_DISK ((MSN) { MIN_MSN.msn + 84 }) +#define PRESERIALIZE_MSN_ON_DISK ((MSN){MIN_MSN.msn + 42}) +#define POSTSERIALIZE_MSN_ON_DISK ((MSN){MIN_MSN.msn + 84}) sn.max_msn_applied_to_node_on_disk = PRESERIALIZE_MSN_ON_DISK; sn.flags = 0x11223344; @@ -228,14 +243,14 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { MALLOC_N(sn.n_children, sn.bp); DBT pivotkey; sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1); - BP_STATE(&sn,0) = PT_AVAIL; - BP_STATE(&sn,1) = PT_AVAIL; + BP_STATE(&sn, 0) = PT_AVAIL; + BP_STATE(&sn, 1) = PT_AVAIL; set_BLB(&sn, 0, toku_create_empty_bn()); set_BLB(&sn, 1, toku_create_empty_bn()); le_add_to_bn(BLB_DATA(&sn, 0), 0, "a", 2, "aval", 5); le_add_to_bn(BLB_DATA(&sn, 0), 1, "b", 2, "bval", 5); le_add_to_bn(BLB_DATA(&sn, 1), 0, "x", 2, "xval", 5); - BLB_MAX_MSN_APPLIED(&sn, 0) = ((MSN) { MIN_MSN.msn + 73 }); + BLB_MAX_MSN_APPLIED(&sn, 0) = ((MSN){MIN_MSN.msn + 73}); BLB_MAX_MSN_APPLIED(&sn, 1) = POSTSERIALIZE_MSN_ON_DISK; FT_HANDLE XMALLOC(ft); @@ -244,30 +259,35 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } - //Want to use block #20 + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; FTNODE_DISK_DATA dest_ndd = NULL; @@ -276,16 +296,18 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->blocknum.b==20); + invariant(dn->blocknum.b == 20); - assert(dn->layout_version ==FT_LAYOUT_VERSION); - assert(dn->layout_version_original ==FT_LAYOUT_VERSION); - assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION); - assert(dn->height == 0); - assert(dn->n_children>=1); - assert(dn->max_msn_applied_to_node_on_disk.msn == POSTSERIALIZE_MSN_ON_DISK.msn); + invariant(dn->layout_version == FT_LAYOUT_VERSION); + invariant(dn->layout_version_original == FT_LAYOUT_VERSION); + invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION); + invariant(dn->height == 0); + invariant(dn->n_children >= 1); + invariant(dn->max_msn_applied_to_node_on_disk.msn == + POSTSERIALIZE_MSN_ON_DISK.msn); { - // Man, this is way too ugly. This entire test suite needs to be refactored. + // Man, this is way too ugly. This entire test suite needs to be + // refactored. // Create a dummy mempool and put the leaves there. Ugh. test_key_le_pair elts[3]; elts[0].init("a", "aval"); @@ -294,34 +316,41 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { const uint32_t npartitions = dn->n_children; uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { - assert(BLB_MAX_MSN_APPLIED(dn, bn).msn == POSTSERIALIZE_MSN_ON_DISK.msn); - assert(dest_ndd[bn].start > 0); - assert(dest_ndd[bn].size > 0); + invariant(BLB_MAX_MSN_APPLIED(dn, bn).msn == + POSTSERIALIZE_MSN_ON_DISK.msn); + invariant(dest_ndd[bn].start > 0); + invariant(dest_ndd[bn].size > 0); if (bn > 0) { - assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size); + invariant(dest_ndd[bn].start >= + dest_ndd[bn - 1].start + dest_ndd[bn - 1].size); } for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) { LEAFENTRY curr_le; uint32_t curr_keylen; - void* curr_key; - BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); - assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le)); - assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0); - if (bn < npartitions-1) { - assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, elts[last_i].keyp) <= 0); + void *curr_key; + BLB_DATA(dn, bn) + ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); + invariant(leafentry_memsize(curr_le) == + leafentry_memsize(elts[last_i].le)); + invariant(memcmp(curr_le, + elts[last_i].le, + leafentry_memsize(curr_le)) == 0); + if (bn < npartitions - 1) { + invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data, + elts[last_i].keyp) <= 0); } // TODO for later, get a key comparison here as well last_i++; } - } - assert(last_i == 3); + invariant(last_i == 3); } toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); @@ -329,17 +358,21 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { toku_free(src_ndd); toku_free(dest_ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -static void -test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone) { +static void test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, + bool do_clone) { int r; struct ftnode sn, *dn; - const int keylens = 256*1024, vallens = 0; + const int keylens = 256 * 1024, vallens = 0; const uint32_t nrows = 8; - // assert(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + // invariant(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; @@ -354,21 +387,27 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone MALLOC_N(sn.n_children, sn.bp); sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { - BP_STATE(&sn,i) = PT_AVAIL; + BP_STATE(&sn, i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); } for (uint32_t i = 0; i < nrows; ++i) { // one basement per row char key[keylens], val[vallens]; - key[keylens-1] = '\0'; + key[keylens - 1] = '\0'; char c = 'a' + i; - memset(key, c, keylens-1); - le_add_to_bn(BLB_DATA(&sn, i), 0, (char *) &key, sizeof(key), (char *) &val, sizeof(val)); - if (i < nrows-1) { + memset(key, c, keylens - 1); + le_add_to_bn(BLB_DATA(&sn, i), + 0, + (char *)&key, + sizeof(key), + (char *)&val, + sizeof(val)); + if (i < nrows - 1) { uint32_t keylen; - void* curr_key; + void *curr_key; BLB_DATA(&sn, i)->fetch_key_and_len(0, &keylen, &curr_key); DBT pivotkey; - sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, curr_key, keylen), i); + sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, curr_key, keylen), + i); } } @@ -378,29 +417,34 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; FTNODE_DISK_DATA dest_ndd = NULL; @@ -408,55 +452,64 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone write_sn_to_disk(fd, ft, &sn, &src_ndd, do_clone); setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - - assert(dn->blocknum.b==20); - assert(dn->layout_version ==FT_LAYOUT_VERSION); - assert(dn->layout_version_original ==FT_LAYOUT_VERSION); + invariant(dn->blocknum.b == 20); + + invariant(dn->layout_version == FT_LAYOUT_VERSION); + invariant(dn->layout_version_original == FT_LAYOUT_VERSION); { - // Man, this is way too ugly. This entire test suite needs to be refactored. + // Man, this is way too ugly. This entire test suite needs to be + // refactored. // Create a dummy mempool and put the leaves there. Ugh. test_key_le_pair *les = new test_key_le_pair[nrows]; { char key[keylens], val[vallens]; - key[keylens-1] = '\0'; + key[keylens - 1] = '\0'; for (uint32_t i = 0; i < nrows; ++i) { char c = 'a' + i; - memset(key, c, keylens-1); - les[i].init((char *) &key, sizeof(key), (char *) &val, sizeof(val)); + memset(key, c, keylens - 1); + les[i].init( + (char *)&key, sizeof(key), (char *)&val, sizeof(val)); } } const uint32_t npartitions = dn->n_children; uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { - assert(dest_ndd[bn].start > 0); - assert(dest_ndd[bn].size > 0); + invariant(dest_ndd[bn].start > 0); + invariant(dest_ndd[bn].size > 0); if (bn > 0) { - assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size); + invariant(dest_ndd[bn].start >= + dest_ndd[bn - 1].start + dest_ndd[bn - 1].size); } - assert(BLB_DATA(dn, bn)->num_klpairs() > 0); + invariant(BLB_DATA(dn, bn)->num_klpairs() > 0); for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) { LEAFENTRY curr_le; uint32_t curr_keylen; - void* curr_key; - BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); - assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le)); - assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0); - if (bn < npartitions-1) { - assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, les[last_i].keyp) <= 0); + void *curr_key; + BLB_DATA(dn, bn) + ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); + invariant(leafentry_memsize(curr_le) == + leafentry_memsize(les[last_i].le)); + invariant(memcmp(curr_le, + les[last_i].le, + leafentry_memsize(curr_le)) == 0); + if (bn < npartitions - 1) { + invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data, + les[last_i].keyp) <= 0); } // TODO for later, get a key comparison here as well last_i++; } } - assert(last_i == nrows); + invariant(last_i == nrows); delete[] les; } toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); @@ -464,15 +517,19 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone toku_free(src_ndd); toku_free(dest_ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -static void -test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { +static void test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, + bool do_clone) { int r; struct ftnode sn, *dn; - const uint32_t nrows = 196*1024; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + const uint32_t nrows = 196 * 1024; + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; @@ -487,14 +544,19 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { XMALLOC_N(sn.n_children, sn.bp); sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { - BP_STATE(&sn,i) = PT_AVAIL; - set_BLB(&sn, i, toku_create_empty_bn()); + BP_STATE(&sn, i) = PT_AVAIL; + set_BLB(&sn, i, toku_create_empty_bn()); } size_t total_size = 0; for (uint32_t i = 0; i < nrows; ++i) { uint32_t key = i; uint32_t val = i; - total_size += le_add_to_bn(BLB_DATA(&sn, 0), i, (char *) &key, sizeof(key), (char *) &val, sizeof(val)); + total_size += le_add_to_bn(BLB_DATA(&sn, 0), + i, + (char *)&key, + sizeof(key), + (char *)&val, + sizeof(val)); } FT_HANDLE XMALLOC(ft); @@ -503,30 +565,35 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; @@ -535,56 +602,66 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->blocknum.b==20); + invariant(dn->blocknum.b == 20); - assert(dn->layout_version ==FT_LAYOUT_VERSION); - assert(dn->layout_version_original ==FT_LAYOUT_VERSION); + invariant(dn->layout_version == FT_LAYOUT_VERSION); + invariant(dn->layout_version_original == FT_LAYOUT_VERSION); { - // Man, this is way too ugly. This entire test suite needs to be refactored. + // Man, this is way too ugly. This entire test suite needs to be + // refactored. // Create a dummy mempool and put the leaves there. Ugh. test_key_le_pair *les = new test_key_le_pair[nrows]; { int key = 0, val = 0; for (uint32_t i = 0; i < nrows; ++i, key++, val++) { - les[i].init((char *) &key, sizeof(key), (char *) &val, sizeof(val)); + les[i].init( + (char *)&key, sizeof(key), (char *)&val, sizeof(val)); } } const uint32_t npartitions = dn->n_children; uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { - assert(dest_ndd[bn].start > 0); - assert(dest_ndd[bn].size > 0); + invariant(dest_ndd[bn].start > 0); + invariant(dest_ndd[bn].size > 0); if (bn > 0) { - assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size); + invariant(dest_ndd[bn].start >= + dest_ndd[bn - 1].start + dest_ndd[bn - 1].size); } - assert(BLB_DATA(dn, bn)->num_klpairs() > 0); + invariant(BLB_DATA(dn, bn)->num_klpairs() > 0); for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) { LEAFENTRY curr_le; uint32_t curr_keylen; - void* curr_key; - BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); - assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le)); - assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0); - if (bn < npartitions-1) { - uint32_t *CAST_FROM_VOIDP(pivot, dn->pivotkeys.get_pivot(bn).data); - void* tmp = les[last_i].keyp; + void *curr_key; + BLB_DATA(dn, bn) + ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); + invariant(leafentry_memsize(curr_le) == + leafentry_memsize(les[last_i].le)); + invariant(memcmp(curr_le, + les[last_i].le, + leafentry_memsize(curr_le)) == 0); + if (bn < npartitions - 1) { + uint32_t *CAST_FROM_VOIDP(pivot, + dn->pivotkeys.get_pivot(bn).data); + void *tmp = les[last_i].keyp; uint32_t *CAST_FROM_VOIDP(item, tmp); - assert(*pivot >= *item); + invariant(*pivot >= *item); } // TODO for later, get a key comparison here as well last_i++; } // don't check soft_copy_is_up_to_date or seqinsert - assert(BLB_DATA(dn, bn)->get_disk_size() < 128*1024); // BN_MAX_SIZE, apt to change + invariant(BLB_DATA(dn, bn)->get_disk_size() < + 128 * 1024); // BN_MAX_SIZE, apt to change } - assert(last_i == nrows); + invariant(last_i == nrows); delete[] les; } toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); @@ -592,19 +669,22 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { toku_free(src_ndd); toku_free(dest_ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } - -static void -test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) { +static void test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, + bool do_clone) { int r; struct ftnode sn, *dn; const uint32_t nrows = 7; const size_t key_size = 8; - const size_t val_size = 512*1024; - // assert(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + const size_t val_size = 512 * 1024; + // invariant(val_size > BN_MAX_SIZE); // BN_MAX_SIZE isn't visible + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); sn.max_msn_applied_to_node_on_disk.msn = 0; sn.flags = 0x11223344; @@ -615,21 +695,21 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) sn.n_children = 1; sn.dirty = 1; sn.oldest_referenced_xid_known = TXNID_NONE; - + MALLOC_N(sn.n_children, sn.bp); sn.pivotkeys.create_empty(); for (int i = 0; i < sn.n_children; ++i) { - BP_STATE(&sn,i) = PT_AVAIL; + BP_STATE(&sn, i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); } for (uint32_t i = 0; i < nrows; ++i) { char key[key_size], val[val_size]; - key[key_size-1] = '\0'; - val[val_size-1] = '\0'; + key[key_size - 1] = '\0'; + val[val_size - 1] = '\0'; char c = 'a' + i; - memset(key, c, key_size-1); - memset(val, c, val_size-1); - le_add_to_bn(BLB_DATA(&sn, 0), i,key, 8, val, val_size); + memset(key, c, key_size - 1); + memset(val, c, val_size - 1); + le_add_to_bn(BLB_DATA(&sn, 0), i, key, 8, val, val_size); } FT_HANDLE XMALLOC(ft); @@ -638,30 +718,35 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; @@ -670,58 +755,66 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->blocknum.b==20); + invariant(dn->blocknum.b == 20); - assert(dn->layout_version ==FT_LAYOUT_VERSION); - assert(dn->layout_version_original ==FT_LAYOUT_VERSION); + invariant(dn->layout_version == FT_LAYOUT_VERSION); + invariant(dn->layout_version_original == FT_LAYOUT_VERSION); { - // Man, this is way too ugly. This entire test suite needs to be refactored. + // Man, this is way too ugly. This entire test suite needs to be + // refactored. // Create a dummy mempool and put the leaves there. Ugh. test_key_le_pair *les = new test_key_le_pair[nrows]; { char key[key_size], val[val_size]; - key[key_size-1] = '\0'; - val[val_size-1] = '\0'; + key[key_size - 1] = '\0'; + val[val_size - 1] = '\0'; for (uint32_t i = 0; i < nrows; ++i) { char c = 'a' + i; - memset(key, c, key_size-1); - memset(val, c, val_size-1); + memset(key, c, key_size - 1); + memset(val, c, val_size - 1); les[i].init(key, key_size, val, val_size); } } const uint32_t npartitions = dn->n_children; - assert(npartitions == nrows); + invariant(npartitions == nrows); uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { - assert(dest_ndd[bn].start > 0); - assert(dest_ndd[bn].size > 0); + invariant(dest_ndd[bn].start > 0); + invariant(dest_ndd[bn].size > 0); if (bn > 0) { - assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size); + invariant(dest_ndd[bn].start >= + dest_ndd[bn - 1].start + dest_ndd[bn - 1].size); } - assert(BLB_DATA(dn, bn)->num_klpairs() > 0); + invariant(BLB_DATA(dn, bn)->num_klpairs() > 0); for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) { LEAFENTRY curr_le; uint32_t curr_keylen; - void* curr_key; - BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); - assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le)); - assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0); - if (bn < npartitions-1) { - assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(les[last_i].keyp)) <= 0); + void *curr_key; + BLB_DATA(dn, bn) + ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); + invariant(leafentry_memsize(curr_le) == + leafentry_memsize(les[last_i].le)); + invariant(memcmp(curr_le, + les[last_i].le, + leafentry_memsize(curr_le)) == 0); + if (bn < npartitions - 1) { + invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data, + (char *)(les[last_i].keyp)) <= 0); } // TODO for later, get a key comparison here as well last_i++; } // don't check soft_copy_is_up_to_date or seqinsert } - assert(last_i == 7); + invariant(last_i == 7); delete[] les; } toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); @@ -729,15 +822,19 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) toku_free(src_ndd); toku_free(dest_ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } - -static void -test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool do_clone) { +static void test_serialize_leaf_with_empty_basement_nodes( + enum ftnode_verify_type bft, + bool do_clone) { struct ftnode sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -760,7 +857,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool toku_fill_dbt(&pivotkeys[5], "x", 2); sn.pivotkeys.create_from_dbts(pivotkeys, 6); for (int i = 0; i < sn.n_children; ++i) { - BP_STATE(&sn,i) = PT_AVAIL; + BP_STATE(&sn, i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); BLB_SEQINSERT(&sn, i) = 0; } @@ -774,30 +871,35 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; FTNODE_DISK_DATA dest_ndd = NULL; @@ -805,17 +907,18 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->blocknum.b==20); + invariant(dn->blocknum.b == 20); - assert(dn->layout_version ==FT_LAYOUT_VERSION); - assert(dn->layout_version_original ==FT_LAYOUT_VERSION); - assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION); - assert(dn->height == 0); - assert(dn->n_children>0); + invariant(dn->layout_version == FT_LAYOUT_VERSION); + invariant(dn->layout_version_original == FT_LAYOUT_VERSION); + invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION); + invariant(dn->height == 0); + invariant(dn->n_children > 0); { test_key_le_pair elts[3]; - // Man, this is way too ugly. This entire test suite needs to be refactored. + // Man, this is way too ugly. This entire test suite needs to be + // refactored. // Create a dummy mempool and put the leaves there. Ugh. elts[0].init("a", "aval"); elts[1].init("b", "bval"); @@ -823,33 +926,39 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool const uint32_t npartitions = dn->n_children; uint32_t last_i = 0; for (uint32_t bn = 0; bn < npartitions; ++bn) { - assert(dest_ndd[bn].start > 0); - assert(dest_ndd[bn].size > 0); + invariant(dest_ndd[bn].start > 0); + invariant(dest_ndd[bn].size > 0); if (bn > 0) { - assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size); + invariant(dest_ndd[bn].start >= + dest_ndd[bn - 1].start + dest_ndd[bn - 1].size); } for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) { LEAFENTRY curr_le; uint32_t curr_keylen; - void* curr_key; - BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); - assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le)); - assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0); - if (bn < npartitions-1) { - assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(elts[last_i].keyp)) <= 0); + void *curr_key; + BLB_DATA(dn, bn) + ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key); + invariant(leafentry_memsize(curr_le) == + leafentry_memsize(elts[last_i].le)); + invariant(memcmp(curr_le, + elts[last_i].le, + leafentry_memsize(curr_le)) == 0); + if (bn < npartitions - 1) { + invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data, + (char *)(elts[last_i].keyp)) <= 0); } // TODO for later, get a key comparison here as well last_i++; } - } - assert(last_i == 3); + invariant(last_i == 3); } toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); @@ -857,14 +966,19 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool toku_free(src_ndd); toku_free(dest_ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -static void -test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type bft, bool do_clone) { +static void test_serialize_leaf_with_multiple_empty_basement_nodes( + enum ftnode_verify_type bft, + bool do_clone) { struct ftnode sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -884,7 +998,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b toku_fill_dbt(&pivotkeys[2], "A", 2); sn.pivotkeys.create_from_dbts(pivotkeys, 3); for (int i = 0; i < sn.n_children; ++i) { - BP_STATE(&sn,i) = PT_AVAIL; + BP_STATE(&sn, i) = PT_AVAIL; set_BLB(&sn, i, toku_create_empty_bn()); } @@ -894,30 +1008,35 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; @@ -926,29 +1045,31 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->blocknum.b==20); + invariant(dn->blocknum.b == 20); - assert(dn->layout_version ==FT_LAYOUT_VERSION); - assert(dn->layout_version_original ==FT_LAYOUT_VERSION); - assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION); - assert(dn->height == 0); - assert(dn->n_children == 1); + invariant(dn->layout_version == FT_LAYOUT_VERSION); + invariant(dn->layout_version_original == FT_LAYOUT_VERSION); + invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION); + invariant(dn->height == 0); + invariant(dn->n_children == 1); { const uint32_t npartitions = dn->n_children; for (uint32_t i = 0; i < npartitions; ++i) { - assert(dest_ndd[i].start > 0); - assert(dest_ndd[i].size > 0); + invariant(dest_ndd[i].start > 0); + invariant(dest_ndd[i].size > 0); if (i > 0) { - assert(dest_ndd[i].start >= dest_ndd[i-1].start + dest_ndd[i-1].size); + invariant(dest_ndd[i].start >= + dest_ndd[i - 1].start + dest_ndd[i - 1].size); } - assert(BLB_DATA(dn, i)->num_klpairs() == 0); + invariant(BLB_DATA(dn, i)->num_klpairs() == 0); } } - + toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); toku_free(ft_h->h); toku_free(ft_h); @@ -956,16 +1077,18 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b toku_free(src_ndd); toku_free(dest_ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } - -static void -test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { +static void test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { // struct ft_handle source_ft; struct ftnode sn, *dn; - int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); + int fd = open(TOKU_TEST_FILENAME, + O_RDWR | O_CREAT | O_BINARY, + S_IRWXU | S_IRWXG | S_IRWXO); + invariant(fd >= 0); int r; @@ -984,11 +1107,11 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1); BP_BLOCKNUM(&sn, 0).b = 30; BP_BLOCKNUM(&sn, 1).b = 35; - BP_STATE(&sn,0) = PT_AVAIL; - BP_STATE(&sn,1) = PT_AVAIL; + BP_STATE(&sn, 0) = PT_AVAIL; + BP_STATE(&sn, 1) = PT_AVAIL; set_BNC(&sn, 0, toku_create_empty_nl()); set_BNC(&sn, 1, toku_create_empty_nl()); - //Create XIDS + // Create XIDS XIDS xids_0 = toku_xids_get_root_xids(); XIDS xids_123; XIDS xids_234; @@ -1000,11 +1123,38 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { toku::comparator cmp; cmp.create(string_key_cmp, nullptr); - toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp); - toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp); - toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp); - - //Cleanup: + toku_bnc_insert_msg(BNC(&sn, 0), + "a", + 2, + "aval", + 5, + FT_NONE, + next_dummymsn(), + xids_0, + true, + cmp); + toku_bnc_insert_msg(BNC(&sn, 0), + "b", + 2, + "bval", + 5, + FT_NONE, + next_dummymsn(), + xids_123, + false, + cmp); + toku_bnc_insert_msg(BNC(&sn, 1), + "x", + 2, + "xval", + 5, + FT_NONE, + next_dummymsn(), + xids_234, + true, + cmp); + + // Cleanup: toku_xids_destroy(&xids_0); toku_xids_destroy(&xids_123); toku_xids_destroy(&xids_234); @@ -1016,31 +1166,36 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { make_blocknum(0), ZERO_LSN, TXNID_NONE, - 4*1024*1024, - 128*1024, + 4 * 1024 * 1024, + 128 * 1024, TOKU_DEFAULT_COMPRESSION_METHOD, 16); ft_h->cmp.create(string_key_cmp, nullptr); ft->ft = ft_h; - + ft_h->blocktable.create(); - { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); } - //Want to use block #20 + { + int r_truncate = ftruncate(fd, 0); + CKERR(r_truncate); + } + // Want to use block #20 BLOCKNUM b = make_blocknum(0); while (b.b < 20) { ft_h->blocktable.allocate_blocknum(&b, ft_h); } - assert(b.b == 20); + invariant(b.b == 20); { DISKOFF offset; DISKOFF size; - ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0); - assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); - assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); - assert(size == 100); + invariant(offset == + (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + invariant(size == 100); } FTNODE_DISK_DATA src_ndd = NULL; FTNODE_DISK_DATA dest_ndd = NULL; @@ -1048,30 +1203,31 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { setup_dn(bft, fd, ft_h, &dn, &dest_ndd); - assert(dn->blocknum.b==20); + invariant(dn->blocknum.b == 20); - assert(dn->layout_version ==FT_LAYOUT_VERSION); - assert(dn->layout_version_original ==FT_LAYOUT_VERSION); - assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION); - assert(dn->height == 1); - assert(dn->n_children==2); - assert(strcmp((char*)dn->pivotkeys.get_pivot(0).data, "hello")==0); - assert(dn->pivotkeys.get_pivot(0).size==6); - assert(BP_BLOCKNUM(dn,0).b==30); - assert(BP_BLOCKNUM(dn,1).b==35); + invariant(dn->layout_version == FT_LAYOUT_VERSION); + invariant(dn->layout_version_original == FT_LAYOUT_VERSION); + invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION); + invariant(dn->height == 1); + invariant(dn->n_children == 2); + invariant(strcmp((char *)dn->pivotkeys.get_pivot(0).data, "hello") == 0); + invariant(dn->pivotkeys.get_pivot(0).size == 6); + invariant(BP_BLOCKNUM(dn, 0).b == 30); + invariant(BP_BLOCKNUM(dn, 1).b == 35); message_buffer *src_msg_buffer1 = &BNC(&sn, 0)->msg_buffer; message_buffer *src_msg_buffer2 = &BNC(&sn, 1)->msg_buffer; message_buffer *dest_msg_buffer1 = &BNC(dn, 0)->msg_buffer; message_buffer *dest_msg_buffer2 = &BNC(dn, 1)->msg_buffer; - assert(src_msg_buffer1->equals(dest_msg_buffer1)); - assert(src_msg_buffer2->equals(dest_msg_buffer2)); + invariant(src_msg_buffer1->equals(dest_msg_buffer1)); + invariant(src_msg_buffer2->equals(dest_msg_buffer2)); toku_ftnode_free(&dn); toku_destroy_ftnode_internals(&sn); - ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); + ft_h->blocktable.block_free( + BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100); ft_h->blocktable.destroy(); ft_h->cmp.destroy(); toku_free(ft_h->h); @@ -1080,11 +1236,12 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { toku_free(src_ndd); toku_free(dest_ndd); - r = close(fd); assert(r != -1); + r = close(fd); + invariant(r != -1); } -int -test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { +int test_main(int argc __attribute__((__unused__)), + const char *argv[] __attribute__((__unused__))) { initialize_dummymsn(); test_serialize_nonleaf(read_none, false); @@ -1103,10 +1260,12 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_ test_serialize_leaf_with_multiple_empty_basement_nodes(read_none, false); test_serialize_leaf_with_multiple_empty_basement_nodes(read_all, false); - test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, false); + test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, + false); test_serialize_leaf_with_multiple_empty_basement_nodes(read_none, true); test_serialize_leaf_with_multiple_empty_basement_nodes(read_all, true); - test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, true); + test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, + true); test_serialize_leaf_with_empty_basement_nodes(read_none, false); test_serialize_leaf_with_empty_basement_nodes(read_all, false); diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-test.cc index 598a1cc7085c2..706bd94fbc3af 100644 --- a/storage/tokudb/PerconaFT/ft/tests/ft-test.cc +++ b/storage/tokudb/PerconaFT/ft/tests/ft-test.cc @@ -164,17 +164,16 @@ static void test_read_what_was_written (void) { int r; const int NVALS=10000; - if (verbose) printf("test_read_what_was_written(): "); fflush(stdout); + if (verbose) { + printf("test_read_what_was_written(): "); fflush(stdout); + } unlink(fname); - toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); - toku_cachetable_close(&ct); - - + toku_cachetable_close(&ct); /* Now see if we can read an empty tree in. */ toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); @@ -189,8 +188,6 @@ static void test_read_what_was_written (void) { r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0); toku_cachetable_close(&ct); - - /* Now see if we can read it in and get the value. */ toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr); r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0); diff --git a/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc b/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc index 53973794eae74..aeb5a897c488f 100644 --- a/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc +++ b/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc @@ -109,7 +109,9 @@ static int run_test(void) r = pqueue_pop(pq, &node); assert(r==0); if (verbose) printf("%d : %d\n", i, *(int*)(node->key->data)); if ( *(int*)(node->key->data) != i ) { - if (verbose) printf("FAIL\n"); return -1; + if (verbose) + printf("FAIL\n"); + return -1; } } pqueue_free(pq); diff --git a/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc b/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc index a78f787cdf299..f200496486250 100644 --- a/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc +++ b/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc @@ -793,7 +793,7 @@ static void test_le_garbage_collection_birdie(void) { do_garbage_collect = ule_worth_running_garbage_collection(&ule, 200); invariant(do_garbage_collect); - // It is definately worth doing when the above case is true + // It is definitely worth doing when the above case is true // and there is more than one provisional entry. ule.num_cuxrs = 1; ule.num_puxrs = 2; diff --git a/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc b/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc index 419af550545c4..71357a1e16ad2 100644 --- a/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc +++ b/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc @@ -72,7 +72,7 @@ static void dummy_update_status(FTNODE UU(child), int UU(dirtied), void* UU(extr enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 }; -static void test_oldest_referenced_xid_gets_propogated(void) { +static void test_oldest_referenced_xid_gets_propagated(void) { int r; CACHETABLE ct; FT_HANDLE t; @@ -166,7 +166,7 @@ static void test_oldest_referenced_xid_gets_propogated(void) { toku_ft_flush_some_child(t->ft, node, &fa); // pin the child, verify that oldest referenced xid was - // propogated from parent to child during the flush + // propagated from parent to child during the flush toku_pin_ftnode( t->ft, child_nonleaf_blocknum, @@ -185,6 +185,6 @@ static void test_oldest_referenced_xid_gets_propogated(void) { int test_main(int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { default_parse_args(argc, argv); - test_oldest_referenced_xid_gets_propogated(); + test_oldest_referenced_xid_gets_propagated(); return 0; } diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc similarity index 55% rename from storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h rename to storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc index 8aded3898c1de..ea4f9374dc336 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h +++ b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc @@ -36,30 +36,62 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." -#pragma once - -#include - -#include "ft/serialize/block_allocator.h" - -// Block allocation strategy implementations - -class block_allocator_strategy { -public: - static struct block_allocator::blockpair * - first_fit(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment); - - static struct block_allocator::blockpair * - best_fit(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment); - - static struct block_allocator::blockpair * - padded_fit(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment); - - static struct block_allocator::blockpair * - heat_zone(struct block_allocator::blockpair *blocks_array, - uint64_t n_blocks, uint64_t size, uint64_t alignment, - uint64_t heat); -}; +#include "ft/serialize/rbtree_mhs.h" +#include "test.h" +#include +#include +#include +#include + +static void test_insert_remove(void) { + uint64_t i; + MhsRbTree::Tree *tree = new MhsRbTree::Tree(); + verbose = 0; + + tree->Insert({0, 100}); + + for (i = 0; i < 10; i++) { + tree->Remove(3); + tree->Remove(2); + } + tree->ValidateBalance(); + tree->ValidateMhs(); + + for (i = 0; i < 10; i++) { + tree->Insert({5 * i, 3}); + } + tree->ValidateBalance(); + tree->ValidateMhs(); + + uint64_t offset = tree->Remove(2); + invariant(offset == 0); + offset = tree->Remove(10); + invariant(offset == 50); + offset = tree->Remove(3); + invariant(offset == 5); + tree->ValidateBalance(); + tree->ValidateMhs(); + + tree->Insert({48, 2}); + tree->Insert({50, 10}); + + tree->ValidateBalance(); + tree->ValidateMhs(); + + tree->Insert({3, 7}); + offset = tree->Remove(10); + invariant(offset == 2); + tree->ValidateBalance(); + tree->ValidateMhs(); + tree->Dump(); + delete tree; +} + +int test_main(int argc, const char *argv[]) { + default_parse_args(argc, argv); + + test_insert_remove(); + if (verbose) + printf("test ok\n"); + return 0; +} diff --git a/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc new file mode 100644 index 0000000000000..cefe66335a6cb --- /dev/null +++ b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc @@ -0,0 +1,103 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of PerconaFT. + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with PerconaFT. If not, see . + +---------------------------------------- + + PerconaFT is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License, version 3, + as published by the Free Software Foundation. + + PerconaFT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with PerconaFT. If not, see . +======= */ + +#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#include "ft/serialize/rbtree_mhs.h" +#include "test.h" +#include +#include +#include +#include + +#define N 1000000 +std::vector input_vector; +MhsRbTree::Node::BlockPair old_vector[N]; + +static int myrandom(int i) { return std::rand() % i; } + +static void generate_random_input() { + std::srand(unsigned(std::time(0))); + + // set some values: + for (uint64_t i = 0; i < N; ++i) { + MhsRbTree::Node::BlockPair bp = {i+1, 0}; + input_vector.push_back(bp); + old_vector[i] = bp; + } + // using built-in random generator: + std::random_shuffle(input_vector.begin(), input_vector.end(), myrandom); +} + +static void test_insert_remove(void) { + int i; + MhsRbTree::Tree *tree = new MhsRbTree::Tree(); + verbose = 0; + generate_random_input(); + if (verbose) { + printf("\n we are going to insert the following block offsets\n"); + for (i = 0; i < N; i++) + printf("%" PRIu64 "\t", input_vector[i]._offset.ToInt()); + } + for (i = 0; i < N; i++) { + tree->Insert(input_vector[i]); + // tree->ValidateBalance(); + } + tree->ValidateBalance(); + MhsRbTree::Node::BlockPair *p_bps = &old_vector[0]; + tree->ValidateInOrder(p_bps); + printf("min node of the tree:%" PRIu64 "\n", + rbn_offset(tree->MinNode()).ToInt()); + printf("max node of the tree:%" PRIu64 "\n", + rbn_offset(tree->MaxNode()).ToInt()); + + for (i = 0; i < N; i++) { + // tree->ValidateBalance(); + tree->RawRemove(input_vector[i]._offset.ToInt()); + } + + tree->Destroy(); + delete tree; +} + +int test_main(int argc, const char *argv[]) { + default_parse_args(argc, argv); + + test_insert_remove(); + if (verbose) + printf("test ok\n"); + return 0; +} diff --git a/storage/tokudb/PerconaFT/ft/txn/roll.cc b/storage/tokudb/PerconaFT/ft/txn/roll.cc index 407116b983c1a..9f3977743a049 100644 --- a/storage/tokudb/PerconaFT/ft/txn/roll.cc +++ b/storage/tokudb/PerconaFT/ft/txn/roll.cc @@ -38,18 +38,18 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. /* rollback and rollforward routines. */ - -#include "ft/ft.h" +#include #include "ft/ft-ops.h" +#include "ft/ft.h" #include "ft/log_header.h" #include "ft/logger/log-internal.h" -#include "ft/txn/xids.h" #include "ft/txn/rollback-apply.h" +#include "ft/txn/xids.h" // functionality provided by roll.c is exposed by an autogenerated // header file, logheader.h // -// this (poorly) explains the absense of "roll.h" +// this (poorly) explains the absence of "roll.h" // these flags control whether or not we send commit messages for // various operations @@ -162,10 +162,122 @@ toku_rollback_fcreate (FILENUM filenum, // directory row lock for its dname) and we would not get this // far if there were other live handles. toku_cachefile_unlink_on_close(cf); + toku_cachefile_skip_log_recover_on_close(cf); done: return 0; } +int toku_commit_frename(BYTESTRING /* old_name */, + BYTESTRING /* new_iname */, + TOKUTXN /* txn */, + LSN UU(oplsn)) { + return 0; +} + +int toku_rollback_frename(BYTESTRING old_iname, + BYTESTRING new_iname, + TOKUTXN txn, + LSN UU(oplsn)) { + assert(txn); + assert(txn->logger); + assert(txn->logger->ct); + + CACHETABLE cachetable = txn->logger->ct; + + toku_struct_stat stat; + bool old_exist = true; + bool new_exist = true; + + std::unique_ptr old_iname_full( + toku_cachetable_get_fname_in_cwd(cachetable, old_iname.data), + &toku_free); + std::unique_ptr new_iname_full( + toku_cachetable_get_fname_in_cwd(cachetable, new_iname.data), + &toku_free); + + if (toku_stat(old_iname_full.get(), &stat) == -1) { + if (ENOENT == errno) + old_exist = false; + else + return 1; + } + + if (toku_stat(new_iname_full.get(), &stat) == -1) { + if (ENOENT == errno) + new_exist = false; + else + return 1; + } + + // Both old and new files can exist if: + // - rename() is not completed + // - fcreate was replayed during recovery + // 'Stalled cachefiles' container cachefile_list::m_stale_fileid contains + // closed but not yet evicted cachefiles and the key of this container is + // fs-dependent file id - (device id, inode number) pair. To preserve the + // new cachefile + // file's id and keep it in 'stalled cachefiles' container the old file is + // removed + // and the new file is renamed. + if (old_exist && new_exist && + (toku_os_unlink(old_iname_full.get()) == -1 || + toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 || + toku_fsync_directory(new_iname_full.get()) == -1 || + toku_fsync_directory(old_iname_full.get()) == -1)) + return 1; + + if (!old_exist && new_exist && + (toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 || + toku_fsync_directory(new_iname_full.get()) == -1 || + toku_fsync_directory(old_iname_full.get()) == -1)) + return 1; + + // it's ok if both files do not exist on recovery + if (!old_exist && !new_exist) + assert(txn->for_recovery); + + CACHEFILE cf; + int r = toku_cachefile_of_iname_in_env(cachetable, new_iname.data, &cf); + if (r != ENOENT) { + char *old_fname_in_cf = toku_cachefile_fname_in_env(cf); + toku_cachefile_set_fname_in_env(cf, toku_xstrdup(old_iname.data)); + toku_free(old_fname_in_cf); + // There is at least one case when fclose logging cause error: + // 1) start transaction + // 2) create ft 'a'(write "fcreate" in recovery log) + // 3) rename ft 'a' to 'b'(write "frename" in recovery log) + // 4) abort transaction: + // a) rollback rename ft (renames 'b' to 'a') + // b) rollback create ft (removes 'a'): + // invokes toku_cachefile_unlink_on_close - lazy unlink on file + // close, + // it just sets corresponding flag in cachefile object + // c) write "unlink" for 'a' in recovery log + // (when transaction is aborted all locks are released, + // when file lock is released the file is closed and unlinked if + // corresponding flag is set in cachefile object) + // 5) crash + // + // After this we have the following records in recovery log: + // - create ft 'a', + // - rename 'a' to 'b', + // - unlink 'a' + // + // On recovery: + // - create 'a' + // - rename 'a' to 'b' + // - unlink 'a' - as 'a' file does not exist we have crash on assert + // here + // + // There is no need to write "unlink" in recovery log in (4a) because + // 'a' will be removed + // on transaction rollback on recovery. + toku_cachefile_skip_log_recover_on_close(cf); + } + + return 0; +} + int find_ft_from_filenum (const FT &ft, const FILENUM &filenum); int find_ft_from_filenum (const FT &ft, const FILENUM &filenum) { FILENUM thisfnum = toku_cachefile_filenum(ft->cf); diff --git a/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc b/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc index df830afd0df68..c9464c3ed60a3 100644 --- a/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc +++ b/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc @@ -169,7 +169,7 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) { txn->roll_info.spilled_rollback_head = ROLLBACK_NONE; txn->roll_info.spilled_rollback_tail = ROLLBACK_NONE; } - // if we're commiting a child rollback, put its entries into the parent + // if we're committing a child rollback, put its entries into the parent // by pinning both child and parent and then linking the child log entry // list to the end of the parent log entry list. if (txn_has_current_rollback_log(txn)) { diff --git a/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc b/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc index 68c94c2ad1192..08d7c8874e50e 100644 --- a/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc +++ b/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc @@ -59,21 +59,18 @@ rollback_log_destroy(ROLLBACK_LOG_NODE log) { // flush an ununused log to disk, by allocating a size 0 blocknum in // the blocktable -static void -toku_rollback_flush_unused_log( - ROLLBACK_LOG_NODE log, - BLOCKNUM logname, - int fd, - FT ft, - bool write_me, - bool keep_me, - bool for_checkpoint, - bool is_clone - ) -{ +static void toku_rollback_flush_unused_log(ROLLBACK_LOG_NODE log, + BLOCKNUM logname, + int fd, + FT ft, + bool write_me, + bool keep_me, + bool for_checkpoint, + bool is_clone) { if (write_me) { DISKOFF offset; - ft->blocktable.realloc_on_disk(logname, 0, &offset, ft, fd, for_checkpoint, INT_MAX); + ft->blocktable.realloc_on_disk( + logname, 0, &offset, ft, fd, for_checkpoint); } if (!keep_me && !is_clone) { toku_free(log); diff --git a/storage/tokudb/PerconaFT/ft/ule.cc b/storage/tokudb/PerconaFT/ft/ule.cc index ac393fbf17991..e3dce6d27dd8d 100644 --- a/storage/tokudb/PerconaFT/ft/ule.cc +++ b/storage/tokudb/PerconaFT/ft/ule.cc @@ -587,8 +587,8 @@ bool toku_le_worth_running_garbage_collection( // by new txns. // 2.) There is only one committed entry, but the outermost // provisional entry is older than the oldest known referenced -// xid, so it must have commited. Therefor we can promote it to -// committed and get rid of the old commited entry. +// xid, so it must have committed. Therefor we can promote it to +// committed and get rid of the old committed entry. if (le->type != LE_MVCC) { return false; } diff --git a/storage/tokudb/PerconaFT/portability/CMakeLists.txt b/storage/tokudb/PerconaFT/portability/CMakeLists.txt index 9f84d9b03df2a..4793db63cc1e8 100644 --- a/storage/tokudb/PerconaFT/portability/CMakeLists.txt +++ b/storage/tokudb/PerconaFT/portability/CMakeLists.txt @@ -14,12 +14,11 @@ set(tokuportability_srcs ) add_library(${LIBTOKUPORTABILITY} SHARED ${tokuportability_srcs}) -target_link_libraries(${LIBTOKUPORTABILITY} LINK_PRIVATE ${LIBJEMALLOC}) target_link_libraries(${LIBTOKUPORTABILITY} LINK_PUBLIC ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) add_library(tokuportability_static_conv STATIC ${tokuportability_srcs}) set_target_properties(tokuportability_static_conv PROPERTIES POSITION_INDEPENDENT_CODE ON) -set(tokuportability_source_libs tokuportability_static_conv ${LIBJEMALLOC} ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) +set(tokuportability_source_libs tokuportability_static_conv ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS}) toku_merge_static_libs(${LIBTOKUPORTABILITY}_static ${LIBTOKUPORTABILITY}_static "${tokuportability_source_libs}") maybe_add_gcov_to_libraries(${LIBTOKUPORTABILITY} tokuportability_static_conv) diff --git a/storage/tokudb/PerconaFT/portability/file.cc b/storage/tokudb/PerconaFT/portability/file.cc index 5332a2dff5553..0e3efc1a12afc 100644 --- a/storage/tokudb/PerconaFT/portability/file.cc +++ b/storage/tokudb/PerconaFT/portability/file.cc @@ -356,6 +356,12 @@ toku_os_close(int fd) { // if EINTR, retry until success return r; } +int toku_os_rename(const char *old_name, const char *new_name) { + return rename(old_name, new_name); +} + +int toku_os_unlink(const char *path) { return unlink(path); } + ssize_t toku_os_read(int fd, void *buf, size_t count) { ssize_t r; diff --git a/storage/tokudb/PerconaFT/portability/huge_page_detection.cc b/storage/tokudb/PerconaFT/portability/huge_page_detection.cc index bc48e93937da4..8e73c56a6c549 100644 --- a/storage/tokudb/PerconaFT/portability/huge_page_detection.cc +++ b/storage/tokudb/PerconaFT/portability/huge_page_detection.cc @@ -90,7 +90,13 @@ static bool check_huge_pages_in_practice(void) const long pagesize = 4096; const long n_pages = TWO_MB/pagesize; +#ifdef __linux__ + // On linux mincore is defined as mincore(void *, size_t, unsigned char *) unsigned char vec[n_pages]; +#else + // On BSD (OS X included) it is defined as mincore(void *, size_t, char *) + char vec[n_pages]; +#endif { int r = mincore(second, TWO_MB, vec); if (r!=0 && errno==ENOMEM) { diff --git a/storage/tokudb/PerconaFT/portability/memory.cc b/storage/tokudb/PerconaFT/portability/memory.cc index 2de12699c61f9..5430ff84b7059 100644 --- a/storage/tokudb/PerconaFT/portability/memory.cc +++ b/storage/tokudb/PerconaFT/portability/memory.cc @@ -313,6 +313,15 @@ toku_strdup(const char *s) { return (char *) toku_memdup(s, strlen(s)+1); } +char *toku_strndup(const char *s, size_t n) { + size_t s_size = strlen(s); + size_t bytes_to_copy = n > s_size ? s_size : n; + ++bytes_to_copy; + char *result = (char *)toku_memdup(s, bytes_to_copy); + result[bytes_to_copy - 1] = 0; + return result; +} + void toku_free(void *p) { if (p) { diff --git a/storage/tokudb/PerconaFT/portability/memory.h b/storage/tokudb/PerconaFT/portability/memory.h index 7780536f279ec..5ae652d39fc52 100644 --- a/storage/tokudb/PerconaFT/portability/memory.h +++ b/storage/tokudb/PerconaFT/portability/memory.h @@ -125,7 +125,9 @@ size_t toku_malloc_usable_size(void *p) __attribute__((__visibility__("default") void *toku_memdup (const void *v, size_t len); /* Toku-version of strdup. Use this so that it calls toku_malloc() */ char *toku_strdup (const char *s) __attribute__((__visibility__("default"))); - +/* Toku-version of strndup. Use this so that it calls toku_malloc() */ +char *toku_strndup(const char *s, size_t n) + __attribute__((__visibility__("default"))); /* Copy memory. Analogous to strdup() Crashes instead of returning NULL */ void *toku_xmemdup (const void *v, size_t len) __attribute__((__visibility__("default"))); /* Toku-version of strdup. Use this so that it calls toku_xmalloc() Crashes instead of returning NULL */ diff --git a/storage/tokudb/PerconaFT/portability/portability.cc b/storage/tokudb/PerconaFT/portability/portability.cc index ba9f8d48ed5dc..19f445a85d7f4 100644 --- a/storage/tokudb/PerconaFT/portability/portability.cc +++ b/storage/tokudb/PerconaFT/portability/portability.cc @@ -63,6 +63,9 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #if defined(HAVE_SYS_SYSCTL_H) # include #endif +#if defined(HAVE_PTHREAD_H) +# include +#endif #if defined(HAVE_PTHREAD_NP_H) # include #endif @@ -102,7 +105,11 @@ toku_os_getpid(void) { int toku_os_gettid(void) { -#if defined(__NR_gettid) +#if defined(HAVE_PTHREAD_THREADID_NP) + uint64_t result; + pthread_threadid_np(NULL, &result); + return (int) result; // Used for instrumentation so overflow is ok here. +#elif defined(__NR_gettid) return syscall(__NR_gettid); #elif defined(SYS_gettid) return syscall(SYS_gettid); diff --git a/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc b/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc index 880f9a3a9bbf7..dbbea974a49ff 100644 --- a/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc +++ b/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc @@ -64,7 +64,7 @@ int main(int argc, char *const argv[]) { if (verbose) printf("maxdata=%" PRIu64 " 0x%" PRIx64 "\n", maxdata, maxdata); // check the data size -#if __x86_64__ +#if defined(__x86_64__) || defined(__aarch64__) assert(maxdata > (1ULL << 32)); #elif __i386__ assert(maxdata < (1ULL << 32)); diff --git a/storage/tokudb/PerconaFT/portability/tests/test-xid.cc b/storage/tokudb/PerconaFT/portability/tests/test-xid.cc index 9ee68906bb37a..71736f898ef87 100644 --- a/storage/tokudb/PerconaFT/portability/tests/test-xid.cc +++ b/storage/tokudb/PerconaFT/portability/tests/test-xid.cc @@ -51,11 +51,18 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #if defined(HAVE_PTHREAD_NP_H) # include #endif +#if defined(HAVE_PTHREAD_H) +# include +#endif // since we implement the same thing here as in toku_os_gettid, this test // is pretty pointless static int gettid(void) { -#if defined(__NR_gettid) +#if defined(HAVE_PTHREAD_THREADID_NP) + uint64_t result; + pthread_threadid_np(NULL, &result); + return (int) result; +#elif defined(__NR_gettid) return syscall(__NR_gettid); #elif defined(SYS_gettid) return syscall(SYS_gettid); diff --git a/storage/tokudb/PerconaFT/portability/toku_config.h.in b/storage/tokudb/PerconaFT/portability/toku_config.h.in index e1412cc9e14d5..18f6779796fe5 100644 --- a/storage/tokudb/PerconaFT/portability/toku_config.h.in +++ b/storage/tokudb/PerconaFT/portability/toku_config.h.in @@ -42,7 +42,6 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #cmakedefine TOKU_DEBUG_PARANOID 1 #cmakedefine USE_VALGRIND 1 - #cmakedefine HAVE_ALLOCA_H 1 #cmakedefine HAVE_ARPA_INET_H 1 #cmakedefine HAVE_BYTESWAP_H 1 @@ -88,6 +87,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #cmakedefine HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP 1 #cmakedefine HAVE_PTHREAD_YIELD 1 #cmakedefine HAVE_PTHREAD_YIELD_NP 1 +#cmakedefine HAVE_PTHREAD_THREADID_NP 1 #cmakedefine HAVE_PTHREAD_GETTHREADID_NP 1 #cmakedefine PTHREAD_YIELD_RETURNS_INT 1 diff --git a/storage/tokudb/PerconaFT/portability/toku_portability.h b/storage/tokudb/PerconaFT/portability/toku_portability.h index 921d3a309f6c0..f127b0fe172dc 100644 --- a/storage/tokudb/PerconaFT/portability/toku_portability.h +++ b/storage/tokudb/PerconaFT/portability/toku_portability.h @@ -246,6 +246,8 @@ int toku_os_open(const char *path, int oflag, int mode); int toku_os_open_direct(const char *path, int oflag, int mode); int toku_os_close(int fd); int toku_os_fclose(FILE * stream); +int toku_os_rename(const char *old_name, const char *new_name); +int toku_os_unlink(const char *path); ssize_t toku_os_read(int fd, void *buf, size_t count); ssize_t toku_os_pread(int fd, void *buf, size_t count, off_t offset); void toku_os_recursive_delete(const char *path); diff --git a/storage/tokudb/PerconaFT/portability/toku_time.h b/storage/tokudb/PerconaFT/portability/toku_time.h index 11a3f3aa2b99c..a1278ef033731 100644 --- a/storage/tokudb/PerconaFT/portability/toku_time.h +++ b/storage/tokudb/PerconaFT/portability/toku_time.h @@ -98,9 +98,17 @@ double tokutime_to_seconds(tokutime_t) __attribute__((__visibility__("default") // Get the value of tokutime for right now. We want this to be fast, so we expose the implementation as RDTSC. static inline tokutime_t toku_time_now(void) { +#if defined(__x86_64__) || defined(__i386__) uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; +#elif defined (__aarch64__) + uint64_t result; + __asm __volatile__ ("mrs %[rt], cntvct_el0" : [rt] "=r" (result)); + return result; +#else +#error No timer implementation for this platform +#endif } static inline uint64_t toku_current_time_microsec(void) { diff --git a/storage/tokudb/PerconaFT/src/indexer-internal.h b/storage/tokudb/PerconaFT/src/indexer-internal.h index 48e62ee49b2d7..fdaa561e3d027 100644 --- a/storage/tokudb/PerconaFT/src/indexer-internal.h +++ b/storage/tokudb/PerconaFT/src/indexer-internal.h @@ -42,7 +42,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include // the indexer_commit_keys is an ordered set of keys described by a DBT in the keys array. -// the array is a resizeable array with max size "max_keys" and current size "current_keys". +// the array is a resizable array with max size "max_keys" and current size "current_keys". // the ordered set is used by the hotindex undo function to collect the commit keys. struct indexer_commit_keys { int max_keys; // max number of keys diff --git a/storage/tokudb/PerconaFT/src/indexer-undo-do.cc b/storage/tokudb/PerconaFT/src/indexer-undo-do.cc index 8d0b080b9fe88..4c7f5336161da 100644 --- a/storage/tokudb/PerconaFT/src/indexer-undo-do.cc +++ b/storage/tokudb/PerconaFT/src/indexer-undo-do.cc @@ -528,7 +528,7 @@ indexer_find_prev_xr(DB_INDEXER *UU(indexer), ULEHANDLE ule, uint64_t xrindex, u } // inject "delete" message into ft with logging in recovery and rollback logs, -// and making assocation between txn and ft +// and making association between txn and ft static int indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids, TOKUTXN txn) { int result = 0; @@ -577,7 +577,7 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi } // inject "insert" message into ft with logging in recovery and rollback logs, -// and making assocation between txn and ft +// and making association between txn and ft static int indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids, TOKUTXN txn) { int result = 0; diff --git a/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt b/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt index 47f6aa44a75e8..c01a8f0d62870 100644 --- a/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt +++ b/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt @@ -108,11 +108,11 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) foreach(ov c d r) if (ov STREQUAL c) - set(gset 0) set(hset 0) + set(iset 0) else () - set(gset 0 1 2 3 4 5) - set(hset 0 1) + set(hset 0 1 2 3 4 5) + set(iset 0 1) endif () foreach(av 0 1) @@ -130,25 +130,27 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS) foreach(dv ${dset}) foreach(ev ${eset}) foreach(fv 0 1) - foreach(gv ${gset}) + foreach(gv 0 1) foreach(hv ${hset}) - - if ((NOT ov STREQUAL c) AND (NOT cv) AND ((NOT bv) OR (NOT ev) OR (dv))) - set(iset 0 1) - else () - set(iset 0) - endif () - foreach(iv ${iset}) - set(testname "ydb/recovery_fileops_unit.${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}") - set(envdir "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}") - set(errfile "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}.ctest-errors") - add_test(NAME ${testname} - COMMAND run_recovery_fileops_unit.sh $ ${errfile} 137 - -O ${ov} -A ${av} -B ${bv} -C ${cv} -D ${dv} -E ${ev} -F ${fv} -G ${gv} -H ${hv} -I ${iv} - ) - setup_toku_test_properties(${testname} ${envdir}) - set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${errfile}") + + if ((NOT ov STREQUAL c) AND (NOT cv) AND ((NOT bv) OR (NOT ev) OR (dv))) + set(jset 0 1) + else () + set(jset 0) + endif () + + foreach(jv ${jset}) + set(testname "ydb/recovery_fileops_unit.${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}") + set(envdir "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}") + set(errfile "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}.ctest-errors") + add_test(NAME ${testname} + COMMAND run_recovery_fileops_unit.sh $ ${errfile} 137 + -O ${ov} -A ${av} -B ${bv} -C ${cv} -D ${dv} -E ${ev} -F ${fv} -G ${gv} -H ${hv} -I ${iv} -J ${jv} + ) + setup_toku_test_properties(${testname} ${envdir}) + set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${errfile}") + endforeach(jv) endforeach(iv) endforeach(hv) endforeach(gv) diff --git a/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test b/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test index 20df13923e6d5..7cce68e6ff8e5 100644 --- a/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test +++ b/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test @@ -1,3 +1,3 @@ -# commited insert +# committed insert key k1 insert committed 0 v100 diff --git a/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc b/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc index 3f2f8d7455a98..aaf77c503cc42 100644 --- a/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc +++ b/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc @@ -51,7 +51,7 @@ int DISALLOW_PUTS=0; int COMPRESS=0; enum {MAGIC=311}; -bool dup_row_at_end = false; // false: duplicate at the begining. true: duplicate at the end. The duplicated row is row 0. +bool dup_row_at_end = false; // false: duplicate at the beginning. true: duplicate at the end. The duplicated row is row 0. int dup_row_id = 0; // 0 means to use row 1 if inserting at the end, row NUM_ROWS if inserting at the beginning. Otherwise insert the row specified here. // diff --git a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc index a4dc0ea9236b4..cc99ab560d85e 100644 --- a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc +++ b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc @@ -36,17 +36,17 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." -#include "test.h" -#include "toku_pthread.h" #include -#include #include - +#include +#include "ft/logger/logger.h" +#include "test.h" +#include "toku_pthread.h" static int do_recover; static int do_crash; static char fileop; -static int choices['I'-'A'+1]; +static int choices['J' - 'A' + 1]; const int num_choices = sizeof(choices)/sizeof(choices[0]); static DB_TXN *txn; const char *oldname = "oldfoo"; @@ -58,11 +58,14 @@ static char *cmd; static void usage(void) { - fprintf(stderr, "Usage:\n%s [-v|-q]* [-h] (-c|-r) -O fileop -A# -B# -C# -D# -E# -F# [-G# -H# -I#]\n" - " fileop = c/r/d (create/rename/delete)\n" - " Where # is a single digit number > 0.\n" - " A-F are required for fileop=create\n" - " A-I are required for fileop=delete, fileop=rename\n", cmd); + fprintf(stderr, + "Usage:\n%s [-v|-q]* [-h] (-c|-r) -O fileop -A# -B# -C# -D# -E# " + "-F# -G# [-H# -I# -J#]\n" + " fileop = c/r/d (create/rename/delete)\n" + " Where # is a single digit number > 0.\n" + " A-G are required for fileop=create\n" + " A-I are required for fileop=delete, fileop=rename\n", + cmd); exit(1); } @@ -129,19 +132,18 @@ get_choice_flush_log_before_crash(void) { return get_bool_choice('F'); } -static int -get_choice_create_type(void) { - return get_x_choice('G', 6); -} +static int get_choice_dir_per_db(void) { return get_bool_choice('G'); } + +static int get_choice_create_type(void) { return get_x_choice('H', 6); } static int get_choice_txn_does_open_close_before_fileop(void) { - return get_bool_choice('H'); + return get_bool_choice('I'); } static int get_choice_lock_table_split_fcreate(void) { - int choice = get_bool_choice('I'); + int choice = get_bool_choice('J'); if (choice) assert(fileop_did_commit()); return choice; @@ -157,62 +159,64 @@ do_args(int argc, char * const argv[]) { } char c; - while ((c = getopt(argc, argv, "vqhcrO:A:B:C:D:E:F:G:H:I:X:")) != -1) { - switch(c) { - case 'v': - verbose++; - break; - case 'q': - verbose--; - if (verbose<0) verbose=0; - break; - case 'h': - case '?': - usage(); - break; - case 'c': - do_crash = 1; - break; - case 'r': - do_recover = 1; - break; - case 'O': - if (fileop != '\0') + while ((c = getopt(argc, argv, "vqhcrO:A:B:C:D:E:F:G:H:I:J:X:")) != -1) { + switch (c) { + case 'v': + verbose++; + break; + case 'q': + verbose--; + if (verbose < 0) + verbose = 0; + break; + case 'h': + case '?': usage(); - fileop = optarg[0]; - switch (fileop) { - case 'c': - case 'r': - case 'd': - break; - default: + break; + case 'c': + do_crash = 1; + break; + case 'r': + do_recover = 1; + break; + case 'O': + if (fileop != '\0') usage(); - break; - } - break; - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - case 'G': - case 'H': - case 'I': - if (fileop == '\0') - usage(); - int num; - num = atoi(optarg); - if (num < 0 || num > 9) - usage(); - choices[c - 'A'] = num; - break; - case 'X': - if (strcmp(optarg, "novalgrind") == 0) { - // provide a way for the shell script runner to pass an - // arg that suppresses valgrind on this child process + fileop = optarg[0]; + switch (fileop) { + case 'c': + case 'r': + case 'd': + break; + default: + usage(); + break; + } + break; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + if (fileop == '\0') + usage(); + int num; + num = atoi(optarg); + if (num < 0 || num > 9) + usage(); + choices[c - 'A'] = num; break; - } + case 'X': + if (strcmp(optarg, "novalgrind") == 0) { + // provide a way for the shell script runner to pass an + // arg that suppresses valgrind on this child process + break; + } // otherwise, fall through to an error default: usage(); @@ -222,7 +226,7 @@ do_args(int argc, char * const argv[]) { if (argc!=optind) { usage(); exit(1); } for (i = 0; i < num_choices; i++) { - if (i >= 'G' - 'A' && fileop == 'c') + if (i >= 'H' - 'A' && fileop == 'c') break; if (choices[i] == -1) usage(); @@ -261,6 +265,8 @@ static void env_startup(void) { int envflags = DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_CREATE | DB_PRIVATE | recover_flag; r = db_env_create(&env, 0); CKERR(r); + r = env->set_dir_per_db(env, get_choice_dir_per_db()); + CKERR(r); env->set_errfile(env, stderr); r = env->open(env, TOKU_TEST_FILENAME, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); @@ -625,8 +631,11 @@ recover_and_verify(void) { else if (did_create_commit_early()) expect_old_name = 1; } - verify_file_exists(oldname, expect_old_name); - verify_file_exists(newname, expect_new_name); + // We can't expect files existence until recovery log was not flushed + if ((get_choice_flush_log_before_crash())) { + verify_file_exists(oldname, expect_old_name); + verify_file_exists(newname, expect_new_name); + } env_shutdown(); } diff --git a/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc b/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc index a2b48e443cdfd..48843a0bd3257 100644 --- a/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc +++ b/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc @@ -166,7 +166,7 @@ run_test (void) { DB_BTREE_STAT64 s; r = db->stat64(db, NULL, &s); CKERR(r); - assert(s.bt_nkeys == 0); + assert(s.bt_nkeys == 1); r = db->close(db, 0); CKERR(r); @@ -176,7 +176,7 @@ run_test (void) { r = txn->commit(txn, 0); CKERR(r); r = db->stat64(db, NULL, &s); CKERR(r); - assert(s.bt_nkeys == 0); + assert(s.bt_nkeys == 1); } // verify update callback overwrites the row diff --git a/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc b/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc index 8e5109cd2a973..f6111d4b67c0b 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc @@ -78,7 +78,7 @@ static void test_insert_many_gc(void) { // from having an MVCC stack of size 'N'. At the time of this // writing, we run full GC on leaf-inject when the leaf is // 32mb or larger. A good invariant is that the max LE size - // never grew larger than 35mb and that the max commited xr stack + // never grew larger than 35mb and that the max committed xr stack // length never exceeded 35 const uint64_t le_max_memsize = get_engine_status_val(env, "LE_MAX_MEMSIZE"); const uint64_t le_max_committed_xr = get_engine_status_val(env, "LE_MAX_COMMITTED_XR"); diff --git a/storage/tokudb/PerconaFT/src/tests/test_stress0.cc b/storage/tokudb/PerconaFT/src/tests/test_stress0.cc index aaafe284906a0..88140dd173184 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_stress0.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_stress0.cc @@ -53,7 +53,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. // This test is a micro stress test that does multithreaded updates on a fixed size table. // There is also a thread that scans the table with bulk fetch, ensuring the sum is zero. // -// This test is targetted at stressing the locktree, hence the small table and many update threads. +// This test is targeted at stressing the locktree, hence the small table and many update threads. // static int UU() lock_escalation_op(DB_TXN *UU(txn), ARG arg, void* operation_extra, void *UU(stats_extra)) { diff --git a/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc b/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc index fec454b80093b..301eed1560e13 100644 --- a/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc +++ b/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc @@ -123,7 +123,8 @@ test_main(int argc, char *const argv[]) { continue; } } - if (verbose>0) printf("%s", __FILE__); if (verbose>1) printf("\n"); + if (verbose>0) printf("%s", __FILE__); + if (verbose>1) printf("\n"); for (i=1; i<100; i++) test_txn_abort(i); if (verbose>1) printf("%s OK\n", __FILE__); diff --git a/storage/tokudb/PerconaFT/src/ydb-internal.h b/storage/tokudb/PerconaFT/src/ydb-internal.h index 462a2a3d861e3..d40f7795b0b86 100644 --- a/storage/tokudb/PerconaFT/src/ydb-internal.h +++ b/storage/tokudb/PerconaFT/src/ydb-internal.h @@ -114,7 +114,7 @@ struct __toku_db_env_internal { char *real_data_dir; // data dir used when the env is opened (relative to cwd, or absolute with leading /) char *real_log_dir; // log dir used when the env is opened (relative to cwd, or absolute with leading /) - char *real_tmp_dir; // tmp dir used for temporary files (relative to cwd, or absoulte with leading /) + char *real_tmp_dir; // tmp dir used for temporary files (relative to cwd, or absolute with leading /) fs_redzone_state fs_state; uint64_t fs_seq; // how many times has fs_poller run? @@ -132,7 +132,8 @@ struct __toku_db_env_internal { int datadir_lockfd; int logdir_lockfd; int tmpdir_lockfd; - bool check_thp; // if set check if transparent huge pages are disables + bool check_thp; // if set check if transparent huge pages are disabled + bool dir_per_db; uint64_t (*get_loader_memory_size_callback)(void); uint64_t default_lock_timeout_msec; uint64_t (*get_lock_timeout_callback)(uint64_t default_lock_timeout_msec); diff --git a/storage/tokudb/PerconaFT/src/ydb.cc b/storage/tokudb/PerconaFT/src/ydb.cc index aed271bce4069..3341f6d76c624 100644 --- a/storage/tokudb/PerconaFT/src/ydb.cc +++ b/storage/tokudb/PerconaFT/src/ydb.cc @@ -1298,6 +1298,22 @@ env_get_check_thp(DB_ENV * env) { return env->i->check_thp; } +static bool env_set_dir_per_db(DB_ENV *env, bool new_val) { + HANDLE_PANICKED_ENV(env); + bool r = env->i->dir_per_db; + env->i->dir_per_db = new_val; + return r; +} + +static bool env_get_dir_per_db(DB_ENV *env) { + HANDLE_PANICKED_ENV(env); + return env->i->dir_per_db; +} + +static const char *env_get_data_dir(DB_ENV *env) { + return env->i->real_data_dir; +} + static int env_dbremove(DB_ENV * env, DB_TXN *txn, const char *fname, const char *dbname, uint32_t flags); static int @@ -2700,6 +2716,9 @@ toku_env_create(DB_ENV ** envp, uint32_t flags) { USENV(do_backtrace); USENV(set_check_thp); USENV(get_check_thp); + USENV(set_dir_per_db); + USENV(get_dir_per_db); + USENV(get_data_dir); #undef USENV // unlocked methods @@ -3045,7 +3064,7 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co if (env_is_db_with_dname_open(env, newname)) { return toku_ydb_do_error(env, EINVAL, "Cannot rename dictionary; Dictionary with target name has an open handle.\n"); } - + DBT old_dname_dbt; DBT new_dname_dbt; DBT iname_dbt; @@ -3065,10 +3084,35 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co r = EEXIST; } else if (r == DB_NOTFOUND) { + DBT new_iname_dbt; + // Do not rename ft file if 'dir_per_db' option is not set + auto new_iname = + env->get_dir_per_db(env) + ? generate_iname_for_rename_or_open( + env, txn, newname, false) + : std::unique_ptr( + toku_strdup(iname), &toku_free); + toku_fill_dbt( + &new_iname_dbt, new_iname.get(), strlen(new_iname.get()) + 1); + // remove old (dname,iname) and insert (newname,iname) in directory r = toku_db_del(env->i->directory, txn, &old_dname_dbt, DB_DELETE_ANY, true); if (r != 0) { goto exit; } - r = toku_db_put(env->i->directory, txn, &new_dname_dbt, &iname_dbt, 0, true); + + // Do not rename ft file if 'dir_per_db' option is not set + if (env->get_dir_per_db(env)) + r = toku_ft_rename_iname(txn, + env->get_data_dir(env), + iname, + new_iname.get(), + env->i->cachetable); + + r = toku_db_put(env->i->directory, + txn, + &new_dname_dbt, + &new_iname_dbt, + 0, + true); if (r != 0) { goto exit; } //Now that we have writelocks on both dnames, verify that there are still no handles open. (to prevent race conditions) @@ -3091,7 +3135,7 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co // otherwise, we're okay in marking this ft as remove on // commit. no new handles can open for this dictionary // because the txn has directory write locks on the dname - if (txn && !can_acquire_table_lock(env, txn, iname)) { + if (txn && !can_acquire_table_lock(env, txn, new_iname.get())) { r = DB_LOCK_NOTGRANTED; } // We don't do anything at the ft or cachetable layer for rename. diff --git a/storage/tokudb/PerconaFT/src/ydb_db.cc b/storage/tokudb/PerconaFT/src/ydb_db.cc index e5bd4e7d089d5..100d1bfa20b14 100644 --- a/storage/tokudb/PerconaFT/src/ydb_db.cc +++ b/storage/tokudb/PerconaFT/src/ydb_db.cc @@ -83,8 +83,7 @@ ydb_db_layer_get_status(YDB_DB_LAYER_STATUS statp) { *statp = ydb_db_layer_status; } -static void -create_iname_hint(const char *dname, char *hint) { +void create_iname_hint(const char *dname, char *hint) { //Requires: size of hint array must be > strlen(dname) //Copy alphanumeric characters only. //Replace strings of non-alphanumeric characters with a single underscore. @@ -105,11 +104,43 @@ create_iname_hint(const char *dname, char *hint) { *hint = '\0'; } +void create_iname_hint_for_dbdir(const char *dname, char *hint) { + assert(dname); + if (*dname == '.') + ++dname; + if (*dname == '/') + ++dname; + bool underscored = false; + bool dbdir_is_parsed = false; + // Do not change the first '/' because this is + // delimiter which splits name into database dir + // and table dir. + while (*dname) { + if (isalnum(*dname) || (*dname == '/' && !dbdir_is_parsed)) { + char c = *dname++; + *hint++ = c; + if (c == '/') + dbdir_is_parsed = true; + underscored = false; + } else { + if (!underscored) + *hint++ = '_'; + dname++; + underscored = true; + } + } + *hint = '\0'; +} + // n < 0 means to ignore mark and ignore n // n >= 0 means to include mark ("_B_" or "_P_") with hex value of n in iname // (intended for use by loader, which will create many inames using one txnid). -static char * -create_iname(DB_ENV *env, uint64_t id1, uint64_t id2, char *hint, const char *mark, int n) { +char *create_iname(DB_ENV *env, + uint64_t id1, + uint64_t id2, + char *hint, + const char *mark, + int n) { int bytes; char inamebase[strlen(hint) + 8 + // hex file format version @@ -138,6 +169,34 @@ create_iname(DB_ENV *env, uint64_t id1, uint64_t id2, char *hint, const char *ma return rval; } +static uint64_t nontransactional_open_id = 0; + +std::unique_ptr generate_iname_for_rename_or_open( + DB_ENV *env, + DB_TXN *txn, + const char *dname, + bool is_open) { + std::unique_ptr result(nullptr, &toku_free); + char hint[strlen(dname) + 1]; + uint64_t id1 = 0; + uint64_t id2 = 0; + + if (txn) { + id1 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).parent_id64; + id2 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).child_id64; + } else if (is_open) + id1 = toku_sync_fetch_and_add(&nontransactional_open_id, 1); + + if (env->get_dir_per_db(env) && !toku_os_is_absolute_name(dname)) + create_iname_hint_for_dbdir(dname, hint); + else + create_iname_hint(dname, hint); + + result.reset(create_iname(env, id1, id2, hint, NULL, -1)); + + return result; +} + static int toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYPE dbtype, uint32_t flags, int mode); // Effect: Do the work required of DB->close(). @@ -227,8 +286,6 @@ db_open_subdb(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTY return r; } -static uint64_t nontransactional_open_id = 0; - // inames are created here. // algorithm: // begin txn @@ -286,27 +343,15 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1); toku_init_dbt_flags(&iname_dbt, DB_DBT_REALLOC); r = toku_db_get(db->dbenv->i->directory, txn, &dname_dbt, &iname_dbt, DB_SERIALIZABLE); // allocates memory for iname - char *iname = (char *) iname_dbt.data; + std::unique_ptr iname( + static_cast(iname_dbt.data), &toku_free); if (r == DB_NOTFOUND && !is_db_create) { r = ENOENT; } else if (r==0 && is_db_excl) { r = EEXIST; } else if (r == DB_NOTFOUND) { - char hint[strlen(dname) + 1]; - - // create iname and make entry in directory - uint64_t id1 = 0; - uint64_t id2 = 0; - - if (txn) { - id1 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).parent_id64; - id2 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).child_id64; - } else { - id1 = toku_sync_fetch_and_add(&nontransactional_open_id, 1); - } - create_iname_hint(dname, hint); - iname = create_iname(db->dbenv, id1, id2, hint, NULL, -1); // allocated memory for iname - toku_fill_dbt(&iname_dbt, iname, strlen(iname) + 1); + iname = generate_iname_for_rename_or_open(db->dbenv, txn, dname, true); + toku_fill_dbt(&iname_dbt, iname.get(), strlen(iname.get()) + 1); // // put_flags will be 0 for performance only, avoid unnecessary query // if we are creating a hot index, per #3166, we do not want the write lock in directory grabbed. @@ -318,16 +363,13 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP // we now have an iname if (r == 0) { - r = toku_db_open_iname(db, txn, iname, flags, mode); + r = toku_db_open_iname(db, txn, iname.get(), flags, mode); if (r == 0) { db->i->dname = toku_xstrdup(dname); env_note_db_opened(db->dbenv, db); // tell env that a new db handle is open (using dname) } } - if (iname) { - toku_free(iname); - } return r; } @@ -1181,7 +1223,10 @@ load_inames(DB_ENV * env, DB_TXN * txn, int N, DB * dbs[/*N*/], const char * new toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1); // now create new iname char hint[strlen(dname) + 1]; - create_iname_hint(dname, hint); + if (env->get_dir_per_db(env) && !toku_os_is_absolute_name(dname)) + create_iname_hint_for_dbdir(dname, hint); + else + create_iname_hint(dname, hint); const char *new_iname = create_iname(env, xid.parent_id64, xid.child_id64, hint, mark, i); // allocates memory for iname_in_env new_inames_in_env[i] = new_iname; toku_fill_dbt(&iname_dbt, new_iname, strlen(new_iname) + 1); // iname_in_env goes in directory diff --git a/storage/tokudb/PerconaFT/src/ydb_db.h b/storage/tokudb/PerconaFT/src/ydb_db.h index 8b92dd1c3cb83..8be28857c142b 100644 --- a/storage/tokudb/PerconaFT/src/ydb_db.h +++ b/storage/tokudb/PerconaFT/src/ydb_db.h @@ -43,6 +43,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "ydb-internal.h" #include "ydb_txn.h" +#include + typedef enum { YDB_LAYER_DIRECTORY_WRITE_LOCKS = 0, /* total directory write locks taken */ YDB_LAYER_DIRECTORY_WRITE_LOCKS_FAIL, /* total directory write locks unable to be taken */ @@ -119,3 +121,17 @@ toku_db_destruct_autotxn(DB_TXN *txn, int r, bool changed) { } return r; } + +void create_iname_hint_for_dbdir(const char *dname, char *hint); +void create_iname_hint(const char *dname, char *hint); +char *create_iname(DB_ENV *env, + uint64_t id1, + uint64_t id2, + char *hint, + const char *mark, + int n); +std::unique_ptr generate_iname_for_rename_or_open( + DB_ENV *env, + DB_TXN *txn, + const char *dname, + bool is_open); diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess index da8331460888a..7501b1bee019d 100644 --- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess +++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess @@ -1,10 +1,10 @@ #! /bin/sh # Attempt to guess a canonical system name. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 -# Free Software Foundation, Inc. +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012 Free Software Foundation, Inc. -timestamp='2009-04-27' +timestamp='2016-06-22' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -17,9 +17,7 @@ timestamp='2009-04-27' # General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA -# 02110-1301, USA. +# along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a @@ -27,16 +25,16 @@ timestamp='2009-04-27' # the same distribution terms that you use for the rest of that program. -# Originally written by Per Bothner . -# Please send patches to . Submit a context -# diff and a properly formatted ChangeLog entry. +# Originally written by Per Bothner. Please send patches (context +# diff format) to and include a ChangeLog +# entry. # # This script attempts to guess a canonical system name similar to # config.sub. If it succeeds, it prints the system name on stdout, and # exits with 0. Otherwise, it exits with 1. # -# The plan is that this can be called by configure scripts if you -# don't specify an explicit build system type. +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD me=`echo "$0" | sed -e 's,.*/,,'` @@ -56,8 +54,9 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, -2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 +Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -144,7 +143,7 @@ UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in *:NetBSD:*:*) # NetBSD (nbsd) targets should (where applicable) match one or - # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, + # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently # switched to ELF, *-*-netbsd* would select the old # object file format. This provides both forward @@ -170,7 +169,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in arm*|i386|m68k|ns32k|sh3*|sparc|vax) eval $set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep __ELF__ >/dev/null + | grep -q __ELF__ then # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). # Return netbsd for either. FIX? @@ -180,7 +179,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in fi ;; *) - os=netbsd + os=netbsd ;; esac # The OS release @@ -223,7 +222,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` ;; *5.*) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` ;; esac # According to Compaq, /usr/sbin/psrinfo has been available on @@ -269,7 +268,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - exit ;; + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + exitcode=$? + trap '' 0 + exit $exitcode ;; Alpha\ *:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # Should we change UNAME_MACHINE based on the output of uname instead @@ -295,7 +297,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in echo s390-ibm-zvmoe exit ;; *:OS400:*:*) - echo powerpc-ibm-os400 + echo powerpc-ibm-os400 exit ;; arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) echo arm-acorn-riscix${UNAME_RELEASE} @@ -333,6 +335,9 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + echo i386-pc-auroraux${UNAME_RELEASE} + exit ;; i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) eval $set_cc_for_build SUN_ARCH="i386" @@ -391,23 +396,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # MiNT. But MiNT is downward compatible to TOS, so this should # be no problem. atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} + echo m68k-atari-mint${UNAME_RELEASE} exit ;; atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} - exit ;; + exit ;; *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} + echo m68k-atari-mint${UNAME_RELEASE} exit ;; milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) - echo m68k-milan-mint${UNAME_RELEASE} - exit ;; + echo m68k-milan-mint${UNAME_RELEASE} + exit ;; hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) - echo m68k-hades-mint${UNAME_RELEASE} - exit ;; + echo m68k-hades-mint${UNAME_RELEASE} + exit ;; *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) - echo m68k-unknown-mint${UNAME_RELEASE} - exit ;; + echo m68k-unknown-mint${UNAME_RELEASE} + exit ;; m68k:machten:*:*) echo m68k-apple-machten${UNAME_RELEASE} exit ;; @@ -477,8 +482,8 @@ EOF echo m88k-motorola-sysv3 exit ;; AViiON:dgux:*:*) - # DG/UX returns AViiON for all architectures - UNAME_PROCESSOR=`/usr/bin/uname -p` + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] then if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ @@ -491,7 +496,7 @@ EOF else echo i586-dg-dgux${UNAME_RELEASE} fi - exit ;; + exit ;; M88*:DolphinOS:*:*) # DolphinOS (SVR3) echo m88k-dolphin-sysv3 exit ;; @@ -548,7 +553,7 @@ EOF echo rs6000-ibm-aix3.2 fi exit ;; - *:AIX:*:[456]) + *:AIX:*:[4567]) IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then IBM_ARCH=rs6000 @@ -591,52 +596,52 @@ EOF 9000/[678][0-9][0-9]) if [ -x /usr/bin/getconf ]; then sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` - sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` - case "${sc_cpu_version}" in - 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 - 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 - 532) # CPU_PA_RISC2_0 - case "${sc_kernel_bits}" in - 32) HP_ARCH="hppa2.0n" ;; - 64) HP_ARCH="hppa2.0w" ;; + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 - esac ;; - esac + esac ;; + esac fi if [ "${HP_ARCH}" = "" ]; then eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c + sed 's/^ //' << EOF >$dummy.c - #define _HPUX_SOURCE - #include - #include + #define _HPUX_SOURCE + #include + #include - int main () - { - #if defined(_SC_KERNEL_BITS) - long bits = sysconf(_SC_KERNEL_BITS); - #endif - long cpu = sysconf (_SC_CPU_VERSION); + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1"); break; - case CPU_PA_RISC2_0: - #if defined(_SC_KERNEL_BITS) - switch (bits) - { - case 64: puts ("hppa2.0w"); break; - case 32: puts ("hppa2.0n"); break; - default: puts ("hppa2.0"); break; - } break; - #else /* !defined(_SC_KERNEL_BITS) */ - puts ("hppa2.0"); break; - #endif - default: puts ("hppa1.0"); break; - } - exit (0); - } + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } EOF (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` test -z "$HP_ARCH" && HP_ARCH=hppa @@ -656,7 +661,7 @@ EOF # => hppa64-hp-hpux11.23 if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | - grep __LP64__ >/dev/null + grep -q __LP64__ then HP_ARCH="hppa2.0w" else @@ -727,22 +732,22 @@ EOF exit ;; C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) echo c1-convex-bsd - exit ;; + exit ;; C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi - exit ;; + exit ;; C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) echo c34-convex-bsd - exit ;; + exit ;; C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) echo c38-convex-bsd - exit ;; + exit ;; C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) echo c4-convex-bsd - exit ;; + exit ;; CRAY*Y-MP:*:*:*) echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; @@ -766,14 +771,14 @@ EOF exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` - echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit ;; + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` - echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} @@ -785,13 +790,12 @@ EOF echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} exit ;; *:FreeBSD:*:*) - case ${UNAME_MACHINE} in - pc98) - echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + UNAME_PROCESSOR=`/usr/bin/uname -p` + case ${UNAME_PROCESSOR} in amd64) echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; *) - echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; esac exit ;; i*:CYGWIN*:*) @@ -800,19 +804,22 @@ EOF *:MINGW*:*) echo ${UNAME_MACHINE}-pc-mingw32 exit ;; + i*:MSYS*:*) + echo ${UNAME_MACHINE}-pc-msys + exit ;; i*:windows32*:*) - # uname -m includes "-pc" on this system. - echo ${UNAME_MACHINE}-mingw32 + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 exit ;; i*:PW*:*) echo ${UNAME_MACHINE}-pc-pw32 exit ;; - *:Interix*:[3456]*) - case ${UNAME_MACHINE} in + *:Interix*:*) + case ${UNAME_MACHINE} in x86) echo i586-pc-interix${UNAME_RELEASE} exit ;; - EM64T | authenticamd | genuineintel) + authenticamd | genuineintel | EM64T) echo x86_64-unknown-interix${UNAME_RELEASE} exit ;; IA64) @@ -822,6 +829,9 @@ EOF [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) echo i${UNAME_MACHINE}-pc-mks exit ;; + 8664:Windows_NT:*) + echo x86_64-pc-mks + exit ;; i*:Windows_NT*:* | Pentium*:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we @@ -851,6 +861,27 @@ EOF i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix exit ;; + aarch64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi + echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} + exit ;; arm*:Linux:*:*) eval $set_cc_for_build if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ @@ -858,20 +889,40 @@ EOF then echo ${UNAME_MACHINE}-unknown-linux-gnu else - echo ${UNAME_MACHINE}-unknown-linux-gnueabi + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + echo ${UNAME_MACHINE}-unknown-linux-gnueabi + else + echo ${UNAME_MACHINE}-unknown-linux-gnueabihf + fi fi exit ;; avr32*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; cris:Linux:*:*) - echo cris-axis-linux-gnu + echo ${UNAME_MACHINE}-axis-linux-gnu exit ;; crisv32:Linux:*:*) - echo crisv32-axis-linux-gnu + echo ${UNAME_MACHINE}-axis-linux-gnu exit ;; frv:Linux:*:*) - echo frv-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + hexagon:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + i*86:Linux:*:*) + LIBC=gnu + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #ifdef __dietlibc__ + LIBC=dietlibc + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` + echo "${UNAME_MACHINE}-pc-linux-${LIBC}" exit ;; ia64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu @@ -882,78 +933,34 @@ EOF m68*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; - mips:Linux:*:*) + mips:Linux:*:* | mips64:Linux:*:*) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #undef CPU - #undef mips - #undef mipsel + #undef ${UNAME_MACHINE} + #undef ${UNAME_MACHINE}el #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mipsel + CPU=${UNAME_MACHINE}el #else #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips + CPU=${UNAME_MACHINE} #else CPU= #endif #endif EOF - eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' - /^CPU/{ - s: ::g - p - }'`" - test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } - ;; - mips64:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef mips64 - #undef mips64el - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mips64el - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips64 - #else - CPU= - #endif - #endif -EOF - eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' - /^CPU/{ - s: ::g - p - }'`" + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } ;; or32:Linux:*:*) - echo or32-unknown-linux-gnu - exit ;; - ppc:Linux:*:*) - echo powerpc-unknown-linux-gnu - exit ;; - ppc64:Linux:*:*) - echo powerpc64-unknown-linux-gnu - exit ;; - alpha:Linux:*:*) - case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in - EV5) UNAME_MACHINE=alphaev5 ;; - EV56) UNAME_MACHINE=alphaev56 ;; - PCA56) UNAME_MACHINE=alphapca56 ;; - PCA57) UNAME_MACHINE=alphapca56 ;; - EV6) UNAME_MACHINE=alphaev6 ;; - EV67) UNAME_MACHINE=alphaev67 ;; - EV68*) UNAME_MACHINE=alphaev68 ;; - esac - objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null - if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi - echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; padre:Linux:*:*) echo sparc-unknown-linux-gnu exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-gnu + exit ;; parisc:Linux:*:* | hppa:Linux:*:*) # Look for CPU level case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in @@ -962,14 +969,17 @@ EOF *) echo hppa-unknown-linux-gnu ;; esac exit ;; - parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-gnu + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-gnu + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-gnu exit ;; s390:Linux:*:* | s390x:Linux:*:*) echo ${UNAME_MACHINE}-ibm-linux exit ;; sh64*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; sh*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu @@ -977,75 +987,18 @@ EOF sparc:Linux:*:* | sparc64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; + tile*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; vax:Linux:*:*) echo ${UNAME_MACHINE}-dec-linux-gnu exit ;; x86_64:Linux:*:*) - echo x86_64-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; xtensa*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; - i*86:Linux:*:*) - # The BFD linker knows what the default object file format is, so - # first see if it will tell us. cd to the root directory to prevent - # problems with other programs or directories called `ld' in the path. - # Set LC_ALL=C to ensure ld outputs messages in English. - ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ - | sed -ne '/supported targets:/!d - s/[ ][ ]*/ /g - s/.*supported targets: *// - s/ .*// - p'` - case "$ld_supported_targets" in - elf32-i386) - TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" - ;; - a.out-i386-linux) - echo "${UNAME_MACHINE}-pc-linux-gnuaout" - exit ;; - "") - # Either a pre-BFD a.out linker (linux-gnuoldld) or - # one that does not give us useful --help. - echo "${UNAME_MACHINE}-pc-linux-gnuoldld" - exit ;; - esac - # Determine whether the default compiler is a.out or elf - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - #ifdef __ELF__ - # ifdef __GLIBC__ - # if __GLIBC__ >= 2 - LIBC=gnu - # else - LIBC=gnulibc1 - # endif - # else - LIBC=gnulibc1 - # endif - #else - #if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) - LIBC=gnu - #else - LIBC=gnuaout - #endif - #endif - #ifdef __dietlibc__ - LIBC=dietlibc - #endif -EOF - eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' - /^LIBC/{ - s: ::g - p - }'`" - test x"${LIBC}" != x && { - echo "${UNAME_MACHINE}-pc-linux-${LIBC}" - exit - } - test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; } - ;; i*86:DYNIX/ptx:4*:*) # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. # earlier versions are messed up and put the nodename in both @@ -1053,11 +1006,11 @@ EOF echo i386-sequent-sysv4 exit ;; i*86:UNIX_SV:4.2MP:2.*) - # Unixware is an offshoot of SVR4, but it has its own version - # number series starting with 2... - # I am not positive that other SVR4 systems won't match this, + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, # I just have to hope. -- rms. - # Use sysv4.2uw... so that sysv4* matches it. + # Use sysv4.2uw... so that sysv4* matches it. echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} exit ;; i*86:OS/2:*:*) @@ -1074,7 +1027,7 @@ EOF i*86:syllable:*:*) echo ${UNAME_MACHINE}-pc-syllable exit ;; - i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) echo i386-unknown-lynxos${UNAME_RELEASE} exit ;; i*86:*DOS:*:*) @@ -1089,7 +1042,7 @@ EOF fi exit ;; i*86:*:5:[678]*) - # UnixWare 7.x, OpenUNIX and OpenServer 6. + # UnixWare 7.x, OpenUNIX and OpenServer 6. case `/bin/uname -X | grep "^Machine"` in *486*) UNAME_MACHINE=i486 ;; *Pentium) UNAME_MACHINE=i586 ;; @@ -1117,13 +1070,13 @@ EOF exit ;; pc:*:*:*) # Left here for compatibility: - # uname -m prints for DJGPP always 'pc', but it prints nothing about - # the processor, so we play safe by assuming i586. + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i586. # Note: whatever this is, it MUST be the same as what config.sub # prints for the "djgpp" host, or else GDB configury will decide that # this is a cross-build. echo i586-pc-msdosdjgpp - exit ;; + exit ;; Intel:Mach:3*:*) echo i386-pc-mach3 exit ;; @@ -1158,8 +1111,8 @@ EOF /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4; exit; } ;; + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; NCR*:*:4.2:* | MPRAS*:*:4.2:*) OS_REL='.3' test -r /etc/.relid \ @@ -1182,7 +1135,7 @@ EOF rs6000:LynxOS:2.*:*) echo rs6000-unknown-lynxos${UNAME_RELEASE} exit ;; - PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) echo powerpc-unknown-lynxos${UNAME_RELEASE} exit ;; SM[BE]S:UNIX_SV:*:*) @@ -1202,10 +1155,10 @@ EOF echo ns32k-sni-sysv fi exit ;; - PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort - # says - echo i586-unisys-sysv4 - exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit ;; *:UNIX_System_V:4*:FTX*) # From Gerald Hewes . # How about differentiating between stratus architectures? -djm @@ -1231,11 +1184,11 @@ EOF exit ;; R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) if [ -d /usr/nec ]; then - echo mips-nec-sysv${UNAME_RELEASE} + echo mips-nec-sysv${UNAME_RELEASE} else - echo mips-unknown-sysv${UNAME_RELEASE} + echo mips-unknown-sysv${UNAME_RELEASE} fi - exit ;; + exit ;; BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. echo powerpc-be-beos exit ;; @@ -1275,6 +1228,16 @@ EOF *:Darwin:*:*) UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown case $UNAME_PROCESSOR in + i386) + eval $set_cc_for_build + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + UNAME_PROCESSOR="x86_64" + fi + fi ;; unknown) UNAME_PROCESSOR=powerpc ;; esac echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} @@ -1290,6 +1253,9 @@ EOF *:QNX:*:4*) echo i386-pc-qnx exit ;; + NEO-?:NONSTOP_KERNEL:*:*) + echo neo-tandem-nsk${UNAME_RELEASE} + exit ;; NSE-?:NONSTOP_KERNEL:*:*) echo nse-tandem-nsk${UNAME_RELEASE} exit ;; @@ -1335,13 +1301,13 @@ EOF echo pdp10-unknown-its exit ;; SEI:*:*:SEIUX) - echo mips-sei-seiux${UNAME_RELEASE} + echo mips-sei-seiux${UNAME_RELEASE} exit ;; *:DragonFly:*:*) echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` exit ;; *:*VMS:*:*) - UNAME_MACHINE=`(uname -p) 2>/dev/null` + UNAME_MACHINE=`(uname -p) 2>/dev/null` case "${UNAME_MACHINE}" in A*) echo alpha-dec-vms ; exit ;; I*) echo ia64-dec-vms ; exit ;; @@ -1359,6 +1325,9 @@ EOF i*86:AROS:*:*) echo ${UNAME_MACHINE}-pc-aros exit ;; + x86_64:VMkernel:*:*) + echo ${UNAME_MACHINE}-unknown-esx + exit ;; esac #echo '(No uname command or uname output not recognized.)' 1>&2 @@ -1381,11 +1350,11 @@ main () #include printf ("m68k-sony-newsos%s\n", #ifdef NEWSOS4 - "4" + "4" #else - "" + "" #endif - ); exit (0); + ); exit (0); #endif #endif diff --git a/storage/tokudb/PerconaFT/tools/CMakeLists.txt b/storage/tokudb/PerconaFT/tools/CMakeLists.txt index af82b4357d29e..f11b9f350d726 100644 --- a/storage/tokudb/PerconaFT/tools/CMakeLists.txt +++ b/storage/tokudb/PerconaFT/tools/CMakeLists.txt @@ -1,6 +1,6 @@ set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE DONT_DEPRECATE_ERRNO) -set(tools tokudb_dump tokuftdump tokuft_logprint tdb-recover ftverify ba_replay) +set(tools tokudb_dump tokuftdump tokuft_logprint tdb-recover ftverify) foreach(tool ${tools}) add_executable(${tool} ${tool}.cc) add_dependencies(${tool} install_tdb_h) @@ -14,4 +14,3 @@ target_link_libraries(ftverify m) install(TARGETS tokuftdump DESTINATION ${INSTALL_BINDIR} COMPONENT Server) install(TARGETS tokuft_logprint DESTINATION ${INSTALL_BINDIR} COMPONENT Server) - diff --git a/storage/tokudb/PerconaFT/tools/ba_replay.cc b/storage/tokudb/PerconaFT/tools/ba_replay.cc deleted file mode 100644 index cade7e5dfafc6..0000000000000 --- a/storage/tokudb/PerconaFT/tools/ba_replay.cc +++ /dev/null @@ -1,629 +0,0 @@ -/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: -#ident "$Id$" -/*====== -This file is part of PerconaFT. - - -Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. - - PerconaFT is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License, version 2, - as published by the Free Software Foundation. - - PerconaFT is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with PerconaFT. If not, see . - ----------------------------------------- - - PerconaFT is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License, version 3, - as published by the Free Software Foundation. - - PerconaFT is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with PerconaFT. If not, see . -======= */ - -#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." - -// Replay a block allocator trace against different strategies and compare -// the results - -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "ft/serialize/block_allocator.h" - -using std::map; -using std::set; -using std::string; -using std::vector; - -static int verbose = false; - -static void ba_replay_assert(bool pred, const char *msg, const char *line, int line_num) { - if (!pred) { - fprintf(stderr, "%s, line (#%d): %s\n", msg, line_num, line); - abort(); - } -} - -static char *trim_whitespace(char *line) { - // skip leading whitespace - while (isspace(*line)) { - line++; - } - return line; -} - -static int64_t parse_number(char **ptr, int line_num, int base) { - *ptr = trim_whitespace(*ptr); - char *line = *ptr; - - char *new_ptr; - int64_t n = strtoll(line, &new_ptr, base); - ba_replay_assert(n >= 0, "malformed trace (bad numeric token)", line, line_num); - ba_replay_assert(new_ptr > *ptr, "malformed trace (missing numeric token)", line, line_num); - *ptr = new_ptr; - return n; -} - -static uint64_t parse_uint64(char **ptr, int line_num) { - int64_t n = parse_number(ptr, line_num, 10); - // we happen to know that the uint64's we deal with will - // take less than 63 bits (they come from pointers) - return static_cast(n); -} - -static string parse_token(char **ptr, int line_num) { - *ptr = trim_whitespace(*ptr); - char *line = *ptr; - - // parse the first token, which represents the traced function - char token[64]; - int r = sscanf(*ptr, "%64s", token); - ba_replay_assert(r == 1, "malformed trace (missing string token)", line, line_num); - *ptr += strlen(token); - return string(token); -} - -static block_allocator::blockpair parse_blockpair(char **ptr, int line_num) { - *ptr = trim_whitespace(*ptr); - char *line = *ptr; - - uint64_t offset, size; - int bytes_read; - int r = sscanf(line, "[%" PRIu64 " %" PRIu64 "]%n", &offset, &size, &bytes_read); - ba_replay_assert(r == 2, "malformed trace (bad offset/size pair)", line, line_num); - *ptr += bytes_read; - return block_allocator::blockpair(offset, size); -} - -static char *strip_newline(char *line, bool *found) { - char *ptr = strchr(line, '\n'); - if (ptr != nullptr) { - if (found != nullptr) { - *found = true; - } - *ptr = '\0'; - } - return line; -} - -static char *read_trace_line(FILE *file) { - const int buf_size = 4096; - char buf[buf_size]; - std::stringstream ss; - while (true) { - if (fgets(buf, buf_size, file) == nullptr) { - break; - } - bool has_newline = false; - ss << strip_newline(buf, &has_newline); - if (has_newline) { - // end of the line, we're done out - break; - } - } - std::string s = ss.str(); - return s.size() ? toku_strdup(s.c_str()) : nullptr; -} - -static vector canonicalize_trace_from(FILE *file) { - // new trace, canonicalized from a raw trace - vector canonicalized_trace; - - // raw allocator id -> canonical allocator id - // - // keeps track of allocators that were created as part of the trace, - // and therefore will be part of the canonicalized trace. - uint64_t allocator_id_seq_num = 0; - map allocator_ids; - - // allocated offset -> allocation seq num - // - uint64_t allocation_seq_num = 0; - static const uint64_t ASN_NONE = (uint64_t) -1; - typedef map offset_seq_map; - - // raw allocator id -> offset_seq_map that tracks its allocations - map offset_to_seq_num_maps; - - int line_num = 0; - char *line; - while ((line = read_trace_line(file)) != nullptr) { - line_num++; - char *ptr = line; - - string fn = parse_token(&ptr, line_num); - int64_t allocator_id = parse_number(&ptr, line_num, 16); - - std::stringstream ss; - if (fn.find("ba_trace_create") != string::npos) { - ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line, line_num); - ba_replay_assert(fn == "ba_trace_create" || fn == "ba_trace_create_from_blockpairs", - "corrupted trace: bad fn", line, line_num); - - // we only convert the allocator_id to an allocator_id_seq_num - // in the canonical trace and leave the rest of the line as-is. - allocator_ids[allocator_id] = allocator_id_seq_num; - ss << fn << ' ' << allocator_id_seq_num << ' ' << trim_whitespace(ptr) << std::endl; - allocator_id_seq_num++; - - // First, read passed the reserve / alignment values. - (void) parse_uint64(&ptr, line_num); - (void) parse_uint64(&ptr, line_num); - if (fn == "ba_trace_create_from_blockpairs") { - // For each blockpair created by this traceline, add its offset to the offset seq map - // with asn ASN_NONE so that later canonicalizations of `free' know whether to write - // down the asn or the raw offset. - offset_seq_map *map = &offset_to_seq_num_maps[allocator_id]; - while (*trim_whitespace(ptr) != '\0') { - const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num); - (*map)[bp.offset] = ASN_NONE; - } - } - } else { - ba_replay_assert(allocator_ids.count(allocator_id) > 0, "corrupted trace: unknown allocator", line, line_num); - uint64_t canonical_allocator_id = allocator_ids[allocator_id]; - - // this is the map that tracks allocations for this allocator - offset_seq_map *map = &offset_to_seq_num_maps[allocator_id]; - - if (fn == "ba_trace_alloc") { - const uint64_t size = parse_uint64(&ptr, line_num); - const uint64_t heat = parse_uint64(&ptr, line_num); - const uint64_t offset = parse_uint64(&ptr, line_num); - ba_replay_assert(map->count(offset) == 0, "corrupted trace: double alloc", line, line_num); - - // remember that an allocation at `offset' has the current alloc seq num - (*map)[offset] = allocation_seq_num; - - // translate `offset = alloc(size)' to `asn = alloc(size)' - ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << heat << ' ' << allocation_seq_num << std::endl; - allocation_seq_num++; - } else if (fn == "ba_trace_free") { - const uint64_t offset = parse_uint64(&ptr, line_num); - ba_replay_assert(map->count(offset) != 0, "corrupted trace: invalid free", line, line_num); - - // get the alloc seq num for an allcation that occurred at `offset' - const uint64_t asn = (*map)[offset]; - map->erase(offset); - - // if there's an asn, then a corresponding ba_trace_alloc occurred and we should - // write `free(asn)'. otherwise, the blockpair was initialized from create_from_blockpairs - // and we write the original offset. - if (asn != ASN_NONE) { - ss << "ba_trace_free_asn" << ' ' << canonical_allocator_id << ' ' << asn << std::endl; - } else { - ss << "ba_trace_free_offset" << ' ' << canonical_allocator_id << ' ' << offset << std::endl; - } - } else if (fn == "ba_trace_destroy") { - // Remove this allocator from both maps - allocator_ids.erase(allocator_id); - offset_to_seq_num_maps.erase(allocator_id); - - // translate `destroy(ptr_id) to destroy(canonical_id)' - ss << fn << ' ' << canonical_allocator_id << ' ' << std::endl; - } else { - ba_replay_assert(false, "corrupted trace: bad fn", line, line_num); - } - } - canonicalized_trace.push_back(ss.str()); - - toku_free(line); - } - - if (allocator_ids.size() != 0) { - fprintf(stderr, "warning: leaked allocators. this might be ok if the tracing process is still running"); - } - - return canonicalized_trace; -} - -struct streaming_variance_calculator { - int64_t n_samples; - int64_t mean; - int64_t variance; - - // math credit: AoCP, Donald Knuth, '62 - void add_sample(int64_t x) { - n_samples++; - if (n_samples == 1) { - mean = x; - variance = 0; - } else { - int64_t old_mean = mean; - mean = old_mean + ((x - old_mean) / n_samples); - variance = (((n_samples - 1) * variance) + - ((x - old_mean) * (x - mean))) / n_samples; - } - } -}; - -struct canonical_trace_stats { - uint64_t n_lines_replayed; - - uint64_t n_create; - uint64_t n_create_from_blockpairs; - uint64_t n_alloc_hot; - uint64_t n_alloc_cold; - uint64_t n_free; - uint64_t n_destroy; - - struct streaming_variance_calculator alloc_hot_bytes; - struct streaming_variance_calculator alloc_cold_bytes; - - canonical_trace_stats() { - memset(this, 0, sizeof(*this)); - } -}; - -struct fragmentation_report { - TOKU_DB_FRAGMENTATION_S beginning; - TOKU_DB_FRAGMENTATION_S end; - fragmentation_report() { - memset(this, 0, sizeof(*this)); - } - void merge(const struct fragmentation_report &src_report) { - for (int i = 0; i < 2; i++) { - TOKU_DB_FRAGMENTATION_S *dst = i == 0 ? &beginning : &end; - const TOKU_DB_FRAGMENTATION_S *src = i == 0 ? &src_report.beginning : &src_report.end; - dst->file_size_bytes += src->file_size_bytes; - dst->data_bytes += src->data_bytes; - dst->data_blocks += src->data_blocks; - dst->checkpoint_bytes_additional += src->checkpoint_bytes_additional; - dst->checkpoint_blocks_additional += src->checkpoint_blocks_additional; - dst->unused_bytes += src->unused_bytes; - dst->unused_blocks += src->unused_blocks; - dst->largest_unused_block += src->largest_unused_block; - } - } -}; - -static void replay_canonicalized_trace(const vector &canonicalized_trace, - block_allocator::allocation_strategy strategy, - map *reports, - struct canonical_trace_stats *stats) { - // maps an allocator id to its block allocator - map allocator_map; - - // maps allocation seq num to allocated offset - map seq_num_to_offset; - - for (vector::const_iterator it = canonicalized_trace.begin(); - it != canonicalized_trace.end(); it++) { - const int line_num = stats->n_lines_replayed++; - - char *line = toku_strdup(it->c_str()); - line = strip_newline(line, nullptr); - - char *ptr = trim_whitespace(line); - - // canonical allocator id is in base 10, not 16 - string fn = parse_token(&ptr, line_num); - int64_t allocator_id = parse_number(&ptr, line_num, 10); - - if (fn.find("ba_trace_create") != string::npos) { - const uint64_t reserve_at_beginning = parse_uint64(&ptr, line_num); - const uint64_t alignment = parse_uint64(&ptr, line_num); - ba_replay_assert(allocator_map.count(allocator_id) == 0, - "corrupted canonical trace: double create", line, line_num); - - block_allocator *ba = new block_allocator(); - if (fn == "ba_trace_create") { - ba->create(reserve_at_beginning, alignment); - stats->n_create++; - } else { - ba_replay_assert(fn == "ba_trace_create_from_blockpairs", - "corrupted canonical trace: bad create fn", line, line_num); - vector pairs; - while (*trim_whitespace(ptr) != '\0') { - const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num); - pairs.push_back(bp); - } - ba->create_from_blockpairs(reserve_at_beginning, alignment, &pairs[0], pairs.size()); - stats->n_create_from_blockpairs++; - } - ba->set_strategy(strategy); - - TOKU_DB_FRAGMENTATION_S report; - ba->get_statistics(&report); - (*reports)[allocator_id].beginning = report; - allocator_map[allocator_id] = ba; - } else { - ba_replay_assert(allocator_map.count(allocator_id) > 0, - "corrupted canonical trace: no such allocator", line, line_num); - - block_allocator *ba = allocator_map[allocator_id]; - if (fn == "ba_trace_alloc") { - // replay an `alloc' whose result will be associated with a certain asn - const uint64_t size = parse_uint64(&ptr, line_num); - const uint64_t heat = parse_uint64(&ptr, line_num); - const uint64_t asn = parse_uint64(&ptr, line_num); - ba_replay_assert(seq_num_to_offset.count(asn) == 0, - "corrupted canonical trace: double alloc (asn in use)", line, line_num); - - uint64_t offset; - ba->alloc_block(size, heat, &offset); - seq_num_to_offset[asn] = offset; - heat ? stats->n_alloc_hot++ : stats->n_alloc_cold++; - heat ? stats->alloc_hot_bytes.add_sample(size) : stats->alloc_cold_bytes.add_sample(size); - } else if (fn == "ba_trace_free_asn") { - // replay a `free' on a block whose offset is the result of an alloc with an asn - const uint64_t asn = parse_uint64(&ptr, line_num); - ba_replay_assert(seq_num_to_offset.count(asn) == 1, - "corrupted canonical trace: double free (asn unused)", line, line_num); - - const uint64_t offset = seq_num_to_offset[asn]; - ba->free_block(offset); - seq_num_to_offset.erase(asn); - stats->n_free++; - } else if (fn == "ba_trace_free_offset") { - // replay a `free' on a block whose offset was explicitly set during a create_from_blockpairs - const uint64_t offset = parse_uint64(&ptr, line_num); - ba->free_block(offset); - stats->n_free++; - } else if (fn == "ba_trace_destroy") { - TOKU_DB_FRAGMENTATION_S report; - ba->get_statistics(&report); - ba->destroy(); - (*reports)[allocator_id].end = report; - allocator_map.erase(allocator_id); - stats->n_destroy++; - } else { - ba_replay_assert(false, "corrupted canonical trace: bad fn", line, line_num); - } - } - - toku_free(line); - } -} - -static const char *strategy_to_cstring(block_allocator::allocation_strategy strategy) { - switch (strategy) { - case block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT: - return "first-fit"; - case block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT: - return "best-fit"; - case block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE: - return "heat-zone"; - case block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT: - return "padded-fit"; - default: - abort(); - } -} - -static block_allocator::allocation_strategy cstring_to_strategy(const char *str) { - if (strcmp(str, "first-fit") == 0) { - return block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT; - } - if (strcmp(str, "best-fit") == 0) { - return block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT; - } - if (strcmp(str, "heat-zone") == 0) { - return block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE; - } - if (strcmp(str, "padded-fit") != 0) { - fprintf(stderr, "bad strategy string: %s\n", str); - abort(); - } - return block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT; -} - -static void print_result_verbose(uint64_t allocator_id, - block_allocator::allocation_strategy strategy, - const struct fragmentation_report &report) { - if (report.end.data_bytes + report.end.unused_bytes + - report.beginning.data_bytes + report.beginning.unused_bytes - < 32UL * 1024 * 1024) { - printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id); - return; - } - - printf(" allocator_id: %20" PRId64 "\n", allocator_id); - printf(" strategy: %20s\n", strategy_to_cstring(strategy)); - - for (int i = 0; i < 2; i++) { - const TOKU_DB_FRAGMENTATION_S *r = i == 0 ? &report.beginning : &report.end; - printf("%s\n", i == 0 ? "BEFORE" : "AFTER"); - - uint64_t total_bytes = r->data_bytes + r->unused_bytes; - uint64_t total_blocks = r->data_blocks + r->unused_blocks; - - // byte statistics - printf(" total bytes: %20" PRId64 "\n", total_bytes); - printf(" used bytes: %20" PRId64 " (%.3lf)\n", r->data_bytes, - static_cast(r->data_bytes) / total_bytes); - printf(" unused bytes: %20" PRId64 " (%.3lf)\n", r->unused_bytes, - static_cast(r->unused_bytes) / total_bytes); - - // block statistics - printf(" total blocks: %20" PRId64 "\n", total_blocks); - printf(" used blocks: %20" PRId64 " (%.3lf)\n", r->data_blocks, - static_cast(r->data_blocks) / total_blocks); - printf(" unused blocks: %20" PRId64 " (%.3lf)\n", r->unused_blocks, - static_cast(r->unused_blocks) / total_blocks); - - // misc - printf(" largest unused: %20" PRId64 "\n", r->largest_unused_block); - } -} - -static void print_result(uint64_t allocator_id, - block_allocator::allocation_strategy strategy, - const struct fragmentation_report &report) { - const TOKU_DB_FRAGMENTATION_S *beginning = &report.beginning; - const TOKU_DB_FRAGMENTATION_S *end = &report.end; - - uint64_t total_beginning_bytes = beginning->data_bytes + beginning->unused_bytes; - uint64_t total_end_bytes = end->data_bytes + end->unused_bytes; - if (total_end_bytes + total_beginning_bytes < 32UL * 1024 * 1024) { - if (verbose) { - printf("\n"); - printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id); - } - return; - } - printf("\n"); - if (verbose) { - print_result_verbose(allocator_id, strategy, report); - } else { - printf(" %-15s: allocator %" PRId64 ", %.3lf used bytes (%.3lf before)\n", - strategy_to_cstring(strategy), allocator_id, - static_cast(report.end.data_bytes) / total_end_bytes, - static_cast(report.beginning.data_bytes) / total_beginning_bytes); - } -} - -static int only_aggregate_reports; - -static struct option getopt_options[] = { - { "verbose", no_argument, &verbose, 1 }, - { "only-aggregate-reports", no_argument, &only_aggregate_reports, 1 }, - { "include-strategy", required_argument, nullptr, 'i' }, - { "exclude-strategy", required_argument, nullptr, 'x' }, - { nullptr, 0, nullptr, 0 }, -}; - -int main(int argc, char *argv[]) { - int opt; - set candidate_strategies, excluded_strategies; - while ((opt = getopt_long(argc, argv, "", getopt_options, nullptr)) != -1) { - switch (opt) { - case 0: - break; - case 'i': - candidate_strategies.insert(cstring_to_strategy(optarg)); - break; - case 'x': - excluded_strategies.insert(cstring_to_strategy(optarg)); - break; - case '?': - default: - abort(); - }; - } - // Default to everything if nothing was explicitly included. - if (candidate_strategies.empty()) { - candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT); - candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT); - candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT); - candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE); - } - // ..but remove anything that was explicitly excluded - for (set::const_iterator it = excluded_strategies.begin(); - it != excluded_strategies.end(); it++) { - candidate_strategies.erase(*it); - } - - // Run the real trace - // - // First, read the raw trace from stdin - vector canonicalized_trace = canonicalize_trace_from(stdin); - - if (!only_aggregate_reports) { - printf("\n"); - printf("Individual reports, by allocator:\n"); - } - - struct canonical_trace_stats stats; - map reports_by_strategy; - for (set::const_iterator it = candidate_strategies.begin(); - it != candidate_strategies.end(); it++) { - const block_allocator::allocation_strategy strategy(*it); - - // replay the canonicalized trace against the current strategy. - // - // we provided the allocator map so we can gather statistics later - struct canonical_trace_stats dummy_stats; - map reports; - replay_canonicalized_trace(canonicalized_trace, strategy, &reports, - // Only need to gather canonical trace stats once - it == candidate_strategies.begin() ? &stats : &dummy_stats); - - struct fragmentation_report aggregate_report; - memset(&aggregate_report, 0, sizeof(aggregate_report)); - for (map::iterator rp = reports.begin(); - rp != reports.end(); rp++) { - const struct fragmentation_report &report = rp->second; - aggregate_report.merge(report); - if (!only_aggregate_reports) { - print_result(rp->first, strategy, report); - } - } - reports_by_strategy[strategy] = aggregate_report; - } - - printf("\n"); - printf("Aggregate reports, by strategy:\n"); - - for (map::iterator it = reports_by_strategy.begin(); - it != reports_by_strategy.end(); it++) { - print_result(0, it->first, it->second); - } - - printf("\n"); - printf("Overall trace stats:\n"); - printf("\n"); - printf(" n_lines_played: %15" PRIu64 "\n", stats.n_lines_replayed); - printf(" n_create: %15" PRIu64 "\n", stats.n_create); - printf(" n_create_from_blockpairs: %15" PRIu64 "\n", stats.n_create_from_blockpairs); - printf(" n_alloc_hot: %15" PRIu64 "\n", stats.n_alloc_hot); - printf(" n_alloc_cold: %15" PRIu64 "\n", stats.n_alloc_cold); - printf(" n_free: %15" PRIu64 "\n", stats.n_free); - printf(" n_destroy: %15" PRIu64 "\n", stats.n_destroy); - printf("\n"); - printf(" avg_alloc_hot: %15" PRIu64 "\n", stats.alloc_hot_bytes.mean); - printf(" stddev_alloc_hot: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_hot_bytes.variance)); - printf(" avg_alloc_cold: %15" PRIu64 "\n", stats.alloc_cold_bytes.mean); - printf(" stddev_alloc_cold: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_cold_bytes.variance)); - printf("\n"); - - return 0; -} diff --git a/storage/tokudb/PerconaFT/tools/ftverify.cc b/storage/tokudb/PerconaFT/tools/ftverify.cc index 5920be8dedaa7..2324249ba00f2 100644 --- a/storage/tokudb/PerconaFT/tools/ftverify.cc +++ b/storage/tokudb/PerconaFT/tools/ftverify.cc @@ -148,7 +148,7 @@ deserialize_headers(int fd, struct ft **h1p, struct ft **h2p) } } { - toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE; + toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE; r1 = deserialize_ft_from_fd_into_rbuf( fd, header_1_off, diff --git a/storage/tokudb/PerconaFT/tools/tokuftdump.cc b/storage/tokudb/PerconaFT/tools/tokuftdump.cc index 23ef72218ac31..f6d777b416113 100644 --- a/storage/tokudb/PerconaFT/tools/tokuftdump.cc +++ b/storage/tokudb/PerconaFT/tools/tokuftdump.cc @@ -192,6 +192,7 @@ static void dump_header(FT ft) { dump_descriptor(&ft->descriptor); printf(" estimated numrows=%" PRId64 "\n", ft->in_memory_stats.numrows); printf(" estimated numbytes=%" PRId64 "\n", ft->in_memory_stats.numbytes); + printf(" logical row count=%" PRId64 "\n", ft->in_memory_logical_rows); } static int64_t getRootNode(FT ft) { diff --git a/storage/tokudb/PerconaFT/util/tests/x1764-test.cc b/storage/tokudb/PerconaFT/util/tests/x1764-test.cc index 48ff28e89af15..76b1d9c713e48 100644 --- a/storage/tokudb/PerconaFT/util/tests/x1764-test.cc +++ b/storage/tokudb/PerconaFT/util/tests/x1764-test.cc @@ -110,7 +110,7 @@ test2 (void) { static void test3 (void) -// Compare the simple version to the highly optimized verison. +// Compare the simple version to the highly optimized version. { const int datalen = 1000; char data[datalen]; diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc index 62a704676618a..0afe9958b856c 100644 --- a/storage/tokudb/ha_tokudb.cc +++ b/storage/tokudb/ha_tokudb.cc @@ -414,17 +414,17 @@ void TOKUDB_SHARE::update_row_count( pct_of_rows_changed_to_trigger = ((_rows * auto_threshold) / 100); if (_row_delta_activity >= pct_of_rows_changed_to_trigger) { char msg[200]; - snprintf( - msg, - sizeof(msg), - "TokuDB: Auto %s background analysis for %s, delta_activity " - "%llu is greater than %llu percent of %llu rows.", - tokudb::sysvars::analyze_in_background(thd) > 0 ? - "scheduling" : "running", - full_table_name(), - _row_delta_activity, - auto_threshold, - (ulonglong)(_rows)); + snprintf(msg, + sizeof(msg), + "TokuDB: Auto %s analysis for %s, delta_activity %llu is " + "greater than %llu percent of %llu rows.", + tokudb::sysvars::analyze_in_background(thd) > 0 + ? "scheduling background" + : "running foreground", + full_table_name(), + _row_delta_activity, + auto_threshold, + (ulonglong)(_rows)); // analyze_standard will unlock _mutex regardless of success/failure int ret = analyze_standard(thd, NULL); @@ -4129,7 +4129,7 @@ int ha_tokudb::write_row(uchar * record) { goto cleanup; } if (curr_num_DBs == 1) { - error = insert_row_to_main_dictionary(record,&prim_key, &row, txn); + error = insert_row_to_main_dictionary(record, &prim_key, &row, txn); if (error) { goto cleanup; } } else { error = insert_rows_to_dictionaries_mult(&prim_key, &row, txn, thd); @@ -6176,7 +6176,7 @@ int ha_tokudb::info(uint flag) { // we should always have a primary key assert_always(share->file != NULL); - error = estimate_num_rows(share->file,&num_rows, txn); + error = estimate_num_rows(share->file, &num_rows, txn); if (error == 0) { share->set_row_count(num_rows, false); stats.records = num_rows; diff --git a/storage/tokudb/ha_tokudb_admin.cc b/storage/tokudb/ha_tokudb_admin.cc index db3d6c112d499..6d8e7173c8d8b 100644 --- a/storage/tokudb/ha_tokudb_admin.cc +++ b/storage/tokudb/ha_tokudb_admin.cc @@ -7,7 +7,7 @@ This file is part of TokuDB Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. - TokuDBis is free software: you can redistribute it and/or modify + TokuDB is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2, as published by the Free Software Foundation. @@ -43,13 +43,11 @@ class recount_rows_t : public tokudb::background::job_manager_t::job_t { virtual ~recount_rows_t(); virtual const char* key(); - - virtual void status( - char* database, - char* table, - char* type, - char* params, - char* status); + virtual const char* database(); + virtual const char* table(); + virtual const char* type(); + virtual const char* parameters(); + virtual const char* status(); protected: virtual void on_run(); @@ -64,6 +62,8 @@ class recount_rows_t : public tokudb::background::job_manager_t::job_t { ulonglong _throttle; // for recount rows status reporting + char _parameters[256]; + char _status[1024]; int _result; ulonglong _recount_start; // in microseconds ulonglong _total_elapsed_time; // in microseconds @@ -78,7 +78,6 @@ class recount_rows_t : public tokudb::background::job_manager_t::job_t { uint64_t deleted, void* extra); int analyze_recount_rows_progress(uint64_t count, uint64_t deleted); - void get_analyze_status(char*); }; void* recount_rows_t::operator new(size_t sz) { @@ -114,10 +113,19 @@ recount_rows_t::recount_rows_t( } _throttle = tokudb::sysvars::analyze_throttle(thd); + + snprintf(_parameters, + sizeof(_parameters), + "TOKUDB_ANALYZE_THROTTLE=%llu;", + _throttle); + _status[0] = '\0'; } recount_rows_t::~recount_rows_t() { } void recount_rows_t::on_run() { + const char* orig_proc_info = NULL; + if (_thd) + orig_proc_info = tokudb_thd_get_proc_info(_thd); _recount_start = tokudb::time::microsec(); _total_elapsed_time = 0; @@ -171,6 +179,8 @@ void recount_rows_t::on_run() { _result, _share->row_count()); error: + if(_thd) + tokudb_thd_set_proc_info(_thd, orig_proc_info); return; } void recount_rows_t::on_destroy() { @@ -179,18 +189,21 @@ void recount_rows_t::on_destroy() { const char* recount_rows_t::key() { return _share->full_table_name(); } -void recount_rows_t::status( - char* database, - char* table, - char* type, - char* params, - char* status) { - - strcpy(database, _share->database_name()); - strcpy(table, _share->table_name()); - strcpy(type, "TOKUDB_ANALYZE_MODE_RECOUNT_ROWS"); - sprintf(params, "TOKUDB_ANALYZE_THROTTLE=%llu;", _throttle); - get_analyze_status(status); +const char* recount_rows_t::database() { + return _share->database_name(); +} +const char* recount_rows_t::table() { + return _share->table_name(); +} +const char* recount_rows_t::type() { + static const char* type = "TOKUDB_ANALYZE_MODE_RECOUNT_ROWS"; + return type; +} +const char* recount_rows_t::parameters() { + return _parameters; +} +const char* recount_rows_t::status() { + return _status; } int recount_rows_t::analyze_recount_rows_progress( uint64_t count, @@ -217,12 +230,32 @@ int recount_rows_t::analyze_recount_rows_progress( return ER_ABORTING_CONNECTION; } + // rebuild status + // There is a slight race condition here, + // _status is used here for tokudb_thd_set_proc_info and it is also used + // for the status column in i_s.background_job_status. + // If someone happens to be querying/building the i_s table + // at the exact same time that the status is being rebuilt here, + // the i_s table could get some garbage status. + // This solution is a little heavy handed but it works, it prevents us + // from changing the status while someone might be immediately observing + // us and it prevents someone from observing us while we change the + // status + tokudb::background::_job_manager->lock(); + snprintf(_status, + sizeof(_status), + "recount_rows %s.%s counted %llu rows and %llu deleted " + "in %llu seconds.", + _share->database_name(), + _share->table_name(), + _rows, + _deleted_rows, + _total_elapsed_time / tokudb::time::MICROSECONDS); + tokudb::background::_job_manager->unlock(); + // report - if (_thd) { - char status[256]; - get_analyze_status(status); - thd_proc_info(_thd, status); - } + if (_thd) + tokudb_thd_set_proc_info(_thd, _status); // throttle // given the throttle value, lets calculate the maximum number of rows @@ -238,18 +271,6 @@ int recount_rows_t::analyze_recount_rows_progress( } return 0; } -void recount_rows_t::get_analyze_status(char* msg) { - sprintf( - msg, - "recount_rows %s.%s counted %llu rows and %llu deleted in %llu " - "seconds.", - _share->database_name(), - _share->table_name(), - _rows, - _deleted_rows, - _total_elapsed_time / tokudb::time::MICROSECONDS); -} - class standard_t : public tokudb::background::job_manager_t::job_t { public: @@ -261,13 +282,11 @@ class standard_t : public tokudb::background::job_manager_t::job_t { virtual ~standard_t(); virtual const char* key(void); - - virtual void status( - char* database, - char* table, - char* type, - char* params, - char* status); + virtual const char* database(); + virtual const char* table(); + virtual const char* type(); + virtual const char* parameters(); + virtual const char* status(); protected: virtual void on_run(); @@ -284,6 +303,8 @@ class standard_t : public tokudb::background::job_manager_t::job_t { double _delete_fraction; // for analyze status reporting, may also use other state + char _parameters[256]; + char _status[1024]; int _result; ulonglong _analyze_start; // in microseconds ulonglong _total_elapsed_time; // in microseconds @@ -305,7 +326,6 @@ class standard_t : public tokudb::background::job_manager_t::job_t { uint64_t deleted_rows); bool analyze_standard_cursor_callback(uint64_t deleted_rows); - void get_analyze_status(char*); int analyze_key_progress(); int analyze_key(uint64_t* rec_per_key_part); }; @@ -351,6 +371,16 @@ standard_t::standard_t( _time_limit = tokudb::sysvars::analyze_time(thd) * tokudb::time::MICROSECONDS; _delete_fraction = tokudb::sysvars::analyze_delete_fraction(thd); + + snprintf(_parameters, + sizeof(_parameters), + "TOKUDB_ANALYZE_DELETE_FRACTION=%f; " + "TOKUDB_ANALYZE_TIME=%llu; TOKUDB_ANALYZE_THROTTLE=%llu;", + _delete_fraction, + _time_limit / tokudb::time::MICROSECONDS, + _throttle); + + _status[0] = '\0'; } standard_t::~standard_t() { } @@ -358,6 +388,10 @@ void standard_t::on_run() { DB_BTREE_STAT64 stat64; uint64_t rec_per_key_part[_share->_max_key_parts]; uint64_t total_key_parts = 0; + const char* orig_proc_info = NULL; + if (_thd) + orig_proc_info = tokudb_thd_get_proc_info(_thd); + _analyze_start = tokudb::time::microsec(); _half_time = _time_limit > 0 ? _time_limit/2 : 0; @@ -395,7 +429,7 @@ void standard_t::on_run() { _result = HA_ADMIN_FAILED; } if (_thd && (_result == HA_ADMIN_FAILED || - (double)_deleted_rows > + static_cast(_deleted_rows) > _delete_fraction * (_rows + _deleted_rows))) { char name[256]; int namelen; @@ -460,8 +494,9 @@ void standard_t::on_run() { } error: + if (_thd) + tokudb_thd_set_proc_info(_thd, orig_proc_info); return; - } void standard_t::on_destroy() { _share->lock(); @@ -472,24 +507,21 @@ void standard_t::on_destroy() { const char* standard_t::key() { return _share->full_table_name(); } -void standard_t::status( - char* database, - char* table, - char* type, - char* params, - char* status) { - - strcpy(database, _share->database_name()); - strcpy(table, _share->table_name()); - strcpy(type, "TOKUDB_ANALYZE_MODE_STANDARD"); - sprintf( - params, - "TOKUDB_ANALYZE_DELETE_FRACTION=%f; " - "TOKUDB_ANALYZE_TIME=%llu; TOKUDB_ANALYZE_THROTTLE=%llu;", - _delete_fraction, - _time_limit / tokudb::time::MICROSECONDS, - _throttle); - get_analyze_status(status); +const char* standard_t::database() { + return _share->database_name(); +} +const char* standard_t::table() { + return _share->table_name(); +} +const char* standard_t::type() { + static const char* type = "TOKUDB_ANALYZE_MODE_STANDARD"; + return type; +} +const char* standard_t::parameters() { + return _parameters; +} +const char* standard_t::status() { + return _status; } bool standard_t::analyze_standard_cursor_callback( void* extra, @@ -502,41 +534,6 @@ bool standard_t::analyze_standard_cursor_callback(uint64_t deleted_rows) { _ticks += deleted_rows; return analyze_key_progress() != 0; } -void standard_t::get_analyze_status(char* msg) { - static const char* scan_direction_str[] = { - "not scanning", - "scanning forward", - "scanning backward", - "scan unknown" - }; - - const char* scan_direction = NULL; - switch (_scan_direction) { - case 0: scan_direction = scan_direction_str[0]; break; - case DB_NEXT: scan_direction = scan_direction_str[1]; break; - case DB_PREV: scan_direction = scan_direction_str[2]; break; - default: scan_direction = scan_direction_str[3]; break; - } - - float progress_rows = 0.0; - if (_share->row_count() > 0) - progress_rows = (float) _rows / (float) _share->row_count(); - float progress_time = 0.0; - if (_time_limit > 0) - progress_time = (float) _key_elapsed_time / (float) _time_limit; - sprintf( - msg, - "analyze table standard %s.%s.%s %llu of %u %.lf%% rows %.lf%% time, " - "%s", - _share->database_name(), - _share->table_name(), - _share->_key_descriptors[_current_key]._name, - _current_key, - _share->_keys, - progress_rows * 100.0, - progress_time * 100.0, - scan_direction); -} int standard_t::analyze_key_progress(void) { if (_ticks > 1000) { _ticks = 0; @@ -546,19 +543,72 @@ int standard_t::analyze_key_progress(void) { if ((_thd && thd_killed(_thd)) || cancelled()) { // client killed return ER_ABORTING_CONNECTION; - } else if(_time_limit > 0 && - (uint64_t)_key_elapsed_time > _time_limit) { + } else if (_time_limit > 0 && + static_cast(_key_elapsed_time) > _time_limit) { // time limit reached return ETIME; } - // report - if (_thd) { - char status[256]; - get_analyze_status(status); - thd_proc_info(_thd, status); + // rebuild status + // There is a slight race condition here, + // _status is used here for tokudb_thd_set_proc_info and it is also used + // for the status column in i_s.background_job_status. + // If someone happens to be querying/building the i_s table + // at the exact same time that the status is being rebuilt here, + // the i_s table could get some garbage status. + // This solution is a little heavy handed but it works, it prevents us + // from changing the status while someone might be immediately observing + // us and it prevents someone from observing us while we change the + // status. + static const char* scan_direction_str[] = {"not scanning", + "scanning forward", + "scanning backward", + "scan unknown"}; + + const char* scan_direction = NULL; + switch (_scan_direction) { + case 0: + scan_direction = scan_direction_str[0]; + break; + case DB_NEXT: + scan_direction = scan_direction_str[1]; + break; + case DB_PREV: + scan_direction = scan_direction_str[2]; + break; + default: + scan_direction = scan_direction_str[3]; + break; } + float progress_rows = 0.0; + if (_share->row_count() > 0) + progress_rows = static_cast(_rows) / + static_cast(_share->row_count()); + float progress_time = 0.0; + if (_time_limit > 0) + progress_time = static_cast(_key_elapsed_time) / + static_cast(_time_limit); + tokudb::background::_job_manager->lock(); + snprintf( + _status, + sizeof(_status), + "analyze table standard %s.%s.%s %llu of %u %.lf%% rows %.lf%% " + "time, %s", + _share->database_name(), + _share->table_name(), + _share->_key_descriptors[_current_key]._name, + _current_key, + _share->_keys, + progress_rows * 100.0, + progress_time * 100.0, + scan_direction); + tokudb::background::_job_manager->unlock(); + + // report + if (_thd) + tokudb_thd_set_proc_info(_thd, _status); + // throttle // given the throttle value, lets calculate the maximum number of rows // we should have seen so far in a .1 sec resolution @@ -694,6 +744,11 @@ int standard_t::analyze_key(uint64_t* rec_per_key_part) { assert_always(close_error == 0); done: + // in case we timed out (bunch of deleted records) without hitting a + // single row + if (_rows == 0) + _rows = 1; + // return cardinality for (uint64_t i = 0; i < num_key_parts; i++) { rec_per_key_part[i] = _rows / unique_rows[i]; @@ -733,7 +788,6 @@ int TOKUDB_SHARE::analyze_recount_rows(THD* thd,DB_TXN* txn) { assert_always(thd != NULL); - const char *orig_proc_info = tokudb_thd_get_proc_info(thd); int result = HA_ADMIN_OK; tokudb::analyze::recount_rows_t* job @@ -753,8 +807,6 @@ int TOKUDB_SHARE::analyze_recount_rows(THD* thd,DB_TXN* txn) { result = HA_ADMIN_FAILED; } - thd_proc_info(thd, orig_proc_info); - TOKUDB_HANDLER_DBUG_RETURN(result); } @@ -778,8 +830,6 @@ int TOKUDB_SHARE::analyze_standard(THD* thd, DB_TXN* txn) { TOKUDB_HANDLER_DBUG_RETURN(result); } - const char *orig_proc_info = tokudb_thd_get_proc_info(thd); - tokudb::analyze::standard_t* job = new tokudb::analyze::standard_t(txn == NULL ? false : true, thd, this, txn); @@ -808,8 +858,6 @@ int TOKUDB_SHARE::analyze_standard(THD* thd, DB_TXN* txn) { lock(); - thd_proc_info(thd, orig_proc_info); - TOKUDB_HANDLER_DBUG_RETURN(result); } diff --git a/storage/tokudb/hatoku_defines.h b/storage/tokudb/hatoku_defines.h index 3ff3e5377781c..a8d4a38e1c36d 100644 --- a/storage/tokudb/hatoku_defines.h +++ b/storage/tokudb/hatoku_defines.h @@ -7,7 +7,7 @@ This file is part of TokuDB Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. - TokuDBis is free software: you can redistribute it and/or modify + TokuDB is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2, as published by the Free Software Foundation. @@ -233,9 +233,12 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. // mysql 5.6.15 removed the test macro, so we define our own #define tokudb_test(e) ((e) ? 1 : 0) -inline const char* tokudb_thd_get_proc_info(const THD *thd) { +inline const char* tokudb_thd_get_proc_info(const THD* thd) { return thd->proc_info; } +inline void tokudb_thd_set_proc_info(THD* thd, const char* proc_info) { + thd_proc_info(thd, proc_info); +} // uint3korr reads 4 bytes and valgrind reports an error, so we use this function instead inline uint tokudb_uint3korr(const uchar *a) { diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc index e7dfbd810c213..1581a7b76df15 100644 --- a/storage/tokudb/hatoku_hton.cc +++ b/storage/tokudb/hatoku_hton.cc @@ -531,6 +531,7 @@ static int tokudb_init_func(void *p) { db_env->change_fsync_log_period(db_env, tokudb::sysvars::fsync_log_period); db_env->set_lock_timeout_callback(db_env, tokudb_lock_timeout_callback); + db_env->set_dir_per_db(db_env, tokudb::sysvars::dir_per_db); db_env->set_loader_memory_size( db_env, diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result deleted file mode 100644 index c2a598632caad..0000000000000 --- a/storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result +++ /dev/null @@ -1,51 +0,0 @@ -include/master-slave.inc -[connection master] -CREATE TABLE t1 (a INT AUTO_INCREMENT KEY) ENGINE=TokuDB; -CREATE TABLE t2 (b INT AUTO_INCREMENT KEY, c INT, FOREIGN KEY(b) REFERENCES t1(a)) ENGINE=TokuDB; -SET FOREIGN_KEY_CHECKS=0; -INSERT INTO t1 VALUES (10); -INSERT INTO t1 VALUES (NULL),(NULL),(NULL); -INSERT INTO t2 VALUES (5,0); -INSERT INTO t2 VALUES (NULL,LAST_INSERT_ID()); -SET FOREIGN_KEY_CHECKS=1; -SELECT * FROM t1 ORDER BY a; -a -10 -11 -12 -13 -SELECT * FROM t2 ORDER BY b; -b c -5 0 -6 11 -SELECT * FROM t1 ORDER BY a; -a -10 -11 -12 -13 -SELECT * FROM t2 ORDER BY b; -b c -5 0 -6 11 -SET TIMESTAMP=1000000000; -CREATE TABLE t3 ( a INT UNIQUE ); -SET FOREIGN_KEY_CHECKS=0; -INSERT INTO t3 VALUES (1),(1); -Got one of the listed errors -SET FOREIGN_KEY_CHECKS=0; -DROP TABLE IF EXISTS t1,t2,t3; -SET FOREIGN_KEY_CHECKS=1; -create table t1 (b int primary key) engine = TokuDB; -create table t2 (a int primary key, b int, foreign key (b) references t1(b)) -engine = TokuDB; -insert into t1 set b=1; -insert into t2 set a=1, b=1; -set foreign_key_checks=0; -delete from t1; -must sync w/o a problem (could not with the buggy code) -select count(*) from t1 /* must be zero */; -count(*) -0 -drop table t2,t1; -include/rpl_end.inc diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test b/storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test deleted file mode 100644 index d798cfd4a6232..0000000000000 --- a/storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test +++ /dev/null @@ -1,4 +0,0 @@ --- source include/not_ndb_default.inc --- source include/have_tokudb.inc -let $engine_type=TokuDB; --- source extra/rpl_tests/rpl_foreign_key.test diff --git a/storage/tokudb/mysql-test/tokudb/disabled.def b/storage/tokudb/mysql-test/tokudb/disabled.def index c98a8aa622a93..ddefceb432ed7 100644 --- a/storage/tokudb/mysql-test/tokudb/disabled.def +++ b/storage/tokudb/mysql-test/tokudb/disabled.def @@ -28,3 +28,4 @@ type_timestamp_explicit: cluster_key_part: engine options on partitioned tables i_s_tokudb_lock_waits_released: unstable, race conditions i_s_tokudb_locks_released: unstable, race conditions +row_format: n/a diff --git a/storage/tokudb/mysql-test/tokudb/include/table_files_replace_pattern.inc b/storage/tokudb/mysql-test/tokudb/include/table_files_replace_pattern.inc new file mode 100644 index 0000000000000..b10ad21dd95ce --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/include/table_files_replace_pattern.inc @@ -0,0 +1 @@ +--replace_regex /[a-z0-9]+_[a-z0-9]+_[a-z0-9]+(_[BP]_[a-z0-9]+){0,1}\./id./ /sqlx_[a-z0-9]+_[a-z0-9]+_/sqlx_nnnn_nnnn_/ /sqlx-[a-z0-9]+_[a-z0-9]+/sqlx-nnnn_nnnn/ /#p#/#P#/ /#sp#/#SP#/ /#tmp#/#TMP#/ diff --git a/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result b/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result index 5769ee7407134..8b53f89efa337 100644 --- a/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result +++ b/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result @@ -25,7 +25,7 @@ TokuDB_background_job_status CREATE TEMPORARY TABLE `TokuDB_background_job_statu `scheduler` varchar(32) NOT NULL DEFAULT '', `scheduled_time` datetime NOT NULL DEFAULT '0000-00-00 00:00:00', `started_time` datetime DEFAULT NULL, - `status` varchar(256) DEFAULT NULL + `status` varchar(1024) DEFAULT NULL ) ENGINE=MEMORY DEFAULT CHARSET=utf8 create table t1 (a int not null auto_increment, b int, c int, primary key(a), key kb(b), key kc(c), key kabc(a,b,c), key kab(a,b), key kbc(b,c)); insert into t1(b,c) values(0,0), (1,1), (2,2), (3,3); diff --git a/storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result b/storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result new file mode 100644 index 0000000000000..a36dbcb28c042 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result @@ -0,0 +1,10 @@ +SELECT @@tokudb_dir_per_db; +@@tokudb_dir_per_db +1 +TOKUDB_DATA_DIR_CHANGED +1 +CREATE DATABASE tokudb_test; +USE tokudb_test; +CREATE TABLE t (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY) ENGINE=tokudb; +DROP TABLE t; +DROP DATABASE tokudb_test; diff --git a/storage/tokudb/mysql-test/tokudb/r/dir_per_db.result b/storage/tokudb/mysql-test/tokudb/r/dir_per_db.result new file mode 100644 index 0000000000000..371f97406c8eb --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/dir_per_db.result @@ -0,0 +1,180 @@ +######## +# tokudb_dir_per_db = 1 +######## +SET GLOBAL tokudb_dir_per_db= 1; +######## +# CREATE +######## +CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; +INSERT INTO t1 SET b = 10; +INSERT INTO t1 SET b = 20; +SELECT b FROM t1 ORDER BY a; +b +10 +20 +CREATE INDEX b ON t1 (b); +CREATE INDEX ab ON t1 (a,b); +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +t1_key_ab_id.tokudb +t1_key_b_id.tokudb +t1_main_id.tokudb +t1_status_id.tokudb +######## +# RENAME +######## +RENAME TABLE t1 TO t2; +SELECT b FROM t2 ORDER BY a; +b +10 +20 +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +t2_key_ab_id.tokudb +t2_key_b_id.tokudb +t2_main_id.tokudb +t2_status_id.tokudb +######## +# DROP +######## +DROP TABLE t2; +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +######## +# tokudb_dir_per_db = 0 +######## +SET GLOBAL tokudb_dir_per_db= 0; +######## +# CREATE +######## +CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; +INSERT INTO t1 SET b = 10; +INSERT INTO t1 SET b = 20; +SELECT b FROM t1 ORDER BY a; +b +10 +20 +CREATE INDEX b ON t1 (b); +CREATE INDEX ab ON t1 (a,b); +## Looking for *.tokudb files in data_dir +_test_t1_key_ab_id.tokudb +_test_t1_key_b_id.tokudb +_test_t1_main_id.tokudb +_test_t1_status_id.tokudb +## Looking for *.tokudb files in data_dir/test +######## +# RENAME +######## +RENAME TABLE t1 TO t2; +SELECT b FROM t2 ORDER BY a; +b +10 +20 +## Looking for *.tokudb files in data_dir +_test_t1_key_ab_id.tokudb +_test_t1_key_b_id.tokudb +_test_t1_main_id.tokudb +_test_t1_status_id.tokudb +## Looking for *.tokudb files in data_dir/test +######## +# DROP +######## +DROP TABLE t2; +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +######## +# CREATE on tokudb_dir_per_db = 0 and RENAME on tokudb_dir_per_db = 1 and vice versa +######## +######## +# tokudb_dir_per_db = (1 - 1); +######## +SET GLOBAL tokudb_dir_per_db= (1 - 1);; +######## +# CREATE +######## +CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; +INSERT INTO t1 SET b = 10; +INSERT INTO t1 SET b = 20; +SELECT b FROM t1 ORDER BY a; +b +10 +20 +CREATE INDEX b ON t1 (b); +CREATE INDEX ab ON t1 (a,b); +## Looking for *.tokudb files in data_dir +_test_t1_key_ab_id.tokudb +_test_t1_key_b_id.tokudb +_test_t1_main_id.tokudb +_test_t1_status_id.tokudb +## Looking for *.tokudb files in data_dir/test +######## +# tokudb_dir_per_db = 1 +######## +SET GLOBAL tokudb_dir_per_db= 1; +######## +# RENAME +######## +RENAME TABLE t1 TO t2; +SELECT b FROM t2 ORDER BY a; +b +10 +20 +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +t2_key_ab_id.tokudb +t2_key_b_id.tokudb +t2_main_id.tokudb +t2_status_id.tokudb +######## +# DROP +######## +DROP TABLE t2; +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +######## +# tokudb_dir_per_db = (1 - 0); +######## +SET GLOBAL tokudb_dir_per_db= (1 - 0);; +######## +# CREATE +######## +CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; +INSERT INTO t1 SET b = 10; +INSERT INTO t1 SET b = 20; +SELECT b FROM t1 ORDER BY a; +b +10 +20 +CREATE INDEX b ON t1 (b); +CREATE INDEX ab ON t1 (a,b); +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +t1_key_ab_id.tokudb +t1_key_b_id.tokudb +t1_main_id.tokudb +t1_status_id.tokudb +######## +# tokudb_dir_per_db = 0 +######## +SET GLOBAL tokudb_dir_per_db= 0; +######## +# RENAME +######## +RENAME TABLE t1 TO t2; +SELECT b FROM t2 ORDER BY a; +b +10 +20 +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +t1_key_ab_id.tokudb +t1_key_b_id.tokudb +t1_main_id.tokudb +t1_status_id.tokudb +######## +# DROP +######## +DROP TABLE t2; +## Looking for *.tokudb files in data_dir +## Looking for *.tokudb files in data_dir/test +SET GLOBAL tokudb_dir_per_db=default; diff --git a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result index 6f9592ddc1fc0..ecd4d07720611 100644 --- a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result +++ b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result @@ -2,6 +2,7 @@ set default_storage_engine='tokudb'; set tokudb_prelock_empty=false; drop table if exists t; create table t (id int primary key); +t should be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; @@ -15,17 +16,21 @@ insert into t values (1); set autocommit=0; set tokudb_lock_timeout=600000; insert into t values (1); +should find the presence of a lock on 1st transaction select * from information_schema.tokudb_locks; locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main +should find the presence of a lock_wait on the 2nd transaction select * from information_schema.tokudb_lock_waits; requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME test t main +should find the presence of two transactions select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; trx_id trx_mysql_thread_id TRX_ID MYSQL_ID TRX_ID MYSQL_ID commit; +verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction select * from information_schema.tokudb_locks; locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main @@ -33,6 +38,8 @@ select * from information_schema.tokudb_lock_waits; requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name ERROR 23000: Duplicate entry '1' for key 'PRIMARY' commit; +verify that txn_a replace (1) blocks txn_b replace (1) and txn_b eventually gets the lock on (1) and completes +verify that the lock on the 2nd transaction has been released, should be be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; @@ -46,23 +53,28 @@ replace into t values (1); set autocommit=0; set tokudb_lock_timeout=600000; replace into t values (1); +should find the presence of a lock on 1st transaction select * from information_schema.tokudb_locks; locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main +should find the presence of a lock_wait on the 2nd transaction select * from information_schema.tokudb_lock_waits; requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name REQUEST_TRX_ID BLOCK_TRX_ID ./test/t-main 0001000000 0001000000 LOCK_WAITS_START_TIME test t main +should find the presence of two transactions select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; trx_id trx_mysql_thread_id TRX_ID MYSQL_ID TRX_ID MYSQL_ID commit; +verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction select * from information_schema.tokudb_locks; locks_trx_id locks_mysql_thread_id locks_dname locks_key_left locks_key_right locks_table_schema locks_table_name locks_table_dictionary_name TRX_ID MYSQL_ID ./test/t-main 0001000000 0001000000 test t main select * from information_schema.tokudb_lock_waits; requesting_trx_id blocking_trx_id lock_waits_dname lock_waits_key_left lock_waits_key_right lock_waits_start_time lock_waits_table_schema lock_waits_table_name lock_waits_table_dictionary_name commit; +verify that the lock on the 2nd transaction has been released, should be be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; trx_id trx_mysql_thread_id select * from information_schema.tokudb_locks; diff --git a/storage/tokudb/mysql-test/tokudb/r/row_format.result b/storage/tokudb/mysql-test/tokudb/r/row_format.result new file mode 100644 index 0000000000000..cb66914844562 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/r/row_format.result @@ -0,0 +1,51 @@ +CREATE TABLE tokudb_row_format_test_1 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT; +CREATE TABLE tokudb_row_format_test_2 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST; +CREATE TABLE tokudb_row_format_test_3 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL; +CREATE TABLE tokudb_row_format_test_4 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED; +CREATE TABLE tokudb_row_format_test_5 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB; +CREATE TABLE tokudb_row_format_test_6 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA; +CREATE TABLE tokudb_row_format_test_7 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ; +CREATE TABLE tokudb_row_format_test_8 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name like 'tokudb_row_format_test%' ORDER BY table_name; +table_name row_format engine +tokudb_row_format_test_1 tokudb_zlib TokuDB +tokudb_row_format_test_2 tokudb_quicklz TokuDB +tokudb_row_format_test_3 tokudb_lzma TokuDB +tokudb_row_format_test_4 tokudb_uncompressed TokuDB +tokudb_row_format_test_5 tokudb_zlib TokuDB +tokudb_row_format_test_6 tokudb_lzma TokuDB +tokudb_row_format_test_7 tokudb_quicklz TokuDB +tokudb_row_format_test_8 tokudb_snappy TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_quicklz TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_lzma TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_uncompressed TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_zlib TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_snappy TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_quicklz TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_lzma TokuDB +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; +table_name row_format engine +tokudb_row_format_test_1 tokudb_zlib TokuDB +DROP TABLE tokudb_row_format_test_1, tokudb_row_format_test_2, tokudb_row_format_test_3, tokudb_row_format_test_4, tokudb_row_format_test_5, tokudb_row_format_test_6, tokudb_row_format_test_7, tokudb_row_format_test_8; diff --git a/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result b/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result index 5c1c53946a464..b287c70469e08 100644 --- a/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result +++ b/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result @@ -1009,6 +1009,7 @@ Table Op Msg_type Msg_text test.t check status OK optimize table t; Table Op Msg_type Msg_text +test.t optimize note Table does not support optimize, doing recreate + analyze instead test.t optimize status OK check table t; Table Op Msg_type Msg_text diff --git a/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt new file mode 100644 index 0000000000000..a9090f4d1157e --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt @@ -0,0 +1 @@ +--loose-tokudb_data_dir="$MYSQL_TMP_DIR" --loose-tokudb-dir-per-db=1 diff --git a/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test new file mode 100644 index 0000000000000..7f415a7251595 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test @@ -0,0 +1,16 @@ +--source include/have_tokudb.inc + +SELECT @@tokudb_dir_per_db; + +--disable_query_log +--eval SELECT STRCMP(@@tokudb_data_dir, '$MYSQL_TMP_DIR') = 0 AS TOKUDB_DATA_DIR_CHANGED +--enable_query_log + +CREATE DATABASE tokudb_test; +USE tokudb_test; +CREATE TABLE t (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY) ENGINE=tokudb; + +--file_exists $MYSQL_TMP_DIR/tokudb_test + +DROP TABLE t; +DROP DATABASE tokudb_test; diff --git a/storage/tokudb/mysql-test/tokudb/t/dir_per_db.test b/storage/tokudb/mysql-test/tokudb/t/dir_per_db.test new file mode 100644 index 0000000000000..b638b706d8719 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/dir_per_db.test @@ -0,0 +1,76 @@ +source include/have_tokudb.inc; + +--let $DB= test +--let $DATADIR= `select @@datadir` +--let $i= 2 + +while ($i) { + --dec $i + --echo ######## + --echo # tokudb_dir_per_db = $i + --echo ######## + --eval SET GLOBAL tokudb_dir_per_db= $i + --echo ######## + --echo # CREATE + --echo ######## + CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; + INSERT INTO t1 SET b = 10; + INSERT INTO t1 SET b = 20; + SELECT b FROM t1 ORDER BY a; + CREATE INDEX b ON t1 (b); + CREATE INDEX ab ON t1 (a,b); + --source dir_per_db_show_table_files.inc + --echo ######## + --echo # RENAME + --echo ######## + RENAME TABLE t1 TO t2; + SELECT b FROM t2 ORDER BY a; + --source dir_per_db_show_table_files.inc + --echo ######## + --echo # DROP + --echo ######## + DROP TABLE t2; + --source dir_per_db_show_table_files.inc +} + +--echo ######## +--echo # CREATE on tokudb_dir_per_db = 0 and RENAME on tokudb_dir_per_db = 1 and vice versa +--echo ######## + +--let $i= 2 + +while ($i) { + --dec $i + --let $inv_i= (1 - $i); + --echo ######## + --echo # tokudb_dir_per_db = $inv_i + --echo ######## + --eval SET GLOBAL tokudb_dir_per_db= $inv_i + --echo ######## + --echo # CREATE + --echo ######## + CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb; + INSERT INTO t1 SET b = 10; + INSERT INTO t1 SET b = 20; + SELECT b FROM t1 ORDER BY a; + CREATE INDEX b ON t1 (b); + CREATE INDEX ab ON t1 (a,b); + --source dir_per_db_show_table_files.inc + --echo ######## + --echo # tokudb_dir_per_db = $i + --echo ######## + --eval SET GLOBAL tokudb_dir_per_db= $i + --echo ######## + --echo # RENAME + --echo ######## + RENAME TABLE t1 TO t2; + SELECT b FROM t2 ORDER BY a; + --source dir_per_db_show_table_files.inc + --echo ######## + --echo # DROP + --echo ######## + DROP TABLE t2; + --source dir_per_db_show_table_files.inc +} + +SET GLOBAL tokudb_dir_per_db=default; diff --git a/storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc b/storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc new file mode 100644 index 0000000000000..bdf7d5b235ff9 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc @@ -0,0 +1,9 @@ +--sorted_result + +--echo ## Looking for *.tokudb files in data_dir +--source include/table_files_replace_pattern.inc +--list_files $DATADIR *.tokudb + +--echo ## Looking for *.tokudb files in data_dir/$DB +--source include/table_files_replace_pattern.inc +--list_files $DATADIR/$DB/ *.tokudb diff --git a/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test b/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test index d8ce18b3aa7f4..6534175d61902 100644 --- a/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test +++ b/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test @@ -17,7 +17,7 @@ create table t (id int primary key); # verify that txn_a insert (1) blocks txn_b insert (1) and txn_b gets a duplicate key error -# should be empty +--echo t should be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; select * from information_schema.tokudb_locks; select * from information_schema.tokudb_lock_waits; @@ -33,7 +33,7 @@ set autocommit=0; set tokudb_lock_timeout=600000; # set lock wait timeout to 10 minutes send insert into t values (1); -# should find the presence of a lock on 1st transaction +--echo should find the presence of a lock on 1st transaction connection default; let $wait_condition= select count(*)=1 from information_schema.processlist where info='insert into t values (1)' and state='update'; source include/wait_condition.inc; @@ -42,17 +42,17 @@ real_sleep 1; # delay a little to shorten the update -> write row -> lock wait r replace_column 1 TRX_ID 2 MYSQL_ID; select * from information_schema.tokudb_locks; -# should find the presence of a lock_wait on the 2nd transaction +--echo should find the presence of a lock_wait on the 2nd transaction replace_column 1 REQUEST_TRX_ID 2 BLOCK_TRX_ID 6 LOCK_WAITS_START_TIME; select * from information_schema.tokudb_lock_waits; -# should find the presence of two transactions +--echo should find the presence of two transactions replace_column 1 TRX_ID 2 MYSQL_ID; select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; connection conn_a; commit; -# verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction +--echo verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction let $wait_condition= select count(*)=1 from information_schema.tokudb_locks where locks_dname='./test/t-main'; source include/wait_condition.inc; @@ -69,10 +69,8 @@ connection default; disconnect conn_a; disconnect conn_b; -# verify that txn_a replace (1) blocks txn_b replace (1) and txn_b eventually gets the lock on (1) and completes - -# verify that the lock on the 2nd transaction has been released -# should be be empty +--echo verify that txn_a replace (1) blocks txn_b replace (1) and txn_b eventually gets the lock on (1) and completes +--echo verify that the lock on the 2nd transaction has been released, should be be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; select * from information_schema.tokudb_locks; select * from information_schema.tokudb_lock_waits; @@ -88,7 +86,7 @@ set autocommit=0; set tokudb_lock_timeout=600000; # set lock wait timeout to 10 minutes send replace into t values (1); -# should find the presence of a lock on 1st transaction +--echo should find the presence of a lock on 1st transaction connection default; let $wait_condition= select count(*)=1 from information_schema.processlist where info='replace into t values (1)' and state='update'; source include/wait_condition.inc; @@ -97,17 +95,19 @@ real_sleep 1; # delay a little to shorten the update -> write row -> lock wait r replace_column 1 TRX_ID 2 MYSQL_ID; select * from information_schema.tokudb_locks; -# should find the presence of a lock_wait on the 2nd transaction +--echo should find the presence of a lock_wait on the 2nd transaction replace_column 1 REQUEST_TRX_ID 2 BLOCK_TRX_ID 6 LOCK_WAITS_START_TIME; select * from information_schema.tokudb_lock_waits; -# should find the presence of two transactions +--echo should find the presence of two transactions replace_column 1 TRX_ID 2 MYSQL_ID; select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; connection conn_a; commit; -# verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction +--echo verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction +let $wait_condition= select count(*)=1 from information_schema.tokudb_locks where locks_dname='./test/t-main'; +source include/wait_condition.inc; replace_column 1 TRX_ID 2 MYSQL_ID; select * from information_schema.tokudb_locks; select * from information_schema.tokudb_lock_waits; @@ -120,8 +120,7 @@ connection default; disconnect conn_a; disconnect conn_b; -# verify that the lock on the 2nd transaction has been released -# should be be empty +--echo verify that the lock on the 2nd transaction has been released, should be be empty select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx; select * from information_schema.tokudb_locks; select * from information_schema.tokudb_lock_waits; diff --git a/storage/tokudb/mysql-test/tokudb/t/row_format.test b/storage/tokudb/mysql-test/tokudb/t/row_format.test new file mode 100644 index 0000000000000..6533f8c06be95 --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb/t/row_format.test @@ -0,0 +1,41 @@ +# +# Test TokuDB compression option additions to row_format +# +--source include/have_tokudb.inc + +CREATE TABLE tokudb_row_format_test_1 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT; +CREATE TABLE tokudb_row_format_test_2 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST; +CREATE TABLE tokudb_row_format_test_3 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL; +CREATE TABLE tokudb_row_format_test_4 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED; +CREATE TABLE tokudb_row_format_test_5 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB; +CREATE TABLE tokudb_row_format_test_6 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA; +CREATE TABLE tokudb_row_format_test_7 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ; +CREATE TABLE tokudb_row_format_test_8 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY; + +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name like 'tokudb_row_format_test%' ORDER BY table_name; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT; +SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1'; + +DROP TABLE tokudb_row_format_test_1, tokudb_row_format_test_2, tokudb_row_format_test_3, tokudb_row_format_test_4, tokudb_row_format_test_5, tokudb_row_format_test_6, tokudb_row_format_test_7, tokudb_row_format_test_8; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result b/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result index 779d458221bd1..30e0bdbebd78b 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result +++ b/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result @@ -23,6 +23,7 @@ set DEBUG_SYNC = 'tokudb_after_truncate_all_dictionarys SIGNAL closed WAIT_FOR d TRUNCATE TABLE t1; set global tokudb_debug_pause_background_job_manager = FALSE; set DEBUG_SYNC = 'now SIGNAL done'; +set DEBUG_SYNC = 'RESET'; drop table t1; set session tokudb_auto_analyze = @orig_auto_analyze; set session tokudb_analyze_in_background = @orig_in_background; @@ -32,4 +33,3 @@ set session tokudb_analyze_time = @orig_time; set global tokudb_cardinality_scale_percent = @orig_scale_percent; set session default_storage_engine = @orig_default_storage_engine; set global tokudb_debug_pause_background_job_manager = @orig_pause_background_job_manager; -set DEBUG_SYNC='reset'; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test b/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test index f56f93d1492c8..50434a79a0017 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test @@ -40,6 +40,7 @@ insert into t1(b,c) values(0,0), (1,1), (2,2), (3,3); select database_name, table_name, job_type, job_params, scheduler from information_schema.tokudb_background_job_status; # lets flip to another connection +--source include/count_sessions.inc connect(conn1, localhost, root); # set up the DEBUG_SYNC point @@ -64,6 +65,7 @@ connection conn1; reap; connection default; disconnect conn1; +set DEBUG_SYNC = 'RESET'; drop table t1; set session tokudb_auto_analyze = @orig_auto_analyze; @@ -74,4 +76,4 @@ set session tokudb_analyze_time = @orig_time; set global tokudb_cardinality_scale_percent = @orig_scale_percent; set session default_storage_engine = @orig_default_storage_engine; set global tokudb_debug_pause_background_job_manager = @orig_pause_background_job_manager; -set DEBUG_SYNC='reset'; +--source include/wait_until_count_sessions.inc diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test index 6100d9aeec230..8b6df4966f498 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test @@ -12,33 +12,11 @@ let $MYSQLD_DATADIR= `SELECT @@datadir`; create table foo (a int, b int); create table bar (a int, key(a)); -# Write file to make mysql-test-run.pl expect the "crash", but don't start -# it until it's told to ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect -wait -EOF - -# Send shutdown to the connected server and give -# it 10 seconds to die before zapping it -shutdown_server 10; - +--source include/shutdown_mysqld.inc remove_file $MYSQLD_DATADIR/test/foo.frm; copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm; remove_file $MYSQLD_DATADIR/test/bar.frm; - -# Write file to make mysql-test-run.pl start up the server again ---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect -restart -EOF - -# Turn on reconnect ---enable_reconnect - -# Call script that will poll the server waiting for it to be back online again ---source include/wait_until_connected_again.inc - -# Turn off reconnect again ---disable_reconnect +--source include/start_mysqld.inc show create table foo; show create table bar; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test index e1acea13ed703..53c1037b051d8 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test @@ -15,33 +15,11 @@ create table bar (a int); alter table foo drop column a; alter table bar add column b int, add column c int; -# Write file to make mysql-test-run.pl expect the "crash", but don't start -# it until it's told to ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect -wait -EOF - -# Send shutdown to the connected server and give -# it 10 seconds to die before zapping it -shutdown_server 10; - +--source include/shutdown_mysqld.inc remove_file $MYSQLD_DATADIR/test/foo.frm; copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm; remove_file $MYSQLD_DATADIR/test/bar.frm; - -# Write file to make mysql-test-run.pl start up the server again ---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect -restart -EOF - -# Turn on reconnect ---enable_reconnect - -# Call script that will poll the server waiting for it to be back online again ---source include/wait_until_connected_again.inc - -# Turn off reconnect again ---disable_reconnect +--source include/start_mysqld.inc show create table foo; show create table bar; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test index 17a124249da26..0421b8e9d269f 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test @@ -14,33 +14,11 @@ create table bar (a bigint)engine=TokuDB; alter table foo drop index b; alter table bar add index (a); -# Write file to make mysql-test-run.pl expect the "crash", but don't start -# it until it's told to ---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect -wait -EOF - -# Send shutdown to the connected server and give -# it 10 seconds to die before zapping it -shutdown_server 10; - +--source include/shutdown_mysqld.inc remove_file $MYSQLD_DATADIR/test/foo.frm; copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm; remove_file $MYSQLD_DATADIR/test/bar.frm; - -# Write file to make mysql-test-run.pl start up the server again ---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect -restart -EOF - -# Turn on reconnect ---enable_reconnect - -# Call script that will poll the server waiting for it to be back online again ---source include/wait_until_connected_again.inc - -# Turn off reconnect again ---disable_reconnect +--source include/start_mysqld.inc show create table foo; show create table bar; diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test index 42dbb30058a74..4c40339be5a2f 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test @@ -7,17 +7,7 @@ set default_storage_engine='tokudb'; # capture the datadir let $MYSQLD_DATADIR= `SELECT @@datadir`; -# shutdown mysqld (code stolen from mysql_plugin.test) -let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; -# MTR will remove this file later, but this might be too late. ---error 0,1 ---remove_file $expect_file ---write_file $expect_file -wait -EOF ---shutdown_server 10 ---source include/wait_until_disconnected.inc - +--source include/shutdown_mysqld.inc # remove all tokudb file in the datadir system mkdir $MYSQLD_DATADIR/save; system mv $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test $MYSQLD_DATADIR/save; @@ -25,13 +15,7 @@ system mkdir $MYSQLD_DATADIR/test; # install 6.6.8 tokudb test files system cp -r std_data/tokudb_drop_part_table_668/data/* $MYSQLD_DATADIR; - -# restart mysqld ---append_file $expect_file -restart -EOF ---enable_reconnect ---source include/wait_until_connected_again.inc +--source include/start_mysqld.inc create table tc (a int, b int, c int, primary key(a), key(b)) engine=tokudb partition by hash(a) partitions 2; @@ -45,26 +29,9 @@ select dictionary_name from information_schema.tokudb_file_map; # check that the test dir is empty list_files $MYSQLD_DATADIR/test *.frm; -# shutdown mysqld (code stolen from mysql_plugin.test) -let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; -# MTR will remove this file later, but this might be too late. ---error 0,1 ---remove_file $expect_file ---write_file $expect_file -wait -EOF ---shutdown_server 10 ---source include/wait_until_disconnected.inc - +--source include/shutdown_mysqld.inc # restore saved datadir system rm -rf $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test; system mv $MYSQLD_DATADIR/save/* $MYSQLD_DATADIR; system rmdir $MYSQLD_DATADIR/save; - -# restart mysqld ---append_file $expect_file -restart -EOF ---enable_reconnect ---source include/wait_until_connected_again.inc - +--source include/start_mysqld.inc diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test index 3903c2cef9f28..0340b960fa5ab 100644 --- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test +++ b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test @@ -6,17 +6,7 @@ set default_storage_engine='tokudb'; # capture the datadir let $MYSQLD_DATADIR= `SELECT @@datadir`; -# shutdown mysqld (code stolen from mysql_plugin.test) -let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; -# MTR will remove this file later, but this might be too late. ---error 0,1 ---remove_file $expect_file ---write_file $expect_file -wait -EOF ---shutdown_server 10 ---source include/wait_until_disconnected.inc - +--source include/shutdown_mysqld.inc # remove all tokudb file in the datadir system mkdir $MYSQLD_DATADIR/save; system mv $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test $MYSQLD_DATADIR/save; @@ -24,13 +14,7 @@ system mkdir $MYSQLD_DATADIR/test; # install 6.6.8 tokudb test files system cp -r std_data/tokudb_drop_simple_table_668/data/* $MYSQLD_DATADIR; - -# restart mysqld ---append_file $expect_file -restart -EOF ---enable_reconnect ---source include/wait_until_connected_again.inc +--source include/start_mysqld.inc create table tc (id int, x int, primary key(id), key(x)); @@ -46,26 +30,9 @@ select dictionary_name from information_schema.tokudb_file_map; # check that the test dir is empty list_files $MYSQLD_DATADIR/test *.frm; -# shutdown mysqld (code stolen from mysql_plugin.test) -let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect; -# MTR will remove this file later, but this might be too late. ---error 0,1 ---remove_file $expect_file ---write_file $expect_file -wait -EOF ---shutdown_server 10 ---source include/wait_until_disconnected.inc - +--source include/shutdown_mysqld.inc # restore saved datadir system rm -rf $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test; system mv $MYSQLD_DATADIR/save/* $MYSQLD_DATADIR; system rmdir $MYSQLD_DATADIR/save; - -# restart mysqld ---append_file $expect_file -restart -EOF ---enable_reconnect ---source include/wait_until_connected_again.inc - +--source include/start_mysqld.inc diff --git a/storage/tokudb/mysql-test/tokudb_parts/include/table_files_replace_pattern.inc b/storage/tokudb/mysql-test/tokudb_parts/include/table_files_replace_pattern.inc new file mode 100644 index 0000000000000..b10ad21dd95ce --- /dev/null +++ b/storage/tokudb/mysql-test/tokudb_parts/include/table_files_replace_pattern.inc @@ -0,0 +1 @@ +--replace_regex /[a-z0-9]+_[a-z0-9]+_[a-z0-9]+(_[BP]_[a-z0-9]+){0,1}\./id./ /sqlx_[a-z0-9]+_[a-z0-9]+_/sqlx_nnnn_nnnn_/ /sqlx-[a-z0-9]+_[a-z0-9]+/sqlx-nnnn_nnnn/ /#p#/#P#/ /#sp#/#SP#/ /#tmp#/#TMP#/ diff --git a/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test b/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test index be14d8814f04e..f97235a0a2d3a 100644 --- a/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test +++ b/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test @@ -56,7 +56,7 @@ partition by range (a) insert into t1 values (1), (11), (21), (33); SELECT * FROM t1; SHOW CREATE TABLE t1; ---replace_result #p# #P# #sp# #SP# +--source include/table_files_replace_pattern.inc --list_files $MYSQLD_DATADIR/test SET DEBUG_SYNC='before_open_in_get_all_tables SIGNAL parked WAIT_FOR open'; @@ -82,7 +82,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p0 INTO disconnect con1; connection default; --reap ---replace_result #p# #P# #sp# #SP# +--source include/table_files_replace_pattern.inc --list_files $MYSQLD_DATADIR/test SHOW CREATE TABLE t1; SELECT * FROM t1; diff --git a/storage/tokudb/tokudb_background.cc b/storage/tokudb/tokudb_background.cc index d8ef54a59729e..e019e41c78824 100644 --- a/storage/tokudb/tokudb_background.cc +++ b/storage/tokudb/tokudb_background.cc @@ -8,7 +8,7 @@ This file is part of TokuDB Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. - TokuDBis is free software: you can redistribute it and/or modify + TokuDB is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2, as published by the Free Software Foundation. @@ -68,7 +68,8 @@ void job_manager_t::destroy() { while (_background_jobs.size()) { _mutex.lock(); job_t* job = _background_jobs.front(); - cancel(job); + if (!job->cancelled()) + cancel(job); _background_jobs.pop_front(); delete job; _mutex.unlock(); @@ -148,11 +149,8 @@ bool job_manager_t::cancel_job(const char* key) { it != _background_jobs.end(); it++) { job_t* job = *it; - if (!job->cancelled() && - strcmp(job->key(), key) == 0) { - + if (!job->cancelled() && strcmp(job->key(), key) == 0) { cancel(job); - ret = true; } } @@ -162,8 +160,6 @@ bool job_manager_t::cancel_job(const char* key) { } void job_manager_t::iterate_jobs(pfn_iterate_t callback, void* extra) const { - char database[256], table[256], type[256], params[256], status[256]; - _mutex.lock(); for (jobs_t::const_iterator it = _background_jobs.begin(); @@ -171,19 +167,7 @@ void job_manager_t::iterate_jobs(pfn_iterate_t callback, void* extra) const { it++) { job_t* job = *it; if (!job->cancelled()) { - database[0] = table[0] = type[0] = params[0] = status[0] = '\0'; - job->status(database, table, type, params, status); - callback( - job->id(), - database, - table, - type, - params, - status, - job->user_scheduled(), - job->scheduled_time(), - job->started_time(), - extra); + callback(job, extra); } } @@ -233,6 +217,7 @@ void job_manager_t::run(job_t* job) { } void job_manager_t::cancel(job_t* job) { assert_debug(_mutex.is_owned_by_me()); + assert_always(!job->cancelled()); job->cancel(); } job_manager_t* _job_manager = NULL; diff --git a/storage/tokudb/tokudb_background.h b/storage/tokudb/tokudb_background.h index 3786701fd0fa0..29991ab325d54 100644 --- a/storage/tokudb/tokudb_background.h +++ b/storage/tokudb/tokudb_background.h @@ -7,7 +7,7 @@ This file is part of TokuDB Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. - TokuDBis is free software: you can redistribute it and/or modify + TokuDB is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2, as published by the Free Software Foundation. @@ -58,13 +58,20 @@ class job_manager_t { // (or jobs) usually used to find jobs to cancel virtual const char* key() = 0; - // method to get info for information schema, 255 chars per buffer - virtual void status( - char* database, - char* table, - char* type, - char* params, - char* status) = 0; + // method to obtain the database name the job is scheduled on + virtual const char* database() = 0; + + // method to obtain the table name the job is scheduled on + virtual const char* table() = 0; + + // method to obtain the type of job + virtual const char* type() = 0; + + // method to obtain a stringized list of job parameters + virtual const char* parameters() = 0; + + // method to obtain a sting identifying the current status of the job + virtual const char* status() = 0; inline bool running() const; @@ -99,17 +106,7 @@ class job_manager_t { }; // pfn for iterate callback - typedef void (*pfn_iterate_t)( - uint64_t, - const char*, - const char*, - const char*, - const char*, - const char*, - bool, - time_t, - time_t, - void*); + typedef void (*pfn_iterate_t)(class job_t*, void*); public: void* operator new(size_t sz); @@ -144,6 +141,11 @@ class job_manager_t { // data passed when the job was scheduled void iterate_jobs(pfn_iterate_t callback, void* extra) const; + // lock the bjm, this prevents anyone from running, cancelling or iterating + // jobs in the bjm. + inline void lock(); + inline void unlock(); + private: static void* thread_func(void* v); @@ -170,6 +172,15 @@ extern job_manager_t* _job_manager; bool initialize(); bool destroy(); +inline void job_manager_t::lock() { + assert_debug(!_mutex.is_owned_by_me()); + _mutex.lock(); +} +inline void job_manager_t::unlock() { + assert_debug(_mutex.is_owned_by_me()); + _mutex.unlock(); +} + inline void job_manager_t::job_t::run() { if (!_cancelled) { _running = true; diff --git a/storage/tokudb/tokudb_information_schema.cc b/storage/tokudb/tokudb_information_schema.cc index e69a7899b4597..b3d77eef2d968 100644 --- a/storage/tokudb/tokudb_information_schema.cc +++ b/storage/tokudb/tokudb_information_schema.cc @@ -1085,7 +1085,7 @@ ST_FIELD_INFO background_job_status_field_info[] = { {"scheduler", 32, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE }, {"scheduled_time", 0, MYSQL_TYPE_DATETIME, 0, 0, NULL, SKIP_OPEN_TABLE }, {"started_time", 0, MYSQL_TYPE_DATETIME, 0, MY_I_S_MAYBE_NULL, NULL, SKIP_OPEN_TABLE }, - {"status", 256, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, SKIP_OPEN_TABLE }, + {"status", 1024, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, SKIP_OPEN_TABLE }, {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE} }; @@ -1095,15 +1095,7 @@ struct background_job_status_extra { }; void background_job_status_callback( - uint64_t id, - const char* database_name, - const char* table_name, - const char* type, - const char* params, - const char* status, - bool user_scheduled, - time_t scheduled_time, - time_t started_time, + tokudb::background::job_manager_t::job_t* job, void* extra) { background_job_status_extra* e = @@ -1111,24 +1103,33 @@ void background_job_status_callback( THD* thd = e->thd; TABLE* table = e->table; + const char* tmp = NULL; - table->field[0]->store(id, false); - table->field[1]->store( - database_name, - strlen(database_name), - system_charset_info); - table->field[2]->store(table_name, strlen(table_name), system_charset_info); - table->field[3]->store(type, strlen(type), system_charset_info); - table->field[4]->store(params, strlen(params), system_charset_info); - if (user_scheduled) + table->field[0]->store(job->id(), false); + + tmp = job->database(); + table->field[1]->store(tmp, strlen(tmp), system_charset_info); + + tmp = job->table(); + table->field[2]->store(tmp, strlen(tmp), system_charset_info); + + tmp = job->type(); + table->field[3]->store(tmp, strlen(tmp), system_charset_info); + + tmp = job->parameters(); + table->field[4]->store(tmp, strlen(tmp), system_charset_info); + + if (job->user_scheduled()) table->field[5]->store("USER", strlen("USER"), system_charset_info); else table->field[5]->store("AUTO", strlen("AUTO"), system_charset_info); - field_store_time_t(table->field[6], scheduled_time); - field_store_time_t(table->field[7], started_time); - if (status[0] != '\0') { - table->field[8]->store(status, strlen(status), system_charset_info); + field_store_time_t(table->field[6], job->scheduled_time()); + field_store_time_t(table->field[7], job->started_time()); + + tmp = job->status(); + if (tmp && tmp[0] != '\0') { + table->field[8]->store(tmp, strlen(tmp), system_charset_info); table->field[8]->set_notnull(); } else { table->field[8]->store(NULL, 0, system_charset_info); diff --git a/storage/tokudb/tokudb_sysvars.cc b/storage/tokudb/tokudb_sysvars.cc index 7cea749b4fbed..b758929c10e66 100644 --- a/storage/tokudb/tokudb_sysvars.cc +++ b/storage/tokudb/tokudb_sysvars.cc @@ -66,6 +66,7 @@ uint read_status_frequency = 0; my_bool strip_frm_data = FALSE; char* tmp_dir = NULL; uint write_status_frequency = 0; +my_bool dir_per_db = FALSE; char* version = (char*) TOKUDB_VERSION_STR; // file system reserve as a percentage of total disk space @@ -394,6 +395,18 @@ static MYSQL_SYSVAR_UINT( ~0U, 0); +static void tokudb_dir_per_db_update(THD* thd, + struct st_mysql_sys_var* sys_var, + void* var, const void* save) { + my_bool *value = (my_bool *) var; + *value = *(const my_bool *) save; + db_env->set_dir_per_db(db_env, *value); +} + +static MYSQL_SYSVAR_BOOL(dir_per_db, dir_per_db, + 0, "TokuDB store ft files in db directories", + NULL, tokudb_dir_per_db_update, FALSE); + #if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL static MYSQL_SYSVAR_STR( gdb_path, @@ -935,6 +948,7 @@ st_mysql_sys_var* system_variables[] = { MYSQL_SYSVAR(tmp_dir), MYSQL_SYSVAR(version), MYSQL_SYSVAR(write_status_frequency), + MYSQL_SYSVAR(dir_per_db), #if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL MYSQL_SYSVAR(gdb_path), diff --git a/storage/tokudb/tokudb_sysvars.h b/storage/tokudb/tokudb_sysvars.h index 3bd96f7c68da9..7701f211729c7 100644 --- a/storage/tokudb/tokudb_sysvars.h +++ b/storage/tokudb/tokudb_sysvars.h @@ -101,6 +101,7 @@ extern uint read_status_frequency; extern my_bool strip_frm_data; extern char* tmp_dir; extern uint write_status_frequency; +extern my_bool dir_per_db; extern char* version; #if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL diff --git a/storage/xtradb/btr/btr0btr.cc b/storage/xtradb/btr/btr0btr.cc index da4a6d3cdb1a6..ecea98fccfebb 100644 --- a/storage/xtradb/btr/btr0btr.cc +++ b/storage/xtradb/btr/btr0btr.cc @@ -78,7 +78,7 @@ btr_corruption_report( buf_block_get_zip_size(block), BUF_PAGE_PRINT_NO_CRASH); } - buf_page_print(buf_block_get_frame_fast(block), 0, 0); + buf_page_print(buf_nonnull_block_get_frame(block), 0, 0); } #ifndef UNIV_HOTBACKUP @@ -804,8 +804,10 @@ btr_height_get( /* S latches the page */ root_block = btr_root_block_get(index, RW_S_LATCH, mtr); + ut_ad(root_block); // The index must not be corrupted - height = btr_page_get_level(buf_block_get_frame_fast(root_block), mtr); + height = btr_page_get_level(buf_nonnull_block_get_frame(root_block), + mtr); /* Release the S latch on the root page. */ mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX); @@ -1231,7 +1233,7 @@ btr_get_size( SRV_CORRUPT_TABLE_CHECK(root, { mtr_commit(mtr); - return(0); + return(ULINT_UNDEFINED); }); if (flag == BTR_N_LEAF_PAGES) { @@ -2756,7 +2758,7 @@ btr_attach_half_pages( } /* Get the level of the split pages */ - level = btr_page_get_level(buf_block_get_frame_fast(block), mtr); + level = btr_page_get_level(buf_nonnull_block_get_frame(block), mtr); ut_ad(level == btr_page_get_level(buf_block_get_frame(new_block), mtr)); @@ -4133,8 +4135,10 @@ btr_discard_page( /* Decide the page which will inherit the locks */ - left_page_no = btr_page_get_prev(buf_block_get_frame_fast(block), mtr); - right_page_no = btr_page_get_next(buf_block_get_frame_fast(block), mtr); + left_page_no = btr_page_get_prev(buf_nonnull_block_get_frame(block), + mtr); + right_page_no = btr_page_get_next(buf_nonnull_block_get_frame(block), + mtr); if (left_page_no != FIL_NULL) { merge_block = btr_block_get(space, zip_size, left_page_no, diff --git a/storage/xtradb/btr/btr0cur.cc b/storage/xtradb/btr/btr0cur.cc index 05af024a88226..214d050d56219 100644 --- a/storage/xtradb/btr/btr0cur.cc +++ b/storage/xtradb/btr/btr0cur.cc @@ -1751,7 +1751,7 @@ btr_cur_pessimistic_insert( } if (!page_rec_is_infimum(btr_cur_get_rec(cursor)) || btr_page_get_prev( - buf_block_get_frame( + buf_nonnull_block_get_frame( btr_cur_get_block(cursor)), mtr) == FIL_NULL) { /* split and inserted need to call @@ -2220,7 +2220,7 @@ btr_cur_update_in_place( if (page_zip && !(flags & BTR_KEEP_IBUF_BITMAP) && !dict_index_is_clust(index) - && page_is_leaf(buf_block_get_frame(block))) { + && page_is_leaf(buf_nonnull_block_get_frame(block))) { /* Update the free bits in the insert buffer. */ ibuf_update_free_bits_zip(block, mtr); } diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index 0d5a478ca679c..21b10196a2572 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -4539,7 +4539,9 @@ buf_page_io_complete( recv_recover_page(TRUE, (buf_block_t*) bpage); } - if (uncompressed && !recv_no_ibuf_operations) { + if (uncompressed && !recv_no_ibuf_operations + && fil_page_get_type(frame) == FIL_PAGE_INDEX + && page_is_leaf(frame)) { buf_block_t* block; ibool update_ibuf_bitmap; diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc index f4d1c637e3e6f..3c12d6da73f3f 100644 --- a/storage/xtradb/buf/buf0dblwr.cc +++ b/storage/xtradb/buf/buf0dblwr.cc @@ -521,7 +521,7 @@ buf_dblwr_process() if (buf_page_is_corrupted(true, read_buf, zip_size)) { fprintf(stderr, - "InnoDB: Warning: database page" + "InnoDB: Database page" " corruption or a failed\n" "InnoDB: file read of" " space %lu page %lu.\n" diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc index 3554405e0b908..601e79d892335 100644 --- a/storage/xtradb/buf/buf0flu.cc +++ b/storage/xtradb/buf/buf0flu.cc @@ -309,6 +309,8 @@ buf_flush_init_flush_rbt(void) buf_flush_list_mutex_enter(buf_pool); + ut_ad(buf_pool->flush_rbt == NULL); + /* Create red black tree for speedy insertions in flush list. */ buf_pool->flush_rbt = rbt_create( sizeof(buf_page_t*), buf_flush_block_cmp); @@ -2565,6 +2567,11 @@ page_cleaner_sleep_if_needed( ulint next_loop_time) /*!< in: time when next loop iteration should start */ { + /* No sleep if we are cleaning the buffer pool during the shutdown + with everything else finished */ + if (srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE) + return; + ulint cur_time = ut_time_ms(); if (next_loop_time > cur_time) { diff --git a/storage/xtradb/dict/dict0boot.cc b/storage/xtradb/dict/dict0boot.cc index 94a3af2852b39..c0bb0298bea72 100644 --- a/storage/xtradb/dict/dict0boot.cc +++ b/storage/xtradb/dict/dict0boot.cc @@ -272,6 +272,10 @@ dict_boot(void) ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_FOR_NAME == 2); ut_ad(DICT_NUM_COLS__SYS_FOREIGN_COLS == 4); ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_COLS == 6); + ut_ad(DICT_NUM_COLS__SYS_ZIP_DICT == 3); + ut_ad(DICT_NUM_FIELDS__SYS_ZIP_DICT == 5); + ut_ad(DICT_NUM_COLS__SYS_ZIP_DICT_COLS == 3); + ut_ad(DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS == 5); mtr_start(&mtr); diff --git a/storage/xtradb/dict/dict0crea.cc b/storage/xtradb/dict/dict0crea.cc index b34fb7e262660..db0ca638de4da 100644 --- a/storage/xtradb/dict/dict0crea.cc +++ b/storage/xtradb/dict/dict0crea.cc @@ -38,6 +38,7 @@ Created 1/8/1996 Heikki Tuuri #include "que0que.h" #include "row0ins.h" #include "row0mysql.h" +#include "row0sel.h" #include "pars0pars.h" #include "trx0roll.h" #include "usr0sess.h" @@ -1931,6 +1932,135 @@ dict_create_or_check_sys_tablespace(void) return(err); } +/** Creates the zip_dict system table inside InnoDB +at server bootstrap or server start if it is not found or is +not of the right form. +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +dict_create_or_check_sys_zip_dict(void) +{ + trx_t* trx; + my_bool srv_file_per_table_backup; + dberr_t err; + dberr_t sys_zip_dict_err; + dberr_t sys_zip_dict_cols_err; + + ut_a(srv_get_active_thread_type() == SRV_NONE); + + /* Note: The master thread has not been started at this point. */ + + sys_zip_dict_err = dict_check_if_system_table_exists( + "SYS_ZIP_DICT", DICT_NUM_FIELDS__SYS_ZIP_DICT + 1, 2); + sys_zip_dict_cols_err = dict_check_if_system_table_exists( + "SYS_ZIP_DICT_COLS", DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS + 1, + 1); + + if (sys_zip_dict_err == DB_SUCCESS && + sys_zip_dict_cols_err == DB_SUCCESS) + return (DB_SUCCESS); + + trx = trx_allocate_for_mysql(); + + trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); + + trx->op_info = "creating zip_dict and zip_dict_cols sys tables"; + + row_mysql_lock_data_dictionary(trx); + + /* Check which incomplete table definition to drop. */ + + if (sys_zip_dict_err == DB_CORRUPTION) { + ib_logf(IB_LOG_LEVEL_WARN, + "Dropping incompletely created " + "SYS_ZIP_DICT table."); + row_drop_table_for_mysql("SYS_ZIP_DICT", trx, TRUE, TRUE); + } + if (sys_zip_dict_cols_err == DB_CORRUPTION) { + ib_logf(IB_LOG_LEVEL_WARN, + "Dropping incompletely created " + "SYS_ZIP_DICT_COLS table."); + row_drop_table_for_mysql("SYS_ZIP_DICT_COLS", trx, TRUE, TRUE); + } + + ib_logf(IB_LOG_LEVEL_INFO, + "Creating zip_dict and zip_dict_cols system tables."); + + /* We always want SYSTEM tables to be created inside the system + tablespace. */ + srv_file_per_table_backup = srv_file_per_table; + srv_file_per_table = 0; + + err = que_eval_sql( + NULL, + "PROCEDURE CREATE_SYS_ZIP_DICT_PROC () IS\n" + "BEGIN\n" + "CREATE TABLE SYS_ZIP_DICT(\n" + " ID INT UNSIGNED NOT NULL,\n" + " NAME CHAR(" + STRINGIFY_ARG(ZIP_DICT_MAX_NAME_LENGTH) + ") NOT NULL,\n" + " DATA BLOB NOT NULL\n" + ");\n" + "CREATE UNIQUE CLUSTERED INDEX SYS_ZIP_DICT_ID" + " ON SYS_ZIP_DICT (ID);\n" + "CREATE UNIQUE INDEX SYS_ZIP_DICT_NAME" + " ON SYS_ZIP_DICT (NAME);\n" + "CREATE TABLE SYS_ZIP_DICT_COLS(\n" + " TABLE_ID INT UNSIGNED NOT NULL,\n" + " COLUMN_POS INT UNSIGNED NOT NULL,\n" + " DICT_ID INT UNSIGNED NOT NULL\n" + ");\n" + "CREATE UNIQUE CLUSTERED INDEX SYS_ZIP_DICT_COLS_COMPOSITE" + " ON SYS_ZIP_DICT_COLS (TABLE_ID, COLUMN_POS);\n" + "END;\n", + FALSE, trx); + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Creation of SYS_ZIP_DICT and SYS_ZIP_DICT_COLS" + "has failed with error %lu. Tablespace is full. " + "Dropping incompletely created tables.", + (ulong) err); + + ut_a(err == DB_OUT_OF_FILE_SPACE + || err == DB_TOO_MANY_CONCURRENT_TRXS); + + row_drop_table_for_mysql("SYS_ZIP_DICT", trx, TRUE, TRUE); + row_drop_table_for_mysql("SYS_ZIP_DICT_COLS", trx, TRUE, TRUE); + + if (err == DB_OUT_OF_FILE_SPACE) { + err = DB_MUST_GET_MORE_FILE_SPACE; + } + } + + trx_commit_for_mysql(trx); + + row_mysql_unlock_data_dictionary(trx); + + trx_free_for_mysql(trx); + + srv_file_per_table = srv_file_per_table_backup; + + if (err == DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_INFO, + "zip_dict and zip_dict_cols system tables created."); + } + + /* Note: The master thread has not been started at this point. */ + /* Confirm and move to the non-LRU part of the table LRU list. */ + + sys_zip_dict_err = dict_check_if_system_table_exists( + "SYS_ZIP_DICT", DICT_NUM_FIELDS__SYS_ZIP_DICT + 1, 2); + ut_a(sys_zip_dict_err == DB_SUCCESS); + sys_zip_dict_cols_err = dict_check_if_system_table_exists( + "SYS_ZIP_DICT_COLS", + DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS + 1, 1); + ut_a(sys_zip_dict_cols_err == DB_SUCCESS); + + return(err); +} + /********************************************************************//** Add a single tablespace definition to the data dictionary tables in the database. @@ -1984,3 +2114,456 @@ dict_create_add_tablespace_to_dictionary( return(error); } + +/** Add a single compression dictionary definition to the SYS_ZIP_DICT +InnoDB system table. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_add_zip_dict( + const char* name, /*!< in: dict name */ + ulint name_len, /*!< in: dict name length */ + const char* data, /*!< in: dict data */ + ulint data_len, /*!< in: dict data length */ + trx_t* trx) /*!< in/out: transaction */ +{ + ut_ad(name); + ut_ad(data); + + pars_info_t* info = pars_info_create(); + + pars_info_add_literal(info, "name", name, name_len, + DATA_VARCHAR, DATA_ENGLISH); + pars_info_add_literal(info, "data", data, data_len, + DATA_BLOB, DATA_BINARY_TYPE | DATA_NOT_NULL); + + dberr_t error = que_eval_sql(info, + "PROCEDURE P () IS\n" + " max_id INT;\n" + "DECLARE CURSOR cur IS\n" + " SELECT ID FROM SYS_ZIP_DICT\n" + " ORDER BY ID DESC;\n" + "BEGIN\n" + " max_id := 0;\n" + " OPEN cur;\n" + " FETCH cur INTO max_id;\n" + " IF (cur % NOTFOUND) THEN\n" + " max_id := 0;\n" + " END IF;\n" + " CLOSE cur;\n" + " INSERT INTO SYS_ZIP_DICT VALUES" + " (max_id + 1, :name, :data);\n" + "END;\n", + FALSE, trx); + + return error; +} + +/** Fetch callback, just stores extracted zip_dict id in the external +variable. +@return TRUE if all OK */ +static +ibool +dict_create_extract_int_aux( + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: int32 id */ +{ + sel_node_t* node = static_cast(row); + dfield_t* dfield = que_node_get_val(node->select_list); + dtype_t* type = dfield_get_type(dfield); + ulint len = dfield_get_len(dfield); + + ut_a(dtype_get_mtype(type) == DATA_INT); + ut_a(len == sizeof(ib_uint32_t)); + + memcpy(user_arg, dfield_get_data(dfield), sizeof(ib_uint32_t)); + + return(TRUE); +} + +/** Add a single compression dictionary reference to the SYS_ZIP_DICT_COLS +InnoDB system table. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_add_zip_dict_reference( + ulint table_id, /*!< in: table id */ + ulint column_pos, /*!< in: column position */ + ulint dict_id, /*!< in: dict id */ + trx_t* trx) /*!< in/out: transaction */ +{ + pars_info_t* info = pars_info_create(); + + pars_info_add_int4_literal(info, "table_id", table_id); + pars_info_add_int4_literal(info, "column_pos", column_pos); + pars_info_add_int4_literal(info, "dict_id", dict_id); + + dberr_t error = que_eval_sql(info, + "PROCEDURE P () IS\n" + "BEGIN\n" + " INSERT INTO SYS_ZIP_DICT_COLS VALUES" + " (:table_id, :column_pos, :dict_id);\n" + "END;\n", + FALSE, trx); + return error; +} + +/** Get a single compression dictionary id for the given +(table id, column pos) pair. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_get_zip_dict_id_by_reference( + ulint table_id, /*!< in: table id */ + ulint column_pos, /*!< in: column position */ + ulint* dict_id, /*!< out: dict id */ + trx_t* trx) /*!< in/out: transaction */ +{ + ut_ad(dict_id); + + pars_info_t* info = pars_info_create(); + + ib_uint32_t dict_id_buf; + mach_write_to_4(reinterpret_cast(&dict_id_buf ), + ULINT32_UNDEFINED); + + pars_info_add_int4_literal(info, "table_id", table_id); + pars_info_add_int4_literal(info, "column_pos", column_pos); + pars_info_bind_function( + info, "my_func", dict_create_extract_int_aux, &dict_id_buf); + + dberr_t error = que_eval_sql(info, + "PROCEDURE P () IS\n" + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR cur IS\n" + " SELECT DICT_ID FROM SYS_ZIP_DICT_COLS\n" + " WHERE TABLE_ID = :table_id AND\n" + " COLUMN_POS = :column_pos;\n" + "BEGIN\n" + " OPEN cur;\n" + " FETCH cur INTO my_func();\n" + " CLOSE cur;\n" + "END;\n", + FALSE, trx); + if (error == DB_SUCCESS) { + ib_uint32_t local_dict_id = mach_read_from_4( + reinterpret_cast(&dict_id_buf)); + if (local_dict_id == ULINT32_UNDEFINED) + error = DB_RECORD_NOT_FOUND; + else + *dict_id = local_dict_id; + } + return error; +} + +/** Get compression dictionary id for the given name. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_get_zip_dict_id_by_name( + const char* dict_name, /*!< in: dict name */ + ulint dict_name_len, /*!< in: dict name length */ + ulint* dict_id, /*!< out: dict id */ + trx_t* trx) /*!< in/out: transaction */ +{ + ut_ad(dict_name); + ut_ad(dict_name_len); + ut_ad(dict_id); + + pars_info_t* info = pars_info_create(); + + pars_info_add_literal(info, "dict_name", dict_name, dict_name_len, + DATA_VARCHAR, DATA_ENGLISH); + + ib_uint32_t dict_id_buf; + mach_write_to_4(reinterpret_cast(&dict_id_buf), + ULINT32_UNDEFINED); + pars_info_bind_function( + info, "my_func", dict_create_extract_int_aux, &dict_id_buf); + + dberr_t error = que_eval_sql(info, + "PROCEDURE P () IS\n" + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR cur IS\n" + " SELECT ID FROM SYS_ZIP_DICT\n" + " WHERE NAME = :dict_name;\n" + "BEGIN\n" + " OPEN cur;\n" + " FETCH cur INTO my_func();\n" + " CLOSE cur;\n" + "END;\n", + FALSE, trx); + if (error == DB_SUCCESS) { + ib_uint32_t local_dict_id = mach_read_from_4( + reinterpret_cast(&dict_id_buf)); + if (local_dict_id == ULINT32_UNDEFINED) + error = DB_RECORD_NOT_FOUND; + else + *dict_id = local_dict_id; + } + return error; +} + +/** Auxiliary enum used to indicate zip dict data extraction result code */ +enum zip_dict_info_aux_code { + zip_dict_info_success, /*!< success */ + zip_dict_info_not_found, /*!< zip dict record not found */ + zip_dict_info_oom, /*!< out of memory */ + zip_dict_info_corrupted_name, /*!< corrupted zip dict name */ + zip_dict_info_corrupted_data /*!< corrupted zip dict data */ +}; + +/** Auxiliary struct used to return zip dict info aling with result code */ +struct zip_dict_info_aux { + LEX_STRING name; /*!< zip dict name */ + LEX_STRING data; /*!< zip dict data */ + int code; /*!< result code (0 - success) */ +}; + +/** Fetch callback, just stores extracted zip_dict data in the external +variable. +@return always returns TRUE */ +static +ibool +dict_create_get_zip_dict_info_by_id_aux( + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: pointer to zip_dict_info_aux* */ +{ + sel_node_t* node = static_cast(row); + zip_dict_info_aux* result = + static_cast(user_arg); + + result->code = zip_dict_info_success; + result->name.str = 0; + result->name.length = 0; + result->data.str = 0; + result->data.length = 0; + + /* NAME field */ + que_node_t* exp = node->select_list; + ut_a(exp != 0); + + dfield_t* dfield = que_node_get_val(exp); + dtype_t* type = dfield_get_type(dfield); + ut_a(dtype_get_mtype(type) == DATA_VARCHAR); + + ulint len = dfield_get_len(dfield); + void* data = dfield_get_data(dfield); + + + if (len == UNIV_SQL_NULL) { + result->code = zip_dict_info_corrupted_name; + } + else { + result->name.str = + static_cast(my_malloc(len + 1, MYF(0))); + if (result->name.str == 0) { + result->code = zip_dict_info_oom; + } + else { + memcpy(result->name.str, data, len); + result->name.str[len] = '\0'; + result->name.length = len; + } + } + + /* DATA field */ + exp = que_node_get_next(exp); + ut_a(exp != 0); + + dfield = que_node_get_val(exp); + type = dfield_get_type(dfield); + ut_a(dtype_get_mtype(type) == DATA_BLOB); + + len = dfield_get_len(dfield); + data = dfield_get_data(dfield); + + if (len == UNIV_SQL_NULL) { + result->code = zip_dict_info_corrupted_data; + } + else { + result->data.str = + static_cast(my_malloc( + len == 0 ? 1 : len, MYF(0))); + if (result->data.str == 0) { + result->code = zip_dict_info_oom; + } + else { + memcpy(result->data.str, data, len); + result->data.length = len; + } + } + + ut_ad(que_node_get_next(exp) == 0); + + if (result->code != zip_dict_info_success) { + if (result->name.str == 0) { + mem_free(result->name.str); + result->name.str = 0; + result->name.length = 0; + } + if (result->data.str == 0) { + mem_free(result->data.str); + result->data.str = 0; + result->data.length = 0; + } + } + + return TRUE; +} + +/** Get compression dictionary info (name and data) for the given id. +Allocates memory for name and data on success. +Must be freed with mem_free(). +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_get_zip_dict_info_by_id( + ulint dict_id, /*!< in: dict id */ + char** name, /*!< out: dict name */ + ulint* name_len, /*!< out: dict name length*/ + char** data, /*!< out: dict data */ + ulint* data_len, /*!< out: dict data length*/ + trx_t* trx) /*!< in/out: transaction */ +{ + ut_ad(name); + ut_ad(data); + + zip_dict_info_aux rec; + rec.code = zip_dict_info_not_found; + pars_info_t* info = pars_info_create(); + + pars_info_add_int4_literal(info, "id", dict_id); + pars_info_bind_function( + info, "my_func", dict_create_get_zip_dict_info_by_id_aux, + &rec); + + dberr_t error = que_eval_sql(info, + "PROCEDURE P () IS\n" + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR cur IS\n" + " SELECT NAME, DATA FROM SYS_ZIP_DICT\n" + " WHERE ID = :id;\n" + "BEGIN\n" + " OPEN cur;\n" + " FETCH cur INTO my_func();\n" + " CLOSE cur;\n" + "END;\n", + FALSE, trx); + if (error == DB_SUCCESS) { + switch (rec.code) { + case zip_dict_info_success: + *name = rec.name.str; + *name_len = rec.name.length; + *data = rec.data.str; + *data_len = rec.data.length; + break; + case zip_dict_info_not_found: + error = DB_RECORD_NOT_FOUND; + break; + case zip_dict_info_oom: + error = DB_OUT_OF_MEMORY; + break; + case zip_dict_info_corrupted_name: + case zip_dict_info_corrupted_data: + error = DB_INVALID_NULL; + break; + default: + ut_error; + } + } + return error; +} + +/** Remove a single compression dictionary from the data dictionary +tables in the database. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_remove_zip_dict( + const char* name, /*!< in: dict name */ + ulint name_len, /*!< in: dict name length */ + trx_t* trx) /*!< in/out: transaction */ +{ + ut_ad(name); + + pars_info_t* info = pars_info_create(); + + ib_uint32_t dict_id_buf; + mach_write_to_4(reinterpret_cast(&dict_id_buf), + ULINT32_UNDEFINED); + ib_uint32_t counter_buf; + mach_write_to_4(reinterpret_cast(&counter_buf), + ULINT32_UNDEFINED); + + pars_info_add_literal(info, "name", name, name_len, + DATA_VARCHAR, DATA_ENGLISH); + pars_info_bind_int4_literal(info, "dict_id", &dict_id_buf); + pars_info_bind_function(info, "find_dict_func", + dict_create_extract_int_aux, &dict_id_buf); + pars_info_bind_function(info, "count_func", + dict_create_extract_int_aux, &counter_buf); + + dberr_t error = que_eval_sql(info, + "PROCEDURE P () IS\n" + "DECLARE FUNCTION find_dict_func;\n" + "DECLARE FUNCTION count_func;\n" + "DECLARE CURSOR dict_cur IS\n" + " SELECT ID FROM SYS_ZIP_DICT\n" + " WHERE NAME = :name\n" + " FOR UPDATE;\n" + "DECLARE CURSOR ref_cur IS\n" + " SELECT 1 FROM SYS_ZIP_DICT_COLS\n" + " WHERE DICT_ID = :dict_id;\n" + "BEGIN\n" + " OPEN dict_cur;\n" + " FETCH dict_cur INTO find_dict_func();\n" + " IF NOT (SQL % NOTFOUND) THEN\n" + " OPEN ref_cur;\n" + " FETCH ref_cur INTO count_func();\n" + " IF SQL % NOTFOUND THEN\n" + " DELETE FROM SYS_ZIP_DICT WHERE CURRENT OF dict_cur;\n" + " END IF;\n" + " CLOSE ref_cur;\n" + " END IF;\n" + " CLOSE dict_cur;\n" + "END;\n", + FALSE, trx); + if (error == DB_SUCCESS) { + ib_uint32_t local_dict_id = mach_read_from_4( + reinterpret_cast(&dict_id_buf)); + if (local_dict_id == ULINT32_UNDEFINED) { + error = DB_RECORD_NOT_FOUND; + } + else { + ib_uint32_t local_counter = mach_read_from_4( + reinterpret_cast(&counter_buf)); + if (local_counter != ULINT32_UNDEFINED) + error = DB_ROW_IS_REFERENCED; + } + } + return error; +} + +/** Remove all compression dictionary references for the given table ID from +the data dictionary tables in the database. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_remove_zip_dict_references_for_table( + ulint table_id, /*!< in: table id */ + trx_t* trx) /*!< in/out: transaction */ +{ + pars_info_t* info = pars_info_create(); + + pars_info_add_int4_literal(info, "table_id", table_id); + + dberr_t error = que_eval_sql(info, + "PROCEDURE P () IS\n" + "BEGIN\n" + " DELETE FROM SYS_ZIP_DICT_COLS\n" + " WHERE TABLE_ID = :table_id;\n" + "END;\n", + FALSE, trx); + return error; +} diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc index 1b4fa916973db..9e2aa8328d1c0 100644 --- a/storage/xtradb/dict/dict0dict.cc +++ b/storage/xtradb/dict/dict0dict.cc @@ -7310,3 +7310,161 @@ dict_tf_to_row_format_string( return(0); } #endif /* !UNIV_HOTBACKUP */ + +/** Insert a records into SYS_ZIP_DICT. +@retval DB_SUCCESS if OK +@retval dberr_t if the insert failed */ +UNIV_INTERN +dberr_t +dict_create_zip_dict( + const char* name, /*!< in: zip_dict name */ + ulint name_len, /*!< in: zip_dict name length*/ + const char* data, /*!< in: zip_dict data */ + ulint data_len) /*!< in: zip_dict data length */ +{ + dberr_t err = DB_SUCCESS; + trx_t* trx; + + ut_ad(name); + ut_ad(data); + + rw_lock_x_lock(&dict_operation_lock); + dict_mutex_enter_for_mysql(); + + trx = trx_allocate_for_background(); + trx->op_info = "insert zip_dict"; + trx->dict_operation_lock_mode = RW_X_LATCH; + trx_start_if_not_started(trx); + + err = dict_create_add_zip_dict(name, name_len, data, data_len, trx); + + if (err == DB_SUCCESS) { + trx_commit_for_mysql(trx); + } + else { + trx->op_info = "rollback of internal trx on zip_dict table"; + trx_rollback_to_savepoint(trx, NULL); + ut_a(trx->error_state == DB_SUCCESS); + } + trx->op_info = ""; + trx->dict_operation_lock_mode = 0; + trx_free_for_background(trx); + + dict_mutex_exit_for_mysql(); + rw_lock_x_unlock(&dict_operation_lock); + + return err; +} +/** Get single compression dictionary id for the given +(table id, column pos) pair. +@retval DB_SUCCESS if OK +@retval DB_RECORD_NOT_FOUND if not found */ +UNIV_INTERN +dberr_t +dict_get_dictionary_id_by_key( + ulint table_id, /*!< in: table id */ + ulint column_pos, /*!< in: column position */ + ulint* dict_id) /*!< out: zip_dict id */ +{ + dberr_t err = DB_SUCCESS; + trx_t* trx; + + rw_lock_s_lock(&dict_operation_lock); + dict_mutex_enter_for_mysql(); + + trx = trx_allocate_for_background(); + trx->op_info = "get zip dict id by composite key"; + trx->dict_operation_lock_mode = RW_S_LATCH; + trx_start_if_not_started(trx); + + err = dict_create_get_zip_dict_id_by_reference(table_id, column_pos, + dict_id, trx); + + trx_commit_for_mysql(trx); + trx->dict_operation_lock_mode = 0; + trx_free_for_background(trx); + + dict_mutex_exit_for_mysql(); + rw_lock_s_unlock(&dict_operation_lock); + + return err; +} +/** Get compression dictionary info (name and data) for the given id. +Allocates memory in name->str and data->str on success. +Must be freed with mem_free(). +@retval DB_SUCCESS if OK +@retval DB_RECORD_NOT_FOUND if not found */ +UNIV_INTERN +dberr_t +dict_get_dictionary_info_by_id( + ulint dict_id, /*!< in: table name */ + char** name, /*!< out: dictionary name */ + ulint* name_len, /*!< out: dictionary name length*/ + char** data, /*!< out: dictionary data */ + ulint* data_len) /*!< out: dictionary data length*/ +{ + dberr_t err = DB_SUCCESS; + trx_t* trx; + + rw_lock_s_lock(&dict_operation_lock); + dict_mutex_enter_for_mysql(); + + trx = trx_allocate_for_background(); + trx->op_info = "get zip dict name and data by id"; + trx->dict_operation_lock_mode = RW_S_LATCH; + trx_start_if_not_started(trx); + + err = dict_create_get_zip_dict_info_by_id(dict_id, name, name_len, + data, data_len, trx); + + trx_commit_for_mysql(trx); + trx->dict_operation_lock_mode = 0; + trx_free_for_background(trx); + + dict_mutex_exit_for_mysql(); + rw_lock_s_unlock(&dict_operation_lock); + + return err; +} +/** Delete a record in SYS_ZIP_DICT with the given name. +@retval DB_SUCCESS if OK +@retval DB_RECORD_NOT_FOUND if not found +@retval DB_ROW_IS_REFERENCED if in use */ +UNIV_INTERN +dberr_t +dict_drop_zip_dict( + const char* name, /*!< in: zip_dict name */ + ulint name_len) /*!< in: zip_dict name length*/ +{ + dberr_t err = DB_SUCCESS; + trx_t* trx; + + ut_ad(name); + + rw_lock_x_lock(&dict_operation_lock); + dict_mutex_enter_for_mysql(); + + trx = trx_allocate_for_background(); + trx->op_info = "delete zip_dict"; + trx->dict_operation_lock_mode = RW_X_LATCH; + trx_start_if_not_started(trx); + + err = dict_create_remove_zip_dict(name, name_len, trx); + + if (err == DB_SUCCESS) { + trx_commit_for_mysql(trx); + } + else { + trx->op_info = "rollback of internal trx on zip_dict table"; + trx_rollback_to_savepoint(trx, NULL); + ut_a(trx->error_state == DB_SUCCESS); + } + trx->op_info = ""; + trx->dict_operation_lock_mode = 0; + trx_free_for_background(trx); + + dict_mutex_exit_for_mysql(); + rw_lock_x_unlock(&dict_operation_lock); + + return err; +} diff --git a/storage/xtradb/dict/dict0load.cc b/storage/xtradb/dict/dict0load.cc index 988351dbca55a..ca7de72c9b907 100644 --- a/storage/xtradb/dict/dict0load.cc +++ b/storage/xtradb/dict/dict0load.cc @@ -56,7 +56,9 @@ static const char* SYSTEM_TABLE_NAME[] = { "SYS_FOREIGN", "SYS_FOREIGN_COLS", "SYS_TABLESPACES", - "SYS_DATAFILES" + "SYS_DATAFILES", + "SYS_ZIP_DICT", + "SYS_ZIP_DICT_COLS" }; /* If this flag is TRUE, then we will load the cluster index's (and tables') @@ -728,6 +730,161 @@ dict_process_sys_datafiles( return(NULL); } +/** This function parses a SYS_ZIP_DICT record, extracts necessary +information from the record and returns to caller. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_zip_dict( + mem_heap_t* heap, /*!< in/out: heap memory */ + ulint zip_size, /*!< in: nonzero=compressed BLOB page size */ + const rec_t* rec, /*!< in: current SYS_ZIP_DICT rec */ + ulint* id, /*!< out: dict id */ + const char** name, /*!< out: dict name */ + const char** data, /*!< out: dict data */ + ulint* data_len) /*!< out: dict data length */ +{ + ulint len; + const byte* field; + + /* Initialize the output values */ + *id = ULINT_UNDEFINED; + *name = NULL; + *data = NULL; + *data_len = 0; + + if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, 0))) { + return("delete-marked record in SYS_ZIP_DICT"); + } + + if (UNIV_UNLIKELY( + rec_get_n_fields_old(rec)!= DICT_NUM_FIELDS__SYS_ZIP_DICT)) { + return("wrong number of columns in SYS_ZIP_DICT record"); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_ZIP_DICT__ID, &len); + if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) { + goto err_len; + } + *id = mach_read_from_4(field); + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_ZIP_DICT__DB_TRX_ID, &len); + if (UNIV_UNLIKELY(len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL)) { + goto err_len; + } + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_ZIP_DICT__DB_ROLL_PTR, &len); + if (UNIV_UNLIKELY(len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL)) { + goto err_len; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_ZIP_DICT__NAME, &len); + if (UNIV_UNLIKELY(len == 0 || len == UNIV_SQL_NULL)) { + goto err_len; + } + *name = mem_heap_strdupl(heap, (char*) field, len); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_ZIP_DICT__DATA, &len); + if (UNIV_UNLIKELY(len == UNIV_SQL_NULL)) { + goto err_len; + } + + if (rec_get_1byte_offs_flag(rec) == 0 && + rec_2_is_field_extern(rec, DICT_FLD__SYS_ZIP_DICT__DATA)) { + ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); + + if (UNIV_UNLIKELY + (!memcmp(field + len - BTR_EXTERN_FIELD_REF_SIZE, + field_ref_zero, + BTR_EXTERN_FIELD_REF_SIZE))) { + goto err_len; + } + *data = reinterpret_cast( + btr_copy_externally_stored_field(data_len, field, + zip_size, len, heap, 0)); + } + else { + *data_len = len; + *data = static_cast(mem_heap_dup(heap, field, len)); + } + + return(NULL); + +err_len: + return("incorrect column length in SYS_ZIP_DICT"); +} + +/** This function parses a SYS_ZIP_DICT_COLS record, extracts necessary +information from the record and returns to caller. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_zip_dict_cols( + mem_heap_t* heap, /*!< in/out: heap memory */ + const rec_t* rec, /*!< in: current SYS_ZIP_DICT rec */ + ulint* table_id, /*!< out: table id */ + ulint* column_pos, /*!< out: column position */ + ulint* dict_id) /*!< out: dict id */ +{ + ulint len; + const byte* field; + + /* Initialize the output values */ + *table_id = ULINT_UNDEFINED; + *column_pos = ULINT_UNDEFINED; + *dict_id = ULINT_UNDEFINED; + + if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, 0))) { + return("delete-marked record in SYS_ZIP_DICT_COLS"); + } + + if (UNIV_UNLIKELY(rec_get_n_fields_old(rec) != + DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS)) { + return("wrong number of columns in SYS_ZIP_DICT_COLS" + " record"); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_ZIP_DICT_COLS__TABLE_ID, &len); + if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) { +err_len: + return("incorrect column length in SYS_ZIP_DICT_COLS"); + } + *table_id = mach_read_from_4(field); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_ZIP_DICT_COLS__COLUMN_POS, &len); + if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) { + goto err_len; + } + *column_pos = mach_read_from_4(field); + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_ZIP_DICT_COLS__DB_TRX_ID, &len); + if (UNIV_UNLIKELY(len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL)) { + goto err_len; + } + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_ZIP_DICT_COLS__DB_ROLL_PTR, &len); + if (UNIV_UNLIKELY(len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL)) { + goto err_len; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_ZIP_DICT_COLS__DICT_ID, &len); + if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) { + goto err_len; + } + *dict_id = mach_read_from_4(field); + + return(NULL); +} /********************************************************************//** Determine the flags of a table as stored in SYS_TABLES.TYPE and N_COLS. @return ULINT_UNDEFINED if error, else a valid dict_table_t::flags. */ diff --git a/storage/xtradb/dict/dict0stats.cc b/storage/xtradb/dict/dict0stats.cc index b073398f8ecf7..a4aa43651f8a5 100644 --- a/storage/xtradb/dict/dict0stats.cc +++ b/storage/xtradb/dict/dict0stats.cc @@ -673,7 +673,10 @@ void dict_stats_copy( /*============*/ dict_table_t* dst, /*!< in/out: destination table */ - const dict_table_t* src) /*!< in: source table */ + const dict_table_t* src, /*!< in: source table */ + bool reset_ignored_indexes) /*!< in: if true, set ignored indexes + to have the same statistics as if + the table was empty */ { dst->stats_last_recalc = src->stats_last_recalc; dst->stat_n_rows = src->stat_n_rows; @@ -692,7 +695,16 @@ dict_stats_copy( && (src_idx = dict_table_get_next_index(src_idx)))) { if (dict_stats_should_ignore_index(dst_idx)) { - continue; + if (reset_ignored_indexes) { + /* Reset index statistics for all ignored indexes, + unless they are FT indexes (these have no statistics)*/ + if (dst_idx->type & DICT_FTS) { + continue; + } + dict_stats_empty_index(dst_idx); + } else { + continue; + } } ut_ad(!dict_index_is_univ(dst_idx)); @@ -782,7 +794,7 @@ dict_stats_snapshot_create( t = dict_stats_table_clone_create(table); - dict_stats_copy(t, table); + dict_stats_copy(t, table, false); t->stat_persistent = table->stat_persistent; t->stats_auto_recalc = table->stats_auto_recalc; @@ -3240,13 +3252,10 @@ dict_stats_update( dict_table_stats_lock(table, RW_X_LATCH); - /* Initialize all stats to dummy values before - copying because dict_stats_table_clone_create() does - skip corrupted indexes so our dummy object 't' may - have less indexes than the real object 'table'. */ - dict_stats_empty_table(table); - - dict_stats_copy(table, t); + /* Pass reset_ignored_indexes=true as parameter + to dict_stats_copy. This will cause statictics + for corrupted indexes to be set to empty values */ + dict_stats_copy(table, t, true); dict_stats_assert_initialized(table); diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index 21a3cd75a4436..a7b0377d2a469 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -325,6 +325,8 @@ fil_space_get_by_id( ut_ad(space->magic_n == FIL_SPACE_MAGIC_N), space->id == id); + /* The system tablespace must always be found */ + ut_ad(space || id != 0 || srv_is_being_started); return(space); } @@ -1680,6 +1682,9 @@ fil_close_all_files(void) { fil_space_t* space; + // Must check both flags as it's possible for this to be called during + // server startup with srv_track_changed_pages == true but + // srv_redo_log_thread_started == false if (srv_track_changed_pages && srv_redo_log_thread_started) os_event_wait(srv_redo_log_tracked_event); @@ -1719,6 +1724,9 @@ fil_close_log_files( { fil_space_t* space; + // Must check both flags as it's possible for this to be called during + // server startup with srv_track_changed_pages == true but + // srv_redo_log_thread_started == false if (srv_track_changed_pages && srv_redo_log_thread_started) os_event_wait(srv_redo_log_tracked_event); diff --git a/storage/xtradb/fts/fts0fts.cc b/storage/xtradb/fts/fts0fts.cc index 80c48b616a91c..4c54afae8cda8 100644 --- a/storage/xtradb/fts/fts0fts.cc +++ b/storage/xtradb/fts/fts0fts.cc @@ -108,6 +108,7 @@ UNIV_INTERN mysql_pfs_key_t fts_pll_tokenize_mutex_key; /** variable to record innodb_fts_internal_tbl_name for information schema table INNODB_FTS_INSERTED etc. */ UNIV_INTERN char* fts_internal_tbl_name = NULL; +UNIV_INTERN char* fts_internal_tbl_name2 = NULL; /** InnoDB default stopword list: There are different versions of stopwords, the stop words listed @@ -265,13 +266,15 @@ FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] sync sync state @param[in] unlock_cache whether unlock cache lock when write node @param[in] wait whether wait when a sync is in progress +@param[in] has_dict whether has dict operation lock @return DB_SUCCESS if all OK */ static dberr_t fts_sync( fts_sync_t* sync, bool unlock_cache, - bool wait); + bool wait, + bool has_dict); /****************************************************************//** Release all resources help by the words rb tree e.g., the node ilist. */ @@ -3567,7 +3570,7 @@ fts_add_doc_by_id( DBUG_EXECUTE_IF( "fts_instrument_sync_debug", - fts_sync(cache->sync, true, true); + fts_sync(cache->sync, true, true, false); ); DEBUG_SYNC_C("fts_instrument_sync_request"); @@ -4379,13 +4382,11 @@ fts_sync_index( } /** Check if index cache has been synced completely -@param[in,out] sync sync state @param[in,out] index_cache index cache @return true if index is synced, otherwise false. */ static bool fts_sync_index_check( - fts_sync_t* sync, fts_index_cache_t* index_cache) { const ib_rbt_node_t* rbt_node; @@ -4408,14 +4409,36 @@ fts_sync_index_check( return(true); } -/*********************************************************************//** -Commit the SYNC, change state of processed doc ids etc. +/** Reset synced flag in index cache when rollback +@param[in,out] index_cache index cache */ +static +void +fts_sync_index_reset( + fts_index_cache_t* index_cache) +{ + const ib_rbt_node_t* rbt_node; + + for (rbt_node = rbt_first(index_cache->words); + rbt_node != NULL; + rbt_node = rbt_next(index_cache->words, rbt_node)) { + + fts_tokenizer_word_t* word; + word = rbt_value(fts_tokenizer_word_t, rbt_node); + + fts_node_t* fts_node; + fts_node = static_cast(ib_vector_last(word->nodes)); + + fts_node->synced = false; + } +} + +/** Commit the SYNC, change state of processed doc ids etc. +@param[in,out] sync sync state @return DB_SUCCESS if all OK */ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t fts_sync_commit( -/*============*/ - fts_sync_t* sync) /*!< in: sync state */ + fts_sync_t* sync) { dberr_t error; trx_t* trx = sync->trx; @@ -4468,6 +4491,8 @@ fts_sync_commit( (double) n_nodes/ (double) elapsed_time); } + /* Avoid assertion in trx_free(). */ + trx->dict_operation_lock_mode = 0; trx_free_for_background(trx); return(error); @@ -4490,6 +4515,10 @@ fts_sync_rollback( index_cache = static_cast( ib_vector_get(cache->indexes, i)); + /* Reset synced flag so nodes will not be skipped + in the next sync, see fts_sync_write_words(). */ + fts_sync_index_reset(index_cache); + for (j = 0; fts_index_selector[j].value; ++j) { if (index_cache->ins_graph[j] != NULL) { @@ -4515,6 +4544,9 @@ fts_sync_rollback( rw_lock_x_unlock(&cache->lock); fts_sql_rollback(trx); + + /* Avoid assertion in trx_free(). */ + trx->dict_operation_lock_mode = 0; trx_free_for_background(trx); } @@ -4523,13 +4555,15 @@ FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] sync sync state @param[in] unlock_cache whether unlock cache lock when write node @param[in] wait whether wait when a sync is in progress +@param[in] has_dict whether has dict operation lock @return DB_SUCCESS if all OK */ static dberr_t fts_sync( fts_sync_t* sync, bool unlock_cache, - bool wait) + bool wait, + bool has_dict) { ulint i; dberr_t error = DB_SUCCESS; @@ -4558,6 +4592,12 @@ fts_sync( DEBUG_SYNC_C("fts_sync_begin"); fts_sync_begin(sync); + /* When sync in background, we hold dict operation lock + to prevent DDL like DROP INDEX, etc. */ + if (has_dict) { + sync->trx->dict_operation_lock_mode = RW_S_LATCH; + } + begin_sync: if (cache->total_size > fts_max_cache_size) { /* Avoid the case: sync never finish when @@ -4598,7 +4638,7 @@ fts_sync( ib_vector_get(cache->indexes, i)); if (index_cache->index->to_be_dropped - || fts_sync_index_check(sync, index_cache)) { + || fts_sync_index_check(index_cache)) { continue; } @@ -4613,6 +4653,7 @@ fts_sync( } rw_lock_x_lock(&cache->lock); + sync->interrupted = false; sync->in_progress = false; os_event_set(sync->event); rw_lock_x_unlock(&cache->lock); @@ -4636,20 +4677,23 @@ FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] table fts table @param[in] unlock_cache whether unlock cache when write node @param[in] wait whether wait for existing sync to finish +@param[in] has_dict whether has dict operation lock @return DB_SUCCESS on success, error code on failure. */ UNIV_INTERN dberr_t fts_sync_table( dict_table_t* table, bool unlock_cache, - bool wait) + bool wait, + bool has_dict) { dberr_t err = DB_SUCCESS; ut_ad(table->fts); if (!dict_table_is_discarded(table) && table->fts->cache) { - err = fts_sync(table->fts->cache->sync, unlock_cache, wait); + err = fts_sync(table->fts->cache->sync, + unlock_cache, wait, has_dict); } return(err); @@ -6527,6 +6571,36 @@ fts_check_corrupt_index( return(0); } +/* Get parent table name if it's a fts aux table +@param[in] aux_table_name aux table name +@param[in] aux_table_len aux table length +@return parent table name, or NULL */ +char* +fts_get_parent_table_name( + const char* aux_table_name, + ulint aux_table_len) +{ + fts_aux_table_t aux_table; + char* parent_table_name = NULL; + + if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) { + dict_table_t* parent_table; + + parent_table = dict_table_open_on_id( + aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL); + + if (parent_table != NULL) { + parent_table_name = mem_strdupl( + parent_table->name, + strlen(parent_table->name)); + + dict_table_close(parent_table, TRUE, FALSE); + } + } + + return(parent_table_name); +} + /** Check the validity of the parent table. @param[in] aux_table auxiliary table @return true if it is a valid table or false if it is not */ diff --git a/storage/xtradb/fts/fts0opt.cc b/storage/xtradb/fts/fts0opt.cc index d9f2532578e5c..ea937c2075217 100644 --- a/storage/xtradb/fts/fts0opt.cc +++ b/storage/xtradb/fts/fts0opt.cc @@ -2986,7 +2986,7 @@ fts_optimize_sync_table( if (table) { if (dict_table_has_fts_index(table) && table->fts->cache) { - fts_sync_table(table, true, false); + fts_sync_table(table, true, false, true); } dict_table_close(table, FALSE, FALSE); diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index ceb489c8f57d4..66d2ae4f9de15 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -107,6 +107,14 @@ this program; if not, write to the Free Software Foundation, Inc., #define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X)) +#ifndef HAVE_PERCONA_COMPRESSED_COLUMNS +#define COLUMN_FORMAT_TYPE_COMPRESSED 0xBADF00D +#define SQLCOM_CREATE_COMPRESSION_DICTIONARY 0xDECAF +#define SQLCOM_DROP_COMPRESSION_DICTIONARY 0xC0FFEE +#define ER_COMPRESSION_DICTIONARY_DOES_NOT_EXIST 0xDEADFACE +const static LEX_CSTRING null_lex_cstr={0,0}; +#endif + #ifdef MYSQL_DYNAMIC_PLUGIN #define tc_size 400 #define tdc_size 400 @@ -818,6 +826,19 @@ innobase_is_fake_change( THD* thd) __attribute__((unused)); /*!< in: MySQL thread handle of the user for whom the transaction is being committed */ +/** Get the list of foreign keys referencing a specified table +table. +@param thd The thread handle +@param path Path to the table +@param f_key_list[out] The list of foreign keys + +@return error code or zero for success */ +static +int +innobase_get_parent_fk_list( + THD* thd, + const char* path, + List* f_key_list) __attribute__((unused)); /******************************************************************//** Maps a MySQL trx isolation level code to the InnoDB isolation level code @@ -1493,6 +1514,30 @@ normalize_table_name_low( ibool set_lower_case); /* in: TRUE if we want to set name to lower case */ +#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS +/** Creates a new compression dictionary. */ +static +handler_create_zip_dict_result +innobase_create_zip_dict( + handlerton* hton, /*!< in: innobase handlerton */ + THD* thd, /*!< in: handle to the MySQL thread */ + const char* name, /*!< in: zip dictionary name */ + ulint* name_len, + /*!< in/out: zip dictionary name length */ + const char* data, /*!< in: zip dictionary data */ + ulint* data_len); + /*!< in/out: zip dictionary data length */ + +/** Drops a existing compression dictionary. */ +static +handler_drop_zip_dict_result +innobase_drop_zip_dict( + handlerton* hton, /*!< in: innobase handlerton */ + THD* thd, /*!< in: handle to the MySQL thread */ + const char* name, /*!< in: zip dictionary name */ + ulint* name_len); + /*!< in/out: zip dictionary name length */ +#endif /*************************************************************//** Checks if buffer pool is big enough to enable backoff algorithm. InnoDB empty free list algorithm backoff requires free pages @@ -3607,6 +3652,10 @@ innobase_init( innobase_hton->wsrep_fake_trx_id=wsrep_fake_trx_id; #endif /* WITH_WSREP */ +#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS + innobase_hton->create_zip_dict = innobase_create_zip_dict; + innobase_hton->drop_zip_dict = innobase_drop_zip_dict; +#endif ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR); #ifndef DBUG_OFF @@ -4300,6 +4349,90 @@ innobase_purge_changed_page_bitmaps( return (my_bool)log_online_purge_changed_page_bitmaps(lsn); } +#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS +/** Creates a new compression dictionary. */ +static +handler_create_zip_dict_result +innobase_create_zip_dict( + handlerton* hton, /*!< in: innobase handlerton */ + THD* thd, /*!< in: handle to the MySQL thread */ + const char* name, /*!< in: zip dictionary name */ + ulint* name_len, + /*!< in/out: zip dictionary name length */ + const char* data, /*!< in: zip dictionary data */ + ulint* data_len) + /*!< in/out: zip dictionary data length */ +{ + handler_create_zip_dict_result result = + HA_CREATE_ZIP_DICT_UNKNOWN_ERROR; + + DBUG_ENTER("innobase_create_zip_dict"); + DBUG_ASSERT(hton == innodb_hton_ptr); + + if (UNIV_UNLIKELY(high_level_read_only)) { + DBUG_RETURN(HA_CREATE_ZIP_DICT_READ_ONLY); + } + + if (UNIV_UNLIKELY(*name_len > ZIP_DICT_MAX_NAME_LENGTH)) { + *name_len = ZIP_DICT_MAX_NAME_LENGTH; + DBUG_RETURN(HA_CREATE_ZIP_DICT_NAME_TOO_LONG); + } + + if (UNIV_UNLIKELY(*data_len > ZIP_DICT_MAX_DATA_LENGTH)) { + *data_len = ZIP_DICT_MAX_DATA_LENGTH; + DBUG_RETURN(HA_CREATE_ZIP_DICT_DATA_TOO_LONG); + } + + switch (dict_create_zip_dict(name, *name_len, data, *data_len)) { + case DB_SUCCESS: + result = HA_CREATE_ZIP_DICT_OK; + break; + case DB_DUPLICATE_KEY: + result = HA_CREATE_ZIP_DICT_ALREADY_EXISTS; + break; + default: + ut_ad(0); + result = HA_CREATE_ZIP_DICT_UNKNOWN_ERROR; + } + DBUG_RETURN(result); +} + +/** Drops a existing compression dictionary. */ +static +handler_drop_zip_dict_result +innobase_drop_zip_dict( + handlerton* hton, /*!< in: innobase handlerton */ + THD* thd, /*!< in: handle to the MySQL thread */ + const char* name, /*!< in: zip dictionary name */ + ulint* name_len) + /*!< in/out: zip dictionary name length */ +{ + handler_drop_zip_dict_result result = HA_DROP_ZIP_DICT_UNKNOWN_ERROR; + + DBUG_ENTER("innobase_drop_zip_dict"); + DBUG_ASSERT(hton == innodb_hton_ptr); + + if (UNIV_UNLIKELY(high_level_read_only)) { + DBUG_RETURN(HA_DROP_ZIP_DICT_READ_ONLY); + } + + switch (dict_drop_zip_dict(name, *name_len)) { + case DB_SUCCESS: + result = HA_DROP_ZIP_DICT_OK; + break; + case DB_RECORD_NOT_FOUND: + result = HA_DROP_ZIP_DICT_DOES_NOT_EXIST; + break; + case DB_ROW_IS_REFERENCED: + result = HA_DROP_ZIP_DICT_IS_REFERENCED; + break; + default: + ut_ad(0); + result = HA_DROP_ZIP_DICT_UNKNOWN_ERROR; + } + DBUG_RETURN(result); +} +#endif /*****************************************************************//** Check whether this is a fake change transaction. @return TRUE if a fake change transaction */ @@ -5933,6 +6066,88 @@ innobase_build_index_translation( DBUG_RETURN(ret); } +/** This function checks if all the compression dictionaries referenced +in table->fields exist in SYS_ZIP_DICT InnoDB system table. +@return true if all referenced dictionaries exist */ +UNIV_INTERN +bool +innobase_check_zip_dicts( + const TABLE* table, /*!< in: table in MySQL data + dictionary */ + ulint* dict_ids, /*!< out: identified zip dict ids + (at least n_fields long) */ + trx_t* trx, /*!< in: transaction */ + const char** err_dict_name) /*!< out: the name of the + zip_dict which does not exist. */ +{ + DBUG_ENTER("innobase_check_zip_dicts"); + + bool res = true; +#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS + dberr_t err = DB_SUCCESS; + const size_t n_fields = table->s->fields; + + Field* field_ptr; + for (size_t field_idx = 0; err == DB_SUCCESS && field_idx < n_fields; + ++field_idx) + { + field_ptr = table->field[field_idx]; + if (field_ptr->has_associated_compression_dictionary()) { + err = dict_create_get_zip_dict_id_by_name( + field_ptr->zip_dict_name.str, + field_ptr->zip_dict_name.length, + &dict_ids[field_idx], + trx); + ut_a(err == DB_SUCCESS || err == DB_RECORD_NOT_FOUND); + } + else { + dict_ids[field_idx] = ULINT_UNDEFINED; + } + + } + + if (err != DB_SUCCESS) { + res = false; + *err_dict_name = field_ptr->zip_dict_name.str; + } + +#endif + DBUG_RETURN(res); +} + +/** This function creates compression dictionary references in +SYS_ZIP_DICT_COLS InnoDB system table for table_id based on info +in table->fields and provided zip dict ids. */ +UNIV_INTERN +void +innobase_create_zip_dict_references( + const TABLE* table, /*!< in: table in MySQL data + dictionary */ + table_id_t ib_table_id, /*!< in: table ID in Innodb data + dictionary */ + ulint* zip_dict_ids, /*!< in: zip dict ids + (at least n_fields long) */ + trx_t* trx) /*!< in: transaction */ +{ + DBUG_ENTER("innobase_create_zip_dict_references"); + + dberr_t err = DB_SUCCESS; + const size_t n_fields = table->s->fields; + + for (size_t field_idx = 0; err == DB_SUCCESS && field_idx < n_fields; + ++field_idx) + { + if (zip_dict_ids[field_idx] != ULINT_UNDEFINED) { + err = dict_create_add_zip_dict_reference(ib_table_id, + table->field[field_idx]->field_index, + zip_dict_ids[field_idx], trx); + ut_a(err == DB_SUCCESS); + } + } + + DBUG_VOID_RETURN; +} + /*******************************************************************//** This function uses index translation table to quickly locate the requested index structure. @@ -7164,6 +7379,7 @@ wsrep_store_key_val_for_row( format) */ uint buff_len,/*!< in: buffer length */ const uchar* record, + row_prebuilt_t* prebuilt, /*!< in: InnoDB prebuilt struct */ ibool* key_is_null)/*!< out: full key was null */ { KEY* key_info = table->key_info + keynr; @@ -7320,8 +7536,17 @@ wsrep_store_key_val_for_row( blob_data = row_mysql_read_blob_ref(&blob_len, (byte*) (record - + (ulint)get_field_offset(table, field)), - (ulint) field->pack_length()); + + (ulint) get_field_offset(table, field)), + (ulint) field->pack_length(), +#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS + field->column_format() == + COLUMN_FORMAT_TYPE_COMPRESSED, + reinterpret_cast( + field->zip_dict_data.str), + field->zip_dict_data.length, prebuilt); +#else + 0, 0, 0, prebuilt); +#endif true_len = blob_len; @@ -7616,7 +7841,16 @@ ha_innobase::store_key_val_for_row( blob_data = row_mysql_read_blob_ref(&blob_len, (byte*) (record + (ulint) get_field_offset(table, field)), - (ulint) field->pack_length()); + (ulint) field->pack_length(), +#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS + field->column_format() == + COLUMN_FORMAT_TYPE_COMPRESSED, + reinterpret_cast( + field->zip_dict_data.str), + field->zip_dict_data.length, prebuilt); +#else + 0, 0, 0, prebuilt); +#endif true_len = blob_len; @@ -7872,6 +8106,14 @@ build_template_field( templ->mbminlen = dict_col_get_mbminlen(col); templ->mbmaxlen = dict_col_get_mbmaxlen(col); templ->is_unsigned = col->prtype & DATA_UNSIGNED; +#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS + templ->compressed = (field->column_format() + == COLUMN_FORMAT_TYPE_COMPRESSED); + templ->zip_dict_data = field->zip_dict_data; +#else + templ->compressed = 0; + templ->zip_dict_data = null_lex_cstr; +#endif if (!dict_index_is_clust(index) && templ->rec_field_no == ULINT_UNDEFINED) { @@ -8190,6 +8432,7 @@ dberr_t ha_innobase::innobase_lock_autoinc(void) /*====================================*/ { + DBUG_ENTER("ha_innobase::innobase_lock_autoinc"); dberr_t error = DB_SUCCESS; ut_ad(!srv_read_only_mode); @@ -8229,6 +8472,8 @@ ha_innobase::innobase_lock_autoinc(void) /* Fall through to old style locking. */ case AUTOINC_OLD_STYLE_LOCKING: + DBUG_EXECUTE_IF("die_if_autoinc_old_lock_style_used", + ut_ad(0);); error = row_lock_table_autoinc_for_mysql(prebuilt); if (error == DB_SUCCESS) { @@ -8242,7 +8487,7 @@ ha_innobase::innobase_lock_autoinc(void) ut_error; } - return(error); + DBUG_RETURN(error); } /********************************************************************//** @@ -8776,8 +9021,11 @@ calc_row_difference( switch (col_type) { case DATA_BLOB: - o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len); - n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len); + /* Do not compress blob column while comparing*/ + o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len, + false, 0, 0, prebuilt); + n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len, + false, 0, 0, prebuilt); break; @@ -8847,7 +9095,17 @@ calc_row_difference( TRUE, new_mysql_row_col, col_pack_len, - dict_table_is_comp(prebuilt->table)); + dict_table_is_comp(prebuilt->table), +#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS + field->column_format() == + COLUMN_FORMAT_TYPE_COMPRESSED, + reinterpret_cast( + field->zip_dict_data.str), + field->zip_dict_data.length, +#else + 0, 0, 0, +#endif + prebuilt); dfield_copy(&ufield->new_val, &dfield); } else { dfield_set_null(&ufield->new_val); @@ -9018,7 +9276,8 @@ wsrep_calc_row_hash( switch (col_type) { case DATA_BLOB: - ptr = row_mysql_read_blob_ref(&len, ptr, len); + ptr = row_mysql_read_blob_ref(&len, ptr, len, + false, 0, 0, prebuilt); break; @@ -10831,7 +11090,7 @@ ha_innobase::wsrep_append_keys( len = wsrep_store_key_val_for_row( thd, table, 0, key, WSREP_MAX_SUPPORTED_KEY_LENGTH, - record0, &is_null); + record0, prebuilt, &is_null); if (!is_null) { rcode = wsrep_append_key( @@ -10885,7 +11144,7 @@ ha_innobase::wsrep_append_keys( len = wsrep_store_key_val_for_row( thd, table, i, key0, WSREP_MAX_SUPPORTED_KEY_LENGTH, - record0, &is_null); + record0, prebuilt, &is_null); if (!is_null) { rcode = wsrep_append_key( thd, trx, table_share, table, @@ -10904,7 +11163,7 @@ ha_innobase::wsrep_append_keys( len = wsrep_store_key_val_for_row( thd, table, i, key1, WSREP_MAX_SUPPORTED_KEY_LENGTH, - record1, &is_null); + record1, prebuilt, &is_null); if (!is_null && memcmp(key0, key1, len)) { rcode = wsrep_append_key( thd, trx, table_share, @@ -11079,6 +11338,7 @@ create_table_def( ulint unsigned_type; ulint binary_type; ulint long_true_varchar; + ulint compressed; ulint charset_no; ulint i; ulint doc_id_col = 0; @@ -11228,6 +11488,13 @@ create_table_def( } } + /* Check if the the field has COMPRESSED attribute */ + compressed = 0; + if (field->column_format() == + COLUMN_FORMAT_TYPE_COMPRESSED) { + compressed = DATA_COMPRESSED; + } + /* First check whether the column to be added has a system reserved name. */ if (dict_col_name_is_reserved(field->field_name)){ @@ -11248,7 +11515,8 @@ create_table_def( dtype_form_prtype( (ulint) field->type() | nulls_allowed | unsigned_type - | binary_type | long_true_varchar, + | binary_type | long_true_varchar + | compressed, charset_no), col_len); } @@ -12078,6 +12346,9 @@ ha_innobase::create( const char* stmt; size_t stmt_len; + mem_heap_t* heap = 0; + ulint* zip_dict_ids = 0; + DBUG_ENTER("ha_innobase::create"); DBUG_ASSERT(thd != NULL); @@ -12168,6 +12439,19 @@ ha_innobase::create( row_mysql_lock_data_dictionary(trx); + heap = mem_heap_create(form->s->fields * sizeof(ulint)); + zip_dict_ids = static_cast( + mem_heap_alloc(heap, form->s->fields * sizeof(ulint))); + + const char* err_zip_dict_name = 0; + if (!innobase_check_zip_dicts(form, zip_dict_ids, + trx, &err_zip_dict_name)) { + error = -1; + my_error(ER_COMPRESSION_DICTIONARY_DOES_NOT_EXIST, + MYF(0), err_zip_dict_name); + goto cleanup; + } + error = create_table_def(trx, form, norm_name, temp_path, remote_path, flags, flags2); if (error) { @@ -12275,6 +12559,22 @@ ha_innobase::create( dict_table_get_all_fts_indexes(innobase_table, fts->indexes); } + /* + Adding compression dictionary <-> compressed table column links + to the SYS_ZIP_DICT_COLS table. + */ + ut_a(zip_dict_ids != 0); + { + dict_table_t* local_table = dict_table_open_on_name( + norm_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE); + + ut_a(local_table); + table_id_t table_id = local_table->id; + dict_table_close(local_table, TRUE, FALSE); + innobase_create_zip_dict_references(form, + table_id, zip_dict_ids, trx); + } + stmt = innobase_get_stmt(thd, &stmt_len); if (stmt) { @@ -12391,6 +12691,9 @@ ha_innobase::create( trx_free_for_mysql(trx); + if (heap != 0) + mem_heap_free(heap); + DBUG_RETURN(0); cleanup: @@ -12400,6 +12703,9 @@ ha_innobase::create( trx_free_for_mysql(trx); + if (heap != 0) + mem_heap_free(heap); + DBUG_RETURN(error); } @@ -13486,6 +13792,14 @@ ha_innobase::info_low( if (dict_stats_is_persistent_enabled(ib_table)) { if (is_analyze) { + + /* If this table is already queued for + background analyze, remove it from the + queue as we are about to do the same */ + dict_mutex_enter_for_mysql(); + dict_stats_recalc_pool_del(ib_table); + dict_mutex_exit_for_mysql(); + opt = DICT_STATS_RECALC_PERSISTENT; } else { /* This is e.g. 'SHOW INDEXES', fetch @@ -13915,7 +14229,7 @@ ha_innobase::optimize( if (innodb_optimize_fulltext_only) { if (prebuilt->table->fts && prebuilt->table->fts->cache && !dict_table_is_discarded(prebuilt->table)) { - fts_sync_table(prebuilt->table, false, true); + fts_sync_table(prebuilt->table, false, true, false); fts_optimize_table(prebuilt->table); } return(HA_ADMIN_OK); @@ -14119,7 +14433,14 @@ ha_innobase::check( prebuilt->select_lock_type = LOCK_NONE; - if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) { + bool check_result + = row_check_index_for_mysql(prebuilt, index, &n_rows); + DBUG_EXECUTE_IF( + "dict_set_index_corrupted", + if (!(index->type & DICT_CLUSTERED)) { + check_result = false; + }); + if (!check_result) { innobase_format_name( index_name, sizeof index_name, index->name, TRUE); @@ -14446,6 +14767,75 @@ get_foreign_key_info( return(pf_key_info); } +/** Get the list of foreign keys referencing a specified table +table. +@param thd The thread handle +@param path Path to the table +@param f_key_list[out] The list of foreign keys */ +static +void +fill_foreign_key_list(THD* thd, + const dict_table_t* table, + List* f_key_list) +{ + ut_ad(mutex_own(&dict_sys->mutex)); + + for (dict_foreign_set::iterator it = table->referenced_set.begin(); + it != table->referenced_set.end(); ++it) { + + dict_foreign_t* foreign = *it; + + FOREIGN_KEY_INFO* pf_key_info + = get_foreign_key_info(thd, foreign); + if (pf_key_info) { + f_key_list->push_back(pf_key_info); + } + } +} + +/** Get the list of foreign keys referencing a specified table +table. +@param thd The thread handle +@param path Path to the table +@param f_key_list[out] The list of foreign keys + +@return error code or zero for success */ +static +int +innobase_get_parent_fk_list( + THD* thd, + const char* path, + List* f_key_list) +{ + ut_a(strlen(path) <= FN_REFLEN); + char norm_name[FN_REFLEN + 1]; + normalize_table_name(norm_name, path); + + trx_t* parent_trx = check_trx_exists(thd); + parent_trx->op_info = "getting list of referencing foreign keys"; + trx_search_latch_release_if_reserved(parent_trx); + + mutex_enter(&dict_sys->mutex); + + dict_table_t* table + = dict_table_open_on_name(norm_name, TRUE, FALSE, + static_cast( + DICT_ERR_IGNORE_INDEX_ROOT + | DICT_ERR_IGNORE_CORRUPT)); + if (!table) { + mutex_exit(&dict_sys->mutex); + return(HA_ERR_NO_SUCH_TABLE); + } + + fill_foreign_key_list(thd, table, f_key_list); + + dict_table_close(table, TRUE, FALSE); + + mutex_exit(&dict_sys->mutex); + parent_trx->op_info = ""; + return(0); +} + /*******************************************************************//** Gets the list of foreign keys in this table. @return always 0, that is, always succeeds */ @@ -14498,9 +14888,6 @@ ha_innobase::get_parent_foreign_key_list( THD* thd, /*!< in: user thread handle */ List* f_key_list) /*!< out: foreign key list */ { - FOREIGN_KEY_INFO* pf_key_info; - dict_foreign_t* foreign; - ut_a(prebuilt != NULL); update_thd(ha_thd()); @@ -14509,20 +14896,7 @@ ha_innobase::get_parent_foreign_key_list( trx_search_latch_release_if_reserved(prebuilt->trx); mutex_enter(&(dict_sys->mutex)); - - for (dict_foreign_set::iterator it - = prebuilt->table->referenced_set.begin(); - it != prebuilt->table->referenced_set.end(); - ++it) { - - foreign = *it; - - pf_key_info = get_foreign_key_info(thd, foreign); - if (pf_key_info) { - f_key_list->push_back(pf_key_info); - } - } - + fill_foreign_key_list(thd, prebuilt->table, f_key_list); mutex_exit(&(dict_sys->mutex)); prebuilt->trx->op_info = ""; @@ -14612,6 +14986,11 @@ ha_innobase::extra( if (prebuilt->blob_heap) { row_mysql_prebuilt_free_blob_heap(prebuilt); } + + if (prebuilt->compress_heap) { + row_mysql_prebuilt_free_compress_heap(prebuilt); + } + break; case HA_EXTRA_RESET_STATE: reset_template(); @@ -14663,6 +15042,10 @@ ha_innobase::reset() row_mysql_prebuilt_free_blob_heap(prebuilt); } + if (prebuilt->compress_heap) { + row_mysql_prebuilt_free_compress_heap(prebuilt); + } + reset_template(); ds_mrr.dsmrr_close(); @@ -14869,7 +15252,11 @@ ha_innobase::external_lock( && lock_type == F_WRLCK) || thd_sql_command(thd) == SQLCOM_CREATE_INDEX || thd_sql_command(thd) == SQLCOM_DROP_INDEX - || thd_sql_command(thd) == SQLCOM_DELETE)) { + || thd_sql_command(thd) == SQLCOM_DELETE + || thd_sql_command(thd) == + SQLCOM_CREATE_COMPRESSION_DICTIONARY + || thd_sql_command(thd) == + SQLCOM_DROP_COMPRESSION_DICTIONARY)) { if (thd_sql_command(thd) == SQLCOM_CREATE_TABLE) { @@ -15637,7 +16024,9 @@ ha_innobase::store_lock( && lock_type <= TL_WRITE)) || sql_command == SQLCOM_CREATE_INDEX || sql_command == SQLCOM_DROP_INDEX - || sql_command == SQLCOM_DELETE)) { + || sql_command == SQLCOM_DELETE + || sql_command == SQLCOM_CREATE_COMPRESSION_DICTIONARY + || sql_command == SQLCOM_DROP_COMPRESSION_DICTIONARY)) { ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); @@ -16594,6 +16983,84 @@ ha_innobase::check_if_incompatible_data( return(COMPATIBLE_DATA_YES); } +/** This function reads zip dict-related info from SYS_ZIP_DICT +and SYS_ZIP_DICT_COLS for all columns marked with +COLUMN_FORMAT_TYPE_COMPRESSED flag and updates +zip_dict_name / zip_dict_data for those which have associated +compression dictionaries. +*/ +UNIV_INTERN +void +ha_innobase::update_field_defs_with_zip_dict_info() +{ + DBUG_ENTER("update_field_defs_with_zip_dict_info"); + ut_ad(!mutex_own(&dict_sys->mutex)); + + char norm_name[FN_REFLEN]; + normalize_table_name(norm_name, table_share->normalized_path.str); + + dict_table_t* ib_table = dict_table_open_on_name( + norm_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); + + /* if dict_table_open_on_name() returns NULL, then it means that + TABLE_SHARE is populated for a table being created and we can + skip filling zip dict info here */ + if (ib_table == 0) + DBUG_VOID_RETURN; + +#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS + table_id_t ib_table_id = ib_table->id; + dict_table_close(ib_table, FALSE, FALSE); + Field* field; + for (uint i = 0; i < table_share->fields; ++i) { + field = table_share->field[i]; + if (field->column_format() == + COLUMN_FORMAT_TYPE_COMPRESSED) { + bool reference_found = false; + ulint dict_id = 0; + switch (dict_get_dictionary_id_by_key(ib_table_id, i, + &dict_id)) { + case DB_SUCCESS: + reference_found = true; + break; + case DB_RECORD_NOT_FOUND: + reference_found = false; + break; + default: + ut_error; + } + if (reference_found) { + char* local_name = 0; + ulint local_name_len = 0; + char* local_data = 0; + ulint local_data_len = 0; + if (dict_get_dictionary_info_by_id(dict_id, + &local_name, &local_name_len, + &local_data, &local_data_len) != + DB_SUCCESS) { + ut_error; + } + else { + field->zip_dict_name.str = + local_name; + field->zip_dict_name.length = + local_name_len; + field->zip_dict_data.str = + local_data; + field->zip_dict_data.length = + local_data_len; + } + } + else { + field->zip_dict_name = null_lex_cstr; + field->zip_dict_data = null_lex_cstr; + } + } + } +#endif + DBUG_VOID_RETURN; +} + /****************************************************************//** Update the system variable innodb_io_capacity_max using the "saved" value. This function is registered as a callback with MySQL. */ @@ -17155,7 +17622,12 @@ innodb_internal_table_update( my_free(old); } - fts_internal_tbl_name = *(char**) var_ptr; + fts_internal_tbl_name2 = *(char**) var_ptr; + if (fts_internal_tbl_name2 == NULL) { + fts_internal_tbl_name = const_cast("default"); + } else { + fts_internal_tbl_name = fts_internal_tbl_name2; + } } /****************************************************************//** @@ -18287,7 +18759,6 @@ innodb_track_changed_pages_validate( for update function */ struct st_mysql_value* value) /*!< in: incoming bool */ { - static bool enabled_on_startup = false; long long intbuf = 0; if (value->val_int(value, &intbuf)) { @@ -18295,8 +18766,7 @@ innodb_track_changed_pages_validate( return 1; } - if (srv_track_changed_pages || enabled_on_startup) { - enabled_on_startup = true; + if (srv_redo_log_thread_started) { *reinterpret_cast(save) = static_cast(intbuf); return 0; @@ -19834,7 +20304,7 @@ static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache, "Whether to disable OS system file cache for sort I/O", NULL, NULL, FALSE); -static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name, +static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name2, PLUGIN_VAR_NOCMDARG, "FTS internal auxiliary table to be checked", innodb_internal_table_validate, @@ -20314,7 +20784,7 @@ static MYSQL_SYSVAR_ENUM(corrupt_table_action, srv_pass_corrupt_table, "Warn corruptions of user tables as 'corrupt table' instead of not crashing itself, " "when used with file_per_table. " "All file io for the datafile after detected as corrupt are disabled, " - "except for the deletion. Possible options are 'assert', 'warn' & 'salvage'", + "except for the deletion.", NULL, NULL, 0, &corrupt_table_action_typelib); static MYSQL_SYSVAR_BOOL(locking_fake_changes, srv_fake_changes_locks, @@ -20329,6 +20799,21 @@ static MYSQL_SYSVAR_BOOL(use_stacktrace, srv_use_stacktrace, "Print stacktrace on long semaphore wait (off by default supported only on linux)", NULL, NULL, FALSE); +#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS +static MYSQL_SYSVAR_UINT(compressed_columns_zip_level, + srv_compressed_columns_zip_level, + PLUGIN_VAR_RQCMDARG, + "Compression level used for compressed columns. 0 is no compression" + ", 1 is fastest and 9 is best compression. Default is 6.", + NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0); + +static MYSQL_SYSVAR_ULONG(compressed_columns_threshold, + srv_compressed_columns_threshold, + PLUGIN_VAR_RQCMDARG, + "Compress column data if its length exceeds this value. Default is 96", + NULL, NULL, 96, 1, ~0UL, 0); +#endif + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(log_block_size), MYSQL_SYSVAR(additional_mem_pool_size), @@ -20535,6 +21020,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(tmpdir), MYSQL_SYSVAR(use_stacktrace), MYSQL_SYSVAR(simulate_comp_failures), +#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS + MYSQL_SYSVAR(compressed_columns_zip_level), + MYSQL_SYSVAR(compressed_columns_threshold), +#endif NULL }; @@ -20557,6 +21046,10 @@ maria_declare_plugin(xtradb) i_s_xtradb_read_view, i_s_xtradb_internal_hash_tables, i_s_xtradb_rseg, +#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS +i_s_xtradb_zip_dict, +i_s_xtradb_zip_dict_cols, +#endif i_s_innodb_trx, i_s_innodb_locks, i_s_innodb_lock_waits, diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 37d787ad14d62..cb7dd6b9cf4c8 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -291,8 +291,17 @@ class ha_innobase: public handler /** @} */ bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); + bool check_if_supported_virtual_columns(void) { return TRUE; } + /** This function reads zip dict-related info from SYS_ZIP_DICT + and SYS_ZIP_DICT_COLS for all columns marked with + COLUMN_FORMAT_TYPE_COMPRESSED flag and updates + zip_dict_name / zip_dict_data for those which have associated + compression dictionaries. + */ + virtual void update_field_defs_with_zip_dict_info(); + private: /** Builds a 'template' to the prebuilt struct. @@ -723,3 +732,31 @@ ib_push_frm_error( TABLE* table, /*!< in: MySQL table */ ulint n_keys, /*!< in: InnoDB #keys */ bool push_warning); /*!< in: print warning ? */ + +/** This function checks if all the compression dictionaries referenced +in table->fields exist in SYS_ZIP_DICT InnoDB system table. +@return true if all referenced dictionaries exist */ +UNIV_INTERN +bool +innobase_check_zip_dicts( + const TABLE* table, /*!< in: table in MySQL data + dictionary */ + ulint* dict_ids, /*!< out: identified zip dict ids + (at least n_fields long) */ + trx_t* trx, /*!< in: transaction */ + const char** err_dict_name); /*!< out: the name of the + zip_dict which does not exist. */ + +/** This function creates compression dictionary references in +SYS_ZIP_DICT_COLS InnoDB system table for table_id based on info +in table->fields and provided zip dict ids. */ +UNIV_INTERN +void +innobase_create_zip_dict_references( + const TABLE* table, /*!< in: table in MySQL data + dictionary */ + table_id_t ib_table_id, /*!< in: table ID in Innodb data + dictionary */ + ulint* zip_dict_ids, /*!< in: zip dict ids + (at least n_fields long) */ + trx_t* trx); /*!< in: transaction */ diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index 646ba986dcca8..8299b4dafe1b0 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -21,6 +21,11 @@ this program; if not, write to the Free Software Foundation, Inc., Smart ALTER TABLE *******************************************************/ +#ifndef HAVE_PERCONA_COMPRESSED_COLUMNS +#define COLUMN_FORMAT_TYPE_COMPRESSED 0xBADF00D +#define ER_COMPRESSION_DICTIONARY_DOES_NOT_EXIST 0xDEADFACE +#endif + #include #include #include @@ -213,7 +218,10 @@ innobase_need_rebuild( const Alter_inplace_info* ha_alter_info, const TABLE* altered_table) { - if (ha_alter_info->handler_flags + Alter_inplace_info::HA_ALTER_FLAGS alter_inplace_flags = + ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE); + + if (alter_inplace_flags == Alter_inplace_info::CHANGE_CREATE_OPTION && !(ha_alter_info->create_info->used_fields & (HA_CREATE_USED_ROW_FORMAT @@ -1181,6 +1189,15 @@ innobase_col_to_mysql( field->reset(); if (field->type() == MYSQL_TYPE_VARCHAR) { + if (field->column_format() == + COLUMN_FORMAT_TYPE_COMPRESSED) { + /* Skip compressed varchar column when + reporting an erroneous row + during index creation or table rebuild. */ + field->set_null(); + break; + } + /* This is a >= 5.0.3 type true VARCHAR. Store the length of the data to the first byte or the first two bytes of dest. */ @@ -2466,7 +2483,8 @@ innobase_build_col_map_add( mem_heap_t* heap, dfield_t* dfield, const Field* field, - ulint comp) + ulint comp, + row_prebuilt_t* prebuilt) { if (field->is_real_null()) { dfield_set_null(dfield); @@ -2478,7 +2496,14 @@ innobase_build_col_map_add( byte* buf = static_cast(mem_heap_alloc(heap, size)); row_mysql_store_col_in_innobase_format( - dfield, buf, TRUE, field->ptr, size, comp); + dfield, buf, TRUE, field->ptr, size, comp, +#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS + field->column_format() == COLUMN_FORMAT_TYPE_COMPRESSED, + reinterpret_cast(field->zip_dict_data.str), + field->zip_dict_data.length, prebuilt); +#else + 0,0,0, prebuilt); +#endif } /** Construct the translation table for reordering, dropping or @@ -2503,7 +2528,8 @@ innobase_build_col_map( const dict_table_t* new_table, const dict_table_t* old_table, dtuple_t* add_cols, - mem_heap_t* heap) + mem_heap_t* heap, + row_prebuilt_t* prebuilt) { uint old_i, old_innobase_i; DBUG_ENTER("innobase_build_col_map"); @@ -2554,7 +2580,7 @@ innobase_build_col_map( innobase_build_col_map_add( heap, dtuple_get_nth_field(add_cols, i), altered_table->field[sql_idx], - dict_table_is_comp(new_table)); + dict_table_is_comp(new_table), prebuilt); found_col: i++; sql_idx++; @@ -2718,7 +2744,8 @@ prepare_inplace_alter_table_dict( ulint flags2, ulint fts_doc_id_col, bool add_fts_doc_id, - bool add_fts_doc_id_idx) + bool add_fts_doc_id_idx, + row_prebuilt_t* prebuilt) { bool dict_locked = false; ulint* add_key_nums; /* MySQL key numbers */ @@ -2730,6 +2757,7 @@ prepare_inplace_alter_table_dict( ulint num_fts_index; ha_innobase_inplace_ctx*ctx; uint sql_idx; + ulint* zip_dict_ids = 0; DBUG_ENTER("prepare_inplace_alter_table_dict"); @@ -2866,6 +2894,18 @@ prepare_inplace_alter_table_dict( ulint n_cols; dtuple_t* add_cols; + zip_dict_ids = static_cast( + mem_heap_alloc(ctx->heap, + altered_table->s->fields * sizeof(ulint))); + + const char* err_zip_dict_name = 0; + if (!innobase_check_zip_dicts(altered_table, zip_dict_ids, + ctx->trx, &err_zip_dict_name)) { + my_error(ER_COMPRESSION_DICTIONARY_DOES_NOT_EXIST, + MYF(0), err_zip_dict_name); + goto new_clustered_failed; + } + if (innobase_check_foreigns( ha_alter_info, altered_table, old_table, user_table, ctx->drop_fk, ctx->num_to_drop_fk)) { @@ -2972,6 +3012,12 @@ prepare_inplace_alter_table_dict( } } + if (field->column_format() == + COLUMN_FORMAT_TYPE_COMPRESSED) { + field_type |= DATA_COMPRESSED; + } + + if (dict_col_name_is_reserved(field->field_name)) { dict_mem_table_free(ctx->new_table); my_error(ER_WRONG_COLUMN_NAME, MYF(0), @@ -3051,7 +3097,7 @@ prepare_inplace_alter_table_dict( ctx->col_map = innobase_build_col_map( ha_alter_info, altered_table, old_table, ctx->new_table, user_table, - add_cols, ctx->heap); + add_cols, ctx->heap, prebuilt); ctx->add_cols = add_cols; } else { DBUG_ASSERT(!innobase_need_rebuild(ha_alter_info, old_table)); @@ -3229,6 +3275,15 @@ prepare_inplace_alter_table_dict( DBUG_ASSERT(error == DB_SUCCESS); + /* + Adding compression dictionary <-> compressed table column links + to the SYS_ZIP_DICT_COLS table. + */ + if (zip_dict_ids != 0) { + innobase_create_zip_dict_references(altered_table, + ctx->trx->table_id, zip_dict_ids, ctx->trx); + } + /* Commit the data dictionary transaction in order to release the table locks on the system tables. This means that if MySQL crashes while creating a new primary key inside @@ -3947,7 +4002,7 @@ ha_innobase::prepare_inplace_alter_table( } if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA) - || (ha_alter_info->handler_flags + || ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) == Alter_inplace_info::CHANGE_CREATE_OPTION && !innobase_need_rebuild(ha_alter_info, table))) { @@ -4081,7 +4136,7 @@ ha_innobase::prepare_inplace_alter_table( table_share->table_name.str, flags, flags2, fts_doc_col_no, add_fts_doc_id, - add_fts_doc_id_idx)); + add_fts_doc_id_idx, prebuilt)); } /** Alter the table structure in-place with operations @@ -4121,7 +4176,7 @@ ha_innobase::inplace_alter_table( DBUG_RETURN(false); } - if (ha_alter_info->handler_flags + if ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) == Alter_inplace_info::CHANGE_CREATE_OPTION && !innobase_need_rebuild(ha_alter_info, table)) { goto ok_exit; diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc index 02d2a2100a403..59cad1c2e7a4d 100644 --- a/storage/xtradb/handler/i_s.cc +++ b/storage/xtradb/handler/i_s.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2958,15 +2958,26 @@ i_s_fts_deleted_generic_fill( DBUG_RETURN(0); } - deleted = fts_doc_ids_create(); + /* Prevent DDL to drop fts aux tables. */ + rw_lock_s_lock(&dict_operation_lock); user_table = dict_table_open_on_name( fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); if (!user_table) { + rw_lock_s_unlock(&dict_operation_lock); + + DBUG_RETURN(0); + } else if (!dict_table_has_fts_index(user_table)) { + dict_table_close(user_table, FALSE, FALSE); + + rw_lock_s_unlock(&dict_operation_lock); + DBUG_RETURN(0); } + deleted = fts_doc_ids_create(); + trx = trx_allocate_for_background(); trx->op_info = "Select for FTS DELETE TABLE"; @@ -2994,6 +3005,8 @@ i_s_fts_deleted_generic_fill( dict_table_close(user_table, FALSE, FALSE); + rw_lock_s_unlock(&dict_operation_lock); + DBUG_RETURN(0); } @@ -3365,6 +3378,12 @@ i_s_fts_index_cache_fill( DBUG_RETURN(0); } + if (user_table->fts == NULL || user_table->fts->cache == NULL) { + dict_table_close(user_table, FALSE, FALSE); + + DBUG_RETURN(0); + } + cache = user_table->fts->cache; ut_a(cache); @@ -3798,10 +3817,15 @@ i_s_fts_index_table_fill( DBUG_RETURN(0); } + /* Prevent DDL to drop fts aux tables. */ + rw_lock_s_lock(&dict_operation_lock); + user_table = dict_table_open_on_name( fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); if (!user_table) { + rw_lock_s_unlock(&dict_operation_lock); + DBUG_RETURN(0); } @@ -3814,6 +3838,8 @@ i_s_fts_index_table_fill( dict_table_close(user_table, FALSE, FALSE); + rw_lock_s_unlock(&dict_operation_lock); + DBUG_RETURN(0); } @@ -3946,16 +3972,25 @@ i_s_fts_config_fill( DBUG_RETURN(0); } + DEBUG_SYNC_C("i_s_fts_config_fille_check"); + fields = table->field; + /* Prevent DDL to drop fts aux tables. */ + rw_lock_s_lock(&dict_operation_lock); + user_table = dict_table_open_on_name( fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); if (!user_table) { + rw_lock_s_unlock(&dict_operation_lock); + DBUG_RETURN(0); } else if (!dict_table_has_fts_index(user_table)) { dict_table_close(user_table, FALSE, FALSE); + rw_lock_s_unlock(&dict_operation_lock); + DBUG_RETURN(0); } @@ -4011,6 +4046,8 @@ i_s_fts_config_fill( dict_table_close(user_table, FALSE, FALSE); + rw_lock_s_unlock(&dict_operation_lock); + DBUG_RETURN(0); } diff --git a/storage/xtradb/handler/xtradb_i_s.cc b/storage/xtradb/handler/xtradb_i_s.cc index 7078ab752c2e4..eb6637dad0397 100644 --- a/storage/xtradb/handler/xtradb_i_s.cc +++ b/storage/xtradb/handler/xtradb_i_s.cc @@ -32,9 +32,11 @@ this program; if not, write to the Free Software Foundation, Inc., #include #include #include "srv0start.h" /* for srv_was_started */ +#include /* btr_pcur_t */ #include /* btr_search_sys */ #include /* recv_sys */ #include +#include /* for ZIP_DICT_MAX_* constants */ /* for XTRADB_RSEG table */ #include "trx0trx.h" /* for TRX_QUE_STATE_STR_MAX_LEN */ @@ -131,6 +133,30 @@ field_store_string( return(ret); } +static int field_store_blob(Field*, const char*, uint) __attribute__((unused)); +/** Auxiliary function to store (char*, len) value in MYSQL_TYPE_BLOB +field. +@return 0 on success */ +static +int +field_store_blob( + Field* field, /*!< in/out: target field for storage */ + const char* data, /*!< in: pointer to data, or NULL */ + uint data_len) /*!< in: data length */ +{ + int ret; + + if (data != NULL) { + ret = field->store(data, data_len, system_charset_info); + field->set_notnull(); + } else { + ret = 0; /* success */ + field->set_null(); + } + + return(ret); +} + static int i_s_common_deinit( @@ -603,3 +629,331 @@ UNIV_INTERN struct st_mysql_plugin i_s_xtradb_rseg = STRUCT_FLD(version_info, INNODB_VERSION_STR), STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), }; + + +#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS +/************************************************************************/ +enum zip_dict_field_type +{ + zip_dict_field_id, + zip_dict_field_name, + zip_dict_field_zip_dict +}; + +static ST_FIELD_INFO xtradb_sys_zip_dict_fields_info[] = +{ + { STRUCT_FLD(field_name, "id"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE) }, + + { STRUCT_FLD(field_name, "name"), + STRUCT_FLD(field_length, ZIP_DICT_MAX_NAME_LENGTH), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE) }, + + { STRUCT_FLD(field_name, "zip_dict"), + STRUCT_FLD(field_length, ZIP_DICT_MAX_DATA_LENGTH), + STRUCT_FLD(field_type, MYSQL_TYPE_BLOB), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, 0), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE) }, + + END_OF_ST_FIELD_INFO +}; + +/** Function to fill INFORMATION_SCHEMA.XTRADB_ZIP_DICT with information +collected by scanning SYS_ZIP_DICT table. +@return 0 on success */ +static +int +xtradb_i_s_dict_fill_sys_zip_dict( + THD* thd, /*!< in: thread */ + ulint id, /*!< in: dict ID */ + const char* name, /*!< in: dict name */ + const char* data, /*!< in: dict data */ + ulint data_len, /*!< in: dict data length */ + TABLE* table_to_fill) /*!< in/out: fill this table */ +{ + DBUG_ENTER("xtradb_i_s_dict_fill_sys_zip_dict"); + + Field** fields = table_to_fill->field; + + OK(field_store_ulint(fields[zip_dict_field_id], id)); + OK(field_store_string(fields[zip_dict_field_name], name)); + OK(field_store_blob(fields[zip_dict_field_zip_dict], data, + data_len)); + + OK(schema_table_store_record(thd, table_to_fill)); + + DBUG_RETURN(0); +} + +/** Function to populate INFORMATION_SCHEMA.XTRADB_ZIP_DICT table. +Loop through each record in SYS_ZIP_DICT, and extract the column +information and fill the INFORMATION_SCHEMA.XTRADB_ZIP_DICT table. +@return 0 on success */ +static +int +xtradb_i_s_sys_zip_dict_fill_table( + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + Item* ) /*!< in: condition (not used) */ +{ + btr_pcur_t pcur; + const rec_t* rec; + mem_heap_t* heap; + mtr_t mtr; + + DBUG_ENTER("xtradb_i_s_sys_zip_dict_fill_table"); + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); + + /* deny access to user without SUPER_ACL privilege */ + if (check_global_access(thd, SUPER_ACL)) { + DBUG_RETURN(0); + } + + heap = mem_heap_create(1000); + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + + rec = dict_startscan_system(&pcur, &mtr, SYS_ZIP_DICT); + ulint zip_size = dict_table_zip_size(pcur.btr_cur.index->table); + + while (rec) { + const char* err_msg; + ulint id; + const char* name; + const char* data; + ulint data_len; + + /* Extract necessary information from a SYS_ZIP_DICT row */ + err_msg = dict_process_sys_zip_dict( + heap, zip_size, rec, &id, &name, &data, &data_len); + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + + if (!err_msg) { + xtradb_i_s_dict_fill_sys_zip_dict( + thd, id, name, data, data_len, + tables->table); + } else { + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + ER_CANT_FIND_SYSTEM_REC, "%s", err_msg); + } + + mem_heap_empty(heap); + + /* Get the next record */ + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + rec = dict_getnext_system(&pcur, &mtr); + } + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + mem_heap_free(heap); + + DBUG_RETURN(0); +} + +static int i_s_xtradb_zip_dict_init(void* p) +{ + DBUG_ENTER("i_s_xtradb_zip_dict_init"); + + ST_SCHEMA_TABLE* schema = static_cast(p); + + schema->fields_info = xtradb_sys_zip_dict_fields_info; + schema->fill_table = xtradb_i_s_sys_zip_dict_fill_table; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_mysql_plugin i_s_xtradb_zip_dict = +{ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + STRUCT_FLD(info, &i_s_info), + STRUCT_FLD(name, "XTRADB_ZIP_DICT"), + STRUCT_FLD(author, PLUGIN_AUTHOR), + STRUCT_FLD(descr, "InnoDB compression dictionaries information"), + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + STRUCT_FLD(init, i_s_xtradb_zip_dict_init), + STRUCT_FLD(deinit, i_s_common_deinit), + STRUCT_FLD(version, INNODB_VERSION_SHORT), + STRUCT_FLD(status_vars, NULL), + STRUCT_FLD(system_vars, NULL), + STRUCT_FLD(__reserved1, NULL), + STRUCT_FLD(flags, 0UL), +}; + +enum zip_dict_cols_field_type +{ + zip_dict_cols_field_table_id, + zip_dict_cols_field_column_pos, + zip_dict_cols_field_dict_id +}; + +static ST_FIELD_INFO xtradb_sys_zip_dict_cols_fields_info[] = +{ + { STRUCT_FLD(field_name, "table_id"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE) }, + + { STRUCT_FLD(field_name, "column_pos"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE) }, + + { STRUCT_FLD(field_name, "dict_id"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE) }, + + END_OF_ST_FIELD_INFO +}; + +/** Function to fill INFORMATION_SCHEMA.XTRADB_ZIP_DICT_COLS with information +collected by scanning SYS_ZIP_DICT_COLS table. +@return 0 on success */ +static +int +xtradb_i_s_dict_fill_sys_zip_dict_cols( + THD* thd, /*!< in: thread */ + ulint table_id, /*!< in: table ID */ + ulint column_pos, /*!< in: column position */ + ulint dict_id, /*!< in: dict ID */ + TABLE* table_to_fill) /*!< in/out: fill this table */ +{ + DBUG_ENTER("xtradb_i_s_dict_fill_sys_zip_dict_cols"); + + Field** fields = table_to_fill->field; + + OK(field_store_ulint(fields[zip_dict_cols_field_table_id], + table_id)); + OK(field_store_ulint(fields[zip_dict_cols_field_column_pos], + column_pos)); + OK(field_store_ulint(fields[zip_dict_cols_field_dict_id], + dict_id)); + + OK(schema_table_store_record(thd, table_to_fill)); + + DBUG_RETURN(0); +} + +/** Function to populate INFORMATION_SCHEMA.XTRADB_ZIP_DICT_COLS table. +Loop through each record in SYS_ZIP_DICT_COLS, and extract the column +information and fill the INFORMATION_SCHEMA.XTRADB_ZIP_DICT_COLS table. +@return 0 on success */ +static +int +xtradb_i_s_sys_zip_dict_cols_fill_table( + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + Item* ) /*!< in: condition (not used) */ +{ + btr_pcur_t pcur; + const rec_t* rec; + mem_heap_t* heap; + mtr_t mtr; + + DBUG_ENTER("xtradb_i_s_sys_zip_dict_cols_fill_table"); + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); + + /* deny access to user without SUPER_ACL privilege */ + if (check_global_access(thd, SUPER_ACL)) { + DBUG_RETURN(0); + } + + heap = mem_heap_create(1000); + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + + rec = dict_startscan_system(&pcur, &mtr, SYS_ZIP_DICT_COLS); + + while (rec) { + const char* err_msg; + ulint table_id; + ulint column_pos; + ulint dict_id; + + /* Extract necessary information from a SYS_ZIP_DICT_COLS + row */ + err_msg = dict_process_sys_zip_dict_cols( + heap, rec, &table_id, &column_pos, &dict_id); + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + + if (!err_msg) { + xtradb_i_s_dict_fill_sys_zip_dict_cols( + thd, table_id, column_pos, dict_id, + tables->table); + } else { + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + ER_CANT_FIND_SYSTEM_REC, "%s", err_msg); + } + + mem_heap_empty(heap); + + /* Get the next record */ + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + rec = dict_getnext_system(&pcur, &mtr); + } + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + mem_heap_free(heap); + + DBUG_RETURN(0); +} + +static int i_s_xtradb_zip_dict_cols_init(void* p) +{ + DBUG_ENTER("i_s_xtradb_zip_dict_cols_init"); + + ST_SCHEMA_TABLE* schema = static_cast(p); + + schema->fields_info = xtradb_sys_zip_dict_cols_fields_info; + schema->fill_table = xtradb_i_s_sys_zip_dict_cols_fill_table; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_mysql_plugin i_s_xtradb_zip_dict_cols = +{ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + STRUCT_FLD(info, &i_s_info), + STRUCT_FLD(name, "XTRADB_ZIP_DICT_COLS"), + STRUCT_FLD(author, PLUGIN_AUTHOR), + STRUCT_FLD(descr, "InnoDB compressed columns information"), + STRUCT_FLD(license, PLUGIN_LICENSE_GPL), + STRUCT_FLD(init, i_s_xtradb_zip_dict_cols_init), + STRUCT_FLD(deinit, i_s_common_deinit), + STRUCT_FLD(version, INNODB_VERSION_SHORT), + STRUCT_FLD(status_vars, NULL), + STRUCT_FLD(system_vars, NULL), + STRUCT_FLD(__reserved1, NULL), + STRUCT_FLD(flags, 0UL), +}; +#endif diff --git a/storage/xtradb/handler/xtradb_i_s.h b/storage/xtradb/handler/xtradb_i_s.h index 2f7552c565adc..905d84587affd 100644 --- a/storage/xtradb/handler/xtradb_i_s.h +++ b/storage/xtradb/handler/xtradb_i_s.h @@ -22,5 +22,7 @@ this program; if not, write to the Free Software Foundation, Inc., extern struct st_mysql_plugin i_s_xtradb_read_view; extern struct st_mysql_plugin i_s_xtradb_internal_hash_tables; extern struct st_mysql_plugin i_s_xtradb_rseg; +extern struct st_mysql_plugin i_s_xtradb_zip_dict; +extern struct st_mysql_plugin i_s_xtradb_zip_dict_cols; #endif /* XTRADB_I_S_H */ diff --git a/storage/xtradb/ibuf/ibuf0ibuf.cc b/storage/xtradb/ibuf/ibuf0ibuf.cc index d62b869df4fd9..a9f039d3f0ff6 100644 --- a/storage/xtradb/ibuf/ibuf0ibuf.cc +++ b/storage/xtradb/ibuf/ibuf0ibuf.cc @@ -938,7 +938,7 @@ ibuf_set_free_bits_low( ulint space; ulint page_no; - if (!page_is_leaf(buf_block_get_frame(block))) { + if (!page_is_leaf(buf_nonnull_block_get_frame(block))) { return; } @@ -1113,7 +1113,7 @@ ibuf_update_free_bits_zip( page_no = buf_block_get_page_no(block); zip_size = buf_block_get_zip_size(block); - ut_a(page_is_leaf(buf_block_get_frame(block))); + ut_a(page_is_leaf(buf_nonnull_block_get_frame(block))); ut_a(zip_size); bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr); diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index 7e1083a103c55..8110fbc48086c 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -1069,10 +1069,20 @@ buf_block_get_frame( /*================*/ const buf_block_t* block) /*!< in: pointer to the control block */ MY_ATTRIBUTE((pure)); -# define buf_block_get_frame_fast(block) buf_block_get_frame(block) + +/*********************************************************************//** +Gets a pointer to the memory frame of a block, where block is known not to be +NULL. +@return pointer to the frame */ +UNIV_INLINE +buf_frame_t* +buf_nonnull_block_get_frame( + const buf_block_t* block) /*!< in: pointer to the control block */ + MY_ATTRIBUTE((pure)); + #else /* UNIV_DEBUG */ # define buf_block_get_frame(block) (block ? (block)->frame : 0) -# define buf_block_get_frame_fast(block) (block)->frame +# define buf_nonnull_block_get_frame(block) ((block)->frame) #endif /* UNIV_DEBUG */ /*********************************************************************//** Gets the space id of a block. diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic index b40285ae3f008..8a21f44a2eec4 100644 --- a/storage/xtradb/include/buf0buf.ic +++ b/storage/xtradb/include/buf0buf.ic @@ -690,6 +690,19 @@ buf_block_get_frame( { SRV_CORRUPT_TABLE_CHECK(block, return(0);); + return(buf_nonnull_block_get_frame(block)); +} + +/*********************************************************************//** +Gets a pointer to the memory frame of a block, where block is known not to be +NULL. +@return pointer to the frame */ +UNIV_INLINE +buf_frame_t* +buf_nonnull_block_get_frame( +/*========================*/ + const buf_block_t* block) /*!< in: pointer to the control block */ +{ switch (buf_block_get_state(block)) { case BUF_BLOCK_POOL_WATCH: case BUF_BLOCK_ZIP_PAGE: @@ -711,6 +724,7 @@ buf_block_get_frame( ok: return((buf_frame_t*) block->frame); } + #endif /* UNIV_DEBUG */ /*********************************************************************//** diff --git a/storage/xtradb/include/data0type.h b/storage/xtradb/include/data0type.h index 111664b0b527f..f269c266efb9b 100644 --- a/storage/xtradb/include/data0type.h +++ b/storage/xtradb/include/data0type.h @@ -170,6 +170,9 @@ be less than 256 */ type when the column is true VARCHAR where MySQL uses 2 bytes to store the data len; for shorter VARCHARs MySQL uses only 1 byte */ +#define DATA_COMPRESSED 16384 /* this is ORed to the precise data + type when the column has COLUMN_FORMAT = + COMPRESSED attribute*/ /*-------------------------------------------*/ /* This many bytes we need to store the type information affecting the @@ -500,6 +503,17 @@ dtype_print( /*========*/ const dtype_t* type); /*!< in: type */ +/** +Calculates the number of extra bytes needed for compression header +depending on precise column type. +@reval 0 if prtype does not include DATA_COMPRESSED flag +@reval ZIP_COLUMN_HEADER_LENGTH if prtype includes DATA_COMPRESSED flag +*/ +UNIV_INLINE +ulint +prtype_get_compression_extra( + ulint prtype); /*!< in: precise type */ + /* Structure for an SQL data type. If you add fields to this structure, be sure to initialize them everywhere. This structure is initialized in the following functions: diff --git a/storage/xtradb/include/data0type.ic b/storage/xtradb/include/data0type.ic index d489bef89a8bb..29dc480a19c3e 100644 --- a/storage/xtradb/include/data0type.ic +++ b/storage/xtradb/include/data0type.ic @@ -26,6 +26,7 @@ Created 1/16/1996 Heikki Tuuri #include /* strlen() */ #include "mach0data.h" +#include "rem0types.h" /* ZIP_COLUMN_HEADER_LENGTH */ #ifndef UNIV_HOTBACKUP # include "ha_prototypes.h" @@ -709,3 +710,18 @@ dtype_get_sql_null_size( 0, 0)); #endif /* !UNIV_HOTBACKUP */ } + +/** +Calculates the number of extra bytes needed for compression header +depending on precise column type. +@reval 0 if prtype does not include DATA_COMPRESSED flag +@reval ZIP_COLUMN_HEADER_LENGTH if prtype includes DATA_COMPRESSED flag +*/ +UNIV_INLINE +ulint +prtype_get_compression_extra( + ulint prtype) /*!< in: precise type */ +{ + return (prtype & DATA_COMPRESSED) != 0 ? + ZIP_COLUMN_HEADER_LENGTH : 0; +} diff --git a/storage/xtradb/include/dict0boot.h b/storage/xtradb/include/dict0boot.h index 477e1150f437b..d5bee886cbf43 100644 --- a/storage/xtradb/include/dict0boot.h +++ b/storage/xtradb/include/dict0boot.h @@ -324,6 +324,38 @@ enum dict_fld_sys_datafiles_enum { DICT_FLD__SYS_DATAFILES__PATH = 3, DICT_NUM_FIELDS__SYS_DATAFILES = 4 }; +/* The columns in SYS_DICT */ +enum dict_col_sys_zip_dict_enum { + DICT_COL__SYS_ZIP_DICT__ID = 0, + DICT_COL__SYS_ZIP_DICT__NAME = 1, + DICT_COL__SYS_ZIP_DICT__DATA = 2, + DICT_NUM_COLS__SYS_ZIP_DICT = 3 +}; +/* The field numbers in the SYS_DICT clustered index */ +enum dict_fld_sys_zip_dict_enum { + DICT_FLD__SYS_ZIP_DICT__ID = 0, + DICT_FLD__SYS_ZIP_DICT__DB_TRX_ID = 1, + DICT_FLD__SYS_ZIP_DICT__DB_ROLL_PTR = 2, + DICT_FLD__SYS_ZIP_DICT__NAME = 3, + DICT_FLD__SYS_ZIP_DICT__DATA = 4, + DICT_NUM_FIELDS__SYS_ZIP_DICT = 5 +}; +/* The columns in SYS_DICT_COLS */ +enum dict_col_sys_zip_dict_cols_enum { + DICT_COL__SYS_ZIP_DICT_COLS__TABLE_ID = 0, + DICT_COL__SYS_ZIP_DICT_COLS__COLUMN_POS = 1, + DICT_COL__SYS_ZIP_DICT_COLS__DICT_ID = 2, + DICT_NUM_COLS__SYS_ZIP_DICT_COLS = 3 +}; +/* The field numbers in the SYS_DICT_COLS clustered index */ +enum dict_fld_sys_zip_dict_cols_enum { + DICT_FLD__SYS_ZIP_DICT_COLS__TABLE_ID = 0, + DICT_FLD__SYS_ZIP_DICT_COLS__COLUMN_POS = 1, + DICT_FLD__SYS_ZIP_DICT_COLS__DB_TRX_ID = 2, + DICT_FLD__SYS_ZIP_DICT_COLS__DB_ROLL_PTR = 3, + DICT_FLD__SYS_ZIP_DICT_COLS__DICT_ID = 4, + DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS = 5 +}; /* A number of the columns above occur in multiple tables. These are the length of thos fields. */ diff --git a/storage/xtradb/include/dict0crea.h b/storage/xtradb/include/dict0crea.h index 6fd37b90799ad..686f56ad58c61 100644 --- a/storage/xtradb/include/dict0crea.h +++ b/storage/xtradb/include/dict0crea.h @@ -163,6 +163,19 @@ UNIV_INTERN dberr_t dict_create_or_check_sys_tablespace(void); /*=====================================*/ + +#define ZIP_DICT_MAX_NAME_LENGTH 64 +/* Max window size (2^15) minus 262 */ +#define ZIP_DICT_MAX_DATA_LENGTH 32506 + +/** Creates the zip_dict system table inside InnoDB +at server bootstrap or server start if it is not found or is +not of the right form. +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +dict_create_or_check_sys_zip_dict(void); + /********************************************************************//** Add a single tablespace definition to the data dictionary tables in the database. @@ -178,6 +191,84 @@ dict_create_add_tablespace_to_dictionary( trx_t* trx, /*!< in: transaction */ bool commit); /*!< in: if true then commit the transaction */ + +/** Add a single compression dictionary definition to the SYS_ZIP_DICT +InnoDB system table. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_add_zip_dict( + const char* name, /*!< in: dict name */ + ulint name_len, /*!< in: dict name length */ + const char* data, /*!< in: dict data */ + ulint data_len, /*!< in: dict data length */ + trx_t* trx); /*!< in/out: transaction */ + +/** Add a single compression dictionary reference to the SYS_ZIP_DICT_COLS +InnoDB system table. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_add_zip_dict_reference( + ulint table_id, /*!< in: table id */ + ulint column_pos, /*!< in: column position */ + ulint dict_id, /*!< in: dict id */ + trx_t* trx); /*!< in/out: transaction */ + +/** Get a single compression dictionary id for the given +(table id, column pos) pair. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_get_zip_dict_id_by_reference( + ulint table_id, /*!< in: table id */ + ulint column_pos, /*!< in: column position */ + ulint* dict_id, /*!< out: dict id */ + trx_t* trx); /*!< in/out: transaction */ + +/** Get compression dictionary id for the given name. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_get_zip_dict_id_by_name( + const char* dict_name, /*!< in: dict name */ + ulint dict_name_len, /*!< in: dict name length */ + ulint* dict_id, /*!< out: dict id */ + trx_t* trx); /*!< in/out: transaction */ + +/** Get compression dictionary info (name and data) for the given id. +Allocates memory for name and data on success. +Must be freed with mem_free(). +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_get_zip_dict_info_by_id( + ulint dict_id, /*!< in: dict id */ + char** name, /*!< out: dict name */ + ulint* name_len, /*!< out: dict name length */ + char** data, /*!< out: dict data */ + ulint* data_len, /*!< out: dict data length */ + trx_t* trx); /*!< in/out: transaction */ + +/** Remove a single compression dictionary from the data dictionary +tables in the database. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_remove_zip_dict( + const char* name, /*!< in: dict name */ + ulint name_len, /*!< in: dict name length */ + trx_t* trx); /*!< in/out: transaction */ + +/** Remove all compression dictionary references for the given table ID from +the data dictionary tables in the database. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_remove_zip_dict_references_for_table( + ulint table_id, /*!< in: table id */ + trx_t* trx); /*!< in/out: transaction */ + /********************************************************************//** Add a foreign key definition to the data dictionary tables. @return error code or DB_SUCCESS */ diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index d8a6bc927b6bd..1b6110dd0102c 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -1871,6 +1871,52 @@ dict_table_set_corrupt_by_space( ulint space_id, ibool need_mutex); +/** Insert a records into SYS_ZIP_DICT. +@retval DB_SUCCESS if OK +@retval dberr_t if the insert failed */ +UNIV_INTERN +dberr_t +dict_create_zip_dict( + const char* name, /*!< in: zip_dict name */ + ulint name_len, /*!< in: zip_dict name length*/ + const char* data, /*!< in: zip_dict data */ + ulint data_len); /*!< in: zip_dict data length */ + +/** Get single compression dictionary id for the given +(table id, column pos) pair. +@retval DB_SUCCESS if OK +@retval DB_RECORD_NOT_FOUND if not found */ +UNIV_INTERN +dberr_t +dict_get_dictionary_id_by_key( + ulint table_id, /*!< in: table id */ + ulint column_pos, /*!< in: column position */ + ulint* dict_id); /*!< out: zip_dict id */ + +/** Get compression dictionary info (name and data) for the given id. +Allocates memory in name->str and data->str on success. +Must be freed with mem_free(). +@retval DB_SUCCESS if OK +@retval DB_RECORD_NOT_FOUND if not found */ +UNIV_INTERN +dberr_t +dict_get_dictionary_info_by_id( + ulint dict_id, /*!< in: table name */ + char** name, /*!< out: dictionary name */ + ulint* name_len, /*!< out: dictionary name length*/ + char** data, /*!< out: dictionary data */ + ulint* data_len); /*!< out: dictionary data length*/ + +/** Delete a record in SYS_ZIP_DICT with the given name. +@retval DB_SUCCESS if OK +@retval DB_RECORD_NOT_FOUND if not found +@retval DB_ROW_IS_REFERENCED if in use */ +UNIV_INTERN +dberr_t +dict_drop_zip_dict( + const char* name, /*!< in: zip_dict name */ + ulint name_len); /*!< in: zip_dict name length*/ + #ifndef UNIV_NONINL #include "dict0dict.ic" #endif diff --git a/storage/xtradb/include/dict0load.h b/storage/xtradb/include/dict0load.h index dcbc3de8e942f..85e3e5656371b 100644 --- a/storage/xtradb/include/dict0load.h +++ b/storage/xtradb/include/dict0load.h @@ -44,6 +44,8 @@ enum dict_system_id_t { SYS_FOREIGN_COLS, SYS_TABLESPACES, SYS_DATAFILES, + SYS_ZIP_DICT, + SYS_ZIP_DICT_COLS, /* This must be last item. Defines the number of system tables. */ SYS_NUM_SYSTEM_TABLES @@ -386,6 +388,33 @@ dict_process_sys_datafiles( const rec_t* rec, /*!< in: current SYS_DATAFILES rec */ ulint* space, /*!< out: pace id */ const char** path); /*!< out: datafile path */ + +/** This function parses a SYS_ZIP_DICT record, extracts necessary +information from the record and returns to caller. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_zip_dict( + mem_heap_t* heap, /*!< in/out: heap memory */ + ulint zip_size, /*!< in: nonzero=compressed BLOB page size */ + const rec_t* rec, /*!< in: current SYS_ZIP_DICT rec */ + ulint* id, /*!< out: dict id */ + const char** name, /*!< out: dict name */ + const char** data, /*!< out: dict data */ + ulint* data_len); /*!< out: dict data length */ + +/** This function parses a SYS_ZIP_DICT_COLS record, extracts necessary +information from the record and returns to caller. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_zip_dict_cols( + mem_heap_t* heap, /*!< in/out: heap memory */ + const rec_t* rec, /*!< in: current SYS_ZIP_DICT rec */ + ulint* table_id, /*!< out: table id */ + ulint* column_pos, /*!< out: column position */ + ulint* dict_id); /*!< out: dict id */ + /********************************************************************//** Get the filepath for a spaceid from SYS_DATAFILES. This function provides a temporary heap which is used for the table lookup, but not for the path. diff --git a/storage/xtradb/include/fts0fts.h b/storage/xtradb/include/fts0fts.h index 68d4d333245d9..3e2f359bbebef 100644 --- a/storage/xtradb/include/fts0fts.h +++ b/storage/xtradb/include/fts0fts.h @@ -375,6 +375,7 @@ extern bool fts_need_sync; /** Variable specifying the table that has Fulltext index to display its content through information schema table */ extern char* fts_internal_tbl_name; +extern char* fts_internal_tbl_name2; #define fts_que_graph_free(graph) \ do { \ @@ -823,6 +824,15 @@ void fts_drop_orphaned_tables(void); /*==========================*/ +/* Get parent table name if it's a fts aux table +@param[in] aux_table_name aux table name +@param[in] aux_table_len aux table length +@return parent table name, or NULL */ +char* +fts_get_parent_table_name( + const char* aux_table_name, + ulint aux_table_len); + /******************************************************************//** Since we do a horizontal split on the index table, we need to drop all the split tables. @@ -840,13 +850,15 @@ FTS auxiliary INDEX table and clear the cache at the end. @param[in,out] table fts table @param[in] unlock_cache whether unlock cache when write node @param[in] wait whether wait for existing sync to finish +@param[in] has_dict whether has dict operation lock @return DB_SUCCESS on success, error code on failure. */ UNIV_INTERN dberr_t fts_sync_table( dict_table_t* table, bool unlock_cache, - bool wait); + bool wait, + bool has_dict); /****************************************************************//** Free the query graph but check whether dict_sys->mutex is already diff --git a/storage/xtradb/include/os0thread.h b/storage/xtradb/include/os0thread.h index 815faf97319c0..671b9b7dc3fb8 100644 --- a/storage/xtradb/include/os0thread.h +++ b/storage/xtradb/include/os0thread.h @@ -131,14 +131,27 @@ os_thread_create_func( os_thread_id_t* thread_id); /*!< out: id of the created thread, or NULL */ +/** +Waits until the specified thread completes and joins it. Its return value is +ignored. + +@param thread thread to join */ +UNIV_INTERN +void +os_thread_join( + os_thread_t thread); + /*****************************************************************//** Exits the current thread. */ UNIV_INTERN void os_thread_exit( /*===========*/ - void* exit_value) /*!< in: exit value; in Windows this void* + void* exit_value, /*!< in: exit value; in Windows this void* is cast as a DWORD */ + bool detach = true) /*!< in: if true, the thread will be detached + right before exiting. If false, another thread + is responsible for joining this thread. */ UNIV_COLD MY_ATTRIBUTE((noreturn)); /*****************************************************************//** Returns the thread identifier of current thread. diff --git a/storage/xtradb/include/rem0types.h b/storage/xtradb/include/rem0types.h index f8133f77466d9..5da96066f8851 100644 --- a/storage/xtradb/include/rem0types.h +++ b/storage/xtradb/include/rem0types.h @@ -71,4 +71,7 @@ enum rec_format_enum { }; typedef enum rec_format_enum rec_format_t; +/** Compressed field header size in bytes */ +#define ZIP_COLUMN_HEADER_LENGTH 2 + #endif diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h index 8e219d3e856d1..70da84640e533 100644 --- a/storage/xtradb/include/row0mysql.h +++ b/storage/xtradb/include/row0mysql.h @@ -41,6 +41,9 @@ struct SysIndexCallback; extern ibool row_rollback_on_timeout; +extern uint srv_compressed_columns_zip_level; +extern ulong srv_compressed_columns_threshold; + struct row_prebuilt_t; /*******************************************************************//** @@ -51,6 +54,49 @@ row_mysql_prebuilt_free_blob_heap( /*==============================*/ row_prebuilt_t* prebuilt); /*!< in: prebuilt struct of a ha_innobase:: table handle */ + +/** Frees the compress heap in prebuilt when no longer needed. */ +UNIV_INTERN +void +row_mysql_prebuilt_free_compress_heap( + row_prebuilt_t* prebuilt); /*!< in: prebuilt struct of a + ha_innobase:: table handle */ + +/** Uncompress blob/text/varchar column using zlib +@return pointer to the uncompressed data */ +const byte* +row_decompress_column( + const byte* data, /*!< in: data in innodb(compressed) format */ + ulint *len, /*!< in: data length; out: length of + decompressed data*/ + const byte* dict_data, + /*!< in: optional dictionary data used for + decompression */ + ulint dict_data_len, + /*!< in: optional dictionary data length */ + row_prebuilt_t* prebuilt); + /*!< in: use prebuilt->compress_heap only + here*/ + +/** Compress blob/text/varchar column using zlib +@return pointer to the compressed data */ +byte* +row_compress_column( + const byte* data, /*!< in: data in mysql(uncompressed) + format */ + ulint *len, /*!< in: data length; out: length of + compressed data*/ + ulint lenlen, /*!< in: bytes used to store the length of + data */ + const byte* dict_data, + /*!< in: optional dictionary data used for + compression */ + ulint dict_data_len, + /*!< in: optional dictionary data length */ + row_prebuilt_t* prebuilt); + /*!< in: use prebuilt->compress_heap only + here*/ + /*******************************************************************//** Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row format. @@ -89,10 +135,21 @@ row_mysql_store_blob_ref( to 4 bytes */ const void* data, /*!< in: BLOB data; if the value to store is SQL NULL this should be NULL pointer */ - ulint len); /*!< in: BLOB length; if the value to store + ulint len, /*!< in: BLOB length; if the value to store is SQL NULL this should be 0; remember also to set the NULL bit in the MySQL record header! */ + bool need_decompression, + /*!< in: if the data need to be compressed*/ + const byte* dict_data, + /*!< in: optional compression dictionary + data */ + ulint dict_data_len, + /*!< in: optional compression dictionary data + length */ + row_prebuilt_t* prebuilt); + /*compress_heap only + here */ /*******************************************************************//** Reads a reference to a BLOB in the MySQL format. @return pointer to BLOB data */ @@ -103,8 +160,17 @@ row_mysql_read_blob_ref( ulint* len, /*!< out: BLOB length */ const byte* ref, /*!< in: BLOB reference in the MySQL format */ - ulint col_len); /*!< in: BLOB reference length + ulint col_len, /*!< in: BLOB reference length (not BLOB length) */ + bool need_compression, + /*!< in: if the data need to be + compressed*/ + const byte* dict_data, /*!< in: optional compression + dictionary data */ + ulint dict_data_len, /*!< in: optional compression + dictionary data length */ + row_prebuilt_t* prebuilt); /*!< in: use prebuilt->compress_heap + only here */ /**************************************************************//** Pad a column with spaces. */ UNIV_INTERN @@ -152,7 +218,16 @@ row_mysql_store_col_in_innobase_format( necessarily the length of the actual payload data; if the column is a true VARCHAR then this is irrelevant */ - ulint comp); /*!< in: nonzero=compact format */ + ulint comp, /*!< in: nonzero=compact format */ + bool need_compression, + /*!< in: if the data need to be + compressed */ + const byte* dict_data, /*!< in: optional compression + dictionary data */ + ulint dict_data_len, /*!< in: optional compression + dictionary data length */ + row_prebuilt_t* prebuilt); /*!< in: use prebuilt->compress_heap + only here */ /****************************************************************//** Handles user errors and lock waits detected by the database engine. @return true if it was a lock wait and we should continue running the @@ -646,6 +721,8 @@ struct mysql_row_templ_t { ulint is_unsigned; /*!< if a column type is an integer type and this field is != 0, then it is an unsigned integer type */ + bool compressed; /*!< if column format is compressed */ + LEX_CSTRING zip_dict_data; /*!< associated compression dictionary */ }; #define MYSQL_FETCH_CACHE_SIZE 8 @@ -843,6 +920,8 @@ struct row_prebuilt_t { in fetch_cache */ mem_heap_t* blob_heap; /*!< in SELECTS BLOB fields are copied to this heap */ + mem_heap_t* compress_heap; /*!< memory heap used to compress + /decompress blob column*/ mem_heap_t* old_vers_heap; /*!< memory heap where a previous version is built in consistent read */ bool in_fts_query; /*!< Whether we are in a FTS query */ diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index b6507b136bc03..8ea92f693687a 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -181,8 +181,10 @@ extern os_event_t srv_checkpoint_completed_event; log tracking iteration */ extern os_event_t srv_redo_log_tracked_event; -/** srv_redo_log_follow_thread spawn flag */ -extern bool srv_redo_log_thread_started; +/** Whether the redo log tracker thread has been started. Does not take into +account whether the tracking is currently enabled (see srv_track_changed_pages +for that) */ +extern bool srv_redo_log_thread_started; /* If the last data file is auto-extended, we add this many pages to it at a time */ @@ -278,6 +280,10 @@ extern char** srv_data_file_names; extern ulint* srv_data_file_sizes; extern ulint* srv_data_file_is_raw_partition; + +/** Whether the redo log tracking is currently enabled. Note that it is +possible for the log tracker thread to be running and the tracking to be +disabled */ extern my_bool srv_track_changed_pages; extern ulonglong srv_max_bitmap_file_size; @@ -507,6 +513,9 @@ extern ibool srv_priority_boost; extern ulint srv_truncated_status_writes; extern ulint srv_available_undo_logs; +extern ulint srv_column_compressed; +extern ulint srv_column_decompressed; + extern ulint srv_mem_pool_size; extern ulint srv_lock_table_size; @@ -1105,6 +1114,8 @@ struct export_var_t{ ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id - purged view's min trx_id */ #endif /* UNIV_DEBUG */ + ulint innodb_column_compressed; /*!< srv_column_compressed */ + ulint innodb_column_decompressed; /*!< srv_column_decompressed */ }; /** Thread slot in the thread table. */ diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 3646978ac152d..fb103e2c9b8a1 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -44,10 +44,10 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 5 #define INNODB_VERSION_MINOR 6 -#define INNODB_VERSION_BUGFIX 31 +#define INNODB_VERSION_BUGFIX 32 #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 77.0 +#define PERCONA_INNODB_VERSION 79.0 #endif /* Enable UNIV_LOG_ARCHIVE in XtraDB */ diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc index 9be93c3f5acbc..15183b9c1cb86 100644 --- a/storage/xtradb/log/log0log.cc +++ b/storage/xtradb/log/log0log.cc @@ -1005,6 +1005,7 @@ log_init(void) log_sys->next_checkpoint_no = 0; log_sys->last_checkpoint_lsn = log_sys->lsn; + log_sys->next_checkpoint_lsn = log_sys->lsn; log_sys->n_pending_checkpoint_writes = 0; @@ -1928,6 +1929,7 @@ log_complete_checkpoint(void) log_sys->next_checkpoint_no++; + ut_ad(log_sys->next_checkpoint_lsn >= log_sys->last_checkpoint_lsn); log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn; MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, log_sys->lsn - log_sys->last_checkpoint_lsn); @@ -2015,11 +2017,17 @@ log_group_checkpoint( ulint i; ut_ad(!srv_read_only_mode); + ut_ad(srv_shutdown_state != SRV_SHUTDOWN_LAST_PHASE); ut_ad(mutex_own(&(log_sys->mutex))); ut_a(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE); buf = group->checkpoint_buf; +#ifdef UNIV_DEBUG + lsn_t old_next_checkpoint_lsn + = mach_read_from_8(buf + LOG_CHECKPOINT_LSN); + ut_ad(old_next_checkpoint_lsn <= log_sys->next_checkpoint_lsn); +#endif /* UNIV_DEBUG */ mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no); mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn); @@ -2294,6 +2302,7 @@ log_checkpoint( return(FALSE); } + ut_ad(oldest_lsn >= log_sys->next_checkpoint_lsn); log_sys->next_checkpoint_lsn = oldest_lsn; #ifdef UNIV_DEBUG @@ -3612,13 +3621,15 @@ logs_empty_and_mark_files_at_shutdown(void) before proceeding further. */ srv_shutdown_state = SRV_SHUTDOWN_FLUSH_PHASE; count = 0; - while (buf_page_cleaner_is_active) { - ++count; - os_thread_sleep(100000); - if (srv_print_verbose_log && count > 600) { + while (buf_page_cleaner_is_active || buf_lru_manager_is_active) { + if (srv_print_verbose_log && count == 0) { ib_logf(IB_LOG_LEVEL_INFO, "Waiting for page_cleaner to " "finish flushing of buffer pool"); + } + ++count; + os_thread_sleep(100000); + if (count > 600) { count = 0; } } @@ -3694,7 +3705,7 @@ logs_empty_and_mark_files_at_shutdown(void) /* Wake the log tracking thread which will then immediatelly quit because of srv_shutdown_state value */ - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { os_event_reset(srv_redo_log_tracked_event); os_event_set(srv_checkpoint_completed_event); } @@ -3773,7 +3784,7 @@ logs_empty_and_mark_files_at_shutdown(void) srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; /* Signal the log following thread to quit */ - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { os_event_reset(srv_redo_log_tracked_event); os_event_set(srv_checkpoint_completed_event); } @@ -3786,6 +3797,7 @@ logs_empty_and_mark_files_at_shutdown(void) ut_a(freed); ut_a(lsn == log_sys->lsn); + ut_ad(lsn == log_sys->last_checkpoint_lsn); if (lsn < srv_start_lsn) { ib_logf(IB_LOG_LEVEL_ERROR, diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc index 6517654853293..209ca74362899 100644 --- a/storage/xtradb/log/log0online.cc +++ b/storage/xtradb/log/log0online.cc @@ -433,6 +433,7 @@ log_online_track_missing_on_startup( current server startup */ { ut_ad(last_tracked_lsn != tracking_start_lsn); + ut_ad(srv_track_changed_pages); ib_logf(IB_LOG_LEVEL_WARN, "last tracked LSN in \'%s\' is " LSN_PF ", but the last checkpoint LSN is " LSN_PF ". This might be " @@ -615,6 +616,8 @@ log_online_read_init(void) compile_time_assert(MODIFIED_PAGE_BLOCK_BITMAP % 8 == 0); compile_time_assert(MODIFIED_PAGE_BLOCK_BITMAP_LEN % 8 == 0); + ut_ad(srv_track_changed_pages); + log_bmp_sys = static_cast (ut_malloc(sizeof(*log_bmp_sys))); log_bmp_sys->read_buf_ptr = static_cast @@ -1089,10 +1092,15 @@ log_online_write_bitmap_page( { ibool success; + ut_ad(srv_track_changed_pages); ut_ad(mutex_own(&log_bmp_sys->mutex)); /* Simulate a write error */ - DBUG_EXECUTE_IF("bitmap_page_write_error", return FALSE;); + DBUG_EXECUTE_IF("bitmap_page_write_error", + ib_logf(IB_LOG_LEVEL_ERROR, + "simulating bitmap write error in " + "log_online_write_bitmap_page"); + return FALSE;); success = os_file_write(log_bmp_sys->out.name, log_bmp_sys->out.file, block, log_bmp_sys->out.offset, @@ -1182,7 +1190,9 @@ log_online_write_bitmap(void) rbt_next(log_bmp_sys->modified_pages, bmp_tree_node); DBUG_EXECUTE_IF("bitmap_page_2_write_error", - DBUG_SET("+d,bitmap_page_write_error");); + ut_ad(bmp_tree_node); /* 2nd page must exist */ + DBUG_SET("+d,bitmap_page_write_error"); + DBUG_SET("-d,bitmap_page_2_write_error");); } rbt_reset(log_bmp_sys->modified_pages); @@ -1203,15 +1213,11 @@ log_online_follow_redo_log(void) log_group_t* group; ibool result; - mutex_enter(&log_bmp_sys->mutex); - - if (!srv_track_changed_pages) { - mutex_exit(&log_bmp_sys->mutex); - return FALSE; - } - + ut_ad(srv_track_changed_pages); ut_ad(!srv_read_only_mode); + mutex_enter(&log_bmp_sys->mutex); + /* Grab the LSN of the last checkpoint, we will parse up to it */ mutex_enter(&(log_sys->mutex)); log_bmp_sys->end_lsn = log_sys->last_checkpoint_lsn; @@ -1554,9 +1560,12 @@ log_online_diagnose_bitmap_eof( /* It's a "Warning" here because it's not a fatal error for the whole server */ ib_logf(IB_LOG_LEVEL_WARN, - "changed page bitmap file \'%s\' does not " - "contain a complete run at the end.", - bitmap_file->name); + "changed page bitmap file \'%s\', size " + UINT64PF " bytes, does not " + "contain a complete run at the next read " + "offset " UINT64PF, + bitmap_file->name, bitmap_file->size, + bitmap_file->offset); return FALSE; } } @@ -1788,20 +1797,20 @@ log_online_purge_changed_page_bitmaps( lsn = LSN_MAX; } - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { /* User requests might happen with both enabled and disabled tracking */ mutex_enter(&log_bmp_sys->mutex); } if (!log_online_setup_bitmap_file_range(&bitmap_files, 0, LSN_MAX)) { - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { mutex_exit(&log_bmp_sys->mutex); } return TRUE; } - if (srv_track_changed_pages && lsn > log_bmp_sys->end_lsn) { + if (srv_redo_log_thread_started && lsn > log_bmp_sys->end_lsn) { /* If we have to delete the current output file, close it first. */ os_file_close(log_bmp_sys->out.file); @@ -1834,7 +1843,7 @@ log_online_purge_changed_page_bitmaps( } } - if (srv_track_changed_pages) { + if (srv_redo_log_thread_started) { if (lsn > log_bmp_sys->end_lsn) { lsn_t new_file_lsn; if (lsn == LSN_MAX) { @@ -1845,9 +1854,7 @@ log_online_purge_changed_page_bitmaps( new_file_lsn = log_bmp_sys->end_lsn; } if (!log_online_rotate_bitmap_file(new_file_lsn)) { - /* If file create failed, signal the log - tracking thread to quit next time it wakes - up. */ + /* If file create failed, stop log tracking */ srv_track_changed_pages = FALSE; } } diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc index 2e9e8b6e75ce5..9e42fb5cc1ccd 100644 --- a/storage/xtradb/log/log0recv.cc +++ b/storage/xtradb/log/log0recv.cc @@ -386,12 +386,6 @@ recv_sys_init( } #ifndef UNIV_HOTBACKUP - /* Initialize red-black tree for fast insertions into the - flush_list during recovery process. - As this initialization is done while holding the buffer pool - mutex we perform it before acquiring recv_sys->mutex. */ - buf_flush_init_flush_rbt(); - mutex_enter(&(recv_sys->mutex)); recv_sys->heap = mem_heap_create_typed(256, @@ -481,9 +475,6 @@ recv_sys_debug_free(void) recv_sys->last_block_buf_start = NULL; mutex_exit(&(recv_sys->mutex)); - - /* Free up the flush_rbt. */ - buf_flush_free_flush_rbt(); } # endif /* UNIV_LOG_DEBUG */ @@ -3125,6 +3116,11 @@ recv_recovery_from_checkpoint_start_func( byte* log_hdr_buf_base = reinterpret_cast (alloca(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE)); dberr_t err; + + /* Initialize red-black tree for fast insertions into the + flush_list during recovery process. */ + buf_flush_init_flush_rbt(); + ut_when_dtor tmp(recv_sys->dblwr); log_hdr_buf = static_cast @@ -3544,6 +3540,9 @@ recv_recovery_from_checkpoint_finish(void) #ifndef UNIV_LOG_DEBUG recv_sys_debug_free(); #endif + /* Free up the flush_rbt. */ + buf_flush_free_flush_rbt(); + /* Roll back any recovered data dictionary transactions, so that the data dictionary tables will be free of any locks. The data dictionary latch should guarantee that there is at diff --git a/storage/xtradb/mach/mach0data.cc b/storage/xtradb/mach/mach0data.cc index df68aab8a1874..206434dc5ab37 100644 --- a/storage/xtradb/mach/mach0data.cc +++ b/storage/xtradb/mach/mach0data.cc @@ -56,7 +56,18 @@ mach_parse_compressed( *val = flag; return(ptr + 1); - } else if (flag < 0xC0UL) { + } + + /* Workaround GCC bug + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77673: + the compiler moves mach_read_from_4 right to the beginning of the + function, causing and out-of-bounds read if we are reading a short + integer close to the end of buffer. */ +#if defined(__GNUC__) && (__GNUC__ >= 5) && !defined(__clang__) + asm volatile("": : :"memory"); +#endif + + if (flag < 0xC0UL) { if (end_ptr < ptr + 2) { return(NULL); } diff --git a/storage/xtradb/os/os0thread.cc b/storage/xtradb/os/os0thread.cc index aabdd06d76b2f..af826027efc36 100644 --- a/storage/xtradb/os/os0thread.cc +++ b/storage/xtradb/os/os0thread.cc @@ -210,14 +210,42 @@ os_thread_create_func( #endif } +/** +Waits until the specified thread completes and joins it. Its return value is +ignored. + +@param thread thread to join */ +UNIV_INTERN +void +os_thread_join( + os_thread_t thread) +{ + /*This function is currently only used to workaround glibc bug + described in http://bugs.mysql.com/bug.php?id=82886 + + On Windows, no workarounds are necessary, all threads + are "detached" upon thread exit (handle is closed), so we do + nothing. + */ +#ifndef _WIN32 + int ret MY_ATTRIBUTE((unused)) = pthread_join(thread, NULL); + + /* Waiting on already-quit threads is allowed */ + ut_ad(ret == 0 || ret == ESRCH); +#endif +} + /*****************************************************************//** Exits the current thread. */ UNIV_INTERN void os_thread_exit( /*===========*/ - void* exit_value) /*!< in: exit value; in Windows this void* + void* exit_value, /*!< in: exit value; in Windows this void* is cast as a DWORD */ + bool detach) /*!< in: if true, the thread will be detached + right before exiting. If false, another thread + is responsible for joining this thread. */ { #ifdef UNIV_DEBUG_THREAD_CREATION fprintf(stderr, "Thread exits, id %lu\n", @@ -233,7 +261,8 @@ os_thread_exit( #ifdef __WIN__ ExitThread((DWORD) exit_value); #else - pthread_detach(pthread_self()); + if (detach) + pthread_detach(pthread_self()); pthread_exit(exit_value); #endif } diff --git a/storage/xtradb/rem/rem0rec.cc b/storage/xtradb/rem/rem0rec.cc index 75e0bba343dc1..f8eb807654766 100644 --- a/storage/xtradb/rem/rem0rec.cc +++ b/storage/xtradb/rem/rem0rec.cc @@ -323,7 +323,8 @@ rec_init_offsets_comp_ordinary( stored in one byte for 0..127. The length will be encoded in two bytes when it is 128 or more, or when the field is stored externally. */ - if (UNIV_UNLIKELY(col->len > 255) + if (UNIV_UNLIKELY(col->len > 255 - + prtype_get_compression_extra(col->prtype)) || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) { if (len & 0x80) { @@ -844,8 +845,12 @@ rec_get_converted_size_comp_prefix_low( continue; } - ut_ad(len <= col->len || col->mtype == DATA_BLOB - || (col->len == 0 && col->mtype == DATA_VARCHAR)); + ut_ad(len <= col->len || col->mtype == DATA_BLOB || + ((col->mtype == DATA_VARCHAR || col->mtype == DATA_BINARY + || col->mtype == DATA_VARMYSQL) + && (col->len == 0 + || len <= col->len + + prtype_get_compression_extra(col->prtype)))); fixed_len = field->fixed_len; if (temp && fixed_len @@ -877,7 +882,9 @@ rec_get_converted_size_comp_prefix_low( ut_ad(col->len >= 256 || col->mtype == DATA_BLOB); extra_size += 2; } else if (len < 128 - || (col->len < 256 && col->mtype != DATA_BLOB)) { + || (col->len < 256 - + prtype_get_compression_extra(col->prtype) + && col->mtype != DATA_BLOB)) { extra_size++; } else { /* For variable-length columns, we look up the @@ -1272,12 +1279,16 @@ rec_convert_dtuple_to_rec_comp( *lens-- = (byte) (len >> 8) | 0xc0; *lens-- = (byte) len; } else { - ut_ad(len <= dtype_get_len(type) + ut_ad(len <= dtype_get_len(type) + + prtype_get_compression_extra( + dtype_get_prtype(type)) || dtype_get_mtype(type) == DATA_BLOB || !strcmp(index->name, FTS_INDEX_TABLE_IND_NAME)); if (len < 128 - || (dtype_get_len(type) < 256 + || (dtype_get_len(type) < 256 - + prtype_get_compression_extra( + dtype_get_prtype(type)) && dtype_get_mtype(type) != DATA_BLOB)) { *lens-- = (byte) len; diff --git a/storage/xtradb/row/row0ftsort.cc b/storage/xtradb/row/row0ftsort.cc index 51cc53ff260f8..cb47d60562355 100644 --- a/storage/xtradb/row/row0ftsort.cc +++ b/storage/xtradb/row/row0ftsort.cc @@ -961,7 +961,7 @@ fts_parallel_merge( CloseHandle(psort_info->thread_hdl); #endif /*__WIN__ */ - os_thread_exit(NULL); + os_thread_exit(NULL, false); OS_THREAD_DUMMY_RETURN; } diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc index b84e3113ea8d0..3c5d5773aee85 100644 --- a/storage/xtradb/row/row0log.cc +++ b/storage/xtradb/row/row0log.cc @@ -613,7 +613,7 @@ row_log_table_delete( &old_pk_extra_size); ut_ad(old_pk_extra_size < 0x100); - mrec_size = 4 + old_pk_size; + mrec_size = 6 + old_pk_size; /* Log enough prefix of the BLOB unless both the old and new table are in COMPACT or REDUNDANT format, @@ -643,8 +643,8 @@ row_log_table_delete( *b++ = static_cast(old_pk_extra_size); /* Log the size of external prefix we saved */ - mach_write_to_2(b, ext_size); - b += 2; + mach_write_to_4(b, ext_size); + b += 4; rec_convert_dtuple_to_temp( b + old_pk_extra_size, new_index, @@ -2268,14 +2268,14 @@ row_log_table_apply_op( break; case ROW_T_DELETE: - /* 1 (extra_size) + 2 (ext_size) + at least 1 (payload) */ - if (mrec + 4 >= mrec_end) { + /* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */ + if (mrec + 6 >= mrec_end) { return(NULL); } extra_size = *mrec++; - ext_size = mach_read_from_2(mrec); - mrec += 2; + ext_size = mach_read_from_4(mrec); + mrec += 4; ut_ad(mrec < mrec_end); /* We assume extra_size < 0x100 for the PRIMARY KEY prefix. diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc index 83f6ccb90c308..e397053949e1a 100644 --- a/storage/xtradb/row/row0merge.cc +++ b/storage/xtradb/row/row0merge.cc @@ -525,7 +525,12 @@ row_merge_buf_add( dfield_set_len(field, len); } - ut_ad(len <= col->len || col->mtype == DATA_BLOB); + ut_ad(len <= col->len || col->mtype == DATA_BLOB || + ((col->mtype == DATA_VARCHAR || col->mtype == DATA_BINARY + || col->mtype == DATA_VARMYSQL) + && (col->len == 0 + || len <= col->len + + prtype_get_compression_extra(col->prtype)))); fixed_len = ifield->fixed_len; if (fixed_len && !dict_table_is_comp(index->table) @@ -554,7 +559,9 @@ row_merge_buf_add( } else if (dfield_is_ext(field)) { extra_size += 2; } else if (len < 128 - || (col->len < 256 && col->mtype != DATA_BLOB)) { + || (col->len < 256 - + prtype_get_compression_extra(col->prtype) + && col->mtype != DATA_BLOB)) { extra_size++; } else { /* For variable-length columns, we look up the @@ -1995,7 +2002,7 @@ row_merge_read_clustered_index( /* Sync fts cache for other fts indexes to keep all fts indexes consistent in sync_doc_id. */ err = fts_sync_table(const_cast(new_table), - false, true); + false, true, false); if (err == DB_SUCCESS) { fts_update_next_doc_id( @@ -3823,6 +3830,13 @@ row_merge_build_indexes( " exited when creating FTS" " index '%s'", indexes[i]->name); + } else { + for (j = 0; j < FTS_NUM_AUX_INDEX; + j++) { + + os_thread_join(merge_info[j] + .thread_hdl); + } } } else { /* This cannot report duplicates; an diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc index 733c7fef3744f..46bf523750c6d 100644 --- a/storage/xtradb/row/row0mysql.cc +++ b/storage/xtradb/row/row0mysql.cc @@ -63,11 +63,54 @@ Created 9/17/2000 Heikki Tuuri #include "row0import.h" #include "m_string.h" #include "my_sys.h" +#include "zlib.h" #include /** Provide optional 4.x backwards compatibility for 5.0 and above */ UNIV_INTERN ibool row_rollback_on_timeout = FALSE; +/** +Z_NO_COMPRESSION = 0 +Z_BEST_SPEED = 1 +Z_BEST_COMPRESSION = 9 +Z_DEFAULT_COMPRESSION = -1 +Compression level to be used by zlib for compressed-blob columns. +Settable by user. +*/ +UNIV_INTERN uint srv_compressed_columns_zip_level = DEFAULT_COMPRESSION_LEVEL; +/** +(Z_FILTERED | Z_HUFFMAN_ONLY | Z_RLE | Z_FIXED | Z_DEFAULT_STRATEGY) + +The strategy parameter is used to tune the compression algorithm. Use the +value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a +filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only +(no string match), or Z_RLE to limit match distances to one +(run-length encoding). Filtered data consists mostly of small values with a +somewhat random distribution. In this case, the compression algorithm is +tuned to compress them better. +The effect of Z_FILTERED is to force more Huffman coding and less string +matching; it is somewhat intermediate between Z_DEFAULT_STRATEGY and +Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as Z_HUFFMAN_ONLY, +but give better compression for PNG image data. The strategy parameter only +affects the compression ratio but not the correctness of the compressed +output even if it is not set appropriately. Z_FIXED prevents the use of +dynamic Huffman codes, allowing for a simpler decoder for special +applications. +*/ +const uint srv_compressed_columns_zlib_strategy = Z_DEFAULT_STRATEGY; +/** Compress the column if the data length exceeds this value. */ +UNIV_INTERN ulong srv_compressed_columns_threshold = 96; +/** +Determine if zlib needs to compute adler32 value for the compressed data. +This variables is similar to page_zip_zlib_wrap, but only used by +compressed blob columns. +*/ +const bool srv_compressed_columns_zlib_wrap = true; +/** +Determine if zlib will use custom memory allocation functions based on +InnoDB memory heap routines (mem_heap_t*). +*/ +const bool srv_compressed_columns_zlib_use_heap = false; /** Chain node of the list of tables to drop in the background. */ struct row_mysql_drop_t{ char* table_name; /*!< table name */ @@ -171,6 +214,17 @@ row_mysql_prebuilt_free_blob_heap( prebuilt->blob_heap = NULL; } +/** Frees the compress heap in prebuilt when no longer needed. */ +UNIV_INTERN +void +row_mysql_prebuilt_free_compress_heap( + row_prebuilt_t* prebuilt) /*!< in: prebuilt struct of a + ha_innobase:: table handle */ +{ + mem_heap_free(prebuilt->compress_heap); + prebuilt->compress_heap = NULL; +} + /*******************************************************************//** Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row format. @@ -227,6 +281,425 @@ row_mysql_read_true_varchar( return(field + 1); } +/** + Compressed BLOB header format: + --------------------------------------------------------------- + | reserved | wrap | algorithm | len-len | compressed | unused | + | [1] | [1] | [5] | [3] | [1] | [5] | + --------------------------------------------------------------- + | 0 0 | 1 1 | 2 6 | 7 9 | 10 10 | 11 15 | + --------------------------------------------------------------- + * 'reserved' bit is planned to be used in future versions of the BLOB + header. In this version it must always be + 'default_zip_column_reserved_value' (0). + * 'wrap' identifies if compression algorithm calculated a checksum + (adler32 in case of zlib) and appended it to the compressed data. + * 'algorithm' identifies which algoritm was used to compress this BLOB. + Currently, the only value 'default_zip_column_algorithm_value' (0) is + supported. + * 'len-len' field identifies the length of the column length data portion + followed by this header (see below). + * If 'compressed' bit is set to 1, then this header is immediately followed + by 1..8 bytes (depending on the value of 'len-len' bitfield) which + determine original (uncompressed) block size. These 'len-len' bytes are + followed by compressed representation of the original data. + * If 'compressed' bit is set to 0, every other bitfield ('wrap', + 'algorithm' and 'le-len') must be ignored. In this case the header is + immediately followed by uncompressed (original) data. +*/ + +/** + Currently the only supported value for the 'reserved' field is + false (0). +*/ +static const bool default_zip_column_reserved_value = false; + +/** + Currently the only supported value for the 'algorithm' field is 0, which + means 'zlib'. +*/ +static const uint default_zip_column_algorithm_value = 0; + +static const size_t zip_column_prefix_max_length = + ZIP_COLUMN_HEADER_LENGTH + 8; +static const size_t zip_column_header_length = ZIP_COLUMN_HEADER_LENGTH; + +/* 'reserved', bit 0 */ +static const uint zip_column_reserved = 0; +/* 0000 0000 0000 0001 */ +static const uint zip_column_reserved_mask = 0x0001; + +/* 'wrap', bit 1 */ +static const uint zip_column_wrap = 1; +/* 0000 0000 0000 0010 */ +static const uint zip_column_wrap_mask = 0x0002; + +/* 'algorithm', bit 2,3,4,5,6 */ +static const uint zip_column_algorithm = 2; +/* 0000 0000 0111 1100 */ +static const uint zip_column_algorithm_mask = 0x007C; + +/* 'len-len', bit 7,8,9 */ +static const uint zip_column_data_length = 7; +/* 0000 0011 1000 0000 */ +static const uint zip_column_data_length_mask = 0x0380; + +/* 'compressed', bit 10 */ +static const uint zip_column_compressed = 10; +/* 0000 0100 0000 0000 */ +static const uint zip_column_compressed_mask = 0x0400; + +/** Updates compressed block header with the given components */ +static void +column_set_compress_header( + byte* data, + bool compressed, + ulint lenlen, + uint alg, + bool wrap, + bool reserved) +{ + ulint header = 0; + header |= (compressed << zip_column_compressed); + header |= (lenlen << zip_column_data_length); + header |= (alg << zip_column_algorithm); + header |= (wrap << zip_column_wrap); + header |= (reserved << zip_column_reserved); + mach_write_to_2(data, header); +} + +/** Parse compressed block header into components */ +static void +column_get_compress_header( + const byte* data, + bool* compressed, + ulint* lenlen, + uint* alg, + bool* wrap, + bool* reserved +) +{ + ulint header = mach_read_from_2(data); + *compressed = ((header & zip_column_compressed_mask) >> + zip_column_compressed); + *lenlen = ((header & zip_column_data_length_mask) >> + zip_column_data_length); + *alg = ((header & zip_column_algorithm_mask) >> + zip_column_algorithm); + *wrap = ((header & zip_column_wrap_mask) >> + zip_column_wrap); + *reserved = ((header & zip_column_reserved_mask) >> + zip_column_reserved); +} + +/** Allocate memory for zlib. */ +static +void* +column_zip_zalloc( + void* opaque, /*!< in/out: memory heap */ + uInt items, /*!< in: number of items to allocate */ + uInt size) /*!< in: size of an item in bytes */ +{ + return(mem_heap_zalloc(static_cast(opaque), + items * size)); +} + +/** Deallocate memory for zlib. */ +static +void +column_zip_free( + void* opaque MY_ATTRIBUTE((unused)), /*!< in: memory heap */ + void* address MY_ATTRIBUTE((unused))) /*!< in: object to free */ +{ +} + +/** Configure the zlib allocator to use the given memory heap. */ +UNIV_INTERN +void +column_zip_set_alloc( + void* stream, /*!< in/out: zlib stream */ + mem_heap_t* heap) /*!< in: memory heap to use */ +{ + z_stream* strm = static_cast(stream); + + if (srv_compressed_columns_zlib_use_heap) { + strm->zalloc = column_zip_zalloc; + strm->zfree = column_zip_free; + strm->opaque = heap; + } else { + strm->zalloc = (alloc_func)0; + strm->zfree = (free_func)0; + strm->opaque = (voidpf)0; + } +} + +/** Compress blob/text/varchar column using zlib +@return pointer to the compressed data */ +byte* +row_compress_column( + const byte* data, /*!< in: data in mysql(uncompressed) + format */ + ulint *len, /*!< in: data length; out: length of + compressed data*/ + ulint lenlen, /*!< in: bytes used to store the length of + data */ + const byte* dict_data, + /*!< in: optional dictionary data used for + compression */ + ulint dict_data_len, + /*!< in: optional dictionary data length */ + row_prebuilt_t* prebuilt) + /*!< in: use prebuilt->compress_heap only + here*/ +{ + int err = 0; + ulint comp_len = *len; + ulint buf_len = *len + zip_column_prefix_max_length; + byte* buf; + byte* ptr; + z_stream c_stream; + bool wrap = srv_compressed_columns_zlib_wrap; + + int window_bits = wrap ? MAX_WBITS : -MAX_WBITS; + + if (!prebuilt->compress_heap) { + prebuilt->compress_heap = + mem_heap_create(max(UNIV_PAGE_SIZE, buf_len)); + } + + buf = static_cast(mem_heap_zalloc( + prebuilt->compress_heap,buf_len)); + + if (*len < srv_compressed_columns_threshold || + srv_compressed_columns_zip_level == Z_NO_COMPRESSION) + goto do_not_compress; + + ptr = buf + zip_column_header_length + lenlen; + + /*init deflate object*/ + c_stream.next_in = const_cast(data); + c_stream.avail_in = *len; + c_stream.next_out = ptr; + c_stream.avail_out = comp_len; + + column_zip_set_alloc(&c_stream, prebuilt->compress_heap); + + err = deflateInit2(&c_stream, srv_compressed_columns_zip_level, + Z_DEFLATED, window_bits, MAX_MEM_LEVEL, + srv_compressed_columns_zlib_strategy); + ut_a(err == Z_OK); + + if (dict_data != 0 && dict_data_len != 0) { + err = deflateSetDictionary(&c_stream, dict_data, + dict_data_len); + ut_a(err == Z_OK); + } + + err = deflate(&c_stream, Z_FINISH); + if (err != Z_STREAM_END) { + deflateEnd(&c_stream); + if (err == Z_OK) + err = Z_BUF_ERROR; + } else { + comp_len = c_stream.total_out; + err = deflateEnd(&c_stream); + } + + switch (err) { + case Z_OK: + break; + case Z_BUF_ERROR: + /* data after compress is larger than uncompressed data*/ + break; + default: + ib_logf(IB_LOG_LEVEL_ERROR, + "failed to compress the column, error: %d\n", err); + } + + /* make sure the compressed data size is smaller than + uncompressed data */ + if (err == Z_OK && + *len > (comp_len + zip_column_header_length + lenlen)) { + column_set_compress_header(buf, true, lenlen - 1, + default_zip_column_algorithm_value, wrap, + default_zip_column_reserved_value); + ptr = buf + zip_column_header_length; + /*store the uncompressed data length*/ + switch (lenlen) { + case 1: + mach_write_to_1(ptr, *len); + break; + case 2: + mach_write_to_2(ptr, *len); + break; + case 3: + mach_write_to_3(ptr, *len); + break; + case 4: + mach_write_to_4(ptr, *len); + break; + default: + ut_error; + } + + *len = comp_len + zip_column_header_length + lenlen; + return buf; + } + +do_not_compress: + ptr = buf; + column_set_compress_header(ptr, false, 0, + default_zip_column_algorithm_value, false, + default_zip_column_reserved_value); + ptr += zip_column_header_length; + memcpy(ptr, data, *len); + *len += zip_column_header_length; + return buf; +} + +/** Uncompress blob/text/varchar column using zlib +@return pointer to the uncompressed data */ +const byte* +row_decompress_column( + const byte* data, /*!< in: data in innodb(compressed) format */ + ulint *len, /*!< in: data length; out: length of + decompressed data*/ + const byte* dict_data, + /*!< in: optional dictionary data used for + decompression */ + ulint dict_data_len, + /*!< in: optional dictionary data length */ + row_prebuilt_t* prebuilt) + /*!< in: use prebuilt->compress_heap only + here*/ +{ + ulint buf_len = 0; + byte* buf; + int err = 0; + int window_bits = 0; + z_stream d_stream; + bool is_compressed = false; + bool wrap = false; + bool reserved = false; + ulint lenlen = 0; + uint alg = 0; + + ut_ad(*len != ULINT_UNDEFINED); + ut_ad(*len >= zip_column_header_length); + + column_get_compress_header(data, &is_compressed, &lenlen, &alg, + &wrap, &reserved); + + if (reserved != default_zip_column_reserved_value) { + ib_logf(IB_LOG_LEVEL_FATAL, + "unsupported compressed BLOB header format\n"); + } + + if (alg != default_zip_column_algorithm_value) { + ib_logf(IB_LOG_LEVEL_FATAL, + "unsupported 'algorithm' value in the" + " compressed BLOB header\n"); + } + + ut_a(lenlen < 4); + + data += zip_column_header_length; + if (!is_compressed) { /* column not compressed */ + *len -= zip_column_header_length; + return data; + } + + lenlen++; + + ulint comp_len = *len - zip_column_header_length - lenlen; + + ulint uncomp_len = 0; + switch (lenlen) { + case 1: + uncomp_len = mach_read_from_1(data); + break; + case 2: + uncomp_len = mach_read_from_2(data); + break; + case 3: + uncomp_len = mach_read_from_3(data); + break; + case 4: + uncomp_len = mach_read_from_4(data); + break; + default: + ut_error; + } + + data += lenlen; + + /* data is compressed, decompress it*/ + if (!prebuilt->compress_heap) { + prebuilt->compress_heap = + mem_heap_create(max(UNIV_PAGE_SIZE, uncomp_len)); + } + + buf_len = uncomp_len; + buf = static_cast(mem_heap_zalloc( + prebuilt->compress_heap, buf_len)); + + /* init d_stream */ + d_stream.next_in = const_cast(data); + d_stream.avail_in = comp_len; + d_stream.next_out = buf; + d_stream.avail_out = buf_len; + + column_zip_set_alloc(&d_stream, prebuilt->compress_heap); + + window_bits = wrap ? MAX_WBITS : -MAX_WBITS; + err = inflateInit2(&d_stream, window_bits); + ut_a(err == Z_OK); + + err = inflate(&d_stream, Z_FINISH); + if (err == Z_NEED_DICT) { + ut_a(dict_data != 0 && dict_data_len != 0); + err = inflateSetDictionary(&d_stream, dict_data, + dict_data_len); + ut_a(err == Z_OK); + err = inflate(&d_stream, Z_FINISH); + } + + if (err != Z_STREAM_END) { + inflateEnd(&d_stream); + if (err == Z_BUF_ERROR && d_stream.avail_in == 0) + err = Z_DATA_ERROR; + } else { + buf_len = d_stream.total_out; + err = inflateEnd(&d_stream); + } + + switch (err) { + case Z_OK: + break; + case Z_BUF_ERROR: + ib_logf(IB_LOG_LEVEL_FATAL, + "zlib buf error, this shouldn't happen\n"); + break; + default: + ib_logf(IB_LOG_LEVEL_FATAL, + "failed to decompress column, error: %d\n", err); + } + + if (err == Z_OK) { + if (buf_len != uncomp_len) { + ib_logf(IB_LOG_LEVEL_FATAL, + "failed to decompress blob column, may" + " be corrupted\n"); + } + *len = buf_len; + return buf; + } + + *len -= (zip_column_header_length + lenlen); + return data; +} + + /*******************************************************************//** Stores a reference to a BLOB in the MySQL format. */ UNIV_INTERN @@ -240,10 +713,21 @@ row_mysql_store_blob_ref( to 4 bytes */ const void* data, /*!< in: BLOB data; if the value to store is SQL NULL this should be NULL pointer */ - ulint len) /*!< in: BLOB length; if the value to store + ulint len, /*!< in: BLOB length; if the value to store is SQL NULL this should be 0; remember also to set the NULL bit in the MySQL record header! */ + bool need_decompression, + /*!< in: if the data need to be compressed*/ + const byte* dict_data, + /*!< in: optional compression dictionary + data */ + ulint dict_data_len, + /*!< in: optional compression dictionary data + length */ + row_prebuilt_t* prebuilt) + /*compress_heap only + here */ { /* MySQL might assume the field is set to zero except the length and the pointer fields */ @@ -255,13 +739,28 @@ row_mysql_store_blob_ref( In 32-bit architectures we only use the first 4 bytes of the pointer slot. */ - ut_a(col_len - 8 > 1 || len < 256); - ut_a(col_len - 8 > 2 || len < 256 * 256); - ut_a(col_len - 8 > 3 || len < 256 * 256 * 256); + ut_a(col_len - 8 > 1 || + len < 256 + + (need_decompression ? ZIP_COLUMN_HEADER_LENGTH : 0)); + ut_a(col_len - 8 > 2 || + len < 256 * 256 + + (need_decompression ? ZIP_COLUMN_HEADER_LENGTH : 0)); + ut_a(col_len - 8 > 3 || + len < 256 * 256 * 256 + + (need_decompression ? ZIP_COLUMN_HEADER_LENGTH : 0)); - mach_write_to_n_little_endian(dest, col_len - 8, len); + const byte *ptr = NULL; - memcpy(dest + col_len - 8, &data, sizeof data); + if (need_decompression) + ptr = row_decompress_column((const byte*)data, &len, + dict_data, dict_data_len, prebuilt); + + if (ptr) + memcpy(dest + col_len - 8, &ptr, sizeof ptr); + else + memcpy(dest + col_len - 8, &data, sizeof data); + + mach_write_to_n_little_endian(dest, col_len - 8, len); } /*******************************************************************//** @@ -274,15 +773,32 @@ row_mysql_read_blob_ref( ulint* len, /*!< out: BLOB length */ const byte* ref, /*!< in: BLOB reference in the MySQL format */ - ulint col_len) /*!< in: BLOB reference length + ulint col_len, /*!< in: BLOB reference length (not BLOB length) */ + bool need_compression, + /*!< in: if the data need to be + compressed*/ + const byte* dict_data, /*!< in: optional compression + dictionary data */ + ulint dict_data_len, /*!< in: optional compression + dictionary data length */ + row_prebuilt_t* prebuilt) /*!< in: use prebuilt->compress_heap + only here */ { - byte* data; + byte* data = NULL; + byte* ptr = NULL; *len = mach_read_from_n_little_endian(ref, col_len - 8); memcpy(&data, ref + col_len - 8, sizeof data); + if (need_compression) { + ptr = row_compress_column(data, len, col_len - 8, dict_data, + dict_data_len, prebuilt); + if (ptr) + data = ptr; + } + return(data); } @@ -365,7 +881,16 @@ row_mysql_store_col_in_innobase_format( necessarily the length of the actual payload data; if the column is a true VARCHAR then this is irrelevant */ - ulint comp) /*!< in: nonzero=compact format */ + ulint comp, /*!< in: nonzero=compact format */ + bool need_compression, + /*!< in: if the data need to be + compressed*/ + const byte* dict_data, /*!< in: optional compression + dictionary data */ + ulint dict_data_len, /*!< in: optional compression + dictionary data length */ + row_prebuilt_t* prebuilt) /*!< in: use prebuilt->compress_heap + only here */ { const byte* ptr = mysql_data; const dtype_t* dtype; @@ -418,8 +943,14 @@ row_mysql_store_col_in_innobase_format( lenlen = 2; } - ptr = row_mysql_read_true_varchar(&col_len, mysql_data, - lenlen); + const byte* tmp_ptr = row_mysql_read_true_varchar( + &col_len, mysql_data, lenlen); + if (need_compression) + ptr = row_compress_column(tmp_ptr, &col_len, + lenlen, dict_data, dict_data_len, + prebuilt); + else + ptr = tmp_ptr; } else { /* Remove trailing spaces from old style VARCHAR columns. */ @@ -501,7 +1032,9 @@ row_mysql_store_col_in_innobase_format( } } else if (type == DATA_BLOB && row_format_col) { - ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len); + ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len, + need_compression, dict_data, dict_data_len, + prebuilt); } dfield_set_data(dfield, ptr, col_len); @@ -559,7 +1092,11 @@ row_mysql_convert_row_to_innobase( TRUE, /* MySQL row format data */ mysql_rec + templ->mysql_col_offset, templ->mysql_col_len, - dict_table_is_comp(prebuilt->table)); + dict_table_is_comp(prebuilt->table), + templ->compressed, + reinterpret_cast( + templ->zip_dict_data.str), + templ->zip_dict_data.length, prebuilt); next_column: ; } @@ -905,6 +1442,10 @@ row_prebuilt_free( mem_heap_free(prebuilt->blob_heap); } + if (prebuilt->compress_heap) { + mem_heap_free(prebuilt->compress_heap); + } + if (prebuilt->old_vers_heap) { mem_heap_free(prebuilt->old_vers_heap); } @@ -1334,6 +1875,9 @@ row_insert_for_mysql( return(DB_READ_ONLY); } + if (UNIV_LIKELY_NULL(prebuilt->compress_heap)) + mem_heap_empty(prebuilt->compress_heap); + trx->op_info = "inserting"; row_mysql_delay_if_needed(); @@ -2729,6 +3273,10 @@ row_drop_tables_for_mysql_in_background(void) return(n_tables + n_tables_dropped); } + DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep", + os_thread_sleep(5000000); + ); + table = dict_table_open_on_name(drop->table_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); @@ -2739,6 +3287,16 @@ row_drop_tables_for_mysql_in_background(void) goto already_dropped; } + if (!table->to_be_dropped) { + /* There is a scenario: the old table is dropped + just after it's added into drop list, and new + table with the same name is created, then we try + to drop the new table in background. */ + dict_table_close(table, FALSE, FALSE); + + goto already_dropped; + } + ut_a(!table->can_be_evicted); dict_table_close(table, FALSE, FALSE); @@ -2869,6 +3427,12 @@ row_mysql_table_id_reassign( pars_info_add_ull_literal(info, "old_id", table->id); pars_info_add_ull_literal(info, "new_id", *new_id); + /* As micro-SQL does not support int4 == int8 comparisons, + old and new IDs are added again under different names as + int4 values*/ + pars_info_add_int4_literal(info, "old_id_narrow", table->id); + pars_info_add_int4_literal(info, "new_id_narrow", *new_id); + err = que_eval_sql( info, "PROCEDURE RENUMBER_TABLE_PROC () IS\n" @@ -2879,6 +3443,8 @@ row_mysql_table_id_reassign( " WHERE TABLE_ID = :old_id;\n" "UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n" " WHERE TABLE_ID = :old_id;\n" + "UPDATE SYS_ZIP_DICT_COLS SET TABLE_ID = :new_id_narrow\n" + " WHERE TABLE_ID = :old_id_narrow;\n" "END;\n", FALSE, trx); return(err); @@ -3645,6 +4211,12 @@ row_truncate_table_for_mysql( pars_info_add_ull_literal(info, "old_id", table->id); pars_info_add_ull_literal(info, "new_id", new_id); + /* As micro-SQL does not support int4 == int8 comparisons, + old and new IDs are added again under different names as + int4 values*/ + pars_info_add_int4_literal(info, "old_id_narrow", table->id); + pars_info_add_int4_literal(info, "new_id_narrow", new_id); + err = que_eval_sql(info, "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n" "BEGIN\n" @@ -3656,6 +4228,9 @@ row_truncate_table_for_mysql( "UPDATE SYS_INDEXES" " SET TABLE_ID = :new_id, SPACE = :new_space\n" " WHERE TABLE_ID = :old_id;\n" + "UPDATE SYS_ZIP_DICT_COLS\n" + " SET TABLE_ID = :new_id_narrow\n" + " WHERE TABLE_ID = :old_id_narrow;\n" "END;\n" , FALSE, trx); @@ -4006,6 +4581,13 @@ row_drop_table_for_mysql( } } + + DBUG_EXECUTE_IF("row_drop_table_add_to_background", + row_add_table_to_background_drop_list(table->name); + err = DB_SUCCESS; + goto funct_exit; + ); + /* TODO: could we replace the counter n_foreign_key_checks_running with lock checks on the table? Acquire here an exclusive lock on the table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that @@ -4276,6 +4858,19 @@ row_drop_table_for_mysql( filepath = fil_make_ibd_name(tablename, false); } + /* Remove all compression dictionary references for the + table */ + err = dict_create_remove_zip_dict_references_for_table( + table->id, trx); + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_ERROR, "Error: (%s) not " + "able to remove compression dictionary " + "references for table %s", ut_strerr(err), + tablename); + + goto funct_exit; + } + if (dict_table_has_fts_index(table) || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) { ut_ad(table->n_ref_count == 0); @@ -4623,6 +5218,19 @@ row_drop_database_for_mysql( row_mysql_lock_data_dictionary(trx); while ((table_name = dict_get_first_table_name_in_db(name))) { + /* Drop parent table if it is a fts aux table, to + avoid accessing dropped fts aux tables in information + scheam when parent table still exists. + Note: Drop parent table will drop fts aux tables. */ + char* parent_table_name; + parent_table_name = fts_get_parent_table_name( + table_name, strlen(table_name)); + + if (parent_table_name != NULL) { + mem_free(table_name); + table_name = parent_table_name; + } + ut_a(memcmp(table_name, name, namelen) == 0); table = dict_table_open_on_name( diff --git a/storage/xtradb/row/row0sel.cc b/storage/xtradb/row/row0sel.cc index a42923de8caec..ad1e9e2bf9d83 100644 --- a/storage/xtradb/row/row0sel.cc +++ b/storage/xtradb/row/row0sel.cc @@ -2456,9 +2456,11 @@ row_sel_convert_mysql_key_to_innobase( if (UNIV_LIKELY(!is_null)) { buf = row_mysql_store_col_in_innobase_format( dfield, buf, - FALSE, /* MySQL key value format col */ + /* MySQL key value format col */ + FALSE, key_ptr + data_offset, data_len, - dict_table_is_comp(index->table)); + dict_table_is_comp(index->table), + false, 0, 0 ,0); ut_a(buf <= original_buf + buf_len); } @@ -2551,12 +2553,16 @@ row_sel_store_row_id_to_prebuilt( #ifdef UNIV_DEBUG /** Convert a non-SQL-NULL field from Innobase format to MySQL format. */ -# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \ - row_sel_field_store_in_mysql_format_func(dest,templ,idx,field,src,len) +# define row_sel_field_store_in_mysql_format( \ + dest,templ,idx,field,src,len,prebuilt) \ + row_sel_field_store_in_mysql_format_func \ + (dest,templ,idx,field,src,len, prebuilt) #else /* UNIV_DEBUG */ /** Convert a non-SQL-NULL field from Innobase format to MySQL format. */ -# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \ - row_sel_field_store_in_mysql_format_func(dest,templ,src,len) +# define row_sel_field_store_in_mysql_format( \ + dest,templ,idx,field,src,len,prebuilt) \ + row_sel_field_store_in_mysql_format_func \ + (dest,templ,src,len, prebuilt) #endif /* UNIV_DEBUG */ /**************************************************************//** @@ -2586,7 +2592,10 @@ row_sel_field_store_in_mysql_format_func( templ->icp_rec_field_no */ #endif /* UNIV_DEBUG */ const byte* data, /*!< in: data to store */ - ulint len) /*!< in: length of the data */ + ulint len, /*!< in: length of the data */ + row_prebuilt_t* prebuilt) + /*!< in: use prebuilt->compress_heap + only here */ { byte* ptr; #ifdef UNIV_DEBUG @@ -2630,6 +2639,15 @@ row_sel_field_store_in_mysql_format_func( field_end = dest + templ->mysql_col_len; if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) { + /* If this is a compressed column, + decompress it first */ + if (templ->compressed) + data = row_decompress_column(data, &len, + reinterpret_cast( + templ->zip_dict_data.str), + templ->zip_dict_data.length, + prebuilt); + /* This is a >= 5.0.3 type true VARCHAR. Store the length of the data to the first byte or the first two bytes of dest. */ @@ -2680,7 +2698,11 @@ row_sel_field_store_in_mysql_format_func( already copied to the buffer in row_sel_store_mysql_rec */ row_mysql_store_blob_ref(dest, templ->mysql_col_len, data, - len); + len, templ->compressed, + reinterpret_cast( + templ->zip_dict_data.str), + templ->zip_dict_data.length, + prebuilt); break; case DATA_MYSQL: @@ -2833,7 +2855,7 @@ row_sel_store_mysql_field_func( row_sel_field_store_in_mysql_format( mysql_rec + templ->mysql_col_offset, - templ, index, field_no, data, len); + templ, index, field_no, data, len, prebuilt); if (heap != prebuilt->blob_heap) { mem_heap_free(heap); @@ -2883,7 +2905,7 @@ row_sel_store_mysql_field_func( row_sel_field_store_in_mysql_format( mysql_rec + templ->mysql_col_offset, - templ, index, field_no, data, len); + templ, index, field_no, data, len, prebuilt); } ut_ad(len != UNIV_SQL_NULL); @@ -2931,6 +2953,9 @@ row_sel_store_mysql_rec( prebuilt->blob_heap = NULL; } + if (UNIV_LIKELY_NULL(prebuilt->compress_heap)) + mem_heap_empty(prebuilt->compress_heap); + for (i = 0; i < prebuilt->n_template; i++) { const mysql_row_templ_t*templ = &prebuilt->mysql_template[i]; const ulint field_no diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc index a0dd32c203f4d..4a709160ea6b0 100644 --- a/storage/xtradb/srv/srv0mon.cc +++ b/storage/xtradb/srv/srv0mon.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. This program is free software; you can redistribute it and/or modify it under @@ -1367,7 +1367,10 @@ srv_mon_set_module_control( module */ set_current_module = FALSE; } else if (module_id == MONITOR_ALL_COUNTER) { - continue; + if (!(innodb_counter_info[ix].monitor_type + & MONITOR_GROUP_MODULE)) { + continue; + } } else { /* Hitting the next module, stop */ break; diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index 7886d705ae4ec..c04b446f4800f 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -195,6 +195,9 @@ UNIV_INTERN char** srv_data_file_names = NULL; /* size in database pages */ UNIV_INTERN ulint* srv_data_file_sizes = NULL; +/** Whether the redo log tracking is currently enabled. Note that it is +possible for the log tracker thread to be running and the tracking to be +disabled */ UNIV_INTERN my_bool srv_track_changed_pages = FALSE; UNIV_INTERN ulonglong srv_max_bitmap_file_size = 100 * 1024 * 1024; @@ -793,6 +796,9 @@ UNIV_INTERN os_event_t srv_checkpoint_completed_event; UNIV_INTERN os_event_t srv_redo_log_tracked_event; +/** Whether the redo log tracker thread has been started. Does not take into +account whether the tracking is currently enabled (see srv_track_changed_pages +for that) */ UNIV_INTERN bool srv_redo_log_thread_started = false; /*********************************************************************//** @@ -2431,13 +2437,8 @@ DECLARE_THREAD(srv_redo_log_follow_thread)( os_event_wait(srv_checkpoint_completed_event); os_event_reset(srv_checkpoint_completed_event); -#ifdef UNIV_DEBUG - if (!srv_track_changed_pages) { - continue; - } -#endif - - if (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) { + if (srv_track_changed_pages + && srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) { if (!log_online_follow_redo_log()) { /* TODO: sync with I_S log tracking status? */ ib_logf(IB_LOG_LEVEL_ERROR, diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc index dbddb4a7a1c56..82beb83192fbb 100644 --- a/storage/xtradb/srv/srv0start.cc +++ b/storage/xtradb/srv/srv0start.cc @@ -2780,6 +2780,12 @@ innobase_start_or_create_for_mysql(void) } } + /* Create the SYS_ZIP_DICT system table */ + err = dict_create_or_check_sys_zip_dict(); + if (err != DB_SUCCESS) { + return(err); + } + srv_is_being_started = FALSE; ut_a(trx_purge_state() == PURGE_STATE_INIT); diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index bd1e74becaa10..318c248f74220 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1,5 +1,5 @@ /* Copyright (c) 2003, 2013, Oracle and/or its affiliates - Copyright (c) 2009, 2014, SkySQL Ab. + Copyright (c) 2009, 2016, MariaDB This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index bcb07daae032e..b4fc6297afd50 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. - Copyright (c) 2009, 2013, Monty Program Ab + Copyright (c) 2009, 2016, MariaDB This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public diff --git a/support-files/mysql.server.sh b/support-files/mysql.server.sh index b562354a586ce..8d2e78edbd895 100644 --- a/support-files/mysql.server.sh +++ b/support-files/mysql.server.sh @@ -319,7 +319,7 @@ case "$mode" in then # Give extra arguments to mysqld with the my.cnf file. This script # may be overwritten at next upgrade. - $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" "$@" >/dev/null 2>&1 & + $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" "$@" & wait_for_ready; return_value=$? # Make lock for RedHat / SuSE diff --git a/tests/async_queries.c b/tests/async_queries.c index 76e884e6a6910..a8889fc8d5ab9 100644 --- a/tests/async_queries.c +++ b/tests/async_queries.c @@ -425,7 +425,7 @@ main(int argc, char *argv[]) event_dispatch(); - free(sds); + my_free(sds); mysql_library_end(); diff --git a/win/packaging/CMakeLists.txt b/win/packaging/CMakeLists.txt index 0535a486d57ba..1682bae6986cb 100644 --- a/win/packaging/CMakeLists.txt +++ b/win/packaging/CMakeLists.txt @@ -24,10 +24,13 @@ ENDIF() SET(MANUFACTURER "MariaDB Corporation Ab") -FIND_PATH(WIX_DIR heat.exe - "$ENV{ProgramFiles}/WiX Toolset v3.9/bin" - "$ENV{ProgramFiles}/WiX Toolset v3.10/bin" -) +SET(WIX_BIN_PATHS) +FOREACH(WIX_VER 3.9 3.10 3.11) + LIST(APPEND WIX_BIN_PATHS "$ENV{ProgramFiles}/WiX Toolset v${WIX_VER}/bin") + LIST(APPEND WIX_BIN_PATHS "$ENV{ProgramFiles} (x86)/WiX Toolset v${WIX_VER}/bin") +ENDFOREACH() + +FIND_PATH(WIX_DIR heat.exe ${WIX_BIN_PATHS}) SET(CPACK_WIX_PACKAGE_BASE_NAME "MariaDB") IF(CMAKE_SIZEOF_VOID_P EQUAL 4) SET(CPACK_WIX_UPGRADE_CODE "49EB7A6A-1CEF-4A1E-9E89-B9A4993963E3") diff --git a/win/packaging/create_msi.cmake.in b/win/packaging/create_msi.cmake.in index c2ab648a6dbf2..1f847a396950d 100644 --- a/win/packaging/create_msi.cmake.in +++ b/win/packaging/create_msi.cmake.in @@ -434,6 +434,7 @@ EXECUTE_PROCESS( IF(SIGNCODE) EXECUTE_PROCESS( COMMAND ${SIGNTOOL_EXECUTABLE} sign ${SIGNTOOL_PARAMETERS} + /d ${CPACK_PACKAGE_FILE_NAME}.msi ${CPACK_PACKAGE_FILE_NAME}.msi ) ENDIF()