diff --git a/CREDITS b/CREDITS
index f0e6de7f08fae..35ab4d48a8f79 100644
--- a/CREDITS
+++ b/CREDITS
@@ -10,6 +10,7 @@ Visma                           http://visma.com (2015 - 2016)
 Acronis                         http://acronis.com (2016)
 Nexedi				https://www.nexedi.com (2016)
 Automattic              	https://automattic.com  (2014 - 2016)
+Tencent Game DBA		http://tencentdba.com/about (2016)
 Verkkokauppa.com		https://www.verkkokauppa.com (2015 - 2016)
 Virtuozzo                       https://virtuozzo.com (2016)
 
diff --git a/VERSION b/VERSION
index e748b1bda54d4..a82a4e4d77d54 100644
--- a/VERSION
+++ b/VERSION
@@ -1,3 +1,3 @@
 MYSQL_VERSION_MAJOR=10
 MYSQL_VERSION_MINOR=0
-MYSQL_VERSION_PATCH=27
+MYSQL_VERSION_PATCH=28
diff --git a/client/mysql.cc b/client/mysql.cc
index 89f9a75ec11f8..4b20e4d98cb40 100644
--- a/client/mysql.cc
+++ b/client/mysql.cc
@@ -245,7 +245,8 @@ static void end_pager();
 static void init_tee(const char *);
 static void end_tee();
 static const char* construct_prompt();
-static char *get_arg(char *line, my_bool get_next_arg);
+enum get_arg_mode { CHECK, GET, GET_NEXT};
+static char *get_arg(char *line, get_arg_mode mode);
 static void init_username();
 static void add_int_to_prompt(int toadd);
 static int get_result_width(MYSQL_RES *res);
@@ -2257,7 +2258,7 @@ static COMMANDS *find_command(char *name)
       if (!my_strnncoll(&my_charset_latin1, (uchar*) name, len,
                         (uchar*) commands[i].name, len) &&
           (commands[i].name[len] == '\0') &&
-          (!end || commands[i].takes_params))
+          (!end || (commands[i].takes_params && get_arg(name, CHECK))))
       {
         index= i;
         break;
@@ -3177,7 +3178,7 @@ com_charset(String *buffer __attribute__((unused)), char *line)
   char buff[256], *param;
   CHARSET_INFO * new_cs;
   strmake_buf(buff, line);
-  param= get_arg(buff, 0);
+  param= get_arg(buff, GET);
   if (!param || !*param)
   {
     return put_info("Usage: \\C charset_name | charset charset_name", 
@@ -4263,12 +4264,12 @@ com_connect(String *buffer, char *line)
 #ifdef EXTRA_DEBUG
     tmp[1]= 0;
 #endif
-    tmp= get_arg(buff, 0);
+    tmp= get_arg(buff, GET);
     if (tmp && *tmp)
     {
       my_free(current_db);
       current_db= my_strdup(tmp, MYF(MY_WME));
-      tmp= get_arg(buff, 1);
+      tmp= get_arg(buff, GET_NEXT);
       if (tmp)
       {
 	my_free(current_host);
@@ -4371,7 +4372,7 @@ com_delimiter(String *buffer __attribute__((unused)), char *line)
   char buff[256], *tmp;
 
   strmake_buf(buff, line);
-  tmp= get_arg(buff, 0);
+  tmp= get_arg(buff, GET);
 
   if (!tmp || !*tmp)
   {
@@ -4402,7 +4403,7 @@ com_use(String *buffer __attribute__((unused)), char *line)
 
   bzero(buff, sizeof(buff));
   strmake_buf(buff, line);
-  tmp= get_arg(buff, 0);
+  tmp= get_arg(buff, GET);
   if (!tmp || !*tmp)
   {
     put_info("USE must be followed by a database name", INFO_ERROR);
@@ -4487,23 +4488,22 @@ com_nowarnings(String *buffer __attribute__((unused)),
 }
 
 /*
-  Gets argument from a command on the command line. If get_next_arg is
-  not defined, skips the command and returns the first argument. The
-  line is modified by adding zero to the end of the argument. If
-  get_next_arg is defined, then the function searches for end of string
-  first, after found, returns the next argument and adds zero to the
-  end. If you ever wish to use this feature, remember to initialize all
-  items in the array to zero first.
+  Gets argument from a command on the command line. If mode is not GET_NEXT,
+  skips the command and returns the first argument. The line is modified by
+  adding zero to the end of the argument. If mode is GET_NEXT, then the
+  function searches for end of string first, after found, returns the next
+  argument and adds zero to the end. If you ever wish to use this feature,
+  remember to initialize all items in the array to zero first.
 */
 
-char *get_arg(char *line, my_bool get_next_arg)
+static char *get_arg(char *line, get_arg_mode mode)
 {
   char *ptr, *start;
-  my_bool quoted= 0, valid_arg= 0;
+  bool short_cmd= false;
   char qtype= 0;
 
   ptr= line;
-  if (get_next_arg)
+  if (mode == GET_NEXT)
   {
     for (; *ptr; ptr++) ;
     if (*(ptr + 1))
@@ -4514,7 +4514,7 @@ char *get_arg(char *line, my_bool get_next_arg)
     /* skip leading white spaces */
     while (my_isspace(charset_info, *ptr))
       ptr++;
-    if (*ptr == '\\') // short command was used
+    if ((short_cmd= *ptr == '\\')) // short command was used
       ptr+= 2;
     else
       while (*ptr &&!my_isspace(charset_info, *ptr)) // skip command
@@ -4527,24 +4527,28 @@ char *get_arg(char *line, my_bool get_next_arg)
   if (*ptr == '\'' || *ptr == '\"' || *ptr == '`')
   {
     qtype= *ptr;
-    quoted= 1;
     ptr++;
   }
   for (start=ptr ; *ptr; ptr++)
   {
-    if (*ptr == '\\' && ptr[1]) // escaped character
+    if ((*ptr == '\\' && ptr[1]) ||  // escaped character
+        (!short_cmd && qtype && *ptr == qtype && ptr[1] == qtype)) // quote
     {
-      // Remove the backslash
-      strmov_overlapp(ptr, ptr+1);
+      // Remove (or skip) the backslash (or a second quote)
+      if (mode != CHECK)
+        strmov_overlapp(ptr, ptr+1);
+      else
+        ptr++;
     }
-    else if ((!quoted && *ptr == ' ') || (quoted && *ptr == qtype))
+    else if (*ptr == (qtype ? qtype : ' '))
     {
-      *ptr= 0;
+      qtype= 0;
+      if (mode != CHECK)
+        *ptr= 0;
       break;
     }
   }
-  valid_arg= ptr != start;
-  return valid_arg ? start : NullS;
+  return ptr != start && !qtype ? start : NullS;
 }
 
 
diff --git a/client/mysqldump.c b/client/mysqldump.c
index 153761ed5102c..64ed21ac7fca8 100644
--- a/client/mysqldump.c
+++ b/client/mysqldump.c
@@ -575,9 +575,7 @@ static int dump_all_tablespaces();
 static int dump_tablespaces_for_tables(char *db, char **table_names, int tables);
 static int dump_tablespaces_for_databases(char** databases);
 static int dump_tablespaces(char* ts_where);
-static void print_comment(FILE *sql_file, my_bool is_error, const char *format,
-                          ...);
-
+static void print_comment(FILE *, my_bool, const char *, ...);
 
 /*
   Print the supplied message if in verbose mode
@@ -655,6 +653,30 @@ static void short_usage(FILE *f)
 }
 
 
+/** returns a string fixed to be safely printed inside a -- comment
+
+  that is, any new line in it gets prefixed with --
+*/
+static const char *fix_for_comment(const char *ident)
+{
+  static char buf[1024];
+  char c, *s= buf;
+
+  while ((c= *s++= *ident++))
+  {
+    if (s >= buf + sizeof(buf) - 10)
+    {
+      strmov(s, "...");
+      break;
+    }
+    if (c == '\n')
+      s= strmov(s, "-- ");
+  }
+
+  return buf;
+}
+
+
 static void write_header(FILE *sql_file, char *db_name)
 {
   if (opt_xml)
@@ -677,8 +699,8 @@ static void write_header(FILE *sql_file, char *db_name)
                   DUMP_VERSION, MYSQL_SERVER_VERSION, SYSTEM_TYPE,
                   MACHINE_TYPE);
     print_comment(sql_file, 0, "-- Host: %s    Database: %s\n",
-                  current_host ? current_host : "localhost",
-                  db_name ? db_name : "");
+                  fix_for_comment(current_host ? current_host : "localhost"),
+                  fix_for_comment(db_name ? db_name : ""));
     print_comment(sql_file, 0,
                   "-- ------------------------------------------------------\n"
                  );
@@ -2224,7 +2246,8 @@ static uint dump_events_for_db(char *db)
 
   /* nice comments */
   print_comment(sql_file, 0,
-                "\n--\n-- Dumping events for database '%s'\n--\n", db);
+                "\n--\n-- Dumping events for database '%s'\n--\n",
+                fix_for_comment(db));
 
   /*
     not using "mysql_query_with_error_report" because we may have not
@@ -2436,7 +2459,8 @@ static uint dump_routines_for_db(char *db)
 
   /* nice comments */
   print_comment(sql_file, 0,
-                "\n--\n-- Dumping routines for database '%s'\n--\n", db);
+                "\n--\n-- Dumping routines for database '%s'\n--\n",
+                fix_for_comment(db));
 
   /*
     not using "mysql_query_with_error_report" because we may have not
@@ -2731,11 +2755,11 @@ static uint get_table_structure(char *table, char *db, char *table_type,
       if (strcmp (table_type, "VIEW") == 0)         /* view */
         print_comment(sql_file, 0,
                       "\n--\n-- Temporary table structure for view %s\n--\n\n",
-                      result_table);
+                      fix_for_comment(result_table));
       else
         print_comment(sql_file, 0,
                       "\n--\n-- Table structure for table %s\n--\n\n",
-                      result_table);
+                      fix_for_comment(result_table));
 
       if (opt_drop)
       {
@@ -2977,7 +3001,7 @@ static uint get_table_structure(char *table, char *db, char *table_type,
 
       print_comment(sql_file, 0,
                     "\n--\n-- Table structure for table %s\n--\n\n",
-                    result_table);
+                    fix_for_comment(result_table));
       if (opt_drop)
         fprintf(sql_file, "DROP TABLE IF EXISTS %s;\n", result_table);
       if (!opt_xml)
@@ -3684,21 +3708,21 @@ static void dump_table(char *table, char *db)
   {
     print_comment(md_result_file, 0,
                   "\n--\n-- Dumping data for table %s\n--\n",
-                  result_table);
+                  fix_for_comment(result_table));
     
     dynstr_append_checked(&query_string, "SELECT /*!40001 SQL_NO_CACHE */ * FROM ");
     dynstr_append_checked(&query_string, result_table);
 
     if (where)
     {
-      print_comment(md_result_file, 0, "-- WHERE:  %s\n", where);
+      print_comment(md_result_file, 0, "-- WHERE:  %s\n", fix_for_comment(where));
 
       dynstr_append_checked(&query_string, " WHERE ");
       dynstr_append_checked(&query_string, where);
     }
     if (order_by)
     {
-      print_comment(md_result_file, 0, "-- ORDER BY:  %s\n", order_by);
+      print_comment(md_result_file, 0, "-- ORDER BY:  %s\n", fix_for_comment(order_by));
 
       dynstr_append_checked(&query_string, " ORDER BY ");
       dynstr_append_checked(&query_string, order_by);
@@ -4208,7 +4232,7 @@ static int dump_tablespaces(char* ts_where)
     if (first)
     {
       print_comment(md_result_file, 0, "\n--\n-- Logfile group: %s\n--\n",
-                    row[0]);
+                    fix_for_comment(row[0]));
 
       fprintf(md_result_file, "\nCREATE");
     }
@@ -4277,7 +4301,8 @@ static int dump_tablespaces(char* ts_where)
       first= 1;
     if (first)
     {
-      print_comment(md_result_file, 0, "\n--\n-- Tablespace: %s\n--\n", row[0]);
+      print_comment(md_result_file, 0, "\n--\n-- Tablespace: %s\n--\n",
+                    fix_for_comment(row[0]));
       fprintf(md_result_file, "\nCREATE");
     }
     else
@@ -4481,7 +4506,8 @@ static int init_dumping(char *database, int init_func(char*))
       char *qdatabase= quote_name(database,quoted_database_buf,opt_quoted);
 
       print_comment(md_result_file, 0,
-                    "\n--\n-- Current Database: %s\n--\n", qdatabase);
+                    "\n--\n-- Current Database: %s\n--\n",
+                    fix_for_comment(qdatabase));
 
       /* Call the view or table specific function */
       init_func(qdatabase);
@@ -5672,7 +5698,7 @@ static my_bool get_view_structure(char *table, char* db)
 
   print_comment(sql_file, 0,
                 "\n--\n-- Final view structure for view %s\n--\n\n",
-                result_table);
+                fix_for_comment(result_table));
 
   /* Table might not exist if this view was dumped with --tab. */
   fprintf(sql_file, "/*!50001 DROP TABLE IF EXISTS %s*/;\n", opt_quoted_table);
diff --git a/client/mysqltest.cc b/client/mysqltest.cc
index 66bcb6462e7ec..dede6527d11ac 100644
--- a/client/mysqltest.cc
+++ b/client/mysqltest.cc
@@ -3373,10 +3373,6 @@ void do_exec(struct st_command *command)
 #endif
 #endif
 
-  /* exec command is interpreted externally and will not take newlines */
-  while(replace(&ds_cmd, "\n", 1, " ", 1) == 0)
-    ;
-  
   DBUG_PRINT("info", ("Executing '%s' as '%s'",
                       command->first_argument, ds_cmd.str));
 
diff --git a/cmake/cpack_rpm.cmake b/cmake/cpack_rpm.cmake
index 174548502d819..00f21c1cd8bef 100644
--- a/cmake/cpack_rpm.cmake
+++ b/cmake/cpack_rpm.cmake
@@ -221,6 +221,9 @@ SETA(CPACK_RPM_test_PACKAGE_PROVIDES
   "perl(mtr_io.pl)"
   "perl(mtr_match)"
   "perl(mtr_misc.pl)"
+  "perl(mtr_gcov.pl)"
+  "perl(mtr_gprof.pl)"
+  "perl(mtr_process.pl)"
   "perl(mtr_report)"
   "perl(mtr_results)"
   "perl(mtr_unique)")
diff --git a/cmake/package_name.cmake b/cmake/package_name.cmake
index 87db39d68d458..30f5199441fcb 100644
--- a/cmake/package_name.cmake
+++ b/cmake/package_name.cmake
@@ -30,6 +30,10 @@ IF(NOT VERSION)
       SET(64BIT 1)
     ENDIF()
 
+    IF(NOT 64BIT AND CMAKE_SYSTEM_PROCESSOR MATCHES "^mips64")
+      SET(DEFAULT_MACHINE "mips")
+    ENDIF()
+
     IF(CMAKE_SYSTEM_NAME MATCHES "Windows")
       SET(NEED_DASH_BETWEEN_PLATFORM_AND_MACHINE 0)
       SET(DEFAULT_PLATFORM "win")
diff --git a/extra/innochecksum.cc b/extra/innochecksum.cc
index 6018a4884ea5b..c09458630c834 100644
--- a/extra/innochecksum.cc
+++ b/extra/innochecksum.cc
@@ -243,10 +243,9 @@ int main(int argc, char **argv)
   time_t lastt;                  /* last time */
   ulint oldcsum, oldcsumfield, csum, csumfield, crc32, logseq, logseqfield;
                                  /* ulints for checksum storage */
-  struct stat st;                /* for stat, if you couldn't guess */
   unsigned long long int size;   /* size of file (has to be 64 bits) */
   ulint pages;                   /* number of pages in file */
-  off_t offset= 0;
+  long long offset= 0;
   int fd;
 
   printf("InnoDB offline file checksum utility.\n");
@@ -269,6 +268,47 @@ int main(int argc, char **argv)
     goto error;
   }
 
+#ifdef _WIN32
+  /* Switch off OS file buffering for the file. */
+
+  HANDLE h = CreateFile(filename, GENERIC_READ,
+   FILE_SHARE_READ|FILE_SHARE_WRITE, 0,
+   OPEN_EXISTING, FILE_FLAG_NO_BUFFERING, 0);
+
+  if (!h)
+  {
+    fprintf(stderr, "Error; cant open file\n");
+    goto error;
+  }
+
+  if (!GetFileSizeEx(h, (LARGE_INTEGER *)&size))
+  {
+    fprintf(stderr, "Error; GetFileSize() failed\n");
+    goto error;
+  }
+
+  fd = _open_osfhandle ((intptr_t) h, _O_RDONLY);
+  if (fd < 0)
+  {
+    fprintf(stderr, "Error; _open_osfhandle() failed\n");
+    goto error;
+  }
+
+  f = _fdopen(fd, "rb");
+  if (!f)
+  {
+    fprintf(stderr, "Error; fdopen() failed\n");
+    goto error;
+  }
+
+  /*
+    Disable stdio buffering (FILE_FLAG_NO_BUFFERING requires properly IO buffers
+    which stdio does not guarantee.
+  */
+  setvbuf(f, NULL, _IONBF, 0);
+
+#else
+  struct stat st;
   /* stat the file to get size and page count */
   if (stat(filename, &st))
   {
@@ -279,6 +319,8 @@ int main(int argc, char **argv)
 
   /* Open the file for reading */
   f= fopen(filename, "rb");
+#endif
+
   if (f == NULL)
   {
     fprintf(stderr, "Error; %s cannot be opened", filename);
@@ -323,7 +365,7 @@ int main(int argc, char **argv)
   }
   else if (verbose)
   {
-    printf("file %s = %llu bytes (%lu pages)...\n", filename, size, pages);
+    printf("file %s = %llu bytes (%lu pages)...\n", filename, size, (ulong)pages);
     if (do_one_page)
       printf("InnoChecksum; checking page %lu\n", do_page);
     else
@@ -348,9 +390,12 @@ int main(int argc, char **argv)
       goto error;
     }
 
-    offset= (off_t)start_page * (off_t)physical_page_size;
-
+    offset= (longlong)start_page * (longlong)physical_page_size;
+#ifdef _WIN32
+    if (_lseeki64(fd, offset, SEEK_SET) != offset)
+#else
     if (lseek(fd, offset, SEEK_SET) != offset)
+#endif
     {
       perror("Error; Unable to seek to necessary offset");
       goto error;
diff --git a/extra/yassl/README b/extra/yassl/README
index b5eb88824fb0d..a3d4f60f56128 100644
--- a/extra/yassl/README
+++ b/extra/yassl/README
@@ -12,6 +12,24 @@ before calling SSL_new();
 
 *** end Note ***
 
+yaSSL Release notes, version 2.4.2 (9/22/2016)
+    This release of yaSSL fixes a medium security vulnerability. A fix for
+    potential AES side channel leaks is included that a local user monitoring
+    the same CPU core cache could exploit.  VM users, hyper-threading users,
+    and users where potential attackers have access to the CPU cache will need
+    to update if they utilize AES.
+
+    DSA padding fixes for unusual sizes is included as well.  Users with DSA
+    certficiates should update.
+
+yaSSL Release notes, version 2.4.0 (5/20/2016)
+    This release of yaSSL fixes the OpenSSL compatibility function
+    SSL_CTX_load_verify_locations() when using the path directory to allow
+    unlimited path sizes.  Minor Windows build fixes are included.
+    No high level security fixes in this version but we always recommend
+    updating.
+
+
 yaSSL Release notes, version 2.3.9b (2/03/2016)
     This release of yaSSL fixes the OpenSSL compatibility function
     X509_NAME_get_index_by_NID() to use the actual index of the common name
diff --git a/extra/yassl/certs/dsa-cert.pem b/extra/yassl/certs/dsa-cert.pem
index 10d533edc88b0..10794cbee7313 100644
--- a/extra/yassl/certs/dsa-cert.pem
+++ b/extra/yassl/certs/dsa-cert.pem
@@ -1,22 +1,22 @@
 -----BEGIN CERTIFICATE-----
-MIIDqzCCA2ugAwIBAgIJAMGqrgDU6DyhMAkGByqGSM44BAMwgY4xCzAJBgNVBAYT
+MIIDrzCCA2+gAwIBAgIJAK1zRM7YFcNjMAkGByqGSM44BAMwgZAxCzAJBgNVBAYT
 AlVTMQ8wDQYDVQQIDAZPcmVnb24xETAPBgNVBAcMCFBvcnRsYW5kMRAwDgYDVQQK
-DAd3b2xmU1NMMRAwDgYDVQQLDAd0ZXN0aW5nMRYwFAYDVQQDDA13d3cueWFzc2wu
-Y29tMR8wHQYJKoZIhvcNAQkBFhBpbmZvQHdvbGZzc2wuY29tMB4XDTEzMDQyMjIw
-MDk0NFoXDTE2MDExNzIwMDk0NFowgY4xCzAJBgNVBAYTAlVTMQ8wDQYDVQQIDAZP
-cmVnb24xETAPBgNVBAcMCFBvcnRsYW5kMRAwDgYDVQQKDAd3b2xmU1NMMRAwDgYD
-VQQLDAd0ZXN0aW5nMRYwFAYDVQQDDA13d3cueWFzc2wuY29tMR8wHQYJKoZIhvcN
-AQkBFhBpbmZvQHdvbGZzc2wuY29tMIIBuDCCASwGByqGSM44BAEwggEfAoGBAL1R
-7koy4IrH6sbh6nDEUUPPKgfhxxLCWCVexF2+qzANEr+hC9M002haJXFOfeS9DyoO
-WFbL0qMZOuqv+22CaHnoUWl7q3PjJOAI3JH0P54ZyUPuU1909RzgTdIDp5+ikbr7
-KYjnltL73FQVMbjTZQKthIpPn3MjYcF+4jp2W2zFAhUAkcntYND6MGf+eYzIJDN2
-L7SonHUCgYEAklpxErfqznIZjVvqqHFaq+mgAL5J8QrKVmdhYZh/Y8z4jCjoCA8o
-TDoFKxf7s2ZzgaPKvglaEKiYqLqic9qY78DYJswzQMLFvjsF4sFZ+pYCBdWPQI4N
-PgxCiznK6Ce+JH9ikSBvMvG+tevjr2UpawDIHX3+AWYaZBZwKADAaboDgYUAAoGB
-AJ3LY89yHyvQ/TsQ6zlYbovjbk/ogndsMqPdNUvL4RuPTgJP/caaDDa0XJ7ak6A7
-TJ+QheLNwOXoZPYJC4EGFSDAXpYniGhbWIrVTCGe6lmZDfnx40WXS0kk3m/DHaC0
-3ElLAiybxVGxyqoUfbT3Zv1JwftWMuiqHH5uADhdXuXVo1AwTjAdBgNVHQ4EFgQU
-IJjk416o4v8qpH9LBtXlR9v8gccwHwYDVR0jBBgwFoAUIJjk416o4v8qpH9LBtXl
-R9v8gccwDAYDVR0TBAUwAwEB/zAJBgcqhkjOOAQDAy8AMCwCFCjGKIdOSV12LcTu
-k08owGM6YkO1AhQe+K173VuaO/OsDNsxZlKpyH8+1g==
+DAd3b2xmU1NMMRAwDgYDVQQLDAd0ZXN0aW5nMRgwFgYDVQQDDA93d3cud29sZnNz
+bC5jb20xHzAdBgkqhkiG9w0BCQEWEGluZm9Ad29sZnNzbC5jb20wHhcNMTYwOTIy
+MjEyMzA0WhcNMjIwMzE1MjEyMzA0WjCBkDELMAkGA1UEBhMCVVMxDzANBgNVBAgM
+Bk9yZWdvbjERMA8GA1UEBwwIUG9ydGxhbmQxEDAOBgNVBAoMB3dvbGZTU0wxEDAO
+BgNVBAsMB3Rlc3RpbmcxGDAWBgNVBAMMD3d3dy53b2xmc3NsLmNvbTEfMB0GCSqG
+SIb3DQEJARYQaW5mb0B3b2xmc3NsLmNvbTCCAbgwggEsBgcqhkjOOAQBMIIBHwKB
+gQC9Ue5KMuCKx+rG4epwxFFDzyoH4ccSwlglXsRdvqswDRK/oQvTNNNoWiVxTn3k
+vQ8qDlhWy9KjGTrqr/ttgmh56FFpe6tz4yTgCNyR9D+eGclD7lNfdPUc4E3SA6ef
+opG6+ymI55bS+9xUFTG402UCrYSKT59zI2HBfuI6dltsxQIVAJHJ7WDQ+jBn/nmM
+yCQzdi+0qJx1AoGBAJJacRK36s5yGY1b6qhxWqvpoAC+SfEKylZnYWGYf2PM+Iwo
+6AgPKEw6BSsX+7Nmc4Gjyr4JWhComKi6onPamO/A2CbMM0DCxb47BeLBWfqWAgXV
+j0CODT4MQos5yugnviR/YpEgbzLxvrXr469lKWsAyB19/gFmGmQWcCgAwGm6A4GF
+AAKBgQCdy2PPch8r0P07EOs5WG6L425P6IJ3bDKj3TVLy+Ebj04CT/3Gmgw2tFye
+2pOgO0yfkIXizcDl6GT2CQuBBhUgwF6WJ4hoW1iK1UwhnupZmQ358eNFl0tJJN5v
+wx2gtNxJSwIsm8VRscqqFH2092b9ScH7VjLoqhx+bgA4XV7l1aNQME4wHQYDVR0O
+BBYEFCCY5ONeqOL/KqR/SwbV5Ufb/IHHMB8GA1UdIwQYMBaAFCCY5ONeqOL/KqR/
+SwbV5Ufb/IHHMAwGA1UdEwQFMAMBAf8wCQYHKoZIzjgEAwMvADAsAhQRYSCVN/Ge
+agV3mffU3qNZ92fI0QIUPH7Jp+iASI7U1ocaYDc10qXGaGY=
 -----END CERTIFICATE-----
diff --git a/extra/yassl/include/openssl/ssl.h b/extra/yassl/include/openssl/ssl.h
index c95eb1ed88705..9ec99b46c1f00 100644
--- a/extra/yassl/include/openssl/ssl.h
+++ b/extra/yassl/include/openssl/ssl.h
@@ -34,7 +34,7 @@
 #include "rsa.h"
 
 
-#define YASSL_VERSION "2.3.9b"
+#define YASSL_VERSION "2.4.2"
 
 
 #if defined(__cplusplus)
diff --git a/extra/yassl/src/ssl.cpp b/extra/yassl/src/ssl.cpp
index 57542f174c942..7069140dcda02 100644
--- a/extra/yassl/src/ssl.cpp
+++ b/extra/yassl/src/ssl.cpp
@@ -162,7 +162,7 @@ int read_file(SSL_CTX* ctx, const char* file, int format, CertType type)
             TaoCrypt::DSA_PrivateKey dsaKey;
             dsaKey.Initialize(dsaSource);
 
-            if (rsaSource.GetError().What()) {
+            if (dsaSource.GetError().What()) {
                 // neither worked
                 ret = SSL_FAILURE;
             }
@@ -785,40 +785,67 @@ int SSL_CTX_load_verify_locations(SSL_CTX* ctx, const char* file,
         WIN32_FIND_DATA FindFileData;
         HANDLE hFind;
 
-        char name[MAX_PATH + 1];  // directory specification
-        strncpy(name, path, MAX_PATH - 3);
-        strncat(name, "\\*", 3);
+        const int DELIMITER_SZ      = 2;
+        const int DELIMITER_STAR_SZ = 3;
+        int pathSz = (int)strlen(path);
+        int nameSz = pathSz + DELIMITER_STAR_SZ + 1; // plus 1 for terminator
+        char* name = NEW_YS char[nameSz];  // directory specification
+        memset(name, 0, nameSz);
+        strncpy(name, path, nameSz - DELIMITER_STAR_SZ - 1);
+        strncat(name, "\\*", DELIMITER_STAR_SZ);
 
         hFind = FindFirstFile(name, &FindFileData);
-        if (hFind == INVALID_HANDLE_VALUE) return SSL_BAD_PATH;
+        if (hFind == INVALID_HANDLE_VALUE) {
+            ysArrayDelete(name);
+            return SSL_BAD_PATH;
+        }
 
         do {
-            if (FindFileData.dwFileAttributes != FILE_ATTRIBUTE_DIRECTORY) {
-                strncpy(name, path, MAX_PATH - 2 - HALF_PATH);
-                strncat(name, "\\", 2);
-                strncat(name, FindFileData.cFileName, HALF_PATH);
+            if (!(FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
+                int curSz = (int)strlen(FindFileData.cFileName);
+                if (pathSz + curSz + DELIMITER_SZ + 1 > nameSz) {
+                    ysArrayDelete(name);
+                    // plus 1 for terminator
+                    nameSz = pathSz + curSz + DELIMITER_SZ + 1;
+                    name = NEW_YS char[nameSz];
+                }
+                memset(name, 0, nameSz);
+                strncpy(name, path, nameSz - curSz - DELIMITER_SZ - 1);
+                strncat(name, "\\", DELIMITER_SZ);
+                strncat(name, FindFileData.cFileName,
+                                            nameSz - pathSz - DELIMITER_SZ - 1);
                 ret = read_file(ctx, name, SSL_FILETYPE_PEM, CA);
             }
         } while (ret == SSL_SUCCESS && FindNextFile(hFind, &FindFileData));
 
+        ysArrayDelete(name);
         FindClose(hFind);
 
 #else   // _WIN32
-
-        const int MAX_PATH = 260;
-
         DIR* dir = opendir(path);
         if (!dir) return SSL_BAD_PATH;
 
         struct dirent* entry;
         struct stat    buf;
-        char           name[MAX_PATH + 1];
+        const int DELIMITER_SZ = 1;
+        int pathSz = (int)strlen(path);
+        int nameSz = pathSz + DELIMITER_SZ + 1; //plus 1 for null terminator
+        char* name = NEW_YS char[nameSz];  // directory specification
 
         while (ret == SSL_SUCCESS && (entry = readdir(dir))) {
-            strncpy(name, path, MAX_PATH - 1 - HALF_PATH);
-            strncat(name, "/", 1);
-            strncat(name, entry->d_name, HALF_PATH);
+            int curSz = (int)strlen(entry->d_name);
+            if (pathSz + curSz + DELIMITER_SZ + 1 > nameSz) {
+                ysArrayDelete(name);
+                nameSz = pathSz + DELIMITER_SZ + curSz + 1;
+                name = NEW_YS char[nameSz];
+            }
+            memset(name, 0, nameSz);
+            strncpy(name, path, nameSz - curSz - 1);
+            strncat(name, "/",  DELIMITER_SZ);
+            strncat(name, entry->d_name, nameSz - pathSz - DELIMITER_SZ - 1);
+
             if (stat(name, &buf) < 0) {
+                ysArrayDelete(name);
                 closedir(dir);
                 return SSL_BAD_STAT;
             }
@@ -827,6 +854,7 @@ int SSL_CTX_load_verify_locations(SSL_CTX* ctx, const char* file,
                 ret = read_file(ctx, name, SSL_FILETYPE_PEM, CA);
         }
 
+        ysArrayDelete(name);
         closedir(dir);
 
 #endif
diff --git a/extra/yassl/taocrypt/include/aes.hpp b/extra/yassl/taocrypt/include/aes.hpp
index 017630331560b..bccf6e73fc720 100644
--- a/extra/yassl/taocrypt/include/aes.hpp
+++ b/extra/yassl/taocrypt/include/aes.hpp
@@ -60,6 +60,7 @@ class AES : public Mode_BASE {
 
     static const word32 Te[5][256];
     static const word32 Td[5][256];
+    static const byte   CTd4[256];
 
     static const word32* Te0;
     static const word32* Te1;
@@ -80,11 +81,68 @@ class AES : public Mode_BASE {
 
     void ProcessAndXorBlock(const byte*, const byte*, byte*) const;
 
+    word32 PreFetchTe() const;
+    word32 PreFetchTd() const;
+    word32 PreFetchCTd4() const;
+
     AES(const AES&);            // hide copy
     AES& operator=(const AES&); // and assign
 };
 
 
+#if defined(__x86_64__) || defined(_M_X64) || \
+           (defined(__ILP32__) && (__ILP32__ >= 1))
+    #define TC_CACHE_LINE_SZ 64
+#else
+    /* default cache line size */
+    #define TC_CACHE_LINE_SZ 32
+#endif
+
+inline word32 AES::PreFetchTe() const
+{
+    word32 x = 0;
+
+    /* 4 tables of 256 entries */
+    for (int i = 0; i < 4; i++) {
+        /* each entry is 4 bytes */
+        for (int j = 0; j < 256; j += TC_CACHE_LINE_SZ/4) {
+            x &= Te[i][j];
+        }
+    }
+
+    return x;
+}
+
+
+inline word32 AES::PreFetchTd() const
+{
+    word32 x = 0;
+
+    /* 4 tables of 256 entries */
+    for (int i = 0; i < 4; i++) {
+        /* each entry is 4 bytes */
+        for (int j = 0; j < 256; j += TC_CACHE_LINE_SZ/4) {
+            x &= Td[i][j];
+        }
+    }
+
+    return x;
+}
+
+
+inline word32 AES::PreFetchCTd4() const
+{
+    word32 x = 0;
+    int i;
+
+    for (i = 0; i < 256; i += TC_CACHE_LINE_SZ) {
+        x &= CTd4[i];
+    }
+
+    return x;
+}
+
+
 typedef BlockCipher<ENCRYPTION, AES, ECB> AES_ECB_Encryption;
 typedef BlockCipher<DECRYPTION, AES, ECB> AES_ECB_Decryption;
 
diff --git a/extra/yassl/taocrypt/include/integer.hpp b/extra/yassl/taocrypt/include/integer.hpp
index 75a3ee3d3df80..05fe189fd585f 100644
--- a/extra/yassl/taocrypt/include/integer.hpp
+++ b/extra/yassl/taocrypt/include/integer.hpp
@@ -119,6 +119,9 @@ namespace TaoCrypt {
 
 
 
+#ifdef _WIN32
+    #undef max // avoid name clash
+#endif
 // general MAX
 template<typename T> inline
 const T& max(const T& a, const T& b)
diff --git a/extra/yassl/taocrypt/src/aes.cpp b/extra/yassl/taocrypt/src/aes.cpp
index e47765b87d0ef..2321c72554cdf 100644
--- a/extra/yassl/taocrypt/src/aes.cpp
+++ b/extra/yassl/taocrypt/src/aes.cpp
@@ -109,10 +109,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/)
         {
             temp  = rk[3];
             rk[4] = rk[0] ^
-                (Te4[GETBYTE(temp, 2)] & 0xff000000) ^
-                (Te4[GETBYTE(temp, 1)] & 0x00ff0000) ^
-                (Te4[GETBYTE(temp, 0)] & 0x0000ff00) ^
-                (Te4[GETBYTE(temp, 3)] & 0x000000ff) ^
+                (Te2[GETBYTE(temp, 2)] & 0xff000000) ^
+                (Te3[GETBYTE(temp, 1)] & 0x00ff0000) ^
+                (Te0[GETBYTE(temp, 0)] & 0x0000ff00) ^
+                (Te1[GETBYTE(temp, 3)] & 0x000000ff) ^
                 rcon_[i];
             rk[5] = rk[1] ^ rk[4];
             rk[6] = rk[2] ^ rk[5];
@@ -128,10 +128,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/)
         {
             temp = rk[ 5];
             rk[ 6] = rk[ 0] ^
-                (Te4[GETBYTE(temp, 2)] & 0xff000000) ^
-                (Te4[GETBYTE(temp, 1)] & 0x00ff0000) ^
-                (Te4[GETBYTE(temp, 0)] & 0x0000ff00) ^
-                (Te4[GETBYTE(temp, 3)] & 0x000000ff) ^
+                (Te2[GETBYTE(temp, 2)] & 0xff000000) ^
+                (Te3[GETBYTE(temp, 1)] & 0x00ff0000) ^
+                (Te0[GETBYTE(temp, 0)] & 0x0000ff00) ^
+                (Te1[GETBYTE(temp, 3)] & 0x000000ff) ^
                 rcon_[i];
             rk[ 7] = rk[ 1] ^ rk[ 6];
             rk[ 8] = rk[ 2] ^ rk[ 7];
@@ -149,10 +149,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/)
         {
             temp = rk[ 7];
             rk[ 8] = rk[ 0] ^
-                (Te4[GETBYTE(temp, 2)] & 0xff000000) ^
-                (Te4[GETBYTE(temp, 1)] & 0x00ff0000) ^
-                (Te4[GETBYTE(temp, 0)] & 0x0000ff00) ^
-                (Te4[GETBYTE(temp, 3)] & 0x000000ff) ^
+                (Te2[GETBYTE(temp, 2)] & 0xff000000) ^
+                (Te3[GETBYTE(temp, 1)] & 0x00ff0000) ^
+                (Te0[GETBYTE(temp, 0)] & 0x0000ff00) ^
+                (Te1[GETBYTE(temp, 3)] & 0x000000ff) ^
                 rcon_[i];
             rk[ 9] = rk[ 1] ^ rk[ 8];
             rk[10] = rk[ 2] ^ rk[ 9];
@@ -161,10 +161,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/)
                 break;
             temp = rk[11];
             rk[12] = rk[ 4] ^
-                (Te4[GETBYTE(temp, 3)] & 0xff000000) ^
-                (Te4[GETBYTE(temp, 2)] & 0x00ff0000) ^
-                (Te4[GETBYTE(temp, 1)] & 0x0000ff00) ^
-                (Te4[GETBYTE(temp, 0)] & 0x000000ff);
+                (Te2[GETBYTE(temp, 3)] & 0xff000000) ^
+                (Te3[GETBYTE(temp, 2)] & 0x00ff0000) ^
+                (Te0[GETBYTE(temp, 1)] & 0x0000ff00) ^
+                (Te1[GETBYTE(temp, 0)] & 0x000000ff);
             rk[13] = rk[ 5] ^ rk[12];
             rk[14] = rk[ 6] ^ rk[13];
             rk[15] = rk[ 7] ^ rk[14];
@@ -191,25 +191,25 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/)
         for (i = 1; i < rounds_; i++) {
             rk += 4;
             rk[0] =
-                Td0[Te4[GETBYTE(rk[0], 3)] & 0xff] ^
-                Td1[Te4[GETBYTE(rk[0], 2)] & 0xff] ^
-                Td2[Te4[GETBYTE(rk[0], 1)] & 0xff] ^
-                Td3[Te4[GETBYTE(rk[0], 0)] & 0xff];
+                Td0[Te1[GETBYTE(rk[0], 3)] & 0xff] ^
+                Td1[Te1[GETBYTE(rk[0], 2)] & 0xff] ^
+                Td2[Te1[GETBYTE(rk[0], 1)] & 0xff] ^
+                Td3[Te1[GETBYTE(rk[0], 0)] & 0xff];
             rk[1] =
-                Td0[Te4[GETBYTE(rk[1], 3)] & 0xff] ^
-                Td1[Te4[GETBYTE(rk[1], 2)] & 0xff] ^
-                Td2[Te4[GETBYTE(rk[1], 1)] & 0xff] ^
-                Td3[Te4[GETBYTE(rk[1], 0)] & 0xff];
+                Td0[Te1[GETBYTE(rk[1], 3)] & 0xff] ^
+                Td1[Te1[GETBYTE(rk[1], 2)] & 0xff] ^
+                Td2[Te1[GETBYTE(rk[1], 1)] & 0xff] ^
+                Td3[Te1[GETBYTE(rk[1], 0)] & 0xff];
             rk[2] =
-                Td0[Te4[GETBYTE(rk[2], 3)] & 0xff] ^
-                Td1[Te4[GETBYTE(rk[2], 2)] & 0xff] ^
-                Td2[Te4[GETBYTE(rk[2], 1)] & 0xff] ^
-                Td3[Te4[GETBYTE(rk[2], 0)] & 0xff];
+                Td0[Te1[GETBYTE(rk[2], 3)] & 0xff] ^
+                Td1[Te1[GETBYTE(rk[2], 2)] & 0xff] ^
+                Td2[Te1[GETBYTE(rk[2], 1)] & 0xff] ^
+                Td3[Te1[GETBYTE(rk[2], 0)] & 0xff];
             rk[3] =
-                Td0[Te4[GETBYTE(rk[3], 3)] & 0xff] ^
-                Td1[Te4[GETBYTE(rk[3], 2)] & 0xff] ^
-                Td2[Te4[GETBYTE(rk[3], 1)] & 0xff] ^
-                Td3[Te4[GETBYTE(rk[3], 0)] & 0xff];
+                Td0[Te1[GETBYTE(rk[3], 3)] & 0xff] ^
+                Td1[Te1[GETBYTE(rk[3], 2)] & 0xff] ^
+                Td2[Te1[GETBYTE(rk[3], 1)] & 0xff] ^
+                Td3[Te1[GETBYTE(rk[3], 0)] & 0xff];
         }
     }
 }
@@ -244,6 +244,7 @@ void AES::encrypt(const byte* inBlock, const byte* xorBlock,
     s2 ^= rk[2];
     s3 ^= rk[3];
    
+    s0 |= PreFetchTe();
     /*
      * Nr - 1 full rounds:
      */
@@ -312,28 +313,28 @@ void AES::encrypt(const byte* inBlock, const byte* xorBlock,
      */
 
     s0 =
-        (Te4[GETBYTE(t0, 3)] & 0xff000000) ^
-        (Te4[GETBYTE(t1, 2)] & 0x00ff0000) ^
-        (Te4[GETBYTE(t2, 1)] & 0x0000ff00) ^
-        (Te4[GETBYTE(t3, 0)] & 0x000000ff) ^
+        (Te2[GETBYTE(t0, 3)] & 0xff000000) ^
+        (Te3[GETBYTE(t1, 2)] & 0x00ff0000) ^
+        (Te0[GETBYTE(t2, 1)] & 0x0000ff00) ^
+        (Te1[GETBYTE(t3, 0)] & 0x000000ff) ^
         rk[0];
     s1 =
-        (Te4[GETBYTE(t1, 3)] & 0xff000000) ^
-        (Te4[GETBYTE(t2, 2)] & 0x00ff0000) ^
-        (Te4[GETBYTE(t3, 1)] & 0x0000ff00) ^
-        (Te4[GETBYTE(t0, 0)] & 0x000000ff) ^
+        (Te2[GETBYTE(t1, 3)] & 0xff000000) ^
+        (Te3[GETBYTE(t2, 2)] & 0x00ff0000) ^
+        (Te0[GETBYTE(t3, 1)] & 0x0000ff00) ^
+        (Te1[GETBYTE(t0, 0)] & 0x000000ff) ^
         rk[1];
     s2 =
-        (Te4[GETBYTE(t2, 3)] & 0xff000000) ^
-        (Te4[GETBYTE(t3, 2)] & 0x00ff0000) ^
-        (Te4[GETBYTE(t0, 1)] & 0x0000ff00) ^
-        (Te4[GETBYTE(t1, 0)] & 0x000000ff) ^
+        (Te2[GETBYTE(t2, 3)] & 0xff000000) ^
+        (Te3[GETBYTE(t3, 2)] & 0x00ff0000) ^
+        (Te0[GETBYTE(t0, 1)] & 0x0000ff00) ^
+        (Te1[GETBYTE(t1, 0)] & 0x000000ff) ^
         rk[2];
     s3 =
-        (Te4[GETBYTE(t3, 3)] & 0xff000000) ^
-        (Te4[GETBYTE(t0, 2)] & 0x00ff0000) ^
-        (Te4[GETBYTE(t1, 1)] & 0x0000ff00) ^
-        (Te4[GETBYTE(t2, 0)] & 0x000000ff) ^
+        (Te2[GETBYTE(t3, 3)] & 0xff000000) ^
+        (Te3[GETBYTE(t0, 2)] & 0x00ff0000) ^
+        (Te0[GETBYTE(t1, 1)] & 0x0000ff00) ^
+        (Te1[GETBYTE(t2, 0)] & 0x000000ff) ^
         rk[3];
 
 
@@ -358,6 +359,8 @@ void AES::decrypt(const byte* inBlock, const byte* xorBlock,
     s2 ^= rk[2];
     s3 ^= rk[3];
 
+    s0 |= PreFetchTd();
+
     /*
      * Nr - 1 full rounds:
      */
@@ -423,29 +426,32 @@ void AES::decrypt(const byte* inBlock, const byte* xorBlock,
      * apply last round and
      * map cipher state to byte array block:
      */
+
+    t0 |= PreFetchCTd4();
+
     s0 =
-        (Td4[GETBYTE(t0, 3)] & 0xff000000) ^
-        (Td4[GETBYTE(t3, 2)] & 0x00ff0000) ^
-        (Td4[GETBYTE(t2, 1)] & 0x0000ff00) ^
-        (Td4[GETBYTE(t1, 0)] & 0x000000ff) ^
+        ((word32)CTd4[GETBYTE(t0, 3)] << 24) ^
+        ((word32)CTd4[GETBYTE(t3, 2)] << 16) ^
+        ((word32)CTd4[GETBYTE(t2, 1)] <<  8) ^
+        ((word32)CTd4[GETBYTE(t1, 0)]) ^
         rk[0];
     s1 =
-        (Td4[GETBYTE(t1, 3)] & 0xff000000) ^
-        (Td4[GETBYTE(t0, 2)] & 0x00ff0000) ^
-        (Td4[GETBYTE(t3, 1)] & 0x0000ff00) ^
-        (Td4[GETBYTE(t2, 0)] & 0x000000ff) ^
+        ((word32)CTd4[GETBYTE(t1, 3)]  << 24) ^
+        ((word32)CTd4[GETBYTE(t0, 2)]  << 16) ^
+        ((word32)CTd4[GETBYTE(t3, 1)]  <<  8) ^
+        ((word32)CTd4[GETBYTE(t2, 0)]) ^
         rk[1];
     s2 =
-        (Td4[GETBYTE(t2, 3)] & 0xff000000) ^
-        (Td4[GETBYTE(t1, 2)] & 0x00ff0000) ^
-        (Td4[GETBYTE(t0, 1)] & 0x0000ff00) ^
-        (Td4[GETBYTE(t3, 0)] & 0x000000ff) ^
+        ((word32)CTd4[GETBYTE(t2, 3)] << 24  ) ^
+        ((word32)CTd4[GETBYTE(t1, 2)] << 16 ) ^
+        ((word32)CTd4[GETBYTE(t0, 1)] <<  8 ) ^
+        ((word32)CTd4[GETBYTE(t3, 0)]) ^
         rk[2];
     s3 =
-        (Td4[GETBYTE(t3, 3)] & 0xff000000) ^
-        (Td4[GETBYTE(t2, 2)] & 0x00ff0000) ^
-        (Td4[GETBYTE(t1, 1)] & 0x0000ff00) ^
-        (Td4[GETBYTE(t0, 0)] & 0x000000ff) ^
+        ((word32)CTd4[GETBYTE(t3, 3)] << 24) ^
+        ((word32)CTd4[GETBYTE(t2, 2)] << 16) ^
+        ((word32)CTd4[GETBYTE(t1, 1)] <<  8) ^
+        ((word32)CTd4[GETBYTE(t0, 0)]) ^
         rk[3];
 
     gpBlock::Put(xorBlock, outBlock)(s0)(s1)(s2)(s3);
@@ -1826,18 +1832,52 @@ const word32 AES::Td[5][256] = {
 }
 };
 
+const byte AES::CTd4[256] =
+{
+    0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
+    0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
+    0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
+    0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
+    0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
+    0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
+    0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
+    0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
+    0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
+    0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
+    0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
+    0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
+    0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
+    0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
+    0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
+    0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
+    0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
+    0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
+    0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
+    0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
+    0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
+    0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
+    0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
+    0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
+    0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
+    0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
+    0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
+    0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
+    0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
+    0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
+    0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
+    0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
+};
+
 
 const word32* AES::Te0 = AES::Te[0];
 const word32* AES::Te1 = AES::Te[1];
 const word32* AES::Te2 = AES::Te[2];
 const word32* AES::Te3 = AES::Te[3];
-const word32* AES::Te4 = AES::Te[4];
 
 const word32* AES::Td0 = AES::Td[0];
 const word32* AES::Td1 = AES::Td[1];
 const word32* AES::Td2 = AES::Td[2];
 const word32* AES::Td3 = AES::Td[3];
-const word32* AES::Td4 = AES::Td[4];
 
 
 
diff --git a/extra/yassl/taocrypt/src/asn.cpp b/extra/yassl/taocrypt/src/asn.cpp
index 0474e7c21d596..80bcd612d27fc 100644
--- a/extra/yassl/taocrypt/src/asn.cpp
+++ b/extra/yassl/taocrypt/src/asn.cpp
@@ -1219,17 +1219,17 @@ word32 DecodeDSA_Signature(byte* decoded, const byte* encoded, word32 sz)
     }
     word32 rLen = GetLength(source);
     if (rLen != 20) {
-        if (rLen == 21) {       // zero at front, eat
+        while (rLen > 20 && source.remaining() > 0) {  // zero's at front, eat
             source.next();
             --rLen;
         }
-        else if (rLen == 19) {  // add zero to front so 20 bytes
+        if (rLen < 20) { // add zero's to front so 20 bytes
+            word32 tmpLen = rLen;
+            while (tmpLen < 20) {
             decoded[0] = 0;
             decoded++;
+                tmpLen++;
         }
-        else {
-            source.SetError(DSA_SZ_E);
-            return 0;
         }
     }
     memcpy(decoded, source.get_buffer() + source.get_index(), rLen);
@@ -1242,17 +1242,17 @@ word32 DecodeDSA_Signature(byte* decoded, const byte* encoded, word32 sz)
     }
     word32 sLen = GetLength(source);
     if (sLen != 20) {
-        if (sLen == 21) {
-            source.next();          // zero at front, eat
+        while (sLen > 20 && source.remaining() > 0) {
+            source.next();          // zero's at front, eat
             --sLen;
         }
-        else if (sLen == 19) {
-            decoded[rLen] = 0;      // add zero to front so 20 bytes
+        if (sLen < 20) { // add zero's to front so 20 bytes
+            word32 tmpLen = sLen;
+            while (tmpLen < 20) {
+                decoded[rLen] = 0;
             decoded++;
+                tmpLen++;
         }
-        else {
-            source.SetError(DSA_SZ_E);
-            return 0;
         }
     }
     memcpy(decoded + rLen, source.get_buffer() + source.get_index(), sLen);
diff --git a/extra/yassl/taocrypt/src/dsa.cpp b/extra/yassl/taocrypt/src/dsa.cpp
index 72221441b2bdd..fda01881df5b2 100644
--- a/extra/yassl/taocrypt/src/dsa.cpp
+++ b/extra/yassl/taocrypt/src/dsa.cpp
@@ -172,6 +172,7 @@ word32 DSA_Signer::Sign(const byte* sha_digest, byte* sig,
     const Integer& q = key_.GetSubGroupOrder();
     const Integer& g = key_.GetSubGroupGenerator();
     const Integer& x = key_.GetPrivatePart();
+    byte* tmpPtr = sig;  // initial signature output
 
     Integer k(rng, 1, q - 1);
 
@@ -187,22 +188,23 @@ word32 DSA_Signer::Sign(const byte* sha_digest, byte* sig,
       return (word32) -1;
 
     int rSz = r_.ByteCount();
+    int tmpSz = rSz;
 
-    if (rSz == 19) {
-        sig[0] = 0;
-        sig++;
+    while (tmpSz++ < SHA::DIGEST_SIZE) {
+        *sig++ = 0;
     }
     
     r_.Encode(sig,  rSz);
 
+    sig = tmpPtr + SHA::DIGEST_SIZE;  // advance sig output to s
     int sSz = s_.ByteCount();
+    tmpSz = sSz;
 
-    if (sSz == 19) {
-        sig[rSz] = 0;
-        sig++;
+    while (tmpSz++ < SHA::DIGEST_SIZE) {
+        *sig++ = 0;
     }
 
-    s_.Encode(sig + rSz, sSz);
+    s_.Encode(sig, sSz);
 
     return 40;
 }
diff --git a/extra/yassl/taocrypt/src/integer.cpp b/extra/yassl/taocrypt/src/integer.cpp
index fb8d9276bd9f8..dd8425396eda2 100644
--- a/extra/yassl/taocrypt/src/integer.cpp
+++ b/extra/yassl/taocrypt/src/integer.cpp
@@ -193,8 +193,9 @@ DWord() {}
                 "a" (a), "rm" (b) : "cc");
 
         #elif defined(__mips64)
-            __asm__("dmultu %2,%3" : "=d" (r.halfs_.high), "=l" (r.halfs_.low)
-                : "r" (a), "r" (b));
+            unsigned __int128 t = (unsigned __int128) a * b;
+            r.halfs_.high = t >> 64;
+            r.halfs_.low = (word) t;
 
         #elif defined(_M_IX86)
             // for testing
diff --git a/extra/yassl/taocrypt/test/test.cpp b/extra/yassl/taocrypt/test/test.cpp
index c23d981924d8a..b07a9eb9f29d5 100644
--- a/extra/yassl/taocrypt/test/test.cpp
+++ b/extra/yassl/taocrypt/test/test.cpp
@@ -1281,6 +1281,9 @@ int dsa_test()
     if (!verifier.Verify(digest, decoded))
         return -90;
 
+    if (!verifier.Verify(digest, signature))
+        return -91;
+
     return 0;
 }
 
diff --git a/extra/yassl/testsuite/test.hpp b/extra/yassl/testsuite/test.hpp
index 5374edd0e2ad2..a65a212cf995d 100644
--- a/extra/yassl/testsuite/test.hpp
+++ b/extra/yassl/testsuite/test.hpp
@@ -22,7 +22,6 @@
 #define yaSSL_TEST_HPP
 
 #include "runtime.hpp"
-#include "openssl/ssl.h"   /* openssl compatibility test */
 #include "error.hpp"
 #include <stdio.h>
 #include <stdlib.h>
@@ -56,6 +55,7 @@
 #endif
     #define SOCKET_T int
 #endif /* _WIN32 */
+#include "openssl/ssl.h"   /* openssl compatibility test */
 
 
 #ifdef _MSC_VER
diff --git a/include/byte_order_generic_x86.h b/include/byte_order_generic_x86.h
index 0a71a17829b70..a97dd0f43a37f 100644
--- a/include/byte_order_generic_x86.h
+++ b/include/byte_order_generic_x86.h
@@ -27,19 +27,9 @@
 				  ((uint32) (uchar) (A)[0])))
 #define sint4korr(A)	(*((const long *) (A)))
 #define uint2korr(A)	(*((const uint16 *) (A)))
-
-/*
-  Attention: Please, note, uint3korr reads 4 bytes (not 3)!
-  It means, that you have to provide enough allocated space.
-*/
-#if defined(HAVE_valgrind) && !defined(_WIN32)
 #define uint3korr(A)	(uint32) (((uint32) ((uchar) (A)[0])) +\
 				  (((uint32) ((uchar) (A)[1])) << 8) +\
 				  (((uint32) ((uchar) (A)[2])) << 16))
-#else
-#define uint3korr(A)	(long) (*((const unsigned int *) (A)) & 0xFFFFFF)
-#endif
-
 #define uint4korr(A)	(*((const uint32 *) (A)))
 #define uint5korr(A)	((ulonglong)(((uint32) ((uchar) (A)[0])) +\
 				    (((uint32) ((uchar) (A)[1])) << 8) +\
diff --git a/include/byte_order_generic_x86_64.h b/include/byte_order_generic_x86_64.h
index b6b0c5d8ea582..8c7493965a996 100644
--- a/include/byte_order_generic_x86_64.h
+++ b/include/byte_order_generic_x86_64.h
@@ -27,17 +27,9 @@
 				  ((uint32) (uchar) (A)[0])))
 #define sint4korr(A)	(int32)  (*((int32 *) (A)))
 #define uint2korr(A)	(uint16) (*((uint16 *) (A)))
-/*
-  Attention: Please, note, uint3korr reads 4 bytes (not 3)!
-  It means, that you have to provide enough allocated space.
-*/
-#if defined(HAVE_valgrind) && !defined(_WIN32)
 #define uint3korr(A)	(uint32) (((uint32) ((uchar) (A)[0])) +\
 				  (((uint32) ((uchar) (A)[1])) << 8) +\
 				  (((uint32) ((uchar) (A)[2])) << 16))
-#else
-#define uint3korr(A)	(uint32) (*((unsigned int *) (A)) & 0xFFFFFF)
-#endif
 #define uint4korr(A)	(uint32) (*((uint32 *) (A)))
 #define uint5korr(A)	((ulonglong)(((uint32) ((uchar) (A)[0])) +\
 				    (((uint32) ((uchar) (A)[1])) << 8) +\
diff --git a/include/my_global.h b/include/my_global.h
index 191e08d9218ca..6222467901e67 100644
--- a/include/my_global.h
+++ b/include/my_global.h
@@ -888,8 +888,7 @@ typedef long long	my_ptrdiff_t;
   and related routines are refactored.
 */
 
-#define my_offsetof(TYPE, MEMBER) \
-        ((size_t)((char *)&(((TYPE *)0x10)->MEMBER) - (char*)0x10))
+#define my_offsetof(TYPE, MEMBER) PTR_BYTE_DIFF(&((TYPE *)0x10)->MEMBER, 0x10)
 
 #define NullS		(char *) 0
 
diff --git a/include/my_sys.h b/include/my_sys.h
index a0b7f4cc554e0..f6bf57e50a4e8 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -1,5 +1,5 @@
 /* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
-   Copyright (c) 2010, 2013, Monty Program Ab.
+   Copyright (c) 2010, 2016, Monty Program Ab.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -271,7 +271,7 @@ extern my_bool my_use_symdir;
 extern ulong	my_default_record_cache_size;
 extern my_bool  my_disable_locking, my_disable_async_io,
                 my_disable_flush_key_blocks, my_disable_symlinks;
-extern my_bool my_disable_sync;
+extern my_bool my_disable_sync, my_disable_copystat_in_redel;
 extern char	wild_many,wild_one,wild_prefix;
 extern const char *charsets_dir;
 extern my_bool timed_mutexes;
diff --git a/libmysql/libmysql.c b/libmysql/libmysql.c
index 446f1da0b0c7d..3a08ea26b1d57 100644
--- a/libmysql/libmysql.c
+++ b/libmysql/libmysql.c
@@ -450,8 +450,9 @@ void read_user_name(char *name)
 
 void read_user_name(char *name)
 {
-  char *str=getenv("USER");		/* ODBC will send user variable */
-  strmake(name,str ? str : "ODBC", USERNAME_LENGTH);
+  DWORD len= USERNAME_LENGTH;
+  if (!GetUserName(name, &len))
+    strmov(name,"UNKNOWN_USER");
 }
 
 #endif
diff --git a/mysql-test/extra/binlog_tests/database.test b/mysql-test/extra/binlog_tests/database.test
index 82e8b3963570d..17f8e069fa3ce 100644
--- a/mysql-test/extra/binlog_tests/database.test
+++ b/mysql-test/extra/binlog_tests/database.test
@@ -52,7 +52,7 @@ eval SELECT 'hello' INTO OUTFILE 'fake_file.$prefix';
 
 # Use '/' instead of '\' in the error message. On windows platform, dir is
 # formed with '\'.
---replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /17/39/ /File exists/Directory not empty/
+--replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /93/39/ /17/39/ /247/39/ /File exists/Directory not empty/
 --error 1010
 DROP DATABASE testing_1;
 let $wait_binlog_event= DROP TABLE IF EXIST;
diff --git a/mysql-test/include/index_merge2.inc b/mysql-test/include/index_merge2.inc
index c50a45a9923d9..03afa49d323f0 100644
--- a/mysql-test/include/index_merge2.inc
+++ b/mysql-test/include/index_merge2.inc
@@ -341,6 +341,7 @@ while ($1)
 alter table t1 add index i2(key2);
 alter table t1 add index i3(key3);
 update t1 set key2=key1,key3=key1;
+analyze table t1;
 
 # to test the bug, the following must use "sort_union":
 --replace_column 9 REF
diff --git a/mysql-test/include/search_pattern_in_file.inc b/mysql-test/include/search_pattern_in_file.inc
index 0d09cdcd36efb..84237026ed06e 100644
--- a/mysql-test/include/search_pattern_in_file.inc
+++ b/mysql-test/include/search_pattern_in_file.inc
@@ -60,12 +60,12 @@
 
 perl;
     use strict;
-    my $search_file=           $ENV{'SEARCH_FILE'}           or die "SEARCH_FILE not set";
-    my $search_pattern=        $ENV{'SEARCH_PATTERN'}        or die "SEARCH_PATTERN not set";
-    my $search_range=          $ENV{'SEARCH_RANGE'};
+    my $search_file=    $ENV{'SEARCH_FILE'}    or die "SEARCH_FILE not set";
+    my $search_pattern= $ENV{'SEARCH_PATTERN'} or die "SEARCH_PATTERN not set";
+    my $search_range=   $ENV{'SEARCH_RANGE'};
     my $file_content;
     $search_range= 50000 unless $search_range =~ /-?[0-9]+/;
-    open(FILE, "$search_file") or die("Unable to open '$search_file': $!\n");
+    open(FILE, '<', $search_file) or die("Unable to open '$search_file': $!\n");
     if ($search_range >= 0) {
        read(FILE, $file_content, $search_range, 0);
     } else {
@@ -75,7 +75,10 @@ perl;
        read(FILE, $file_content, -$search_range, 0);
     }
     close(FILE);
-    if ( not $file_content =~ m{$search_pattern} ) {
-       die("# ERROR: The file '$search_file' does not contain the expected pattern  $search_pattern\n->$file_content<-\n");
+    $search_file =~ s{^.*?([^/\\]+)$}{$1};
+    if ($file_content =~ m{$search_pattern}) {
+      print "FOUND /$search_pattern/ in $search_file\n"
+    } else {
+      print "NOT FOUND /$search_pattern/ in $search_file\n"
     }
 EOF
diff --git a/mysql-test/lib/My/CoreDump.pm b/mysql-test/lib/My/CoreDump.pm
index 0e90967ef9545..f9f7b3d8d4b23 100644
--- a/mysql-test/lib/My/CoreDump.pm
+++ b/mysql-test/lib/My/CoreDump.pm
@@ -261,11 +261,7 @@ sub show {
   # On Windows, rely on cdb to be there...
   if (IS_WINDOWS)
   {
-    # Starting cdb is unsafe when used with --parallel > 1 option 
-    if ( $parallel < 2 )
-    {
-      _cdb($core_name);
-    }
+    _cdb($core_name);
     return;
   }
   
diff --git a/mysql-test/lib/My/Platform.pm b/mysql-test/lib/My/Platform.pm
index 1776f1008daa6..110cf8a20e0ed 100644
--- a/mysql-test/lib/My/Platform.pm
+++ b/mysql-test/lib/My/Platform.pm
@@ -24,7 +24,7 @@ use File::Path;
 use base qw(Exporter);
 our @EXPORT= qw(IS_CYGWIN IS_WINDOWS IS_WIN32PERL
 		native_path posix_path mixed_path
-                check_socket_path_length process_alive);
+                check_socket_path_length process_alive open_for_append);
 
 BEGIN {
   if ($^O eq "cygwin") {
@@ -161,4 +161,51 @@ sub process_alive {
 }
 
 
+
+use Symbol qw( gensym );
+
+use if $^O eq 'MSWin32', 'Win32API::File', qw( CloseHandle CreateFile GetOsFHandle OsFHandleOpen  OPEN_ALWAYS FILE_APPEND_DATA 
+  FILE_SHARE_READ FILE_SHARE_WRITE FILE_SHARE_DELETE );
+use if $^O eq 'MSWin32', 'Win32::API';
+
+use constant WIN32API_FILE_NULL => [];
+
+# Open a file for append
+# On Windows we use CreateFile with FILE_APPEND_DATA
+# to insure that writes are atomic, not interleaved
+# with writes by another processes. 
+sub open_for_append
+{
+  my ($file) = @_;
+  my $fh = gensym();
+
+  if (IS_WIN32PERL)
+  {
+    my $handle;
+    if (!($handle = CreateFile(
+        $file,
+        FILE_APPEND_DATA(),
+        FILE_SHARE_READ()|FILE_SHARE_WRITE()|FILE_SHARE_DELETE(),
+        WIN32API_FILE_NULL,
+        OPEN_ALWAYS(),# Create if doesn't exist.
+        0,
+        WIN32API_FILE_NULL,
+      )))
+    {
+      return undef;
+    }
+
+    if (!OsFHandleOpen($fh, $handle, 'wat'))
+    {
+      CloseHandle($handle);
+      return undef;
+    }
+    return $fh;
+  }
+  
+  open($fh,">>",$file) or return undef;
+  return $fh;
+}
+
+
 1;
diff --git a/mysql-test/lib/mtr_cases.pm b/mysql-test/lib/mtr_cases.pm
index 124aff92895ed..5ec7553674c00 100644
--- a/mysql-test/lib/mtr_cases.pm
+++ b/mysql-test/lib/mtr_cases.pm
@@ -60,8 +60,6 @@ use My::Test;
 use My::Find;
 use My::Suite;
 
-require "mtr_misc.pl";
-
 # locate plugin suites, depending on whether it's a build tree or installed
 my @plugin_suitedirs;
 my $plugin_suitedir_regex;
@@ -1122,7 +1120,7 @@ sub get_tags_from_file($$) {
   $file_to_tags{$file}= $tags;
   $file_to_master_opts{$file}= $master_opts;
   $file_to_slave_opts{$file}= $slave_opts;
-  $file_combinations{$file}= [ uniq(@combinations) ];
+  $file_combinations{$file}= [ ::uniq(@combinations) ];
   $file_in_overlay{$file} = 1 if $in_overlay;
   return @{$tags};
 }
diff --git a/mysql-test/lib/mtr_io.pl b/mysql-test/lib/mtr_io.pl
index 8c2803f042794..0de4d9612acee 100644
--- a/mysql-test/lib/mtr_io.pl
+++ b/mysql-test/lib/mtr_io.pl
@@ -21,6 +21,7 @@
 
 use strict;
 use Carp;
+use My::Platform;
 
 sub mtr_fromfile ($);
 sub mtr_tofile ($@);
@@ -45,10 +46,10 @@ ($)
 
 sub mtr_tofile ($@) {
   my $file=  shift;
-
-  open(FILE,">>",$file) or mtr_error("can't open file \"$file\": $!");
-  print FILE join("", @_);
-  close FILE;
+  my $fh= open_for_append $file;
+  mtr_error("can't open file \"$file\": $!") unless defined($fh);
+  print $fh join("", @_);
+  close $fh;
 }
 
 
diff --git a/mysql-test/lib/mtr_report.pm b/mysql-test/lib/mtr_report.pm
index 9ab82c454ed49..97ace54f0fbcc 100644
--- a/mysql-test/lib/mtr_report.pm
+++ b/mysql-test/lib/mtr_report.pm
@@ -34,7 +34,6 @@ use mtr_match;
 use My::Platform;
 use POSIX qw[ _exit ];
 use IO::Handle qw[ flush ];
-require "mtr_io.pl";
 use mtr_results;
 
 my $tot_real_time= 0;
@@ -92,7 +91,7 @@ sub mtr_report_test_passed ($) {
   my $timer_str=  "";
   if ( $timer and -f "$::opt_vardir/log/timer" )
   {
-    $timer_str= mtr_fromfile("$::opt_vardir/log/timer");
+    $timer_str= ::mtr_fromfile("$::opt_vardir/log/timer");
     $tinfo->{timer}= $timer_str;
     resfile_test_info('duration', $timer_str) if $::opt_resfile;
   }
diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl
index 752b62fb915bc..b6263415d9d5f 100755
--- a/mysql-test/mysql-test-run.pl
+++ b/mysql-test/mysql-test-run.pl
@@ -102,11 +102,11 @@ BEGIN
 use IO::Socket::INET;
 use IO::Select;
 
-require "lib/mtr_process.pl";
-require "lib/mtr_io.pl";
-require "lib/mtr_gcov.pl";
-require "lib/mtr_gprof.pl";
-require "lib/mtr_misc.pl";
+require "mtr_process.pl";
+require "mtr_io.pl";
+require "mtr_gcov.pl";
+require "mtr_gprof.pl";
+require "mtr_misc.pl";
 
 $SIG{INT}= sub { mtr_error("Got ^C signal"); };
 $SIG{HUP}= sub { mtr_error("Hangup detected on controlling terminal"); };
diff --git a/mysql-test/r/alter_table.result b/mysql-test/r/alter_table.result
index e572fdb197cc8..2e371ac6ae6e3 100644
--- a/mysql-test/r/alter_table.result
+++ b/mysql-test/r/alter_table.result
@@ -2021,3 +2021,58 @@ ALTER TABLE t1 ADD PRIMARY KEY IF NOT EXISTS event_id (event_id,market_id);
 Warnings:
 Note	1061	Multiple primary key defined
 DROP TABLE t1;
+#
+# MDEV-11126 Crash while altering persistent virtual column
+#
+CREATE TABLE `tab1` (
+`id` bigint(20) NOT NULL AUTO_INCREMENT,
+`field2` set('option1','option2','option3','option4') NOT NULL,
+`field3` set('option1','option2','option3','option4','option5') NOT NULL,
+`field4` set('option1','option2','option3','option4') NOT NULL,
+`field5` varchar(32) NOT NULL,
+`field6` varchar(32) NOT NULL,
+`field7` varchar(32) NOT NULL,
+`field8` varchar(32) NOT NULL,
+`field9` int(11) NOT NULL DEFAULT '1',
+`field10` varchar(16) NOT NULL,
+`field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1',
+`v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT,
+PRIMARY KEY (`id`)
+) DEFAULT CHARSET=latin1;
+ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128);
+SHOW CREATE TABLE `tab1`;
+Table	Create Table
+tab1	CREATE TABLE `tab1` (
+  `id` bigint(20) NOT NULL AUTO_INCREMENT,
+  `field2` set('option1','option2','option3','option4') NOT NULL,
+  `field3` set('option1','option2','option3','option4','option5') NOT NULL,
+  `field4` set('option1','option2','option3','option4') NOT NULL,
+  `field5` varchar(32) NOT NULL,
+  `field6` varchar(32) NOT NULL,
+  `field7` varchar(32) NOT NULL,
+  `field8` varchar(32) NOT NULL,
+  `field9` int(11) NOT NULL DEFAULT '1',
+  `field10` varchar(16) NOT NULL,
+  `field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1',
+  `v_col` varchar(128) DEFAULT NULL,
+  PRIMARY KEY (`id`)
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT;
+SHOW CREATE TABLE `tab1`;
+Table	Create Table
+tab1	CREATE TABLE `tab1` (
+  `id` bigint(20) NOT NULL AUTO_INCREMENT,
+  `field2` set('option1','option2','option3','option4') NOT NULL,
+  `field3` set('option1','option2','option3','option4','option5') NOT NULL,
+  `field4` set('option1','option2','option3','option4') NOT NULL,
+  `field5` varchar(32) NOT NULL,
+  `field6` varchar(32) NOT NULL,
+  `field7` varchar(32) NOT NULL,
+  `field8` varchar(32) NOT NULL,
+  `field9` int(11) NOT NULL DEFAULT '1',
+  `field10` varchar(16) NOT NULL,
+  `field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1',
+  `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT,
+  PRIMARY KEY (`id`)
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+DROP TABLE `tab1`;
diff --git a/mysql-test/r/contributors.result b/mysql-test/r/contributors.result
index 918ceaa496fe1..f3f5e227d3a98 100644
--- a/mysql-test/r/contributors.result
+++ b/mysql-test/r/contributors.result
@@ -9,6 +9,7 @@ Acronis	http://www.acronis.com	Silver Sponsor of the MariaDB Foundation
 Auttomattic	https://automattic.com	Bronze Sponsor of the MariaDB Foundation
 Verkkokauppa.com	https://virtuozzo.com	Bronze Sponsor of the MariaDB Foundation
 Virtuozzo	https://virtuozzo.com/	Bronze Sponsor of the MariaDB Foundation
+Tencent Game DBA	http://tencentdba.com/about/	Bronze Sponsor of the MariaDB Foundation
 Google	USA	Sponsoring encryption, parallel replication and GTID
 Facebook	USA	Sponsoring non-blocking API, LIMIT ROWS EXAMINED etc
 Ronald Bradford	Brisbane, Australia	EFF contribution for UC2006 Auction
diff --git a/mysql-test/r/create_or_replace.result b/mysql-test/r/create_or_replace.result
index 3a894e9fcb140..a43dc2eaca447 100644
--- a/mysql-test/r/create_or_replace.result
+++ b/mysql-test/r/create_or_replace.result
@@ -442,3 +442,14 @@ KILL QUERY con_id;
 ERROR 70100: Query execution was interrupted
 drop table t1;
 DROP TABLE t2;
+#
+# MDEV-10824 - Crash in CREATE OR REPLACE TABLE t1 AS SELECT spfunc()
+#
+CREATE TABLE t1(a INT);
+CREATE FUNCTION f1() RETURNS VARCHAR(16383) RETURN 'test';
+CREATE OR REPLACE TABLE t1 AS SELECT f1();
+LOCK TABLE t1 WRITE;
+CREATE OR REPLACE TABLE t1 AS SELECT f1();
+UNLOCK TABLES;
+DROP FUNCTION f1;
+DROP TABLE t1;
diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result
index 0ec89a50c0f1a..3269e6c900eba 100644
--- a/mysql-test/r/ctype_utf32.result
+++ b/mysql-test/r/ctype_utf32.result
@@ -1658,6 +1658,9 @@ CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061))
 SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061));
 CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061))
 1
+select hex(lower(cast(0xffff0000 as char character set utf32))) as c;
+c
+FFFF0000
 #
 # End of 5.5 tests
 #
diff --git a/mysql-test/r/drop.result b/mysql-test/r/drop.result
index c23ffbe327b63..c25ae9e305517 100644
--- a/mysql-test/r/drop.result
+++ b/mysql-test/r/drop.result
@@ -209,3 +209,9 @@ INSERT INTO table1 VALUES (1);
 ERROR 42S02: Unknown table 't.notable'
 DROP TABLE table1,table2;
 # End BUG#34750
+#
+# MDEV-11105 Table named 'db' has weird side effect.
+#
+CREATE DATABASE mysqltest;
+CREATE TABLE mysqltest.db(id INT);
+DROP DATABASE mysqltest;
diff --git a/mysql-test/r/group_min_max_innodb.result b/mysql-test/r/group_min_max_innodb.result
index 77c74fbc041f9..2803107b97ed6 100644
--- a/mysql-test/r/group_min_max_innodb.result
+++ b/mysql-test/r/group_min_max_innodb.result
@@ -286,3 +286,19 @@ F	28	28
 F	29	29
 F	30	30
 DROP TABLE t0,t1,t2;
+#
+# MDEV-MariaDB daemon leaks memory with specific query
+#
+CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL,
+`language_id` int(11) unsigned NOT NULL DEFAULT '1'
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0',
+`serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000));
+SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0;
+translation_resources	serialized_c
+NULL	cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+NULL	bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+NULL	aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+NULL	aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+drop table t1,t2;
diff --git a/mysql-test/r/index_merge_innodb.result b/mysql-test/r/index_merge_innodb.result
index 5202c79f3c749..5bf56e213abbb 100644
--- a/mysql-test/r/index_merge_innodb.result
+++ b/mysql-test/r/index_merge_innodb.result
@@ -311,6 +311,9 @@ set @d=@d*2;
 alter table t1 add index i2(key2);
 alter table t1 add index i3(key3);
 update t1 set key2=key1,key3=key1;
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
 explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	index_merge	i2,i3	i3,i2	4,4	NULL	REF	Using sort_union(i3,i2); Using where
diff --git a/mysql-test/r/index_merge_myisam.result b/mysql-test/r/index_merge_myisam.result
index fcd5eebefa482..c63ed13266255 100644
--- a/mysql-test/r/index_merge_myisam.result
+++ b/mysql-test/r/index_merge_myisam.result
@@ -1146,6 +1146,9 @@ set @d=@d*2;
 alter table t1 add index i2(key2);
 alter table t1 add index i3(key3);
 update t1 set key2=key1,key3=key1;
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
 explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	index_merge	i2,i3	i3,i2	4,4	NULL	REF	Using sort_union(i3,i2); Using where
diff --git a/mysql-test/r/information_schema.result b/mysql-test/r/information_schema.result
index 9d61850fae674..852b520d1fc4e 100644
--- a/mysql-test/r/information_schema.result
+++ b/mysql-test/r/information_schema.result
@@ -1004,19 +1004,19 @@ show grants;
 Grants for user3@localhost
 GRANT USAGE ON *.* TO 'user3'@'localhost'
 GRANT SELECT ON `mysqltest`.* TO 'user3'@'localhost'
-select * from information_schema.column_privileges where grantee like '%user%'
+select * from information_schema.column_privileges where grantee like '\'user%'
 order by grantee;
 GRANTEE	TABLE_CATALOG	TABLE_SCHEMA	TABLE_NAME	COLUMN_NAME	PRIVILEGE_TYPE	IS_GRANTABLE
 'user1'@'localhost'	def	mysqltest	t1	f1	SELECT	NO
-select * from information_schema.table_privileges where grantee like '%user%'
+select * from information_schema.table_privileges where grantee like '\'user%'
 order by grantee;
 GRANTEE	TABLE_CATALOG	TABLE_SCHEMA	TABLE_NAME	PRIVILEGE_TYPE	IS_GRANTABLE
 'user2'@'localhost'	def	mysqltest	t2	SELECT	NO
-select * from information_schema.schema_privileges where grantee like '%user%'
+select * from information_schema.schema_privileges where grantee like '\'user%'
 order by grantee;
 GRANTEE	TABLE_CATALOG	TABLE_SCHEMA	PRIVILEGE_TYPE	IS_GRANTABLE
 'user3'@'localhost'	def	mysqltest	SELECT	NO
-select * from information_schema.user_privileges where grantee like '%user%'
+select * from information_schema.user_privileges where grantee like '\'user%'
 order by grantee;
 GRANTEE	TABLE_CATALOG	PRIVILEGE_TYPE	IS_GRANTABLE
 'user1'@'localhost'	def	USAGE	NO
diff --git a/mysql-test/r/lowercase_fs_on.result b/mysql-test/r/lowercase_fs_on.result
index a090f46cfbf8b..b844b3f77dde4 100644
--- a/mysql-test/r/lowercase_fs_on.result
+++ b/mysql-test/r/lowercase_fs_on.result
@@ -1,3 +1,4 @@
 #
 # Bug#20198490 : LOWER_CASE_TABLE_NAMES=0 ON WINDOWS LEADS TO PROBLEMS
 #
+FOUND /\[ERROR\] The server option \'lower_case_table_names\' is configured to use case sensitive table names/ in my_restart.err
diff --git a/mysql-test/r/merge.result b/mysql-test/r/merge.result
index 89aaf48219e48..31edbc0fdcee7 100644
--- a/mysql-test/r/merge.result
+++ b/mysql-test/r/merge.result
@@ -3832,6 +3832,23 @@ test.m1	repair	error	Corrupt
 # Clean-up.
 drop tables m1, t1, t4;
 drop view t3;
+#
+# MDEV-10424 - Assertion `ticket == __null' failed in
+#              MDL_request::set_type
+#
+CREATE TABLE t1 (f1 INT) ENGINE=MyISAM;
+CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1);
+PREPARE stmt FROM "ANALYZE TABLE tmerge, t1";
+EXECUTE stmt;
+Table	Op	Msg_type	Msg_text
+test.tmerge	analyze	note	The storage engine for the table doesn't support analyze
+test.t1	analyze	status	Table is already up to date
+EXECUTE stmt;
+Table	Op	Msg_type	Msg_text
+test.tmerge	analyze	note	The storage engine for the table doesn't support analyze
+test.t1	analyze	status	Table is already up to date
+DEALLOCATE PREPARE stmt;
+DROP TABLE t1, tmerge;
 End of 5.5 tests
 #
 # Additional coverage for refactoring which is made as part
diff --git a/mysql-test/r/mysql.result b/mysql-test/r/mysql.result
index cb705d285fe3d..dd0129df0d9fe 100644
--- a/mysql-test/r/mysql.result
+++ b/mysql-test/r/mysql.result
@@ -512,6 +512,14 @@ DROP DATABASE connected_db;
 create database `aa``bb````cc`;
 DATABASE()
 aa`bb``cc
+DATABASE()
+test
+DATABASE()
+aa`bb``cc
+DATABASE()
+test
+DATABASE()
+aa`bb``cc
 drop database `aa``bb````cc`;
 a
 >>\ndelimiter\n<<
diff --git a/mysql-test/r/mysql_not_windows.result b/mysql-test/r/mysql_not_windows.result
index d5670a1a9ca38..1df62d9a12dcf 100644
--- a/mysql-test/r/mysql_not_windows.result
+++ b/mysql-test/r/mysql_not_windows.result
@@ -3,3 +3,9 @@ a
 1
 
 End of tests
+1
+1
+2
+2
+X
+3
diff --git a/mysql-test/r/mysqldump-nl.result b/mysql-test/r/mysqldump-nl.result
new file mode 100644
index 0000000000000..6de439bdf3c61
--- /dev/null
+++ b/mysql-test/r/mysqldump-nl.result
@@ -0,0 +1,126 @@
+create database `mysqltest1
+1tsetlqsym`;
+use `mysqltest1
+1tsetlqsym`;
+create table `t1
+1t` (`foobar
+raboof` int);
+create view `v1
+1v` as select * from `t1
+1t`;
+create procedure sp() select * from `v1
+1v`;
+flush tables;
+use test;
+
+--
+-- Current Database: `mysqltest1
+-- 1tsetlqsym`
+--
+
+/*!40000 DROP DATABASE IF EXISTS `mysqltest1
+1tsetlqsym`*/;
+
+CREATE DATABASE /*!32312 IF NOT EXISTS*/ `mysqltest1
+1tsetlqsym` /*!40100 DEFAULT CHARACTER SET latin1 */;
+
+USE `mysqltest1
+1tsetlqsym`;
+
+--
+-- Table structure for table `t1
+-- 1t`
+--
+
+/*!40101 SET @saved_cs_client     = @@character_set_client */;
+/*!40101 SET character_set_client = utf8 */;
+CREATE TABLE `t1
+1t` (
+  `foobar
+raboof` int(11) DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1;
+/*!40101 SET character_set_client = @saved_cs_client */;
+
+--
+-- Dumping data for table `t1
+-- 1t`
+--
+
+--
+-- Temporary table structure for view `v1
+-- 1v`
+--
+
+SET @saved_cs_client     = @@character_set_client;
+SET character_set_client = utf8;
+/*!50001 CREATE TABLE `v1
+1v` (
+  `foobar
+raboof` tinyint NOT NULL
+) ENGINE=MyISAM */;
+SET character_set_client = @saved_cs_client;
+
+--
+-- Dumping routines for database 'mysqltest1
+-- 1tsetlqsym'
+--
+/*!50003 SET @saved_cs_client      = @@character_set_client */ ;
+/*!50003 SET @saved_cs_results     = @@character_set_results */ ;
+/*!50003 SET @saved_col_connection = @@collation_connection */ ;
+/*!50003 SET character_set_client  = latin1 */ ;
+/*!50003 SET character_set_results = latin1 */ ;
+/*!50003 SET collation_connection  = latin1_swedish_ci */ ;
+/*!50003 SET @saved_sql_mode       = @@sql_mode */ ;
+/*!50003 SET sql_mode              = '' */ ;
+DELIMITER ;;
+CREATE DEFINER=`root`@`localhost` PROCEDURE `sp`()
+select * from `v1
+1v` ;;
+DELIMITER ;
+/*!50003 SET sql_mode              = @saved_sql_mode */ ;
+/*!50003 SET character_set_client  = @saved_cs_client */ ;
+/*!50003 SET character_set_results = @saved_cs_results */ ;
+/*!50003 SET collation_connection  = @saved_col_connection */ ;
+
+--
+-- Current Database: `mysqltest1
+-- 1tsetlqsym`
+--
+
+USE `mysqltest1
+1tsetlqsym`;
+
+--
+-- Final view structure for view `v1
+-- 1v`
+--
+
+/*!50001 DROP TABLE IF EXISTS `v1
+1v`*/;
+/*!50001 SET @saved_cs_client          = @@character_set_client */;
+/*!50001 SET @saved_cs_results         = @@character_set_results */;
+/*!50001 SET @saved_col_connection     = @@collation_connection */;
+/*!50001 SET character_set_client      = latin1 */;
+/*!50001 SET character_set_results     = latin1 */;
+/*!50001 SET collation_connection      = latin1_swedish_ci */;
+/*!50001 CREATE ALGORITHM=UNDEFINED */
+/*!50013 DEFINER=`root`@`localhost` SQL SECURITY DEFINER */
+/*!50001 VIEW `v1
+1v` AS select `t1
+1t`.`foobar
+raboof` AS `foobar
+raboof` from `t1
+1t` */;
+/*!50001 SET character_set_client      = @saved_cs_client */;
+/*!50001 SET character_set_results     = @saved_cs_results */;
+/*!50001 SET collation_connection      = @saved_col_connection */;
+show tables from `mysqltest1
+1tsetlqsym`;
+Tables_in_mysqltest1
+1tsetlqsym
+t1
+1t
+v1
+1v
+drop database `mysqltest1
+1tsetlqsym`;
diff --git a/mysql-test/r/mysqldump.result b/mysql-test/r/mysqldump.result
index b6de51c8b03f2..cb3c28f42cdf2 100644
--- a/mysql-test/r/mysqldump.result
+++ b/mysql-test/r/mysqldump.result
@@ -5236,9 +5236,6 @@ SET @@global.log_output="TABLE";
 SET @@global.general_log='OFF';
 SET @@global.slow_query_log='OFF';
 DROP DATABASE mysql;
-Warnings:
-Error	1146	Table 'mysql.proc' doesn't exist
-Error	1146	Table 'mysql.event' doesn't exist
 SHOW CREATE TABLE mysql.general_log;
 Table	Create Table
 general_log	CREATE TABLE `general_log` (
diff --git a/mysql-test/r/mysqltest.result b/mysql-test/r/mysqltest.result
index e258b1d156fd1..fa054d457f966 100644
--- a/mysql-test/r/mysqltest.result
+++ b/mysql-test/r/mysqltest.result
@@ -269,12 +269,6 @@ source database
 echo message echo message
 
 mysqltest: At line 1: Missing argument in exec
-1
-1
-2
-2
-X
-3
 MySQL
 "MySQL"
 MySQL: The
diff --git a/mysql-test/r/named_pipe.result b/mysql-test/r/named_pipe.result
index ddd48f0ba9166..43fb44beece1d 100644
--- a/mysql-test/r/named_pipe.result
+++ b/mysql-test/r/named_pipe.result
@@ -2154,3 +2154,4 @@ Privat (Private Nutzung)	Mobilfunk
 Warnings:
 Warning	1052	Column 'kundentyp' in group statement is ambiguous
 drop table t1;
+FOUND /\[ERROR\] Create named pipe failed/ in second-mysqld.err
diff --git a/mysql-test/r/ps.result b/mysql-test/r/ps.result
index 517e2d23915fd..eb5c8ca9377ac 100644
--- a/mysql-test/r/ps.result
+++ b/mysql-test/r/ps.result
@@ -4076,4 +4076,35 @@ id	value
 deallocate prepare stmt;
 SET SESSION sql_mode = @save_sql_mode;
 DROP TABLE t1,t2;
-# End of 10.0 tests
+#
+# MDEV-8833: Crash of server on prepared statement with
+# conversion to semi-join
+#
+CREATE TABLE t1 (column1 INT);
+INSERT INTO t1 VALUES (3),(9);
+CREATE TABLE t2 (column2 INT);
+INSERT INTO t2 VALUES (1),(4);
+CREATE TABLE t3 (column3 INT);
+INSERT INTO t3 VALUES (6),(8);
+CREATE TABLE t4 (column4 INT);
+INSERT INTO t4 VALUES (2),(5);
+PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1 
+FROM t1 AS table1
+WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 ) 
+) AS sq
+FROM t3 AS table3, t4 AS table4";
+EXECUTE stmt;
+sq
+NULL
+NULL
+NULL
+NULL
+EXECUTE stmt;
+sq
+NULL
+NULL
+NULL
+NULL
+deallocate prepare stmt;
+drop table t1,t2,t3,t4;
+# End of 5.5 tests
diff --git a/mysql-test/r/selectivity.result b/mysql-test/r/selectivity.result
index 620bdc6bd50c2..8fb5cd17c518a 100644
--- a/mysql-test/r/selectivity.result
+++ b/mysql-test/r/selectivity.result
@@ -1446,3 +1446,74 @@ a	b	i
 set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
 DROP TABLE t1,t2;
 set use_stat_tables=@save_use_stat_tables;
+#
+# Bug mdev-11096: range condition over column without statistical data
+#
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=3;
+create table t1(col1 char(32));
+insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t1 persistent for columns () indexes ();
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+explain extended 
+select * from t1 where col1 > 'b' and col1 < 'e';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	8	100.00	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'e'))
+select * from t1 where col1 > 'b' and col1 < 'e';
+col1
+c
+d
+drop table t1;
+set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
+set use_stat_tables=@save_use_stat_tables;
+#
+# Bug mdev-9628: unindexed blob column without min-max statistics 
+#                with optimizer_use_condition_selectivity=3
+#
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=3;
+create table t1(col1 char(32));
+insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+create table t2(col1 text);
+insert into t2 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t2;
+Table	Op	Msg_type	Msg_text
+test.t2	analyze	status	Engine-independent statistics collected
+test.t2	analyze	status	OK
+select * from t1 where col1 > 'b' and col1 < 'd';
+col1
+c
+explain extended 
+select * from t1 where col1 > 'b' and col1 < 'd';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	8	28.57	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'd'))
+select * from t2 where col1 > 'b' and col1 < 'd';
+col1
+c
+explain extended 
+select * from t2 where col1 > 'b' and col1 < 'd';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	8	100.00	Using where
+Warnings:
+Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where ((`test`.`t2`.`col1` > 'b') and (`test`.`t2`.`col1` < 'd'))
+select * from t2 where col1 < 'b' and col1 > 'd';
+col1
+explain extended 
+select * from t2 where col1 < 'b' and col1 > 'd';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Impossible WHERE noticed after reading const tables
+Warnings:
+Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where 0
+drop table t1,t2;
+set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
+set use_stat_tables=@save_use_stat_tables;
diff --git a/mysql-test/r/selectivity_innodb.result b/mysql-test/r/selectivity_innodb.result
index 0acbb465ba852..3d15131dbb55e 100644
--- a/mysql-test/r/selectivity_innodb.result
+++ b/mysql-test/r/selectivity_innodb.result
@@ -802,9 +802,9 @@ insert into t2 values (2),(3);
 explain extended 
 select * from t1 where a in ( select b from t2 ) AND ( a > 3 );
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	1	0.00	Using where
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	1	100.00	Using where
 1	PRIMARY	<subquery2>	eq_ref	distinct_key	distinct_key	4	func	1	100.00	
-2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	2	0.00	
+2	MATERIALIZED	t2	ALL	NULL	NULL	NULL	NULL	2	100.00	
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t1`.`a` > 3))
 select * from t1 where a in ( select b from t2 ) AND ( a > 3 );
@@ -1450,6 +1450,77 @@ a	b	i
 set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
 DROP TABLE t1,t2;
 set use_stat_tables=@save_use_stat_tables;
+#
+# Bug mdev-11096: range condition over column without statistical data
+#
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=3;
+create table t1(col1 char(32));
+insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t1 persistent for columns () indexes ();
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+explain extended 
+select * from t1 where col1 > 'b' and col1 < 'e';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	8	100.00	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'e'))
+select * from t1 where col1 > 'b' and col1 < 'e';
+col1
+c
+d
+drop table t1;
+set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
+set use_stat_tables=@save_use_stat_tables;
+#
+# Bug mdev-9628: unindexed blob column without min-max statistics 
+#                with optimizer_use_condition_selectivity=3
+#
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=3;
+create table t1(col1 char(32));
+insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+create table t2(col1 text);
+insert into t2 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t2;
+Table	Op	Msg_type	Msg_text
+test.t2	analyze	status	Engine-independent statistics collected
+test.t2	analyze	status	OK
+select * from t1 where col1 > 'b' and col1 < 'd';
+col1
+c
+explain extended 
+select * from t1 where col1 > 'b' and col1 < 'd';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	8	28.57	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`col1` AS `col1` from `test`.`t1` where ((`test`.`t1`.`col1` > 'b') and (`test`.`t1`.`col1` < 'd'))
+select * from t2 where col1 > 'b' and col1 < 'd';
+col1
+c
+explain extended 
+select * from t2 where col1 > 'b' and col1 < 'd';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	8	100.00	Using where
+Warnings:
+Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where ((`test`.`t2`.`col1` > 'b') and (`test`.`t2`.`col1` < 'd'))
+select * from t2 where col1 < 'b' and col1 > 'd';
+col1
+explain extended 
+select * from t2 where col1 < 'b' and col1 > 'd';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Impossible WHERE noticed after reading const tables
+Warnings:
+Note	1003	select `test`.`t2`.`col1` AS `col1` from `test`.`t2` where 0
+drop table t1,t2;
+set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
+set use_stat_tables=@save_use_stat_tables;
 set optimizer_switch=@save_optimizer_switch_for_selectivity_test;
 set @tmp_ust= @@use_stat_tables;
 set @tmp_oucs= @@optimizer_use_condition_selectivity;
@@ -1536,6 +1607,44 @@ where t1.child_user_id=t3.id and t1.child_group_id is null and t2.lower_group_na
 parent_id	child_group_id	child_user_id	id	lower_group_name	directory_id	id
 drop table t1,t2,t3;
 #
+# MDEV-9187: duplicate of bug mdev-9628
+#
+set use_stat_tables = preferably;
+set optimizer_use_condition_selectivity=3;
+CREATE TABLE t1 (f1 char(32)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES ('foo'),('bar'),('qux');
+ANALYZE TABLE t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+SELECT * FROM t1 WHERE f1 < 'm';
+f1
+foo
+bar
+EXPLAIN EXTENDED
+SELECT * FROM t1 WHERE f1 < 'm';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	3	72.09	Using where
+Warnings:
+Note	1003	select `test`.`t1`.`f1` AS `f1` from `test`.`t1` where (`test`.`t1`.`f1` < 'm')
+CREATE TABLE t2 (f1 TEXT) ENGINE=InnoDB;
+INSERT INTO t2 VALUES ('foo'),('bar'),('qux');
+ANALYZE TABLE t2;
+Table	Op	Msg_type	Msg_text
+test.t2	analyze	status	Engine-independent statistics collected
+test.t2	analyze	status	OK
+SELECT * FROM t2 WHERE f1 <> 'qux';
+f1
+foo
+bar
+EXPLAIN EXTENDED
+SELECT * FROM t2 WHERE f1 <> 'qux';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	3	100.00	Using where
+Warnings:
+Note	1003	select `test`.`t2`.`f1` AS `f1` from `test`.`t2` where (`test`.`t2`.`f1` <> 'qux')
+DROP TABLE t1,t2;
+#
 # End of 10.0 tests
 #
 set use_stat_tables= @tmp_ust;
diff --git a/mysql-test/r/type_uint.result b/mysql-test/r/type_uint.result
index 10aa2f2f39396..c970f2ff8969c 100644
--- a/mysql-test/r/type_uint.result
+++ b/mysql-test/r/type_uint.result
@@ -14,6 +14,25 @@ this
 0
 4294967295
 drop table t1;
+create table t1 (a bigint unsigned, b mediumint unsigned);
+insert t1 values (1,2),(0xffffffffffffffff,0xffffff);
+select coalesce(a,b), coalesce(b,a) from t1;
+coalesce(a,b)	coalesce(b,a)
+1	2
+18446744073709551615	16777215
+create table t2 as select a from t1 union select b from t1;
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `a` bigint(20) unsigned DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+select * from t2;
+a
+1
+18446744073709551615
+2
+16777215
+drop table t1, t2;
 #
 # Start of 10.0 tests
 #
diff --git a/mysql-test/r/view.result b/mysql-test/r/view.result
index 52c379d03aff6..924b3a11fef1c 100644
--- a/mysql-test/r/view.result
+++ b/mysql-test/r/view.result
@@ -5432,6 +5432,7 @@ DROP FUNCTION f1;
 DROP VIEW v1;
 DROP TABLE t1, t2;
 create view v1 as select 1;
+FOUND /mariadb-version/ in v1.frm
 drop view v1;
 #
 # MDEV-7260: Crash in get_best_combination when executing multi-table
diff --git a/mysql-test/r/wait_timeout_not_windows.result b/mysql-test/r/wait_timeout_not_windows.result
index df70aa9922160..867787a8ed340 100644
--- a/mysql-test/r/wait_timeout_not_windows.result
+++ b/mysql-test/r/wait_timeout_not_windows.result
@@ -1,3 +1,4 @@
 set global log_warnings=2;
 set @@wait_timeout=1;
+FOUND /Aborted.*Got timeout reading communication packets/ in mysqld.1.err
 set global log_warnings=@@log_warnings;
diff --git a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result b/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result
index cc2a0373444f8..07e13008e2718 100644
--- a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result
+++ b/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result
@@ -33,6 +33,7 @@ INSERT INTO t1 VALUES(1,'X',1);
 SET DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace';
 SELECT b FROM t1 LIMIT 3;
 ERROR HY000: Lost connection to MySQL server during query
+FOUND /Wrote log record for ibuf update in place operation/ in my_restart.err
 CHECK TABLE t1;
 Table	Op	Msg_type	Msg_text
 test.t1	check	status	OK
diff --git a/mysql-test/suite/innodb/r/innodb_bug54044.result b/mysql-test/suite/innodb/r/innodb_bug54044.result
index d80c451c84128..7d6133adb74e8 100644
--- a/mysql-test/suite/innodb/r/innodb_bug54044.result
+++ b/mysql-test/suite/innodb/r/innodb_bug54044.result
@@ -6,7 +6,8 @@ table_54044	CREATE TEMPORARY TABLE `table_54044` (
   `IF(NULL  IS NOT NULL, NULL, NULL)` binary(0) DEFAULT NULL
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1
 DROP TABLE table_54044;
-CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
+CREATE TABLE tmp ENGINE = INNODB
+AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
 SHOW CREATE TABLE tmp;
 Table	Create Table
 tmp	CREATE TABLE `tmp` (
diff --git a/mysql-test/suite/innodb/r/system_tables.result b/mysql-test/suite/innodb/r/system_tables.result
new file mode 100644
index 0000000000000..79a24f7e4556e
--- /dev/null
+++ b/mysql-test/suite/innodb/r/system_tables.result
@@ -0,0 +1,8 @@
+alter table mysql.time_zone_name engine=InnoDB;
+create table envois3 (starttime datetime) engine=InnoDB;
+insert envois3 values ('2008-08-11 22:43:00');
+select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3;
+starttime
+2008-08-12 02:43:00
+drop table envois3;
+alter table mysql.time_zone_name engine=MyISAM;
diff --git a/mysql-test/suite/innodb/t/innodb_bug54044.test b/mysql-test/suite/innodb/t/innodb_bug54044.test
index aa19c51018cad..61a09375ae174 100644
--- a/mysql-test/suite/innodb/t/innodb_bug54044.test
+++ b/mysql-test/suite/innodb/t/innodb_bug54044.test
@@ -10,7 +10,10 @@ CREATE TEMPORARY TABLE table_54044 ENGINE = INNODB
 SHOW CREATE TABLE table_54044;
 DROP TABLE table_54044;
 
-CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
+# This 'create table' should pass since it uses a Field_string of size 0.
+
+CREATE TABLE tmp ENGINE = INNODB
+ AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
 SHOW CREATE TABLE tmp;
 DROP TABLE tmp;
 
@@ -23,4 +26,3 @@ FLUSH TABLES;
 --error 1005
 CREATE TEMPORARY TABLE tmp ENGINE=InnoDB AS SELECT VALUES(a) FROM t1;
 DROP TABLE t1;
-
diff --git a/mysql-test/suite/innodb/t/system_tables.test b/mysql-test/suite/innodb/t/system_tables.test
new file mode 100644
index 0000000000000..90cb8c59fbd1d
--- /dev/null
+++ b/mysql-test/suite/innodb/t/system_tables.test
@@ -0,0 +1,12 @@
+--source include/have_innodb.inc
+
+#
+# MDEV-10775 System table in InnoDB format allowed in MariaDB could lead to crash
+#
+alter table mysql.time_zone_name engine=InnoDB;
+create table envois3 (starttime datetime) engine=InnoDB;
+insert envois3 values ('2008-08-11 22:43:00');
+--source include/restart_mysqld.inc
+select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3;
+drop table envois3;
+alter table mysql.time_zone_name engine=MyISAM;
diff --git a/mysql-test/suite/perfschema/r/aggregate.result b/mysql-test/suite/perfschema/r/aggregate.result
deleted file mode 100644
index c8fa1cc2b2412..0000000000000
--- a/mysql-test/suite/perfschema/r/aggregate.result
+++ /dev/null
@@ -1,121 +0,0 @@
-"General cleanup"
-set @aria_checkpoint_interval_save= @@global.aria_checkpoint_interval;
-set @@global.aria_checkpoint_interval= 0;
-drop table if exists t1;
-update performance_schema.setup_instruments set enabled = 'NO';
-update performance_schema.setup_consumers set enabled = 'NO';
-truncate table performance_schema.file_summary_by_event_name;
-truncate table performance_schema.file_summary_by_instance;
-truncate table performance_schema.socket_summary_by_event_name;
-truncate table performance_schema.socket_summary_by_instance;
-truncate table performance_schema.events_waits_summary_global_by_event_name;
-truncate table performance_schema.events_waits_summary_by_instance;
-truncate table performance_schema.events_waits_summary_by_thread_by_event_name;
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
-set enabled = 'YES', timed = 'YES';
-create table t1 (
-id INT PRIMARY KEY,
-b CHAR(100) DEFAULT 'initial value')
-ENGINE=MyISAM;
-insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8);
-update performance_schema.setup_instruments SET enabled = 'NO';
-update performance_schema.setup_consumers set enabled = 'NO';
-set @dump_all=FALSE;
-"Verifying file aggregate consistency"
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-EVENT_NAME	COUNT_READ	SUM(i.COUNT_READ)
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-EVENT_NAME	COUNT_WRITE	SUM(i.COUNT_WRITE)
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-EVENT_NAME	COUNT_READ	SUM(i.COUNT_READ)
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-EVENT_NAME	COUNT_WRITE	SUM(i.COUNT_WRITE)
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ))
-OR @dump_all;
-EVENT_NAME	SUM_NUMBER_OF_BYTES_READ	SUM(i.SUM_NUMBER_OF_BYTES_READ)
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE))
-OR @dump_all;
-EVENT_NAME	SUM_NUMBER_OF_BYTES_WRITE	SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
-"Verifying waits aggregate consistency (instance)"
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME	SUM_TIMER_WAIT	SUM(i.SUM_TIMER_WAIT)
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT))
-AND (MIN(i.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-EVENT_NAME	MIN_TIMER_WAIT	MIN(i.MIN_TIMER_WAIT)
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME	MAX_TIMER_WAIT	MAX(i.MAX_TIMER_WAIT)
-"Verifying waits aggregate consistency (thread)"
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME	SUM_TIMER_WAIT	SUM(t.SUM_TIMER_WAIT)
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT))
-AND (MIN(t.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-EVENT_NAME	MIN_TIMER_WAIT	MIN(t.MIN_TIMER_WAIT)
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT))
-OR @dump_all;
-EVENT_NAME	MAX_TIMER_WAIT	MAX(t.MAX_TIMER_WAIT)
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
-set enabled = 'YES', timed = 'YES';
-drop table test.t1;
-set @@global.aria_checkpoint_interval= @aria_checkpoint_interval_save;
diff --git a/mysql-test/suite/perfschema/t/aggregate.test b/mysql-test/suite/perfschema/t/aggregate.test
deleted file mode 100644
index fe30a7b869765..0000000000000
--- a/mysql-test/suite/perfschema/t/aggregate.test
+++ /dev/null
@@ -1,197 +0,0 @@
-# Tests for PERFORMANCE_SCHEMA
-# Verify that statistics aggregated by different criteria are consistent.
-
---source include/not_embedded.inc
---source include/have_perfschema.inc
-
---echo "General cleanup"
-
-# MDEV-7187 - test fails sporadically in buildbot
-set @aria_checkpoint_interval_save= @@global.aria_checkpoint_interval;
-set @@global.aria_checkpoint_interval= 0;
-
---disable_warnings
-drop table if exists t1;
---enable_warnings
-
-update performance_schema.setup_instruments set enabled = 'NO';
-update performance_schema.setup_consumers set enabled = 'NO';
-
-# Cleanup statistics
-truncate table performance_schema.file_summary_by_event_name;
-truncate table performance_schema.file_summary_by_instance;
-truncate table performance_schema.socket_summary_by_event_name;
-truncate table performance_schema.socket_summary_by_instance;
-truncate table performance_schema.events_waits_summary_global_by_event_name;
-truncate table performance_schema.events_waits_summary_by_instance;
-truncate table performance_schema.events_waits_summary_by_thread_by_event_name;
-
-# Start recording data
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
-  set enabled = 'YES', timed = 'YES';
-
-
-create table t1 (
-  id INT PRIMARY KEY,
-  b CHAR(100) DEFAULT 'initial value')
-  ENGINE=MyISAM;
-
-insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8);
-
-# Stop recording data, so the select below don't add noise.
-update performance_schema.setup_instruments SET enabled = 'NO';
-# Disable all consumers, for long standing waits
-update performance_schema.setup_consumers set enabled = 'NO';
-
-# Helper to debug
-set @dump_all=FALSE;
-
-# Note that in general:
-# - COUNT/SUM/MAX(file_summary_by_event_name) >=
-#   COUNT/SUM/MAX(file_summary_by_instance).
-# - MIN(file_summary_by_event_name) <=
-#   MIN(file_summary_by_instance).
-# There will be equality only when file instances are not removed,
-# aka when a file is not deleted from the file system,
-# because doing so removes a row in file_summary_by_instance.
-
-# Likewise:
-# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >=
-#   COUNT/SUM/MAX(events_waits_summary_by_instance)
-# - MIN(events_waits_summary_global_by_event_name) <=
-#   MIN(events_waits_summary_by_instance)
-# There will be equality only when an instrument instance
-# is not removed, which is next to impossible to predictably guarantee
-# in the server.
-# For example, a MyISAM table removed from the table cache
-# will cause a mysql_mutex_destroy on myisam/MYISAM_SHARE::intern_lock.
-# Another example, a thread terminating will cause a mysql_mutex_destroy
-# on sql/LOCK_delete
-# Both cause a row to be deleted from events_waits_summary_by_instance.
-
-# Likewise:
-# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >=
-#   COUNT/SUM/MAX(events_waits_summary_by_thread_by_event_name)
-# - MIN(events_waits_summary_global_by_event_name) <=
-#   MIN(events_waits_summary_by_thread_by_event_name)
-# There will be equality only when no thread is removed,
-# that is if no thread disconnects, or no sub thread (for example insert
-# delayed) ever completes.
-# A thread completing will cause rows in
-# events_waits_summary_by_thread_by_event_name to be removed.
-
---echo "Verifying file aggregate consistency"
-
-# Since the code generating the load in this test does:
-# - create table
-# - insert
-# - does not cause temporary tables to be used
-# we can test for equality here for file aggregates.
-
-# If any of these queries returns data, the test failed.
-
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
-FROM performance_schema.socket_summary_by_event_name AS e
-JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
-FROM performance_schema.file_summary_by_event_name AS e
-JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE))
-OR @dump_all;
-
---echo "Verifying waits aggregate consistency (instance)"
-
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT))
-AND (MIN(i.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT))
-OR @dump_all;
-
---echo "Verifying waits aggregate consistency (thread)"
-
-SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT))
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT))
-AND (MIN(t.MIN_TIMER_WAIT) != 0)
-OR @dump_all;
-
-SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT)
-FROM performance_schema.events_waits_summary_global_by_event_name AS e
-JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
-USING (EVENT_NAME)
-GROUP BY EVENT_NAME
-HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT))
-OR @dump_all;
-
-
-# Cleanup
-
-update performance_schema.setup_consumers set enabled = 'YES';
-update performance_schema.setup_instruments
-  set enabled = 'YES', timed = 'YES';
-
-drop table test.t1;
-
-set @@global.aria_checkpoint_interval= @aria_checkpoint_interval_save;
-
diff --git a/mysql-test/suite/plugins/r/server_audit.result b/mysql-test/suite/plugins/r/server_audit.result
index 83b88ed048059..ceb75176b4357 100644
--- a/mysql-test/suite/plugins/r/server_audit.result
+++ b/mysql-test/suite/plugins/r/server_audit.result
@@ -8,7 +8,6 @@ server_audit_file_rotate_now	OFF
 server_audit_file_rotate_size	1000000
 server_audit_file_rotations	9
 server_audit_incl_users	
-server_audit_loc_info	
 server_audit_logging	OFF
 server_audit_mode	0
 server_audit_output_type	file
@@ -72,7 +71,6 @@ server_audit_file_rotate_now	OFF
 server_audit_file_rotate_size	1000000
 server_audit_file_rotations	9
 server_audit_incl_users	odin, root, dva, tri
-server_audit_loc_info	
 server_audit_logging	ON
 server_audit_mode	0
 server_audit_output_type	file
@@ -218,7 +216,6 @@ server_audit_file_rotate_now	OFF
 server_audit_file_rotate_size	1000000
 server_audit_file_rotations	9
 server_audit_incl_users	odin, root, dva, tri
-server_audit_loc_info	
 server_audit_logging	ON
 server_audit_mode	1
 server_audit_output_type	file
diff --git a/mysql-test/suite/plugins/r/thread_pool_server_audit.result b/mysql-test/suite/plugins/r/thread_pool_server_audit.result
index 83b88ed048059..ceb75176b4357 100644
--- a/mysql-test/suite/plugins/r/thread_pool_server_audit.result
+++ b/mysql-test/suite/plugins/r/thread_pool_server_audit.result
@@ -8,7 +8,6 @@ server_audit_file_rotate_now	OFF
 server_audit_file_rotate_size	1000000
 server_audit_file_rotations	9
 server_audit_incl_users	
-server_audit_loc_info	
 server_audit_logging	OFF
 server_audit_mode	0
 server_audit_output_type	file
@@ -72,7 +71,6 @@ server_audit_file_rotate_now	OFF
 server_audit_file_rotate_size	1000000
 server_audit_file_rotations	9
 server_audit_incl_users	odin, root, dva, tri
-server_audit_loc_info	
 server_audit_logging	ON
 server_audit_mode	0
 server_audit_output_type	file
@@ -218,7 +216,6 @@ server_audit_file_rotate_now	OFF
 server_audit_file_rotate_size	1000000
 server_audit_file_rotations	9
 server_audit_incl_users	odin, root, dva, tri
-server_audit_loc_info	
 server_audit_logging	ON
 server_audit_mode	1
 server_audit_output_type	file
diff --git a/mysql-test/suite/rpl/r/rpl_checksum.result b/mysql-test/suite/rpl/r/rpl_checksum.result
index 94d215e596a92..9e37fbf40b18e 100644
--- a/mysql-test/suite/rpl/r/rpl_checksum.result
+++ b/mysql-test/suite/rpl/r/rpl_checksum.result
@@ -143,6 +143,7 @@ SET debug_dbug= @old_dbug;
 INSERT INTO t4 VALUES (2);
 include/wait_for_slave_sql_error.inc [errno=1590]
 Last_SQL_Error = 'The incident LOST_EVENTS occurred on the master. Message: error writing to the binary log'
+FOUND /Slave SQL: The incident LOST_EVENTS occurred on the master\. Message: error writing to the binary log, Internal MariaDB error code: 1590/ in mysqld.2.err
 SELECT * FROM t4 ORDER BY a;
 a
 1
diff --git a/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result b/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result
index 204615201d95d..e247ea9c2a714 100644
--- a/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result
+++ b/mysql-test/suite/rpl/r/rpl_gtid_errorlog.result
@@ -38,5 +38,7 @@ a
 3
 4
 5
+FOUND /Slave SQL: Error 'Duplicate entry .* on query\. .*Query: '.*', Gtid 0-1-100, Internal MariaDB error code:|Slave SQL: Could not execute Write_rows.*table test.t1; Duplicate entry.*, Gtid 0-1-100, Internal MariaDB error/ in mysqld.2.err
+FOUND /Slave SQL: The incident LOST_EVENTS occurred on the master\. Message: <none>, Internal MariaDB error code: 1590/ in mysqld.2.err
 DROP TABLE t1;
 include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_stop_slave_error.result b/mysql-test/suite/rpl/r/rpl_stop_slave_error.result
new file mode 100644
index 0000000000000..2bd372a9a915b
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_stop_slave_error.result
@@ -0,0 +1,6 @@
+include/master-slave.inc
+[connection master]
+include/stop_slave.inc
+NOT FOUND /Error reading packet from server: Lost connection/ in slave_log.err
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_drop_db.test b/mysql-test/suite/rpl/t/rpl_drop_db.test
index a67850a66ddbf..f66187b12f515 100644
--- a/mysql-test/suite/rpl/t/rpl_drop_db.test
+++ b/mysql-test/suite/rpl/t/rpl_drop_db.test
@@ -13,7 +13,7 @@ insert into mysqltest1.t1 values (1);
 select * from mysqltest1.t1 into outfile 'mysqltest1/f1.txt';
 create table mysqltest1.t2 (n int);
 create table mysqltest1.t3 (n int);
---replace_result \\ / 66 39 17 39 "File exists" "Directory not empty"
+--replace_result \\ / 66 39 93 39 17 39 247 39 "File exists" "Directory not empty"
 --error 1010
 drop database mysqltest1;
 use mysqltest1;
@@ -30,7 +30,7 @@ while ($1)
 }
 --enable_query_log
 
---replace_result \\ / 66 39 17 39 "File exists" "Directory not empty"
+--replace_result \\ / 66 39 93 39 17 39 247 39 "File exists" "Directory not empty"
 --error 1010
 drop database mysqltest1;
 use mysqltest1;
diff --git a/mysql-test/suite/rpl/t/rpl_stop_slave_error-slave.opt b/mysql-test/suite/rpl/t/rpl_stop_slave_error-slave.opt
new file mode 100644
index 0000000000000..32c4527a91575
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_stop_slave_error-slave.opt
@@ -0,0 +1 @@
+--log-error=$MYSQLTEST_VARDIR/tmp/slave_log.err
diff --git a/mysql-test/suite/rpl/t/rpl_stop_slave_error.test b/mysql-test/suite/rpl/t/rpl_stop_slave_error.test
new file mode 100644
index 0000000000000..a88981c15c4fd
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_stop_slave_error.test
@@ -0,0 +1,17 @@
+#
+# MDEV-8345 STOP SLAVE should not cause an ERROR to be logged to the error log
+#
+source include/have_binlog_format_mixed.inc; # don't repeat the test three times
+source include/master-slave.inc;
+
+connection master;
+sync_slave_with_master;
+source include/stop_slave.inc;
+let SEARCH_FILE=$MYSQLTEST_VARDIR/tmp/slave_log.err;
+let SEARCH_PATTERN=Error reading packet from server: Lost connection;
+let SEARCH_RANGE= -50000;
+source include/search_pattern_in_file.inc;
+
+source include/start_slave.inc;
+source include/rpl_end.inc;
+
diff --git a/mysql-test/t/alter_table.test b/mysql-test/t/alter_table.test
index 05d915ec478b5..d2b8a6082a623 100644
--- a/mysql-test/t/alter_table.test
+++ b/mysql-test/t/alter_table.test
@@ -1712,3 +1712,28 @@ CREATE TABLE t1 (
 ALTER TABLE t1 ADD PRIMARY KEY IF NOT EXISTS event_id (event_id,market_id);
 DROP TABLE t1;
 
+--echo #
+--echo # MDEV-11126 Crash while altering persistent virtual column
+--echo #
+
+CREATE TABLE `tab1` (
+  `id` bigint(20) NOT NULL AUTO_INCREMENT,
+  `field2` set('option1','option2','option3','option4') NOT NULL,
+  `field3` set('option1','option2','option3','option4','option5') NOT NULL,
+  `field4` set('option1','option2','option3','option4') NOT NULL,
+  `field5` varchar(32) NOT NULL,
+  `field6` varchar(32) NOT NULL,
+  `field7` varchar(32) NOT NULL,
+  `field8` varchar(32) NOT NULL,
+  `field9` int(11) NOT NULL DEFAULT '1',
+  `field10` varchar(16) NOT NULL,
+  `field11` enum('option1','option2','option3') NOT NULL DEFAULT 'option1',
+  `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT,
+  PRIMARY KEY (`id`)
+) DEFAULT CHARSET=latin1;
+
+ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128);
+SHOW CREATE TABLE `tab1`;
+ALTER TABLE `tab1` CHANGE COLUMN v_col `v_col` varchar(128) AS (IF(field11='option1',CONCAT_WS(":","field1",field2,field3,field4,field5,field6,field7,field8,field9,field10), CONCAT_WS(":","field1",field11,field2,field3,field4,field5,field6,field7,field8,field9,field10))) PERSISTENT;
+SHOW CREATE TABLE `tab1`;
+DROP TABLE `tab1`;
diff --git a/mysql-test/t/create_or_replace.test b/mysql-test/t/create_or_replace.test
index 7bba2b341c043..b37417f39d0c8 100644
--- a/mysql-test/t/create_or_replace.test
+++ b/mysql-test/t/create_or_replace.test
@@ -386,3 +386,15 @@ drop table t1;
 # Cleanup
 #
 DROP TABLE t2;
+
+--echo #
+--echo # MDEV-10824 - Crash in CREATE OR REPLACE TABLE t1 AS SELECT spfunc()
+--echo #
+CREATE TABLE t1(a INT);
+CREATE FUNCTION f1() RETURNS VARCHAR(16383) RETURN 'test';
+CREATE OR REPLACE TABLE t1 AS SELECT f1();
+LOCK TABLE t1 WRITE;
+CREATE OR REPLACE TABLE t1 AS SELECT f1();
+UNLOCK TABLES;
+DROP FUNCTION f1;
+DROP TABLE t1;
diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test
index e6583f990cafb..600880d6be59a 100644
--- a/mysql-test/t/ctype_utf32.test
+++ b/mysql-test/t/ctype_utf32.test
@@ -889,6 +889,11 @@ SELECT CHAR_LENGTH(TRIM(BOTH 0x0001 FROM _utf32 0x00000061));
 SELECT CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061));
 SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061));
 
+#
+# potential signedness issue
+#
+select hex(lower(cast(0xffff0000 as char character set utf32))) as c;
+
 --echo #
 --echo # End of 5.5 tests
 --echo #
diff --git a/mysql-test/t/drop.test b/mysql-test/t/drop.test
index d9784bc819a86..a3e96953bac02 100644
--- a/mysql-test/t/drop.test
+++ b/mysql-test/t/drop.test
@@ -313,3 +313,12 @@ INSERT INTO table1 VALUES (1);
 DROP TABLE table1,table2;
 
 --echo # End BUG#34750
+
+--echo #
+--echo # MDEV-11105 Table named 'db' has weird side effect.
+--echo #
+
+CREATE DATABASE mysqltest;
+CREATE TABLE mysqltest.db(id INT);
+DROP DATABASE mysqltest;
+
diff --git a/mysql-test/t/group_min_max_innodb.test b/mysql-test/t/group_min_max_innodb.test
index 6967f84714729..91e0bd3279fb4 100644
--- a/mysql-test/t/group_min_max_innodb.test
+++ b/mysql-test/t/group_min_max_innodb.test
@@ -230,3 +230,16 @@ eval EXPLAIN $query;
 eval $query;
 
 DROP TABLE t0,t1,t2;
+
+--echo #
+--echo # MDEV-MariaDB daemon leaks memory with specific query
+--echo #
+
+CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL,
+  `language_id` int(11) unsigned NOT NULL DEFAULT '1'
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0',
+  `serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000));
+SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0;
+drop table t1,t2;
diff --git a/mysql-test/t/information_schema.test b/mysql-test/t/information_schema.test
index 30ae29844c223..ecfaa6510762a 100644
--- a/mysql-test/t/information_schema.test
+++ b/mysql-test/t/information_schema.test
@@ -612,13 +612,13 @@ select * from information_schema.schema_privileges order by grantee;
 select * from information_schema.user_privileges order by grantee;
 show grants;
 connection con4;
-select * from information_schema.column_privileges where grantee like '%user%'
+select * from information_schema.column_privileges where grantee like '\'user%'
 order by grantee;
-select * from information_schema.table_privileges where grantee like '%user%'
+select * from information_schema.table_privileges where grantee like '\'user%'
 order by grantee;
-select * from information_schema.schema_privileges where grantee like '%user%'
+select * from information_schema.schema_privileges where grantee like '\'user%'
 order by grantee;
-select * from information_schema.user_privileges where grantee like '%user%'
+select * from information_schema.user_privileges where grantee like '\'user%'
 order by grantee;
 show grants;
 connection default;
diff --git a/mysql-test/t/merge.test b/mysql-test/t/merge.test
index 77e896c7c05bc..0cf37a24f8ebe 100644
--- a/mysql-test/t/merge.test
+++ b/mysql-test/t/merge.test
@@ -2880,6 +2880,19 @@ drop tables m1, t1, t4;
 drop view t3;
 
 
+--echo #
+--echo # MDEV-10424 - Assertion `ticket == __null' failed in
+--echo #              MDL_request::set_type
+--echo #
+CREATE TABLE t1 (f1 INT) ENGINE=MyISAM;
+CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1);
+PREPARE stmt FROM "ANALYZE TABLE tmerge, t1";
+EXECUTE stmt;
+EXECUTE stmt;
+DEALLOCATE PREPARE stmt;
+DROP TABLE t1, tmerge;
+
+
 --echo End of 5.5 tests
 
 
diff --git a/mysql-test/t/mysql.test b/mysql-test/t/mysql.test
index 2b4b1e69ab6d7..263e1103e8b8a 100644
--- a/mysql-test/t/mysql.test
+++ b/mysql-test/t/mysql.test
@@ -586,8 +586,16 @@ DROP DATABASE connected_db;
 # USE and names with backticks
 #
 --write_file $MYSQLTEST_VARDIR/tmp/backticks.sql
+\u aa`bb``cc
+SELECT DATABASE();
+USE test
+SELECT DATABASE();
 USE aa`bb``cc
 SELECT DATABASE();
+USE test
+SELECT DATABASE();
+USE `aa``bb````cc`
+SELECT DATABASE();
 EOF
 create database `aa``bb````cc`;
 --exec $MYSQL < $MYSQLTEST_VARDIR/tmp/backticks.sql
diff --git a/mysql-test/t/mysql_not_windows.test b/mysql-test/t/mysql_not_windows.test
index 66853677f7bb6..591de74cbbf47 100644
--- a/mysql-test/t/mysql_not_windows.test
+++ b/mysql-test/t/mysql_not_windows.test
@@ -13,3 +13,12 @@
 
 --echo
 --echo End of tests
+
+# Multi-line exec
+exec $MYSQL \
+    test -e "select 1";
+exec $MYSQL test -e "select
+    2";
+let $query = select 3
+    as X;
+exec $MYSQL test -e "$query";
diff --git a/mysql-test/t/mysqldump-nl.test b/mysql-test/t/mysqldump-nl.test
new file mode 100644
index 0000000000000..311996e77c305
--- /dev/null
+++ b/mysql-test/t/mysqldump-nl.test
@@ -0,0 +1,38 @@
+#
+# New lines in identifiers
+#
+
+# embedded server doesn't support external clients
+--source include/not_embedded.inc
+# cmd.exe doesn't like new lines on the command line
+--source include/not_windows.inc
+
+create database `mysqltest1
+1tsetlqsym`;
+use `mysqltest1
+1tsetlqsym`;
+
+create table `t1
+1t` (`foobar
+raboof` int);
+create view `v1
+1v` as select * from `t1
+1t`;
+
+create procedure sp() select * from `v1
+1v`;
+
+flush tables;
+use test;
+
+exec $MYSQL_DUMP --compact --comment --routines --add-drop-database --databases 'mysqltest1
+1tsetlqsym';
+
+exec $MYSQL_DUMP --compact --comment --routines --add-drop-database --databases 'mysqltest1
+1tsetlqsym' | $MYSQL;
+
+show tables from `mysqltest1
+1tsetlqsym`;
+
+drop database `mysqltest1
+1tsetlqsym`;
diff --git a/mysql-test/t/mysqltest.test b/mysql-test/t/mysqltest.test
index ae59c713c3d04..e85d793b6282c 100644
--- a/mysql-test/t/mysqltest.test
+++ b/mysql-test/t/mysqltest.test
@@ -741,15 +741,6 @@ echo ;
 --error 1
 --exec echo "--exec " | $MYSQL_TEST 2>&1
 
-# Multi-line exec
-exec $MYSQL
-    test -e "select 1";
-exec $MYSQL test -e "select
-    2";
-let $query = select 3
-    as X;
-exec $MYSQL test -e "$query";
-
 # ----------------------------------------------------------------------------
 # Test let command
 # ----------------------------------------------------------------------------
diff --git a/mysql-test/t/ps.test b/mysql-test/t/ps.test
index c45b27281a79e..a6b238b84c66b 100644
--- a/mysql-test/t/ps.test
+++ b/mysql-test/t/ps.test
@@ -3653,5 +3653,32 @@ deallocate prepare stmt;
 SET SESSION sql_mode = @save_sql_mode;
 DROP TABLE t1,t2;
 
+--echo #
+--echo # MDEV-8833: Crash of server on prepared statement with
+--echo # conversion to semi-join
+--echo #
+
+CREATE TABLE t1 (column1 INT);
+INSERT INTO t1 VALUES (3),(9);
+
+CREATE TABLE t2 (column2 INT);
+INSERT INTO t2 VALUES (1),(4);
+
+CREATE TABLE t3 (column3 INT);
+INSERT INTO t3 VALUES (6),(8);
+
+CREATE TABLE t4 (column4 INT);
+INSERT INTO t4 VALUES (2),(5);
+
+PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1 
+FROM t1 AS table1
+WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 ) 
+) AS sq
+FROM t3 AS table3, t4 AS table4";
+EXECUTE stmt;
+EXECUTE stmt;
+deallocate prepare stmt;
+drop table t1,t2,t3,t4;
+
 
---echo # End of 10.0 tests
+--echo # End of 5.5 tests
diff --git a/mysql-test/t/selectivity.test b/mysql-test/t/selectivity.test
index c46ff69295fa7..8efc5216ba09c 100644
--- a/mysql-test/t/selectivity.test
+++ b/mysql-test/t/selectivity.test
@@ -970,6 +970,58 @@ set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivit
 
 DROP TABLE t1,t2;
 
+set use_stat_tables=@save_use_stat_tables;
+
+--echo #
+--echo # Bug mdev-11096: range condition over column without statistical data
+--echo #
+
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=3;
+
+create table t1(col1 char(32));
+insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t1 persistent for columns () indexes ();
+
+explain extended 
+select * from t1 where col1 > 'b' and col1 < 'e';
+select * from t1 where col1 > 'b' and col1 < 'e';
+
+drop table t1;
+
+set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
+set use_stat_tables=@save_use_stat_tables;
+
+--echo #
+--echo # Bug mdev-9628: unindexed blob column without min-max statistics 
+--echo #                with optimizer_use_condition_selectivity=3
+--echo #
+
+set use_stat_tables='preferably';
+set optimizer_use_condition_selectivity=3;
 
+create table t1(col1 char(32));
+insert into t1 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t1;
+
+create table t2(col1 text);
+insert into t2 values ('a'),('b'),('c'),('d'), ('e'),('f'),('g'),('h');
+analyze table t2;
+
+select * from t1 where col1 > 'b' and col1 < 'd';
+explain extended 
+select * from t1 where col1 > 'b' and col1 < 'd';
+
+select * from t2 where col1 > 'b' and col1 < 'd';
+explain extended 
+select * from t2 where col1 > 'b' and col1 < 'd';
+
+select * from t2 where col1 < 'b' and col1 > 'd';
+explain extended 
+select * from t2 where col1 < 'b' and col1 > 'd';
+
+drop table t1,t2;
+
+set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
 set use_stat_tables=@save_use_stat_tables;
 
diff --git a/mysql-test/t/selectivity_innodb.test b/mysql-test/t/selectivity_innodb.test
index d6a77eac60004..25aa0abbc3b45 100644
--- a/mysql-test/t/selectivity_innodb.test
+++ b/mysql-test/t/selectivity_innodb.test
@@ -109,6 +109,31 @@ where t1.child_user_id=t3.id and t1.child_group_id is null and t2.lower_group_na
 
 drop table t1,t2,t3;
 
+--echo #
+--echo # MDEV-9187: duplicate of bug mdev-9628
+--echo #
+
+set use_stat_tables = preferably;
+set optimizer_use_condition_selectivity=3;
+ 
+CREATE TABLE t1 (f1 char(32)) ENGINE=InnoDB;
+INSERT INTO t1 VALUES ('foo'),('bar'),('qux');
+ANALYZE TABLE t1;
+
+SELECT * FROM t1 WHERE f1 < 'm';
+EXPLAIN EXTENDED
+SELECT * FROM t1 WHERE f1 < 'm';
+
+CREATE TABLE t2 (f1 TEXT) ENGINE=InnoDB;
+INSERT INTO t2 VALUES ('foo'),('bar'),('qux');
+ANALYZE TABLE t2;
+
+SELECT * FROM t2 WHERE f1 <> 'qux';
+EXPLAIN EXTENDED
+SELECT * FROM t2 WHERE f1 <> 'qux';
+
+DROP TABLE t1,t2;
+
 --echo #
 --echo # End of 10.0 tests
 --echo #
diff --git a/mysql-test/t/type_uint.test b/mysql-test/t/type_uint.test
index 3a949c5c47a93..84fca993d09eb 100644
--- a/mysql-test/t/type_uint.test
+++ b/mysql-test/t/type_uint.test
@@ -16,6 +16,13 @@ drop table t1;
 
 # End of 4.1 tests
 
+create table t1 (a bigint unsigned, b mediumint unsigned);
+insert t1 values (1,2),(0xffffffffffffffff,0xffffff);
+select coalesce(a,b), coalesce(b,a) from t1;
+create table t2 as select a from t1 union select b from t1;
+show create table t2;
+select * from t2;
+drop table t1, t2;
 
 --echo #
 --echo # Start of 10.0 tests
diff --git a/mysql-test/unstable-tests b/mysql-test/unstable-tests
index 6a46602eb0795..3e25115599fdc 100644
--- a/mysql-test/unstable-tests
+++ b/mysql-test/unstable-tests
@@ -23,77 +23,66 @@
 #
 ##############################################################################
 
-main.bootstrap                     : Modified on 2016-06-18 (MDEV-9969)
 main.create_delayed                : MDEV-10605 - failed with timeout
-main.create_or_replace             : Modified on 2016-06-23 (MDEV-9728)
-main.ctype_recoding                : Modified on 2016-06-10 (MDEV-10181)
-main.ctype_utf8                    : Modified on 2016-06-21 (merge)
-main.ctype_utf8mb4                 : Modified on 2016-06-21 (merge)
-main.events_1                      : Modified on 2016-06-21 (MDEV-9524)
+main.ctype_utf32                   : Modified on 2016-09-27 (merge)
 main.func_group                    : Modified on 2016-08-08 (MDEV-10468)
-main.func_in                       : Modified on 2016-06-20 (MDEV-10020)
 main.func_math                     : Modified on 2016-08-10 (merge)
 main.func_misc                     : Modified on 2016-08-10 (merge)
-main.grant2                        : Modified on 2016-07-18 (MDEV-8569)
-main.help                          : Modified on 2016-06-21 (MDEV-9524)
+main.group_min_max_innodb          : Modified on 2016-08-25 (MDEV-10595)
 main.host_cache_size_functionality : MDEV-10606 - sporadic failure on shutdown
 main.index_intersect_innodb        : MDEV-10643 - failed with timeout
-main.index_merge_innodb            : MDEV-7142 - sporadic wrong execution plan
+main.index_merge_myisam            : Modified on 2016-09-05 (include file changed)
+main.index_merge_innodb            : Modified on 2016-09-05 (MDEV-7142)
 main.information_schema_stats      : Modified on 2016-07-25 (MDEV-10428)
 main.innodb_mysql_lock             : MDEV-7861 - sporadic lock detection failure
-main.insert_innodb                 : Modified on 2016-06-14 (merge from upstream)
 main.loaddata                      : Modified on 2016-08-10 (merge)
-main.locale                        : Modified on 2016-06-21 (merge)
 main.mdev-504                      : MDEV-10607 - sporadic "can't connect"
 main.mdev375                       : MDEV-10607 - sporadic "can't connect"
 main.merge                         : MDEV-10607 - sporadic "can't connect"
-main.multi_update                  : Modified on 2016-06-20 (MDEV-5973)
 main.myisam_enable_keys-10506      : New test, added on 2016-08-10 (MDEV-10506)
 main.mysqlcheck                    : Modified on 2016-08-10 (merge)
 main.mysqldump                     : MDEV-10512 - sporadic assertion failure
+main.mysqlhotcopy_myisam           : MDEV-10995 - test hangs on debug build
 main.mysqltest                     : MDEV-9269 - fails on Alpha
 main.named_pipe                    : Modified on 2016-08-02 (MDEV-10383)
-main.openssl_1                     : Modified on 2016-07-11 (MDEV-10211)
-main.parser                        : Modified on 2016-06-21 (merge)
 main.pool_of_threads               : MDEV-10100 - sporadic error on detecting max connections
-main.ps_1general                   : Modified on 2016-07-12 (merge)
+main.ps                            : MDEV-11017 - sporadic wrong Prepared_stmt_count
 main.range                         : Modified on 2016-08-10 (merge)
 main.range_mrr_icp                 : Modified on 2016-08-10 (merge)
 main.query_cache                   : MDEV-10611 - sporadic mutex problem
-main.shutdown                      : MDEV-10612 - sporadic crashes
+main.shutdown                      : MDEV-10563 - sporadic crashes
 main.sp-prelocking                 : Modified on 2016-08-10 (merge)
 main.sp-security                   : MDEV-10607 - sporadic "can't connect"
-main.ssl                           : MDEV-10211 - different ciphers on some platforms
-main.ssl_ca                        : Modified on 2016-07-11 (MDEV-10211)
-main.ssl_compress                  : Modified on 2016-07-11 (MDEV-10211)
-main.ssl_timeout                   : Modified on 2016-07-11 (MDEV-10211)
+main.ssl_compress                  : MDEV-11110 - valgrind failures
 main.stat_tables_par_innodb        : MDEV-10515 - sporadic wrong results
-main.status_user                   : Modified on 2016-06-20 (MDEV-8633)
 main.subselect_innodb              : MDEV-10614 - sporadic wrong results
-main.temp_table                    : Modified on 2016-06-18 (MDEV-8569)
 main.type_date                     : Modified on 2016-08-10 (merge)
-main.type_datetime                 : Modified on 2016-06-16 (MDEV-9374)
+main.type_uint                     : Modified on 2016-09-27 (merge)
 main.view                          : Modified on 2016-08-10 (merge)
 main.xtradb_mrr                    : Modified on 2016-08-04 (MDEV-9946)
 
 #----------------------------------------------------------------
 
-archive.archive-big : MDEV-10615 - table is marked as crashed
-archive.discover    : MDEV-10510 - table is marked as crashed
+archive.archive-big          : MDEV-10615 - table is marked as crashed
+archive.discover             : MDEV-10510 - table is marked as crashed
+archive.mysqlhotcopy_archive : MDEV-10995 - test hangs on debug build
 
 #----------------------------------------------------------------
 
 binlog.binlog_commit_wait                 : MDEV-10150 - Error: too much time elapsed
-binlog.binlog_dmls_on_tmp_tables_readonly : New test, added on 2016-05-04 (upstream)
 binlog.binlog_xa_recover                  : MDEV-8517 - Extra checkpoint
 
 #----------------------------------------------------------------
 
 connect.tbl             : MDEV-9844, MDEV-10179 - sporadic crashes, valgrind warnings, wrong results
-connect.jdbc            : New test, added on 2016-07-15
-connect.jdbc-new        : New test, added on 2016-07-14
-connect.jdbc-oracle     : New test, added on 2016-07-13
-connect.jdbc-postgresql : New test, added on 2016-07-13
+
+#----------------------------------------------------------------
+
+engines/rr_trx.* : MDEV-10998 - tests not maintained
+
+#----------------------------------------------------------------
+
+extra/binlog_tests.database : Modified on 2016-10-21 (Upstream MIPS test fixes)
 
 #----------------------------------------------------------------
 
@@ -104,20 +93,19 @@ federated.federated_transactions : MDEV-10617, MDEV-10417 - Wrong checksum, time
 
 #----------------------------------------------------------------
 
-funcs_1.processlist_priv_no_prot : Include file modified on 2016-07-12 (merge)
-funcs_1.processlist_priv_ps      : Include file modified on 2016-07-12 (merge)
+funcs_2/charset.* : MDEV-10999 - test not maintained
 
 #----------------------------------------------------------------
 
 innodb.binlog_consistent              : MDEV-10618 - Server fails to start 
 innodb.innodb-alter-table             : MDEV-10619 - Testcase timeout
 innodb.innodb-alter-tempfile          : Modified on 2016-08-09 (MDEV-10469)
-innodb.innodb_corrupt_bit             : Modified on 2016-06-21 (merge)
 innodb.innodb_bug30423                : MDEV-7311 - Wrong number of rows in the plan
-innodb.innodb-fk-warnings             : Modified on 2016-07-18 (MDEV-8569)
-innodb.innodb-fkcheck                 : Modified on 2016-06-13 (MDEV-10083)
+innodb.innodb_bug54044                : Modified on 2016-09-27 (merge)
+innodb.innodb_monitor                 : MDEV-10939 - Testcase timeout
 innodb.innodb-wl5522                  : rdiff file modified on 2016-08-10 (merge)
 innodb.innodb-wl5522-debug-zip        : MDEV-10427 - Warning: database page corruption
+innodb.system_tables                  : Added on 2016-09-23 (MDEV-10775)
 
 #----------------------------------------------------------------
 
@@ -142,21 +130,16 @@ parts.partition_int_myisam   : MDEV-10621 - Testcase timeout
 
 #----------------------------------------------------------------
 
-perfschema.digest_table_full                 : Modified on 2016-06-21 (merge)
 perfschema.func_file_io                      : MDEV-5708 - fails for s390x
 perfschema.func_mutex                        : MDEV-5708 - fails for s390x
-perfschema.rpl_gtid_func                     : Modified on 2016-06-21 (merge)
-perfschema.sizing_low                        : Modified on 2016-04-26 (5.6.30 merge)
+perfschema.hostcache_ipv6_ssl                : MDEV-10696 - crash on shutdown
 perfschema.socket_summary_by_event_name_func : MDEV-10622 - Socket summary tables do not match
-perfschema.start_server_low_digest           : Modified on 2016-06-21 (merge)
-perfschema.statement_digest                  : Modified on 2016-06-21 (merge)
-perfschema.statement_digest_consumers        : Modified on 2016-06-21 (merge)
-perfschema.statement_digest_long_query       : Modified on 2016-06-21 (merge)
-perfschema.table_name                        : New test, added on 2016-04-26 (5.6.30 merge)
+
+perfschema_stress.* : MDEV-10996 - tests not maintained
 
 #----------------------------------------------------------------
 
-plugins.feedback_plugin_send     : MDEV-7932 - ssl failed for url
+plugins.feedback_plugin_send     : MDEV-7932 - ssl failed for url, MDEV-11112 - valgrind warnings
 plugins.pam                      : Modified on 2016-08-03 (MDEV-7329)
 plugins.pam_cleartext            : Modified on 2016-08-03
 plugins.server_audit             : MDEV-9562 - crashes on sol10-sparc
@@ -164,11 +147,6 @@ plugins.thread_pool_server_audit : MDEV-9562 - crashes on sol10-sparc
 
 #----------------------------------------------------------------
 
-roles.rpl_grant_revoke_current_role-8638 : New test, added on 2016-06-20 (MDEV-8638)
-roles.set_role-9614                      : New test, added on 2016-05-30 (MDEV-9614)
-
-#----------------------------------------------------------------
-
 rpl.last_insert_id                    : MDEV-10625 - warnings in error log
 rpl.rpl_auto_increment                : MDEV-10417 - Fails on Mips
 rpl.rpl_auto_increment_bug45679       : MDEV-10417 - Fails on Mips
@@ -177,11 +155,11 @@ rpl.rpl_binlog_index                  : MDEV-9501 - Warning: failed registering
 rpl.rpl_checksum_cache                : MDEV-10626 - Testcase timeout
 rpl.rpl_circular_for_4_hosts          : MDEV-10627 - Testcase timeout
 rpl.rpl_ddl                           : MDEV-10417 - Fails on Mips
+rpl.rpl_drop_db                       : Modified on 2016-10-21 (Upstream MIPS test fixes)
 rpl.rpl_gtid_crash                    : MDEV-9501 - Warning: failed registering on master
 rpl.rpl_gtid_master_promote           : MDEV-10628 - Timeout in sync_with_master
 rpl.rpl_gtid_stop_start               : MDEV-10629 - Crash on shutdown
 rpl.rpl_gtid_until                    : MDEV-10625 - warnings in error log
-rpl.rpl_ignore_table                  : Modified on 2016-06-22
 rpl.rpl_innodb_bug30888               : MDEV-10417 - Fails on Mips
 rpl.rpl_insert                        : MDEV-9329 - Fails on Ubuntu/s390x
 rpl.rpl_insert_delayed                : MDEV-9329 - Fails on Ubuntu/s390x
@@ -201,6 +179,8 @@ rpl.rpl_temporary_error2              : MDEV-10634 - Wrong number of retries
 rpl.sec_behind_master-5114            : MDEV-8518 - Wrong value of Seconds_Behind_Master
 rpl.rpl_skip_replication              : MDEV-9268 - Fails with timeout in sync_slave_with_master on Alpha
 
+rpl/extra/rpl_tests.*                 : MDEV-10994 - tests not maintained
+
 #----------------------------------------------------------------
 
 spider.* : MDEV-9329 - tests are too memory-consuming
@@ -214,6 +194,10 @@ spider/bg.vp_fixes              : MDEV-9329 - Fails on Ubuntu/s390x
 
 #----------------------------------------------------------------
 
+sphinx.* : MDEV-10747 - tests are not run in buildbot, they can't be stable
+
+#----------------------------------------------------------------
+
 stress.ddl_innodb : MDEV-10635 - Testcase timeout
 
 #----------------------------------------------------------------
@@ -229,11 +213,14 @@ tokudb.background_job_manager        : MDEV-10327 - Assertion failure on server
 tokudb.cluster_filter_unpack_varchar : MDEV-10636 - Wrong execution plan
 tokudb.*                             : MDEV-9891 - massive crashes on shutdown
 tokudb_alter_table.*                 : MDEV-9891 - massive crashes on shutdown
+tokudb_backup.*                      : MDEV-11001 - tests don't work
 tokudb_bugs.checkpoint_lock          : MDEV-10637 - Wrong processlist output
 tokudb_bugs.checkpoint_lock_3        : MDEV-10637 - Wrong processlist output
 tokudb_bugs.*                        : MDEV-9891 - massive crashes on shutdown
 tokudb_parts.*                       : MDEV-9891 - massive crashes on shutdown
-rpl-tokudb.*                         : MDEV-9891 - massive crashes on shutdown, also modified on 2016-06-10 (Merge)
+tokudb_rpl_suites.*                  : MDEV-11001 - tests don't work
+tokudb_sys_vars.*                    : MDEV-11001 - tests don't work
+rpl-tokudb.*                         : MDEV-9891 - massive crashes on shutdown
 tokudb/tokudb_add_index.*            : MDEV-9891 - massive crashes on shutdown
 tokudb/tokudb_backup.*               : MDEV-9891 - massive crashes on shutdown
 tokudb/tokudb_mariadb.*              : MDEV-9891 - massive crashes on shutdown
@@ -247,7 +234,6 @@ unit.ma_test_loghandler : MDEV-10638 - record read not ok
 
 #----------------------------------------------------------------
 
-vcol.charsets         : Added on 2016-06-23
 vcol.not_supported    : MDEV-10639 - Testcase timeout
 vcol.vcol_keys_innodb : MDEV-10639 - Testcase timeout
 
diff --git a/mysql-test/valgrind.supp b/mysql-test/valgrind.supp
index 1cc5d1779720c..5f1af1a09053c 100644
--- a/mysql-test/valgrind.supp
+++ b/mysql-test/valgrind.supp
@@ -1228,6 +1228,125 @@
   fun:dlopen@@GLIBC_2.2.5
 }
 
+# 
+# MDEV-11061: OpenSSL 0.9.8 problems
+#
+
+{
+   MDEV-11061: OpenSSL 0.9.8
+   Memcheck:Cond
+   obj:*/libz.so*
+   ...
+   obj:*/libcrypto.so.0.9.8
+   ...
+   obj:*/libssl.so.0.9.8
+   ...
+}
+
+{
+   MDEV-11061: OpenSSL 0.9.8
+   Memcheck:Value8
+   obj:*/libz.so*
+   ...
+   obj:*/libcrypto.so.0.9.8
+   ...
+   obj:*/libssl.so.0.9.8
+   ...
+}
+
+{
+   MDEV-11061: OpenSSL 0.9.8
+   Memcheck:Cond
+   obj:*/libcrypto.so.0.9.8
+   ...
+   obj:*/libssl.so.0.9.8
+   ...
+}
+
+{
+   MDEV-11061: OpenSSL 0.9.8
+   Memcheck:Value8
+   obj:*/libcrypto.so.0.9.8
+   ...
+   obj:*/libssl.so.0.9.8
+   ...
+}
+
+{
+   MDEV-11061: OpenSSL 0.9.8
+   Memcheck:Cond
+   obj:*/libssl.so.0.9.8
+   obj:*/libssl.so.0.9.8
+   ...
+}
+
+{
+   MDEV-11061: OpenSSL 0.9.8
+   Memcheck:Value8
+   obj:*/libssl.so.0.9.8
+   obj:*/libssl.so.0.9.8
+   ...
+}
+
+{
+   MDEV-11061: OpenSSL 0.9.8
+   Memcheck:Cond
+   fun:memcpy
+   obj:*/libcrypto.so.0.9.8
+   obj:*/libssl.so.0.9.8
+   ...
+}
+
+{
+   MDEV-11061: OpenSSL 0.9.8
+   Memcheck:Value8
+   fun:memcpy
+   obj:*/libcrypto.so.0.9.8
+   obj:*/libssl.so.0.9.8
+   ...
+}
+
+{
+   MDEV-11061: OpenSSL 0.9.8
+   Memcheck:Cond
+   fun:is_overlap
+   fun:memcpy
+   obj:*/libcrypto.so.0.9.8
+   obj:*/libssl.so.0.9.8
+   ...
+}
+
+{
+   MDEV-11061: OpenSSL 0.9.8
+   Memcheck:Cond
+   fun:memset
+   obj:*/libcrypto.so.0.9.8
+   ...
+   obj:*/libssl.so.0.9.8
+   ...
+}
+
+{
+   MDEV-11061: OpenSSL 0.9.8
+   Memcheck:Value8
+   fun:memset
+   obj:*/libcrypto.so.0.9.8
+   ...
+   obj:*/libssl.so.0.9.8
+   ...
+}
+
+{
+   MDEV-11061: OpenSSL 0.9.8
+   Memcheck:Param
+   write(buf)
+   obj:*/libpthread-2.9.so*
+   obj:*/libcrypto.so.0.9.8
+   ...
+   obj:*/libssl.so.0.9.8
+   ...
+}
+
 {
    GitHub codership/galera#330
    Memcheck:Leak
@@ -1316,7 +1435,7 @@
 }
 
 {
-g   codership/mysql-wsrep/issues#176
+   codership/mysql-wsrep/issues#176
    Memcheck:Leak
    fun:_Z16wsrep_set_paramsRN6galera10ReplicatorEPKc
 }
diff --git a/mysys/my_fopen.c b/mysys/my_fopen.c
index 409f1bfc9dd81..a24f516116826 100644
--- a/mysys/my_fopen.c
+++ b/mysys/my_fopen.c
@@ -102,6 +102,7 @@ static FILE *my_win_freopen(const char *path, const char *mode, FILE *stream)
   HANDLE osfh;
 
   DBUG_ASSERT(path && stream);
+  DBUG_ASSERT(strchr(mode, 'a')); /* We use FILE_APPEND_DATA below */
 
   /* Services don't have stdout/stderr on Windows, so _fileno returns -1. */
   if (fd < 0)
@@ -112,15 +113,14 @@ static FILE *my_win_freopen(const char *path, const char *mode, FILE *stream)
     fd= _fileno(stream);
   }
 
-  if ((osfh= CreateFile(path, GENERIC_READ | GENERIC_WRITE,
+  if ((osfh= CreateFile(path, GENERIC_READ | FILE_APPEND_DATA,
                         FILE_SHARE_READ | FILE_SHARE_WRITE |
                         FILE_SHARE_DELETE, NULL,
                         OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL,
                         NULL)) == INVALID_HANDLE_VALUE)
     return NULL;
 
-  if ((handle_fd= _open_osfhandle((intptr_t)osfh,
-                                  _O_APPEND | _O_TEXT)) == -1)
+  if ((handle_fd= _open_osfhandle((intptr_t)osfh, _O_TEXT)) == -1)
   {
     CloseHandle(osfh);
     return NULL;
diff --git a/mysys/my_redel.c b/mysys/my_redel.c
index 61e61b40791d7..976fc5a18c345 100644
--- a/mysys/my_redel.c
+++ b/mysys/my_redel.c
@@ -1,5 +1,5 @@
-/*
-   Copyright (c) 2000, 2010, Oracle and/or its affiliates
+/* Copyright (c) 2000, 2010, Oracle and/or its affiliates
+   Copyright (c) 2009, 2016, MariaDB
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -49,7 +49,8 @@ int my_redel(const char *org_name, const char *tmp_name,
   DBUG_PRINT("my",("org_name: '%s' tmp_name: '%s'  MyFlags: %lu",
 		   org_name,tmp_name,MyFlags));
 
-  if (my_copystat(org_name,tmp_name,MyFlags) < 0)
+  if (!my_disable_copystat_in_redel &&
+      my_copystat(org_name,tmp_name,MyFlags) < 0)
     goto end;
   if (MyFlags & MY_REDEL_MAKE_BACKUP)
   {
diff --git a/mysys/my_static.c b/mysys/my_static.c
index 4aca78e30a9ab..9236c1395fb86 100644
--- a/mysys/my_static.c
+++ b/mysys/my_static.c
@@ -98,3 +98,4 @@ my_bool my_disable_sync=0;
 my_bool my_disable_async_io=0;
 my_bool my_disable_flush_key_blocks=0;
 my_bool my_disable_symlinks=0;
+my_bool my_disable_copystat_in_redel=0;
diff --git a/plugin/feedback/utils.cc b/plugin/feedback/utils.cc
index b83b69be0ce17..dad3d59e76dcc 100644
--- a/plugin/feedback/utils.cc
+++ b/plugin/feedback/utils.cc
@@ -43,7 +43,11 @@ static const char *get_os_version_name(OSVERSIONINFOEX *ver)
 {
   DWORD major = ver->dwMajorVersion;
   DWORD minor = ver->dwMinorVersion;
-
+  if (major == 10 && minor == 0)
+  {
+    return (ver->wProductType == VER_NT_WORKSTATION) ?
+      "Windows 10" : "Windows Server 2016";
+  }
   if (major == 6 && minor == 3)
   {
     return (ver->wProductType == VER_NT_WORKSTATION)?
@@ -102,7 +106,12 @@ static int uname(struct utsname *buf)
   if(version_str && version_str[0])
     sprintf(buf->version, "%s %s",version_str, ver.szCSDVersion);
   else
-    sprintf(buf->version, "%s", ver.szCSDVersion);
+  {
+    /* Fallback for unknown versions, e.g "Windows <major_ver>.<minor_ver>" */
+    sprintf(buf->version, "Windows %d.%d%s",
+      ver.dwMajorVersion, ver.dwMinorVersion,
+      (ver.wProductType == VER_NT_WORKSTATION ? "" : " Server"));
+  }
 
 #ifdef _WIN64
   strcpy(buf->machine, "x64");
diff --git a/plugin/server_audit/server_audit.c b/plugin/server_audit/server_audit.c
index b84f2b9480648..d48b6c37728db 100644
--- a/plugin/server_audit/server_audit.c
+++ b/plugin/server_audit/server_audit.c
@@ -427,9 +427,8 @@ static MYSQL_SYSVAR_UINT(query_log_limit, query_log_limit,
 char locinfo_ini_value[sizeof(struct connection_info)+4];
 
 static MYSQL_THDVAR_STR(loc_info,
-                        PLUGIN_VAR_READONLY | PLUGIN_VAR_MEMALLOC,
-                        "Auxiliary info.", NULL, NULL,
-                        locinfo_ini_value);
+                        PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_MEMALLOC,
+                        "Internal info", NULL, NULL, locinfo_ini_value);
 
 static const char *syslog_facility_names[]=
 {
diff --git a/scripts/mysqld_safe.sh b/scripts/mysqld_safe.sh
index b8df320bf9325..9d8bbce48604a 100644
--- a/scripts/mysqld_safe.sh
+++ b/scripts/mysqld_safe.sh
@@ -717,6 +717,10 @@ else
   logging=syslog
 fi
 
+# close stdout and stderr, everything goes to $logging now
+exec 1>&-
+exec 2>&-
+
 USER_OPTION=""
 if test -w / -o "$USER" = "root"
 then
@@ -747,7 +751,7 @@ if [ ! -d $mysql_unix_port_dir ]
 then
   if ! `mkdir -p $mysql_unix_port_dir`
   then
-    echo "Fatal error Can't create database directory '$mysql_unix_port'"
+    log_error "Fatal error Can't create database directory '$mysql_unix_port'"
     exit 1
   fi
   chown $user $mysql_unix_port_dir
diff --git a/sql/contributors.h b/sql/contributors.h
index f52d3243453ae..0359ec5402289 100644
--- a/sql/contributors.h
+++ b/sql/contributors.h
@@ -46,6 +46,7 @@ struct show_table_contributors_st show_table_contributors[]= {
   {"Auttomattic", "https://automattic.com", "Bronze Sponsor of the MariaDB Foundation"},
   {"Verkkokauppa.com", "https://virtuozzo.com", "Bronze Sponsor of the MariaDB Foundation"},
   {"Virtuozzo", "https://virtuozzo.com/", "Bronze Sponsor of the MariaDB Foundation"},
+  {"Tencent Game DBA", "http://tencentdba.com/about/", "Bronze Sponsor of the MariaDB Foundation"},
 
   /* Sponsors of important features */
   {"Google", "USA", "Sponsoring encryption, parallel replication and GTID"},
diff --git a/sql/field.cc b/sql/field.cc
index eb017802da3bd..49989a4a4ed8a 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -355,7 +355,7 @@ static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]=
   //MYSQL_TYPE_NULL         MYSQL_TYPE_TIMESTAMP
     MYSQL_TYPE_LONGLONG,    MYSQL_TYPE_VARCHAR,
   //MYSQL_TYPE_LONGLONG     MYSQL_TYPE_INT24
-    MYSQL_TYPE_LONGLONG,    MYSQL_TYPE_LONG,
+    MYSQL_TYPE_LONGLONG,    MYSQL_TYPE_LONGLONG,
   //MYSQL_TYPE_DATE         MYSQL_TYPE_TIME
     MYSQL_TYPE_VARCHAR,     MYSQL_TYPE_VARCHAR,
   //MYSQL_TYPE_DATETIME     MYSQL_TYPE_YEAR
diff --git a/sql/item.cc b/sql/item.cc
index 6cdbccde72927..21c8b3f701e0f 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -2743,9 +2743,28 @@ void Item_field::fix_after_pullout(st_select_lex *new_parent, Item **ref)
   if (context)
   {
     Name_resolution_context *ctx= new Name_resolution_context();
-    ctx->outer_context= NULL; // We don't build a complete name resolver
-    ctx->table_list= NULL;    // We rely on first_name_resolution_table instead
+    if (context->select_lex == new_parent)
+    {
+      /*
+        This field was pushed in then pulled out
+        (for example left part of IN)
+      */
+      ctx->outer_context= context->outer_context;
+    }
+    else if (context->outer_context)
+    {
+      /* just pull to the upper context */
+      ctx->outer_context= context->outer_context->outer_context;
+    }
+    else
+    {
+      /* No upper context (merging Derived/VIEW where context chain ends) */
+      ctx->outer_context= NULL;
+    }
+    ctx->table_list= context->first_name_resolution_table;
     ctx->select_lex= new_parent;
+    if (context->select_lex == NULL)
+      ctx->select_lex= NULL;
     ctx->first_name_resolution_table= context->first_name_resolution_table;
     ctx->last_name_resolution_table=  context->last_name_resolution_table;
     ctx->error_processor=             context->error_processor;
diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc
index 5cdfa427997bc..e70922bb5d3ca 100644
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@@ -2620,8 +2620,8 @@ static bool check_equality_for_exist2in(Item_func *func,
            args[0]->all_used_tables() == OUTER_REF_TABLE_BIT)
   {
     /* It is Item_field or Item_direct_view_ref) */
-    DBUG_ASSERT(args[0]->type() == Item::FIELD_ITEM ||
-                args[0]->type() == Item::REF_ITEM);
+    DBUG_ASSERT(args[1]->type() == Item::FIELD_ITEM ||
+                args[1]->type() == Item::REF_ITEM);
     *local_field= (Item_ident *)args[1];
     *outer_exp= args[0];
     return TRUE;
diff --git a/sql/log.cc b/sql/log.cc
index 2479208b395e2..7fd185aa5d3d7 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -3144,7 +3144,7 @@ bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
       if (! write_error)
       {
         write_error= 1;
-        sql_print_error(ER(ER_ERROR_ON_WRITE), name, error);
+        sql_print_error(ER(ER_ERROR_ON_WRITE), name, tmp_errno);
       }
     }
   }
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 9d33d822961a6..d43f14c176ba6 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -4084,6 +4084,7 @@ static int init_common_variables()
 
   max_system_variables.pseudo_thread_id= (ulong)~0;
   server_start_time= flush_status_time= my_time(0);
+  my_disable_copystat_in_redel= 1;
 
   global_rpl_filter= new Rpl_filter;
   binlog_filter= new Rpl_filter;
diff --git a/sql/net_serv.cc b/sql/net_serv.cc
index 3e17ced92baed..f52549bfd0b5f 100644
--- a/sql/net_serv.cc
+++ b/sql/net_serv.cc
@@ -1,5 +1,5 @@
-/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
-   Copyright (c) 2010, 2014, SkySQL Ab.
+/* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
+   Copyright (c) 2012, 2016, MariaDB
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index e0ca43e6d726f..5d6891a1edf53 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3345,9 +3345,16 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
     {
       Field *field= *field_ptr;
       uint16 store_length;
+      uint16 max_key_part_length= (uint16) table->file->max_key_part_length();
       key_part->key= keys;
       key_part->part= 0;
-      key_part->length= (uint16) field->key_length();
+      if (field->flags & BLOB_FLAG)
+        key_part->length= max_key_part_length;
+      else
+      {
+        key_part->length= (uint16) field->key_length();
+        set_if_smaller(key_part->length, max_key_part_length);
+      }
       store_length= key_part->length;
       if (field->real_maybe_null())
         store_length+= HA_KEY_NULL_LENGTH;
diff --git a/sql/parse_file.h b/sql/parse_file.h
index 2a0266e98b7eb..83a8eabcf5f72 100644
--- a/sql/parse_file.h
+++ b/sql/parse_file.h
@@ -42,9 +42,9 @@ enum file_opt_type {
 
 struct File_option
 {
-  LEX_STRING name;		/**< Name of the option */
-  int offset;			/**< offset to base address of value */
-  file_opt_type type;		/**< Option type */
+  LEX_STRING name;              /**< Name of the option */
+  my_ptrdiff_t offset;          /**< offset to base address of value */
+  file_opt_type type;           /**< Option type */
 };
 
 
diff --git a/sql/signal_handler.cc b/sql/signal_handler.cc
index fd6f62fa100f9..c3f25848e8a16 100644
--- a/sql/signal_handler.cc
+++ b/sql/signal_handler.cc
@@ -64,13 +64,13 @@ extern "C" sig_handler handle_fatal_signal(int sig)
   struct tm tm;
 #ifdef HAVE_STACKTRACE
   THD *thd;
-#endif
   /*
      This flag remembers if the query pointer was found invalid.
      We will try and print the query at the end of the signal handler, in case
      we're wrong.
   */
   bool print_invalid_query_pointer= false;
+#endif
 
   if (segfaulted)
   {
@@ -265,6 +265,7 @@ extern "C" sig_handler handle_fatal_signal(int sig)
       "\"mlockall\" bugs.\n");
   }
 
+#ifdef HAVE_STACKTRACE
   if (print_invalid_query_pointer)
   {
     my_safe_printf_stderr(
@@ -274,6 +275,7 @@ extern "C" sig_handler handle_fatal_signal(int sig)
     my_write_stderr(thd->query(), MY_MIN(65536U, thd->query_length()));
     my_safe_printf_stderr("\n\n");
   }
+#endif
 
 #ifdef HAVE_WRITE_CORE
   if (test_flags & TEST_CORE_ON_SIGNAL)
diff --git a/sql/slave.cc b/sql/slave.cc
index db6e9cbf0aa7c..359f4f8af9ce0 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -3126,8 +3126,13 @@ static ulong read_event(MYSQL* mysql, Master_info *mi, bool* suppress_warnings)
       *suppress_warnings= TRUE;
     }
     else
-      sql_print_error("Error reading packet from server: %s ( server_errno=%d)",
-                      mysql_error(mysql), mysql_errno(mysql));
+    {
+      if (!mi->rli.abort_slave)
+      {
+        sql_print_error("Error reading packet from server: %s (server_errno=%d)",
+                        mysql_error(mysql), mysql_errno(mysql));
+      }
+    }
     DBUG_RETURN(packet_error);
   }
 
diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
index d32b213a83874..e1c66bceedc0f 100644
--- a/sql/sql_admin.cc
+++ b/sql/sql_admin.cc
@@ -455,7 +455,19 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
       }
       thd->prepare_derived_at_open= FALSE;
 
-      table->next_global= save_next_global;
+      /*
+        MERGE engine may adjust table->next_global chain, thus we have to
+        append save_next_global after merge children.
+      */
+      if (save_next_global)
+      {
+        TABLE_LIST *table_list_iterator= table;
+        while (table_list_iterator->next_global)
+          table_list_iterator= table_list_iterator->next_global;
+        table_list_iterator->next_global= save_next_global;
+        save_next_global->prev_global= &table_list_iterator->next_global;
+      }
+
       table->next_local= save_next_local;
       thd->open_options&= ~extra_open_options;
 
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index 4ff6903991768..efe9ac6f7f45d 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -9279,6 +9279,7 @@ open_system_tables_for_read(THD *thd, TABLE_LIST *table_list,
   */
   lex->reset_n_backup_query_tables_list(&query_tables_list_backup);
   thd->reset_n_backup_open_tables_state(backup);
+  thd->lex->sql_command= SQLCOM_SELECT;
 
   if (open_and_lock_tables(thd, table_list, FALSE,
                            MYSQL_OPEN_IGNORE_FLUSH |
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 10779aebbda55..b210a4d32dc4f 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -5666,9 +5666,11 @@ int THD::decide_logging_format(TABLE_LIST *tables)
     {
       static const char *prelocked_mode_name[] = {
         "NON_PRELOCKED",
+        "LOCK_TABLES",
         "PRELOCKED",
         "PRELOCKED_UNDER_LOCK_TABLES",
       };
+      compile_time_assert(array_elements(prelocked_mode_name) == LTM_always_last);
       DBUG_PRINT("debug", ("prelocked_mode: %s",
                            prelocked_mode_name[locked_tables_mode]));
     }
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 08f014d7f4806..56ae659797ca8 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -1203,7 +1203,8 @@ enum enum_locked_tables_mode
   LTM_NONE= 0,
   LTM_LOCK_TABLES,
   LTM_PRELOCKED,
-  LTM_PRELOCKED_UNDER_LOCK_TABLES
+  LTM_PRELOCKED_UNDER_LOCK_TABLES,
+  LTM_always_last
 };
 
 
@@ -4373,6 +4374,11 @@ class TMP_TABLE_PARAM :public Sql_alloc
       save_copy_field_end= copy_field_end= NULL;
     }
   }
+  void free_copy_field_data()
+  {
+    for (Copy_field *ptr= copy_field ; ptr != copy_field_end ; ptr++)
+      ptr->tmp.free();
+  }
 };
 
 class select_union :public select_result_interceptor
diff --git a/sql/sql_db.cc b/sql/sql_db.cc
index e89c3d9e74566..0a3ff64113f04 100644
--- a/sql/sql_db.cc
+++ b/sql/sql_db.cc
@@ -784,7 +784,7 @@ bool mysql_alter_db(THD *thd, const char *db, HA_CREATE_INFO *create_info)
 bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
 {
   ulong deleted_tables= 0;
-  bool error= true;
+  bool error= true, rm_mysql_schema;
   char	path[FN_REFLEN + 16];
   MY_DIR *dirp;
   uint length;
@@ -809,6 +809,18 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
   length= build_table_filename(path, sizeof(path) - 1, db, "", "", 0);
   strmov(path+length, MY_DB_OPT_FILE);		// Append db option file name
   del_dbopt(path);				// Remove dboption hash entry
+  /*
+     Now remove the db.opt file.
+     The 'find_db_tables_and_rm_known_files' doesn't remove this file
+     if there exists a table with the name 'db', so let's just do it
+     separately. We know this file exists and needs to be deleted anyway.
+  */
+  if (my_delete_with_symlink(path, MYF(0)) && my_errno != ENOENT)
+  {
+    my_error(EE_DELETE, MYF(0), path, my_errno);
+    DBUG_RETURN(true);
+  }
+    
   path[length]= '\0';				// Remove file name
 
   /* See if the directory exists */
@@ -835,7 +847,8 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
     Disable drop of enabled log tables, must be done before name locking.
     This check is only needed if we are dropping the "mysql" database.
   */
-  if ((my_strcasecmp(system_charset_info, MYSQL_SCHEMA_NAME.str, db) == 0))
+  if ((rm_mysql_schema=
+        (my_strcasecmp(system_charset_info, MYSQL_SCHEMA_NAME.str, db) == 0)))
   {
     for (table= tables; table; table= table->next_local)
       if (check_if_log_table(table, TRUE, "DROP"))
@@ -848,7 +861,7 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
       lock_db_routines(thd, dbnorm))
     goto exit;
 
-  if (!in_bootstrap)
+  if (!in_bootstrap && !rm_mysql_schema)
   {
     for (table= tables; table; table= table->next_local)
     {
@@ -893,10 +906,13 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent)
     ha_drop_database(path);
     tmp_disable_binlog(thd);
     query_cache_invalidate1(thd, dbnorm);
-    (void) sp_drop_db_routines(thd, dbnorm); /* @todo Do not ignore errors */
+    if (!rm_mysql_schema)
+    {
+      (void) sp_drop_db_routines(thd, dbnorm); /* @todo Do not ignore errors */
 #ifdef HAVE_EVENT_SCHEDULER
-    Events::drop_schema_events(thd, dbnorm);
+      Events::drop_schema_events(thd, dbnorm);
 #endif
+    }
     reenable_binlog(thd);
 
     /*
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index c1402b36737a4..4ed1b7a5323b0 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -3135,12 +3135,6 @@ case SQLCOM_PREPARE:
       create_info.table_charset= 0;
     }
 
-    /*
-      For CREATE TABLE we should not open the table even if it exists.
-      If the table exists, we should either not create it or replace it
-    */
-    lex->query_tables->open_strategy= TABLE_LIST::OPEN_STUB;
-
     /*
       If we are a slave, we should add OR REPLACE if we don't have
       IF EXISTS. This will help a slave to recover from
@@ -8887,12 +8881,6 @@ bool create_table_precheck(THD *thd, TABLE_LIST *tables,
   if (check_fk_parent_table_access(thd, &lex->create_info, &lex->alter_info, create_table->db))
     goto err;
 
-  /*
-    For CREATE TABLE we should not open the table even if it exists.
-    If the table exists, we should either not create it or replace it
-  */
-  lex->query_tables->open_strategy= TABLE_LIST::OPEN_STUB;
-
   error= FALSE;
 
 err:
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index 956e3c9e57f80..14f1107be3360 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -2810,6 +2810,22 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name,
 }
 
 
+static size_t var_storage_size(int flags)
+{
+  switch (flags & PLUGIN_VAR_TYPEMASK) {
+  case PLUGIN_VAR_BOOL:         return sizeof(my_bool);
+  case PLUGIN_VAR_INT:          return sizeof(int);
+  case PLUGIN_VAR_LONG:         return sizeof(long);
+  case PLUGIN_VAR_ENUM:         return sizeof(long);
+  case PLUGIN_VAR_LONGLONG:     return sizeof(ulonglong);
+  case PLUGIN_VAR_SET:          return sizeof(ulonglong);
+  case PLUGIN_VAR_STR:          return sizeof(char*);
+  case PLUGIN_VAR_DOUBLE:       return sizeof(double);
+  default: DBUG_ASSERT(0);      return 0;
+  }
+}
+
+
 /*
   returns a bookmark for thd-local variables, creating if neccessary.
   returns null for non thd-local variables.
@@ -2818,39 +2834,13 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name,
 static st_bookmark *register_var(const char *plugin, const char *name,
                                  int flags)
 {
-  uint length= strlen(plugin) + strlen(name) + 3, size= 0, offset, new_size;
+  uint length= strlen(plugin) + strlen(name) + 3, size, offset, new_size;
   st_bookmark *result;
   char *varname, *p;
 
-  if (!(flags & PLUGIN_VAR_THDLOCAL))
-    return NULL;
-
-  switch (flags & PLUGIN_VAR_TYPEMASK) {
-  case PLUGIN_VAR_BOOL:
-    size= sizeof(my_bool);
-    break;
-  case PLUGIN_VAR_INT:
-    size= sizeof(int);
-    break;
-  case PLUGIN_VAR_LONG:
-  case PLUGIN_VAR_ENUM:
-    size= sizeof(long);
-    break;
-  case PLUGIN_VAR_LONGLONG:
-  case PLUGIN_VAR_SET:
-    size= sizeof(ulonglong);
-    break;
-  case PLUGIN_VAR_STR:
-    size= sizeof(char*);
-    break;
-  case PLUGIN_VAR_DOUBLE:
-    size= sizeof(double);
-    break;
-  default:
-    DBUG_ASSERT(0);
-    return NULL;
-  };
+  DBUG_ASSERT(flags & PLUGIN_VAR_THDLOCAL);
 
+  size= var_storage_size(flags);
   varname= ((char*) my_alloca(length));
   strxmov(varname + 1, plugin, "_", name, NullS);
   for (p= varname + 1; *p; p++)
@@ -3052,25 +3042,17 @@ void sync_dynamic_session_variables(THD* thd, bool global_lock)
   */
   for (idx= 0; idx < bookmark_hash.records; idx++)
   {
-    sys_var_pluginvar *pi;
-    sys_var *var;
     st_bookmark *v= (st_bookmark*) my_hash_element(&bookmark_hash,idx);
 
     if (v->version <= thd->variables.dynamic_variables_version)
       continue; /* already in thd->variables */
 
-    if (!(var= intern_find_sys_var(v->key + 1, v->name_len)) ||
-        !(pi= var->cast_pluginvar()) ||
-        v->key[0] != plugin_var_bookmark_key(pi->plugin_var->flags))
-      continue;
-
     /* Here we do anything special that may be required of the data types */
 
-    if ((pi->plugin_var->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR &&
-        pi->plugin_var->flags & PLUGIN_VAR_MEMALLOC)
+    if ((v->key[0] & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR &&
+         v->key[0] & BOOKMARK_MEMALLOC)
     {
-      int offset= ((thdvar_str_t *)(pi->plugin_var))->offset;
-      char **pp= (char**) (thd->variables.dynamic_variables_ptr + offset);
+      char **pp= (char**) (thd->variables.dynamic_variables_ptr + v->offset);
       if (*pp)
         *pp= my_strdup(*pp, MYF(MY_WME|MY_FAE));
     }
@@ -3331,69 +3313,58 @@ bool sys_var_pluginvar::session_update(THD *thd, set_var *var)
   return false;
 }
 
-bool sys_var_pluginvar::global_update(THD *thd, set_var *var)
+static const void *var_def_ptr(st_mysql_sys_var *pv)
 {
-  DBUG_ASSERT(!is_readonly());
-  mysql_mutex_assert_owner(&LOCK_global_system_variables);
-
-  void *tgt= real_value_ptr(thd, var->type);
-  const void *src= &var->save_result;
-
-  if (!var->value)
-  {
-    switch (plugin_var->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) {
+    switch (pv->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) {
     case PLUGIN_VAR_INT:
-      src= &((sysvar_uint_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_uint_t*) pv)->def_val;
     case PLUGIN_VAR_LONG:
-      src= &((sysvar_ulong_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_ulong_t*) pv)->def_val;
     case PLUGIN_VAR_LONGLONG:
-      src= &((sysvar_ulonglong_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_ulonglong_t*) pv)->def_val;
     case PLUGIN_VAR_ENUM:
-      src= &((sysvar_enum_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_enum_t*) pv)->def_val;
     case PLUGIN_VAR_SET:
-      src= &((sysvar_set_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_set_t*) pv)->def_val;
     case PLUGIN_VAR_BOOL:
-      src= &((sysvar_bool_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_bool_t*) pv)->def_val;
     case PLUGIN_VAR_STR:
-      src= &((sysvar_str_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_str_t*) pv)->def_val;
     case PLUGIN_VAR_DOUBLE:
-      src= &((sysvar_double_t*) plugin_var)->def_val;
-      break;
+      return &((sysvar_double_t*) pv)->def_val;
     case PLUGIN_VAR_INT | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_uint_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_uint_t*) pv)->def_val;
     case PLUGIN_VAR_LONG | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_ulong_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_ulong_t*) pv)->def_val;
     case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_ulonglong_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_ulonglong_t*) pv)->def_val;
     case PLUGIN_VAR_ENUM | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_enum_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_enum_t*) pv)->def_val;
     case PLUGIN_VAR_SET | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_set_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_set_t*) pv)->def_val;
     case PLUGIN_VAR_BOOL | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_bool_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_bool_t*) pv)->def_val;
     case PLUGIN_VAR_STR | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_str_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_str_t*) pv)->def_val;
     case PLUGIN_VAR_DOUBLE | PLUGIN_VAR_THDLOCAL:
-      src= &((thdvar_double_t*) plugin_var)->def_val;
-      break;
+      return &((thdvar_double_t*) pv)->def_val;
     default:
       DBUG_ASSERT(0);
+      return NULL;
     }
-  }
+}
+
+
+bool sys_var_pluginvar::global_update(THD *thd, set_var *var)
+{
+  DBUG_ASSERT(!is_readonly());
+  mysql_mutex_assert_owner(&LOCK_global_system_variables);
+
+  void *tgt= real_value_ptr(thd, var->type);
+  const void *src= &var->save_result;
+
+  if (!var->value)
+    src= var_def_ptr(plugin_var);
 
   plugin_var->update(thd, plugin_var, tgt, src);
   return false;
@@ -3749,7 +3720,18 @@ static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp,
       *(int*)(opt + 1)= offset= v->offset;
 
       if (opt->flags & PLUGIN_VAR_NOCMDOPT)
+      {
+        char *val= global_system_variables.dynamic_variables_ptr + offset;
+        if (((opt->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR) &&
+             (opt->flags & PLUGIN_VAR_MEMALLOC))
+        {
+          char *def_val= *(char**)var_def_ptr(opt);
+          *(char**)val= def_val ? my_strdup(def_val, MYF(0)) : NULL;
+        }
+        else
+          memcpy(val, var_def_ptr(opt), var_storage_size(opt->flags));
         continue;
+      }
 
       optname= (char*) memdup_root(mem_root, v->key + 1,
                                    (optnamelen= v->name_len) + 1);
@@ -3957,10 +3939,11 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp,
           *str->value= strdup_root(mem_root, *str->value);
       }
 
+      var= find_bookmark(plugin_name.str, o->name, o->flags);
       if (o->flags & PLUGIN_VAR_NOSYSVAR)
         continue;
       tmp_backup[tmp->nbackups++].save(&o->name);
-      if ((var= find_bookmark(plugin_name.str, o->name, o->flags)))
+      if (var)
         v= new (mem_root) sys_var_pluginvar(&chain, var->key + 1, o, tmp);
       else
       {
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 063fe22140386..0cfb964307d2a 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -9004,9 +9004,26 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
     We need to destruct the copy_field (allocated in create_tmp_table())
     before setting it to 0 if the join is not "reusable".
   */
-  if (!tmp_join || tmp_join != this) 
-    tmp_table_param.cleanup(); 
-  tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
+  if (!tmp_join || tmp_join != this)
+    tmp_table_param.cleanup();
+  else
+  {
+    /*
+      Free data buffered in copy_fields, but keep data pointed by copy_field
+      around for next iteration (possibly stored in save_copy_fields).
+
+      It would be logically simpler to not clear copy_field
+      below, but as we have loops that runs over copy_field to
+      copy_field_end that should not be done anymore, it's simpler to
+      just clear the pointers.
+
+      Another option would be to just clear copy_field_end and not run
+      the loops if this is not set or to have tmp_table_param.cleanup()
+      to run cleanup on save_copy_field if copy_field is not set.
+    */
+    tmp_table_param.free_copy_field_data();
+    tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
+  }
   first_record= sort_and_group=0;
   send_records= (ha_rows) 0;
 
@@ -11687,7 +11704,7 @@ void JOIN::join_free()
 /**
   Free resources of given join.
 
-  @param fill   true if we should free all resources, call with full==1
+  @param full   true if we should free all resources, call with full==1
                 should be last, before it this function can be called with
                 full==0
 
@@ -11806,7 +11823,7 @@ void JOIN::cleanup(bool full)
     /*
       If we have tmp_join and 'this' JOIN is not tmp_join and
       tmp_table_param.copy_field's  of them are equal then we have to remove
-      pointer to  tmp_table_param.copy_field from tmp_join, because it qill
+      pointer to  tmp_table_param.copy_field from tmp_join, because it will
       be removed in tmp_table_param.cleanup().
     */
     if (tmp_join &&
@@ -15710,6 +15727,7 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
   case Item::VARBIN_ITEM:
   case Item::CACHE_ITEM:
   case Item::EXPR_CACHE_ITEM:
+  case Item::PARAM_ITEM:
     if (make_copy_field)
     {
       DBUG_ASSERT(((Item_result_field*)item)->result_field);
@@ -22240,7 +22258,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
  err:
   if (copy)
     delete [] param->copy_field;			// This is never 0
-  param->copy_field=0;
+  param->copy_field= 0;
 err2:
   DBUG_RETURN(TRUE);
 }
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 47a5a40ebeb44..70080a6b4f162 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -1003,11 +1003,13 @@ class Column_stat: public Stat_table
 
           switch (i) {
           case COLUMN_STAT_MIN_VALUE:
+	    table_field->read_stats->min_value->set_notnull();
             stat_field->val_str(&val);
             table_field->read_stats->min_value->store(val.ptr(), val.length(),
                                                       &my_charset_bin);
             break;
           case COLUMN_STAT_MAX_VALUE:
+	    table_field->read_stats->max_value->set_notnull();
             stat_field->val_str(&val);
             table_field->read_stats->max_value->store(val.ptr(), val.length(),
                                                       &my_charset_bin);
@@ -3659,17 +3661,8 @@ double get_column_range_cardinality(Field *field,
     {
       double avg_frequency= col_stats->get_avg_frequency();
       res= avg_frequency;   
-      /*
-        psergey-todo: what does check for min_value, max_value mean? 
-          min/max_value are set to NULL in alloc_statistics_for_table() and
-          alloc_statistics_for_table_share().  Both functions will immediately
-          call create_min_max_statistical_fields_for_table and 
-          create_min_max_statistical_fields_for_table_share() respectively,
-          which will set min/max_value to be valid pointers, unless OOM
-          occurs.
-      */
       if (avg_frequency > 1.0 + 0.000001 && 
-          col_stats->min_value && col_stats->max_value)
+          col_stats->min_max_values_are_provided())
       {
         Histogram *hist= &col_stats->histogram;
         if (hist->is_available())
@@ -3692,7 +3685,7 @@ double get_column_range_cardinality(Field *field,
   }  
   else 
   {
-    if (col_stats->min_value && col_stats->max_value)
+    if (col_stats->min_max_values_are_provided())
     {
       double sel, min_mp_pos, max_mp_pos;
 
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 46e5cef22d1ac..8e5f8107849a7 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -388,6 +388,11 @@ class Column_statistics
     avg_frequency= (ulong) (val * Scale_factor_avg_frequency);
   }
 
+  bool min_max_values_are_provided()
+  {
+    return !is_null(COLUMN_STAT_MIN_VALUE) && 
+      !is_null(COLUMN_STAT_MIN_VALUE);
+  }          
 };
 
 
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 736827d378262..a2e51ca8e935e 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -2464,7 +2464,8 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists,
       if (table_type && table_type != view_pseudo_hton)
         ha_lock_engine(thd, table_type);
 
-      if (thd->locked_tables_mode)
+      if (thd->locked_tables_mode == LTM_LOCK_TABLES ||
+          thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES)
       {
         if (wait_while_table_is_used(thd, table->table, HA_EXTRA_NOT_USED))
         {
@@ -6336,6 +6337,7 @@ static bool fill_alter_inplace_info(THD *thd,
           (field->stored_in_db || field->vcol_info->is_in_partitioning_expr()))
       {
         if (is_equal == IS_EQUAL_NO ||
+            !new_field->vcol_info ||
             !field->vcol_info->is_equal(new_field->vcol_info))
           ha_alter_info->handler_flags|= Alter_inplace_info::ALTER_COLUMN_VCOL;
         else
diff --git a/sql/table_cache.cc b/sql/table_cache.cc
index 097f37d26d835..bdb7914c32b97 100644
--- a/sql/table_cache.cc
+++ b/sql/table_cache.cc
@@ -876,6 +876,8 @@ void tdc_release_share(TABLE_SHARE *share)
   }
   if (--share->tdc.ref_count)
   {
+    if (!share->is_view)
+      mysql_cond_broadcast(&share->tdc.COND_release);
     mysql_mutex_unlock(&share->tdc.LOCK_table_share);
     mysql_mutex_unlock(&LOCK_unused_shares);
     DBUG_VOID_RETURN;
diff --git a/sql/threadpool_common.cc b/sql/threadpool_common.cc
index ae8a81b1bcd7e..9d263038bc95f 100644
--- a/sql/threadpool_common.cc
+++ b/sql/threadpool_common.cc
@@ -73,17 +73,16 @@ struct Worker_thread_context
 
   void save()
   {
-#ifdef HAVE_PSI_INTERFACE
-    psi_thread=  PSI_server?PSI_server->get_thread():0;
+#ifdef HAVE_PSI_THREAD_INTERFACE
+    psi_thread = PSI_THREAD_CALL(get_thread)();
 #endif
     mysys_var= (st_my_thread_var *)pthread_getspecific(THR_KEY_mysys);
   }
 
   void restore()
   {
-#ifdef HAVE_PSI_INTERFACE
-    if (PSI_server)
-      PSI_server->set_thread(psi_thread);
+#ifdef HAVE_PSI_THREAD_INTERFACE
+    PSI_THREAD_CALL(set_thread)(psi_thread);
 #endif
     pthread_setspecific(THR_KEY_mysys,mysys_var);
     pthread_setspecific(THR_THD, 0);
@@ -92,6 +91,41 @@ struct Worker_thread_context
 };
 
 
+#ifdef HAVE_PSI_INTERFACE
+
+/*
+  The following fixes PSI "idle" psi instrumentation.
+  The server assumes that connection  becomes idle
+  just before net_read_packet() and switches to active after it.
+  In out setup, server becomes idle when async socket io is made.
+*/
+
+extern void net_before_header_psi(struct st_net *net, void *user_data, size_t);
+
+static void dummy_before_header(struct st_net *, void *, size_t)
+{
+}
+
+static void re_init_net_server_extension(THD *thd)
+{
+  thd->m_net_server_extension.m_before_header = dummy_before_header;
+}
+
+#else
+
+#define re_init_net_server_extension(thd)
+
+#endif /* HAVE_PSI_INTERFACE */
+
+
+static inline void set_thd_idle(THD *thd)
+{
+  thd->net.reading_or_writing= 1;
+#ifdef HAVE_PSI_INTERFACE
+  net_before_header_psi(&thd->net, thd, 0);
+#endif
+}
+
 /*
   Attach/associate the connection with the OS thread,
 */
@@ -100,10 +134,10 @@ static bool thread_attach(THD* thd)
   pthread_setspecific(THR_KEY_mysys,thd->mysys_var);
   thd->thread_stack=(char*)&thd;
   thd->store_globals();
-#ifdef HAVE_PSI_INTERFACE
-  if (PSI_server)
-    PSI_server->set_thread(thd->event_scheduler.m_psi);
+#ifdef HAVE_PSI_THREAD_INTERFACE
+  PSI_THREAD_CALL(set_thread)(thd->event_scheduler.m_psi);
 #endif
+  mysql_socket_set_thread_owner(thd->net.vio->mysql_socket);
   return 0;
 }
 
@@ -130,39 +164,38 @@ int threadpool_add_connection(THD *thd)
   }
 
   /* Create new PSI thread for use with the THD. */
-#ifdef HAVE_PSI_INTERFACE
-  if (PSI_server)
-  {
-    thd->event_scheduler.m_psi = 
-      PSI_server->new_thread(key_thread_one_connection, thd, thd->thread_id);
-  }
+#ifdef HAVE_PSI_THREAD_INTERFACE
+  thd->event_scheduler.m_psi=
+    PSI_THREAD_CALL(new_thread)(key_thread_one_connection, thd, thd->thread_id);
 #endif
 
 
   /* Login. */
   thread_attach(thd);
+  re_init_net_server_extension(thd);
   ulonglong now= microsecond_interval_timer();
   thd->prior_thr_create_utime= now;
   thd->start_utime= now;
   thd->thr_create_utime= now;
 
-  if (!setup_connection_thread_globals(thd))
-  {
-    if (!thd_prepare_connection(thd))
-    {
-      
-      /* 
-        Check if THD is ok, as prepare_new_connection_state()
-        can fail, for example if init command failed.
-      */
-      if (thd_is_connection_alive(thd))
-      {
-        retval= 0;
-        thd->net.reading_or_writing= 1;
-        thd->skip_wait_timeout= true;
-      }
-    }
-  }
+  if (setup_connection_thread_globals(thd))
+    goto end;
+
+  if (thd_prepare_connection(thd))
+    goto end;
+
+  /*
+    Check if THD is ok, as prepare_new_connection_state()
+    can fail, for example if init command failed.
+  */
+  if (!thd_is_connection_alive(thd))
+    goto end;
+
+  retval= 0;
+  thd->skip_wait_timeout= true;
+  set_thd_idle(thd);
+
+end:
   worker_context.restore();
   return retval;
 }
@@ -244,12 +277,13 @@ int threadpool_process_request(THD *thd)
       goto end;
     }
 
+    set_thd_idle(thd);
+
     vio= thd->net.vio;
     if (!vio->has_data(vio))
     { 
       /* More info on this debug sync is in sql_parse.cc*/
       DEBUG_SYNC(thd, "before_do_command_net_read");
-      thd->net.reading_or_writing= 1;
       goto end;
     }
   }
diff --git a/storage/connect/JdbcInterface.java b/storage/connect/JdbcInterface.java
index f765052915d72..34af8c4e013c2 100644
--- a/storage/connect/JdbcInterface.java
+++ b/storage/connect/JdbcInterface.java
@@ -340,6 +340,18 @@ public int GetMaxValue(int n) {
       return m;
     } // end of GetMaxValue
     
+    public String GetQuoteString() {
+      String qs = null;
+      
+      try {
+        qs = dbmd.getIdentifierQuoteString();
+      } catch(SQLException se) {
+    	SetErrmsg(se);  
+      } // end try/catch
+      
+      return qs;
+    } // end of GetQuoteString
+    
     public int GetColumns(String[] parms) {
       int ncol = -1;
       
@@ -680,11 +692,11 @@ public int TimestampField(int n, String name) {
 	  return 0;  
 	} // end of TimestampField
     
-    public String ObjectField(int n, String name) {
+    public Object ObjectField(int n, String name) {
 	  if (rs == null) {
 		System.out.println("No result set");
 	  } else try {
-	    return (n > 0) ? rs.getObject(n).toString() : rs.getObject(name).toString();
+	    return (n > 0) ? rs.getObject(n) : rs.getObject(name);
 	  } catch (SQLException se) {
 		SetErrmsg(se);
 	  } //end try/catch
diff --git a/storage/connect/filamdbf.cpp b/storage/connect/filamdbf.cpp
index 8afda72357809..a4557facbd869 100644
--- a/storage/connect/filamdbf.cpp
+++ b/storage/connect/filamdbf.cpp
@@ -383,7 +383,7 @@ DBFBASE::DBFBASE(DBFBASE *txfp)
 /*  and header length. Set Records, check that Reclen is equal to lrecl and */
 /*  return the header length or 0 in case of error.                         */
 /****************************************************************************/
-int DBFBASE::ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath)
+int DBFBASE::ScanHeader(PGLOBAL g, PSZ fn, int lrecl, int *rln, char *defpath)
   {
   int       rc;
   char      filename[_MAX_PATH];
@@ -393,7 +393,7 @@ int DBFBASE::ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath)
   /************************************************************************/
   /*  Open the input file.                                                */
   /************************************************************************/
-  PlugSetPath(filename, fname, defpath);
+  PlugSetPath(filename, fn, defpath);
 
   if (!(infile= global_fopen(g, MSGID_CANNOT_OPEN, filename, "rb")))
     return 0;              // Assume file does not exist
@@ -410,11 +410,7 @@ int DBFBASE::ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath)
   } else if (rc == RC_FX)
     return -1;
 
-  if ((int)header.Reclen() != lrecl) {
-    sprintf(g->Message, MSG(BAD_LRECL), lrecl, header.Reclen());
-    return -1;
-    } // endif Lrecl
-
+	*rln = (int)header.Reclen();
   Records = (int)header.Records();
   return (int)header.Headlen();
   } // end of ScanHeader
@@ -431,9 +427,27 @@ int DBFFAM::Cardinality(PGLOBAL g)
   if (!g)
     return 1;
 
-  if (!Headlen)
-    if ((Headlen = ScanHeader(g, To_File, Lrecl, Tdbp->GetPath())) < 0)
-      return -1;                // Error in ScanHeader
+	if (!Headlen) {
+		int rln = 0;								// Record length in the file header
+
+		Headlen = ScanHeader(g, To_File, Lrecl, &rln, Tdbp->GetPath());
+
+		if (Headlen < 0)
+			return -1;                // Error in ScanHeader
+
+		if (rln && Lrecl != rln) {
+			// This happens always on some Linux platforms
+			sprintf(g->Message, MSG(BAD_LRECL), Lrecl, rln);
+
+			if (Accept) {
+				Lrecl = rln;
+				PushWarning(g, Tdbp);
+			} else
+				return -1;
+
+		} // endif rln
+
+	}	// endif Headlen
 
   // Set number of blocks for later use
   Block = (Records > 0) ? (Records + Nrec - 1) / Nrec : 0;
@@ -565,7 +579,13 @@ bool DBFFAM::AllocateBuffer(PGLOBAL g)
 
       if (Lrecl != reclen) {
         sprintf(g->Message, MSG(BAD_LRECL), Lrecl, reclen);
-        return true;
+
+				if (Accept) {
+					Lrecl = reclen;
+					PushWarning(g, Tdbp);
+				}	else
+					return true;
+
         } // endif Lrecl
 
       hlen = HEADLEN * (n + 1) + 2;
@@ -641,8 +661,14 @@ bool DBFFAM::AllocateBuffer(PGLOBAL g)
     if ((rc = dbfhead(g, Stream, Tdbp->GetFile(g), &header)) == RC_OK) {
       if (Lrecl != (int)header.Reclen()) {
         sprintf(g->Message, MSG(BAD_LRECL), Lrecl, header.Reclen());
-        return true;
-        } // endif Lrecl
+
+				if (Accept) {
+					Lrecl = header.Reclen();
+					PushWarning(g, Tdbp);
+				} else
+					return true;
+
+			} // endif Lrecl
 
       Records = (int)header.Records();
       Headlen = (int)header.Headlen();
@@ -916,9 +942,27 @@ int DBMFAM::Cardinality(PGLOBAL g)
   if (!g)
     return 1;
 
-  if (!Headlen)
-    if ((Headlen = ScanHeader(g, To_File, Lrecl, Tdbp->GetPath())) < 0)
-      return -1;                // Error in ScanHeader
+	if (!Headlen) {
+		int rln = 0;								// Record length in the file header
+
+		Headlen = ScanHeader(g, To_File, Lrecl, &rln, Tdbp->GetPath());
+
+		if (Headlen < 0)
+			return -1;                // Error in ScanHeader
+
+		if (rln && Lrecl != rln) {
+			// This happens always on some Linux platforms
+			sprintf(g->Message, MSG(BAD_LRECL), Lrecl, rln);
+
+			if (Accept) {
+				Lrecl = rln;
+				PushWarning(g, Tdbp);
+			} else
+				return -1;
+
+		} // endif rln
+
+	}	// endif Headlen
 
   // Set number of blocks for later use
   Block = (Records > 0) ? (Records + Nrec - 1) / Nrec : 0;
@@ -961,8 +1005,14 @@ bool DBMFAM::AllocateBuffer(PGLOBAL g)
 
     if (Lrecl != (int)hp->Reclen()) {
       sprintf(g->Message, MSG(BAD_LRECL), Lrecl, hp->Reclen());
-      return true;
-      } // endif Lrecl
+
+			if (Accept) {
+				Lrecl = hp->Reclen();
+				PushWarning(g, Tdbp);
+			} else
+				return true;
+
+		} // endif Lrecl
 
     Records = (int)hp->Records();
     Headlen = (int)hp->Headlen();
diff --git a/storage/connect/filamdbf.h b/storage/connect/filamdbf.h
index da84d7685a8dd..66458a10eaad9 100644
--- a/storage/connect/filamdbf.h
+++ b/storage/connect/filamdbf.h
@@ -31,7 +31,7 @@ class DllExport DBFBASE {
   DBFBASE(PDBF txfp);
 
   // Implementation
-  int  ScanHeader(PGLOBAL g, PSZ fname, int lrecl, char *defpath);
+  int  ScanHeader(PGLOBAL g, PSZ fname, int lrecl, int *rlen, char *defpath);
 
  protected:
   // Default constructor, not to be used
diff --git a/storage/connect/ha_connect.cc b/storage/connect/ha_connect.cc
index ea6fb1b08c16d..cf945a73f4610 100644
--- a/storage/connect/ha_connect.cc
+++ b/storage/connect/ha_connect.cc
@@ -224,6 +224,7 @@ uint    GetWorkSize(void);
 void    SetWorkSize(uint);
 extern "C" const char *msglang(void);
 
+static void PopUser(PCONNECT xp);
 static PCONNECT GetUser(THD *thd, PCONNECT xp);
 static PGLOBAL  GetPlug(THD *thd, PCONNECT& lxp);
 
@@ -831,34 +832,43 @@ ha_connect::~ha_connect(void)
                          table ? table->s->table_name.str : "<null>",
                          xp, xp ? xp->count : 0);
 
-  if (xp) {
-    PCONNECT p;
+	PopUser(xp);
+} // end of ha_connect destructor
 
-    xp->count--;
 
-    for (p= user_connect::to_users; p; p= p->next)
-      if (p == xp)
-        break;
+/****************************************************************************/
+/*  Check whether this user can be removed.                                 */
+/****************************************************************************/
+static void PopUser(PCONNECT xp)
+{
+	if (xp) {
+		xp->count--;
 
-    if (p && !p->count) {
-      if (p->next)
-        p->next->previous= p->previous;
+		if (!xp->count) {
+			PCONNECT p;
 
-      if (p->previous)
-        p->previous->next= p->next;
-      else
-        user_connect::to_users= p->next;
+			for (p= user_connect::to_users; p; p= p->next)
+			  if (p == xp)
+				  break;
 
-      } // endif p
+		  if (p) {
+			  if (p->next)
+				  p->next->previous= p->previous;
 
-    if (!xp->count) {
-      PlugCleanup(xp->g, true);
-      delete xp;
-      } // endif count
+			  if (p->previous)
+				  p->previous->next= p->next;
+			  else
+				  user_connect::to_users= p->next;
 
-    } // endif xp
+		  } // endif p
 
-} // end of ha_connect destructor
+			PlugCleanup(xp->g, true);
+			delete xp;
+		} // endif count
+
+	} // endif xp
+
+} // end of PopUser
 
 
 /****************************************************************************/
@@ -866,7 +876,7 @@ ha_connect::~ha_connect(void)
 /****************************************************************************/
 static PCONNECT GetUser(THD *thd, PCONNECT xp)
 {
-  if (!thd)
+	if (!thd)
     return NULL;
 
   if (xp && thd == xp->thdp)
@@ -890,7 +900,6 @@ static PCONNECT GetUser(THD *thd, PCONNECT xp)
   return xp;
 } // end of GetUser
 
-
 /****************************************************************************/
 /*  Get the global pointer of the user of this handler.                     */
 /****************************************************************************/
@@ -5261,7 +5270,18 @@ static int connect_assisted_discovery(handlerton *, THD* thd,
   if (!(shm= (char*)db))
     db= table_s->db.str;                   // Default value
 
-  // Check table type
+	// Save stack and allocation environment and prepare error return
+	if (g->jump_level == MAX_JUMP) {
+		strcpy(g->Message, MSG(TOO_MANY_JUMPS));
+		goto jer;
+	} // endif jump_level
+
+	if ((rc= setjmp(g->jumper[++g->jump_level])) != 0) {
+		my_message(ER_UNKNOWN_ERROR, g->Message, MYF(0));
+		goto err;
+	} // endif rc
+
+	// Check table type
   if (ttp == TAB_UNDEF) {
     topt->type= (src) ? "MYSQL" : (tab) ? "PROXY" : "DOS";
     ttp= GetTypeID(topt->type);
@@ -5270,20 +5290,9 @@ static int connect_assisted_discovery(handlerton *, THD* thd,
   } else if (ttp == TAB_NIY) {
     sprintf(g->Message, "Unsupported table type %s", topt->type);
     my_message(ER_UNKNOWN_ERROR, g->Message, MYF(0));
-    return HA_ERR_INTERNAL_ERROR;
+		goto err;
   } // endif ttp
 
-  // Save stack and allocation environment and prepare error return
-  if (g->jump_level == MAX_JUMP) {
-    strcpy(g->Message, MSG(TOO_MANY_JUMPS));
-    return HA_ERR_INTERNAL_ERROR;
-    } // endif jump_level
-
-  if ((rc= setjmp(g->jumper[++g->jump_level])) != 0) {
-    my_message(ER_UNKNOWN_ERROR, g->Message, MYF(0));
-    goto err;
-    } // endif rc
-
   if (!tab) {
     if (ttp == TAB_TBL) {
       // Make tab the first table of the list
@@ -5843,6 +5852,7 @@ static int connect_assisted_discovery(handlerton *, THD* thd,
       rc= init_table_share(thd, table_s, create_info, &sql);
 
     g->jump_level--;
+		PopUser(xp);
     return rc;
     } // endif ok
 
@@ -5850,7 +5860,9 @@ static int connect_assisted_discovery(handlerton *, THD* thd,
 
  err:
   g->jump_level--;
-  return HA_ERR_INTERNAL_ERROR;
+ jer:
+	PopUser(xp);
+	return HA_ERR_INTERNAL_ERROR;
 } // end of connect_assisted_discovery
 
 /**
diff --git a/storage/connect/jdbconn.cpp b/storage/connect/jdbconn.cpp
index 3b8de3e975bfb..dca9bd0eac4e5 100644
--- a/storage/connect/jdbconn.cpp
+++ b/storage/connect/jdbconn.cpp
@@ -498,145 +498,6 @@ PQRYRES JDBCDrivers(PGLOBAL g, int maxres, bool info)
 	return qrp;
 } // end of JDBCDrivers
 
-#if 0
-/*************************************************************************/
-/*  JDBCDataSources: constructs the result blocks containing all JDBC    */
-/*  data sources available on the local host.                            */
-/*  Called with info=true to have result column names.                   */
-/*************************************************************************/
-PQRYRES JDBCDataSources(PGLOBAL g, int maxres, bool info)
-{
-	int      buftyp[] ={ TYPE_STRING, TYPE_STRING };
-	XFLD     fldtyp[] ={ FLD_NAME, FLD_REM };
-	unsigned int length[] ={ 0, 256 };
-	bool     b[] ={ false, true };
-	int      i, n = 0, ncol = 2;
-	PCOLRES  crp;
-	PQRYRES  qrp;
-	JDBConn *jcp = NULL;
-
-	/************************************************************************/
-	/*  Do an evaluation of the result size.                                */
-	/************************************************************************/
-	if (!info) {
-		jcp = new(g)JDBConn(g, NULL);
-		n = jcp->GetMaxValue(SQL_MAX_DSN_LENGTH);
-		length[0] = (n) ? (n + 1) : 256;
-
-		if (!maxres)
-			maxres = 512;         // Estimated max number of data sources
-
-	} else {
-		length[0] = 256;
-		maxres = 0;
-	} // endif info
-
-	if (trace)
-		htrc("JDBCDataSources: max=%d len=%d\n", maxres, length[0]);
-
-	/************************************************************************/
-	/*  Allocate the structures used to refer to the result set.            */
-	/************************************************************************/
-	qrp = PlgAllocResult(g, ncol, maxres, IDS_DSRC,
-		buftyp, fldtyp, length, false, true);
-
-	for (i = 0, crp = qrp->Colresp; crp; i++, crp = crp->Next)
-		if (b[i])
-			crp->Kdata->SetNullable(true);
-
-	/************************************************************************/
-	/*  Now get the results into blocks.                                    */
-	/************************************************************************/
-	if (!info && qrp && jcp->GetDataSources(qrp))
-		qrp = NULL;
-
-	/************************************************************************/
-	/*  Return the result pointer for use by GetData routines.              */
-	/************************************************************************/
-	return qrp;
-} // end of JDBCDataSources
-
-/**************************************************************************/
-/*  PrimaryKeys: constructs the result blocks containing all the          */
-/*  JDBC catalog information concerning primary keys.                     */
-/**************************************************************************/
-PQRYRES JDBCPrimaryKeys(PGLOBAL g, JDBConn *op, char *dsn, char *table)
-{
-	static int buftyp[] ={ TYPE_STRING, TYPE_STRING, TYPE_STRING,
-		TYPE_STRING, TYPE_SHORT, TYPE_STRING };
-	static unsigned int length[] ={ 0, 0, 0, 0, 6, 128 };
-	int      n, ncol = 5;
-	int     maxres;
-	PQRYRES  qrp;
-	JCATPARM *cap;
-	JDBConn *jcp = op;
-
-	if (!op) {
-		/**********************************************************************/
-		/*  Open the connection with the JDBC data source.                    */
-		/**********************************************************************/
-		jcp = new(g)JDBConn(g, NULL);
-
-		if (jcp->Open(dsn, 2) < 1)        // 2 is openReadOnly
-			return NULL;
-
-	} // endif op
-
-	/************************************************************************/
-	/*  Do an evaluation of the result size.                                */
-	/************************************************************************/
-	n = jcp->GetMaxValue(SQL_MAX_COLUMNS_IN_TABLE);
-	maxres = (n) ? (int)n : 250;
-	n = jcp->GetMaxValue(SQL_MAX_CATALOG_NAME_LEN);
-	length[0] = (n) ? (n + 1) : 128;
-	n = jcp->GetMaxValue(SQL_MAX_SCHEMA_NAME_LEN);
-	length[1] = (n) ? (n + 1) : 128;
-	n = jcp->GetMaxValue(SQL_MAX_TABLE_NAME_LEN);
-	length[2] = (n) ? (n + 1) : 128;
-	n = jcp->GetMaxValue(SQL_MAX_COLUMN_NAME_LEN);
-	length[3] = (n) ? (n + 1) : 128;
-
-	if (trace)
-		htrc("JDBCPrimaryKeys: max=%d len=%d,%d,%d\n",
-		maxres, length[0], length[1], length[2]);
-
-	/************************************************************************/
-	/*  Allocate the structure used to refer to the result set.             */
-	/************************************************************************/
-	qrp = PlgAllocResult(g, ncol, maxres, IDS_PKEY,
-		buftyp, NULL, length, false, true);
-
-	if (trace)
-		htrc("Getting pkey results ncol=%d\n", qrp->Nbcol);
-
-	cap = AllocCatInfo(g, CAT_KEY, NULL, table, qrp);
-
-	/************************************************************************/
-	/*  Now get the results into blocks.                                    */
-	/************************************************************************/
-	if ((n = jcp->GetCatInfo(cap)) >= 0) {
-		qrp->Nblin = n;
-		//  ResetNullValues(cap);
-
-		if (trace)
-			htrc("PrimaryKeys: NBCOL=%d NBLIN=%d\n", qrp->Nbcol, qrp->Nblin);
-
-	} else
-		qrp = NULL;
-
-	/************************************************************************/
-	/*  Close any local connection.                                         */
-	/************************************************************************/
-	if (!op)
-		jcp->Close();
-
-	/************************************************************************/
-	/*  Return the result pointer for use by GetData routines.              */
-	/************************************************************************/
-	return qrp;
-} // end of JDBCPrimaryKeys
-#endif // 0
-
 /***********************************************************************/
 /*  JDBConn construction/destruction.                                  */
 /***********************************************************************/
@@ -651,7 +512,7 @@ JDBConn::JDBConn(PGLOBAL g, TDBJDBC *tdbp)
 	xqid = xuid = xid = grs = readid = fetchid = typid = errid = nullptr;
 	prepid = xpid = pcid = nullptr;
 	chrfldid = intfldid = dblfldid = fltfldid = bigfldid = nullptr;
-	datfldid = timfldid = tspfldid = nullptr;
+	objfldid = datfldid = timfldid = tspfldid = nullptr;
 	//m_LoginTimeout = DEFAULT_LOGIN_TIMEOUT;
 //m_QueryTimeout = DEFAULT_QUERY_TIMEOUT;
 //m_UpdateOptions = 0;
@@ -739,60 +600,6 @@ bool  JDBConn::gmID(PGLOBAL g, jmethodID& mid, const char *name, const char *sig
 
 } // end of gmID
 
-#if 0
-/***********************************************************************/
-/*  Utility routine.                                                   */
-/***********************************************************************/
-PSZ JDBConn::GetStringInfo(ushort infotype)
-{
-	//ASSERT(m_hdbc != SQL_NULL_HDBC);
-	char   *p, buffer[MAX_STRING_INFO];
-	SWORD   result;
-	RETCODE rc;
-
-	rc = SQLGetInfo(m_hdbc, infotype, buffer, sizeof(buffer), &result);
-
-	if (!Check(rc)) {
-		ThrowDJX(rc, "SQLGetInfo");  // Temporary
-		//  *buffer = '\0';
-	} // endif rc
-
-	p = PlugDup(m_G, buffer);
-	return p;
-} // end of GetStringInfo
-
-/***********************************************************************/
-/*  Utility routines.                                                  */
-/***********************************************************************/
-void JDBConn::OnSetOptions(HSTMT hstmt)
-{
-	RETCODE rc;
-	ASSERT(m_hdbc != SQL_NULL_HDBC);
-
-	if ((signed)m_QueryTimeout != -1) {
-		// Attempt to set query timeout.  Ignore failure
-		rc = SQLSetStmtOption(hstmt, SQL_QUERY_TIMEOUT, m_QueryTimeout);
-
-		if (!Check(rc))
-			// don't attempt it again
-			m_QueryTimeout = (DWORD)-1;
-
-	} // endif m_QueryTimeout
-
-	if (m_RowsetSize > 0) {
-		// Attempt to set rowset size.
-		// In case of failure reset it to 0 to use Fetch.
-		rc = SQLSetStmtOption(hstmt, SQL_ROWSET_SIZE, m_RowsetSize);
-
-		if (!Check(rc))
-			// don't attempt it again
-			m_RowsetSize = 0;
-
-	} // endif m_RowsetSize
-
-} // end of OnSetOptions
-#endif // 0
-
 /***********************************************************************/
 /*  Utility routine.                                                   */
 /***********************************************************************/
@@ -1007,7 +814,7 @@ int JDBConn::Open(PJPARM sop)
 #define N 1
 #endif
 
-		// Java source will be compiled as ajar file installed in the plugin dir
+		// Java source will be compiled as a jar file installed in the plugin dir
 		jpop->Append(sep);
 		jpop->Append(GetPluginDir());
 		jpop->Append("JdbcInterface.jar");
@@ -1204,6 +1011,21 @@ int JDBConn::Open(PJPARM sop)
 		return RC_FX;
 	}	// endif Msg
 
+	jmethodID qcid = nullptr;
+
+	if (!gmID(g, qcid, "GetQuoteString", "()Ljava/lang/String;")) {
+		jstring s = (jstring)env->CallObjectMethod(job, qcid);
+
+		if (s != nullptr) {
+			char *qch = (char*)env->GetStringUTFChars(s, (jboolean)false);
+			m_IDQuoteChar[0] = *qch;
+		} else {
+			s = (jstring)env->CallObjectMethod(job, errid);
+			Msg = (char*)env->GetStringUTFChars(s, (jboolean)false);
+		}	// endif s
+
+	}	// endif qcid
+
 	if (gmID(g, typid, "ColumnType", "(ILjava/lang/String;)I"))
 		return RC_FX;
 	else
@@ -1345,9 +1167,10 @@ void JDBConn::Close()
 /***********************************************************************/
 void JDBConn::SetColumnValue(int rank, PSZ name, PVAL val)
 {
-	PGLOBAL&   g = m_G;
-	jint       ctyp;
-	jstring    cn, jn = nullptr;
+	PGLOBAL& g = m_G;
+	jint     ctyp;
+	jstring  cn, jn = nullptr;
+	jobject  jb = nullptr;
 
 	if (rank == 0)
 		if (!name || (jn = env->NewStringUTF(name)) == nullptr) {
@@ -1363,21 +1186,32 @@ void JDBConn::SetColumnValue(int rank, PSZ name, PVAL val)
 		longjmp(g->jumper[g->jump_level], TYPE_AM_JDBC);
 	} // endif Check
 
+	if (val->GetNullable())
+		if (!gmID(g, objfldid, "ObjectField", "(ILjava/lang/String;)Ljava/lang/Object;")) {
+			jb = env->CallObjectMethod(job, objfldid, (jint)rank, jn);
+
+			if (jb == nullptr) {
+				val->Reset();
+				val->SetNull(true);
+				goto chk;
+			}	// endif job
+
+		}	// endif objfldid
+
 	switch (ctyp) {
 	case 12:          // VARCHAR
 	case -1:          // LONGVARCHAR
 	case 1:           // CHAR
-		if (!gmID(g, chrfldid, "StringField", "(ILjava/lang/String;)Ljava/lang/String;")) {
+		if (jb)
+			cn = (jstring)jb;
+		else if (!gmID(g, chrfldid, "StringField", "(ILjava/lang/String;)Ljava/lang/String;"))
 			cn = (jstring)env->CallObjectMethod(job, chrfldid, (jint)rank, jn);
+		else
+			cn = nullptr;
 
-			if (cn) {
-				const char *field = env->GetStringUTFChars(cn, (jboolean)false);
-				val->SetValue_psz((PSZ)field);
-			} else {
-				val->Reset();
-				val->SetNull(true);
-			} // endif cn
-
+		if (cn) {
+			const char *field = env->GetStringUTFChars(cn, (jboolean)false);
+			val->SetValue_psz((PSZ)field);
 		} else
 			val->Reset();
 
@@ -1449,6 +1283,7 @@ void JDBConn::SetColumnValue(int rank, PSZ name, PVAL val)
 		val->Reset();
 	} // endswitch Type
 
+ chk:
 	if (Check()) {
 		if (rank == 0)
 			env->DeleteLocalRef(jn);
diff --git a/storage/connect/jdbconn.h b/storage/connect/jdbconn.h
index 095b1565bd276..0a1c52d4576d1 100644
--- a/storage/connect/jdbconn.h
+++ b/storage/connect/jdbconn.h
@@ -165,6 +165,7 @@ class JDBConn : public BLOCK {
 	jmethodID xpid;										  // The ExecutePrep method ID
 	jmethodID pcid;										  // The ClosePrepStmt method ID
 	jmethodID errid;										// The GetErrmsg method ID
+	jmethodID objfldid;									// The ObjectField method ID
 	jmethodID chrfldid;									// The StringField method ID
 	jmethodID intfldid;									// The IntField method ID
 	jmethodID dblfldid;									// The DoubleField method ID
diff --git a/storage/connect/json.cpp b/storage/connect/json.cpp
index 3558c5762bba2..c45630129f120 100644
--- a/storage/connect/json.cpp
+++ b/storage/connect/json.cpp
@@ -595,7 +595,7 @@ PSZ Serialize(PGLOBAL g, PJSON jsp, char *fn, int pretty)
 		fputs(EL, fs);
     fclose(fs);
 		str = (err) ? NULL : strcpy(g->Message, "Ok");
-	} else if (!err) {
+  } else if (!err) {
     str = ((JOUTSTR*)jp)->Strp;
     jp->WriteChr('\0');
     PlugSubAlloc(g, NULL, ((JOUTSTR*)jp)->N);
diff --git a/storage/connect/reldef.cpp b/storage/connect/reldef.cpp
index 2c8ada52e6f5c..ac2327212e0fa 100644
--- a/storage/connect/reldef.cpp
+++ b/storage/connect/reldef.cpp
@@ -294,7 +294,7 @@ int TABDEF::GetColCatInfo(PGLOBAL g)
 				nlg+= nof;
       case TAB_DIR:
       case TAB_XML:
-        poff= loff + 1;
+				poff= loff + (pcf->Flags & U_VIRTUAL ? 0 : 1);
         break;
       case TAB_INI:
       case TAB_MAC:
@@ -440,7 +440,11 @@ int TABDEF::GetColCatInfo(PGLOBAL g)
       } // endswitch tc
 
 		// lrecl must be at least recln to avoid buffer overflow
-		recln= MY_MAX(recln, Hc->GetIntegerOption("Lrecl"));
+		if (trace)
+			htrc("Lrecl: Calculated=%d defined=%d\n", 
+			  recln, Hc->GetIntegerOption("Lrecl"));
+
+		recln = MY_MAX(recln, Hc->GetIntegerOption("Lrecl"));
 		Hc->SetIntegerOption("Lrecl", recln);
 		((PDOSDEF)this)->SetLrecl(recln);
 		} // endif Lrecl
diff --git a/storage/connect/tabjdbc.cpp b/storage/connect/tabjdbc.cpp
index 86fd831b26201..e398523892f2e 100644
--- a/storage/connect/tabjdbc.cpp
+++ b/storage/connect/tabjdbc.cpp
@@ -686,6 +686,9 @@ bool TDBJDBC::MakeInsert(PGLOBAL g)
 	else
 		Prepared = true;
 
+	if (trace)
+		htrc("Insert=%s\n", Query->GetStr());
+
 	return false;
 } // end of MakeInsert
 
@@ -733,17 +736,18 @@ bool TDBJDBC::MakeCommand(PGLOBAL g)
 	// If so, it must be quoted in the original query
 	strlwr(strcat(strcat(strcpy(name, " "), Name), " "));
 
-	if (!strstr(" update delete low_priority ignore quick from ", name))
-		strlwr(strcpy(name, Name));     // Not a keyword
-	else
+	if (strstr(" update delete low_priority ignore quick from ", name)) {
 		strlwr(strcat(strcat(strcpy(name, qc), Name), qc));
+		k += 2;
+	} else
+		strlwr(strcpy(name, Name));     // Not a keyword
 
 	if ((p = strstr(qrystr, name))) {
 		for (i = 0; i < p - qrystr; i++)
 			stmt[i] = (Qrystr[i] == '`') ? *qc : Qrystr[i];
 
 		stmt[i] = 0;
-		k = i + (int)strlen(Name);
+		k += i + (int)strlen(Name);
 
 		if (qtd && *(p-1) == ' ')
 			strcat(strcat(strcat(stmt, qc), TableName), qc);
@@ -765,6 +769,9 @@ bool TDBJDBC::MakeCommand(PGLOBAL g)
 		return NULL;
 	} // endif p
 
+	if (trace)
+		htrc("Command=%s\n", stmt);
+
 	Query = new(g)STRING(g, 0, stmt);
 	return (!Query->GetSize());
 } // end of MakeCommand
@@ -1214,6 +1221,10 @@ int TDBJDBC::WriteDB(PGLOBAL g)
 	} // endif oom
 
 	Query->RepLast(')');
+
+	if (trace > 1)
+		htrc("Inserting: %s\n", Query->GetStr());
+
 	rc = Jcp->ExecuteUpdate(Query->GetStr());
 	Query->Truncate(len);     // Restore query
 
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index b073398f8ecf7..a4aa43651f8a5 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -673,7 +673,10 @@ void
 dict_stats_copy(
 /*============*/
 	dict_table_t*		dst,	/*!< in/out: destination table */
-	const dict_table_t*	src)	/*!< in: source table */
+	const dict_table_t*	src,	/*!< in: source table */
+	bool reset_ignored_indexes)	/*!< in: if true, set ignored indexes
+                                             to have the same statistics as if 
+                                             the table was empty */
 {
 	dst->stats_last_recalc = src->stats_last_recalc;
 	dst->stat_n_rows = src->stat_n_rows;
@@ -692,7 +695,16 @@ dict_stats_copy(
 	      && (src_idx = dict_table_get_next_index(src_idx)))) {
 
 		if (dict_stats_should_ignore_index(dst_idx)) {
-			continue;
+			if (reset_ignored_indexes) {
+				/* Reset index statistics for all ignored indexes,
+				unless they are FT indexes (these have no statistics)*/
+				if (dst_idx->type & DICT_FTS) {
+					continue;
+				}
+				dict_stats_empty_index(dst_idx);
+			} else {
+				continue;
+			}
 		}
 
 		ut_ad(!dict_index_is_univ(dst_idx));
@@ -782,7 +794,7 @@ dict_stats_snapshot_create(
 
 	t = dict_stats_table_clone_create(table);
 
-	dict_stats_copy(t, table);
+	dict_stats_copy(t, table, false);
 
 	t->stat_persistent = table->stat_persistent;
 	t->stats_auto_recalc = table->stats_auto_recalc;
@@ -3240,13 +3252,10 @@ dict_stats_update(
 
 			dict_table_stats_lock(table, RW_X_LATCH);
 
-			/* Initialize all stats to dummy values before
-			copying because dict_stats_table_clone_create() does
-			skip corrupted indexes so our dummy object 't' may
-			have less indexes than the real object 'table'. */
-			dict_stats_empty_table(table);
-
-			dict_stats_copy(table, t);
+			/* Pass reset_ignored_indexes=true as parameter
+			to dict_stats_copy. This will cause statictics
+			for corrupted indexes to be set to empty values */
+			dict_stats_copy(table, t, true);
 
 			dict_stats_assert_initialized(table);
 
diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc
index 99a25fead8442..6059c28eabc60 100644
--- a/storage/innobase/fts/fts0fts.cc
+++ b/storage/innobase/fts/fts0fts.cc
@@ -108,6 +108,7 @@ UNIV_INTERN mysql_pfs_key_t	fts_pll_tokenize_mutex_key;
 /** variable to record innodb_fts_internal_tbl_name for information
 schema table INNODB_FTS_INSERTED etc. */
 UNIV_INTERN char* fts_internal_tbl_name		= NULL;
+UNIV_INTERN char* fts_internal_tbl_name2	= NULL;
 
 /** InnoDB default stopword list:
 There are different versions of stopwords, the stop words listed
@@ -6570,6 +6571,36 @@ fts_check_corrupt_index(
 	return(0);
 }
 
+/* Get parent table name if it's a fts aux table
+@param[in]	aux_table_name	aux table name
+@param[in]	aux_table_len	aux table length
+@return parent table name, or NULL */
+char*
+fts_get_parent_table_name(
+	const char*	aux_table_name,
+	ulint		aux_table_len)
+{
+	fts_aux_table_t	aux_table;
+	char*		parent_table_name = NULL;
+
+	if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
+		dict_table_t*	parent_table;
+
+		parent_table = dict_table_open_on_id(
+			aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
+
+		if (parent_table != NULL) {
+			parent_table_name = mem_strdupl(
+				parent_table->name,
+				strlen(parent_table->name));
+
+			dict_table_close(parent_table, TRUE, FALSE);
+		}
+	}
+
+	return(parent_table_name);
+}
+
 /** Check the validity of the parent table.
 @param[in]	aux_table	auxiliary table
 @return true if it is a valid table or false if it is not */
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 3b438d140c734..21bb795f5d97b 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -16217,7 +16217,12 @@ innodb_internal_table_update(
 		my_free(old);
 	}
 
-	fts_internal_tbl_name = *(char**) var_ptr;
+	fts_internal_tbl_name2 = *(char**) var_ptr;
+	if (fts_internal_tbl_name2 == NULL) {
+		fts_internal_tbl_name = const_cast<char*>("default");
+	} else {
+		fts_internal_tbl_name = fts_internal_tbl_name2;
+	}
 }
 
 /****************************************************************//**
@@ -18319,7 +18324,7 @@ static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
   "Whether to disable OS system file cache for sort I/O",
   NULL, NULL, FALSE);
 
-static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name,
+static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name2,
   PLUGIN_VAR_NOCMDARG,
   "FTS internal auxiliary table to be checked",
   innodb_internal_table_validate,
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 13bc757454298..e164d7e279c90 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -213,7 +213,10 @@ innobase_need_rebuild(
 	const Alter_inplace_info*	ha_alter_info,
 	const TABLE*			altered_table)
 {
-	if (ha_alter_info->handler_flags
+	Alter_inplace_info::HA_ALTER_FLAGS alter_inplace_flags =
+		ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE);
+
+	if (alter_inplace_flags
 	    == Alter_inplace_info::CHANGE_CREATE_OPTION
 	    && !(ha_alter_info->create_info->used_fields
 		 & (HA_CREATE_USED_ROW_FORMAT
@@ -3937,7 +3940,7 @@ ha_innobase::prepare_inplace_alter_table(
 	}
 
 	if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)
-	    || (ha_alter_info->handler_flags
+	    || ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
 		== Alter_inplace_info::CHANGE_CREATE_OPTION
 		&& !innobase_need_rebuild(ha_alter_info, table))) {
 
@@ -4111,7 +4114,7 @@ ha_innobase::inplace_alter_table(
 		DBUG_RETURN(false);
 	}
 
-	if (ha_alter_info->handler_flags
+	if ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
 	    == Alter_inplace_info::CHANGE_CREATE_OPTION
 	    && !innobase_need_rebuild(ha_alter_info, table)) {
 		goto ok_exit;
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index c5310e1369f04..50340e05860b0 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -3981,6 +3981,8 @@ i_s_fts_config_fill(
 		DBUG_RETURN(0);
 	}
 
+	DEBUG_SYNC_C("i_s_fts_config_fille_check");
+
 	fields = table->field;
 
 	/* Prevent DDL to drop fts aux tables. */
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index 87b5787d416be..3e2f359bbebef 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -375,6 +375,7 @@ extern bool		fts_need_sync;
 /** Variable specifying the table that has Fulltext index to display its
 content through information schema table */
 extern char*		fts_internal_tbl_name;
+extern char*		fts_internal_tbl_name2;
 
 #define	fts_que_graph_free(graph)			\
 do {							\
@@ -823,6 +824,15 @@ void
 fts_drop_orphaned_tables(void);
 /*==========================*/
 
+/* Get parent table name if it's a fts aux table
+@param[in]	aux_table_name	aux table name
+@param[in]	aux_table_len	aux table length
+@return parent table name, or NULL */
+char*
+fts_get_parent_table_name(
+	const char*	aux_table_name,
+	ulint		aux_table_len);
+
 /******************************************************************//**
 Since we do a horizontal split on the index table, we need to drop
 all the split tables.
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 7c98e2877d5fc..e6ddf45fabaa9 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -44,7 +44,7 @@ Created 1/20/1994 Heikki Tuuri
 
 #define INNODB_VERSION_MAJOR	5
 #define INNODB_VERSION_MINOR	6
-#define INNODB_VERSION_BUGFIX	32
+#define INNODB_VERSION_BUGFIX	33
 
 /* The following is the InnoDB version as shown in
 SELECT plugin_version FROM information_schema.plugins;
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
index 59382455cea9b..77cca37ddd130 100644
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@@ -613,7 +613,7 @@ row_log_table_delete(
 		&old_pk_extra_size);
 	ut_ad(old_pk_extra_size < 0x100);
 
-	mrec_size = 4 + old_pk_size;
+	mrec_size = 6 + old_pk_size;
 
 	/* Log enough prefix of the BLOB unless both the
 	old and new table are in COMPACT or REDUNDANT format,
@@ -643,8 +643,8 @@ row_log_table_delete(
 		*b++ = static_cast<byte>(old_pk_extra_size);
 
 		/* Log the size of external prefix we saved */
-		mach_write_to_2(b, ext_size);
-		b += 2;
+		mach_write_to_4(b, ext_size);
+		b += 4;
 
 		rec_convert_dtuple_to_temp(
 			b + old_pk_extra_size, new_index,
@@ -2268,14 +2268,14 @@ row_log_table_apply_op(
 		break;
 
 	case ROW_T_DELETE:
-		/* 1 (extra_size) + 2 (ext_size) + at least 1 (payload) */
-		if (mrec + 4 >= mrec_end) {
+		/* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */
+		if (mrec + 6 >= mrec_end) {
 			return(NULL);
 		}
 
 		extra_size = *mrec++;
-		ext_size = mach_read_from_2(mrec);
-		mrec += 2;
+		ext_size = mach_read_from_4(mrec);
+		mrec += 4;
 		ut_ad(mrec < mrec_end);
 
 		/* We assume extra_size < 0x100 for the PRIMARY KEY prefix.
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index 122c03b7cc9fe..07b841de3007e 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -2715,6 +2715,10 @@ row_drop_tables_for_mysql_in_background(void)
 		return(n_tables + n_tables_dropped);
 	}
 
+	DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep",
+		os_thread_sleep(5000000);
+	);
+
 	table = dict_table_open_on_name(drop->table_name, FALSE, FALSE,
 					DICT_ERR_IGNORE_NONE);
 
@@ -2725,6 +2729,16 @@ row_drop_tables_for_mysql_in_background(void)
 		goto already_dropped;
 	}
 
+	if (!table->to_be_dropped) {
+		/* There is a scenario: the old table is dropped
+		just after it's added into drop list, and new
+		table with the same name is created, then we try
+		to drop the new table in background. */
+		dict_table_close(table, FALSE, FALSE);
+
+		goto already_dropped;
+	}
+
 	ut_a(!table->can_be_evicted);
 
 	dict_table_close(table, FALSE, FALSE);
@@ -3992,6 +4006,13 @@ row_drop_table_for_mysql(
 		}
 	}
 
+
+	DBUG_EXECUTE_IF("row_drop_table_add_to_background",
+		row_add_table_to_background_drop_list(table->name);
+		err = DB_SUCCESS;
+		goto funct_exit;
+	);
+
 	/* TODO: could we replace the counter n_foreign_key_checks_running
 	with lock checks on the table? Acquire here an exclusive lock on the
 	table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
@@ -4608,6 +4629,19 @@ row_drop_database_for_mysql(
 	row_mysql_lock_data_dictionary(trx);
 
 	while ((table_name = dict_get_first_table_name_in_db(name))) {
+		/* Drop parent table if it is a fts aux table, to
+		avoid accessing dropped fts aux tables in information
+		scheam when parent table still exists.
+		Note: Drop parent table will drop fts aux tables. */
+		char*	parent_table_name;
+		parent_table_name = fts_get_parent_table_name(
+				table_name, strlen(table_name));
+
+		if (parent_table_name != NULL) {
+			mem_free(table_name);
+			table_name = parent_table_name;
+		}
+
 		ut_a(memcmp(table_name, name, namelen) == 0);
 
 		table = dict_table_open_on_name(
diff --git a/storage/oqgraph/graphcore.cc b/storage/oqgraph/graphcore.cc
index 4346b94805c7e..7c8ca53c0969c 100644
--- a/storage/oqgraph/graphcore.cc
+++ b/storage/oqgraph/graphcore.cc
@@ -485,7 +485,7 @@ namespace open_query
   optional<Vertex>
   oqgraph_share::find_vertex(VertexID id) const
   {
-    return ::boost::find_vertex(id, g);
+    return oqgraph3::find_vertex(id, g);
   }
 
 #if 0
diff --git a/storage/oqgraph/oqgraph_shim.h b/storage/oqgraph/oqgraph_shim.h
index af240b88ebdcb..004d7f0f7c509 100644
--- a/storage/oqgraph/oqgraph_shim.h
+++ b/storage/oqgraph/oqgraph_shim.h
@@ -274,6 +274,33 @@ namespace boost
   };
 #endif
 
+  template<>
+  struct property_map<oqgraph3::graph, edge_weight_t>
+  {
+    typedef void type;
+    typedef oqgraph3::edge_weight_property_map const_type;
+  };
+
+  template<>
+  struct property_map<oqgraph3::graph, vertex_index_t>
+  {
+    typedef void type;
+    typedef oqgraph3::vertex_index_property_map const_type;
+  };
+
+  template<>
+  struct property_map<oqgraph3::graph, edge_index_t>
+  {
+    typedef void type;
+    typedef oqgraph3::edge_index_property_map const_type;
+  };
+
+}
+
+namespace oqgraph3
+{
+  using namespace boost;
+
   inline graph_traits<oqgraph3::graph>::vertex_descriptor
   source(
       const graph_traits<oqgraph3::graph>::edge_descriptor& e,
@@ -401,27 +428,6 @@ namespace boost
     return count;
   }
 
-  template<>
-  struct property_map<oqgraph3::graph, edge_weight_t>
-  {
-    typedef void type;
-    typedef oqgraph3::edge_weight_property_map const_type;
-  };
-
-  template<>
-  struct property_map<oqgraph3::graph, vertex_index_t>
-  {
-    typedef void type;
-    typedef oqgraph3::vertex_index_property_map const_type;
-  };
-
-  template<>
-  struct property_map<oqgraph3::graph, edge_index_t>
-  {
-    typedef void type;
-    typedef oqgraph3::edge_index_property_map const_type;
-  };
-
   inline property_map<
       oqgraph3::graph,
       edge_weight_t>::const_type::reference
diff --git a/storage/perfschema/ha_perfschema.cc b/storage/perfschema/ha_perfschema.cc
index 444de87ba8ba3..2445bd80927ad 100644
--- a/storage/perfschema/ha_perfschema.cc
+++ b/storage/perfschema/ha_perfschema.cc
@@ -205,7 +205,7 @@ maria_declare_plugin(perfschema)
   0x0001,
   pfs_status_vars,
   NULL,
-  "5.6.32",
+  "5.6.33",
   MariaDB_PLUGIN_MATURITY_STABLE
 }
 maria_declare_plugin_end;
diff --git a/storage/tokudb/CMakeLists.txt b/storage/tokudb/CMakeLists.txt
index 8a9058bbb3e42..dad90fe96ebf7 100644
--- a/storage/tokudb/CMakeLists.txt
+++ b/storage/tokudb/CMakeLists.txt
@@ -1,4 +1,4 @@
-SET(TOKUDB_VERSION 5.6.31-77.0)
+SET(TOKUDB_VERSION 5.6.33-79.0)
 # PerconaFT only supports x86-64 and cmake-2.8.9+
 IF(CMAKE_VERSION VERSION_LESS "2.8.9")
   MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB")
diff --git a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
index 4b62703480f16..7ede78b3c0db3 100644
--- a/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
+++ b/storage/tokudb/PerconaFT/buildheader/make_tdb.cc
@@ -367,8 +367,8 @@ static void print_db_env_struct (void) {
                              "int (*checkpointing_get_period)             (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic checkpoints.  0 means disabled. */",
                              "int (*cleaner_set_period)                   (DB_ENV*, uint32_t) /* Change the delay between automatic cleaner attempts.  0 means disabled. */",
                              "int (*cleaner_get_period)                   (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic cleaner attempts.  0 means disabled. */",
-                             "int (*cleaner_set_iterations)               (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invokation.  0 means disabled. */",
-                             "int (*cleaner_get_iterations)               (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invokation.  0 means disabled. */",
+                             "int (*cleaner_set_iterations)               (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invocation.  0 means disabled. */",
+                             "int (*cleaner_get_iterations)               (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invocation.  0 means disabled. */",
                              "int (*evictor_set_enable_partial_eviction)  (DB_ENV*, bool) /* Enables or disabled partial eviction of nodes from cachetable. */",
                              "int (*evictor_get_enable_partial_eviction)  (DB_ENV*, bool*) /* Retrieve the status of partial eviction of nodes from cachetable. */",
                              "int (*checkpointing_postpone)               (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */",
@@ -422,6 +422,9 @@ static void print_db_env_struct (void) {
                              "int (*set_checkpoint_pool_threads)(DB_ENV *, uint32_t)",
                              "void (*set_check_thp)(DB_ENV *, bool new_val)",
                              "bool (*get_check_thp)(DB_ENV *)",
+                             "bool (*set_dir_per_db)(DB_ENV *, bool new_val)",
+                             "bool (*get_dir_per_db)(DB_ENV *)",
+                             "const char *(*get_data_dir)(DB_ENV *env)",
                              NULL};
 
         sort_and_dump_fields("db_env", true, extra);
diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake
index 4c5004cd6a538..883f35041e2fd 100644
--- a/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake
+++ b/storage/tokudb/PerconaFT/cmake_modules/TokuFeatureDetection.cmake
@@ -97,7 +97,7 @@ if (NOT HAVE_BACKTRACE_WITHOUT_EXECINFO)
   endif ()
 endif ()
 
-if(HAVE_CLOCK_REALTIME)
+if(HAVE_CLOCK_REALTIME AND (NOT APPLE))
   list(APPEND EXTRA_SYSTEM_LIBS rt)
 else()
   list(APPEND EXTRA_SYSTEM_LIBS System)
@@ -109,6 +109,8 @@ check_function_exists(pthread_rwlockattr_setkind_np HAVE_PTHREAD_RWLOCKATTR_SETK
 ## check for the right way to yield using pthreads
 check_function_exists(pthread_yield HAVE_PTHREAD_YIELD)
 check_function_exists(pthread_yield_np HAVE_PTHREAD_YIELD_NP)
+## check if we have pthread_threadid_np() (i.e. osx)
+check_function_exists(pthread_threadid_np HAVE_PTHREAD_THREADID_NP)
 ## check if we have pthread_getthreadid_np() (i.e. freebsd)
 check_function_exists(pthread_getthreadid_np HAVE_PTHREAD_GETTHREADID_NP)
 check_function_exists(sched_getcpu HAVE_SCHED_GETCPU)
diff --git a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
index 6082178da75b6..769bdffa5d99c 100644
--- a/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
+++ b/storage/tokudb/PerconaFT/cmake_modules/TokuSetupCompiler.cmake
@@ -98,11 +98,10 @@ set_cflags_if_supported(
   -Wno-error=address-of-array-temporary
   -Wno-error=tautological-constant-out-of-range-compare
   -Wno-error=maybe-uninitialized
-  -Wno-ignored-attributes
   -Wno-error=extern-c-compat
-  -Wno-pointer-bool-conversion
   -fno-rtti
   -fno-exceptions
+  -Wno-error=nonnull-compare
   )
 ## set_cflags_if_supported_named("-Weffc++" -Weffcpp)
 
diff --git a/storage/tokudb/PerconaFT/ft/CMakeLists.txt b/storage/tokudb/PerconaFT/ft/CMakeLists.txt
index 11091073ac2a1..6696c26ecc00c 100644
--- a/storage/tokudb/PerconaFT/ft/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/ft/CMakeLists.txt
@@ -55,8 +55,8 @@ set(FT_SOURCES
   msg_buffer
   node
   pivotkeys
+  serialize/rbtree_mhs
   serialize/block_allocator
-  serialize/block_allocator_strategy
   serialize/block_table
   serialize/compress
   serialize/ft_node-serialize
diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h b/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h
index dc6aec9226d8f..05fb771de0871 100644
--- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h
+++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable-internal.h
@@ -138,6 +138,8 @@ struct cachefile {
     // nor attempt to open any cachefile with the same fname (dname)
     // until this cachefile has been fully closed and unlinked.
     bool unlink_on_close;
+    // If set then fclose will not be logged in recovery log.
+    bool skip_log_recover_on_close;
     int fd;       /* Bug: If a file is opened read-only, then it is stuck in read-only.  If it is opened read-write, then subsequent writers can write to it too. */
     CACHETABLE cachetable;
     struct fileid fileid;
diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
index 5bba977de1a33..6d753805fa9fa 100644
--- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
+++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc
@@ -467,6 +467,10 @@ toku_cachefile_fname_in_env (CACHEFILE cf) {
     return cf->fname_in_env;
 }
 
+void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env) {
+    cf->fname_in_env = new_fname_in_env;
+}
+
 int 
 toku_cachefile_get_fd (CACHEFILE cf) {
     return cf->fd;
@@ -2903,6 +2907,18 @@ bool toku_cachefile_is_unlink_on_close(CACHEFILE cf) {
     return cf->unlink_on_close;
 }
 
+void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf) {
+    cf->skip_log_recover_on_close = true;
+}
+
+void toku_cachefile_do_log_recover_on_close(CACHEFILE cf) {
+    cf->skip_log_recover_on_close = false;
+}
+
+bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf) {
+    return cf->skip_log_recover_on_close;
+}
+
 uint64_t toku_cachefile_size(CACHEFILE cf) {
     int64_t file_size;
     int fd = toku_cachefile_get_fd(cf);
diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h
index 148326562ab81..3b3cb0a2d4691 100644
--- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h
+++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.h
@@ -500,12 +500,18 @@ int toku_cachefile_get_fd (CACHEFILE);
 // Return the filename
 char * toku_cachefile_fname_in_env (CACHEFILE cf);
 
+void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env);
+
 // Make it so when the cachefile closes, the underlying file is unlinked
 void toku_cachefile_unlink_on_close(CACHEFILE cf);
 
 // is this cachefile marked as unlink on close?
 bool toku_cachefile_is_unlink_on_close(CACHEFILE cf);
 
+void toku_cachefile_skip_log_recover_on_close(CACHEFILE cf);
+void toku_cachefile_do_log_recover_on_close(CACHEFILE cf);
+bool toku_cachefile_is_skip_log_recover_on_close(CACHEFILE cf);
+
 // Return the logger associated with the cachefile
 struct tokulogger *toku_cachefile_logger(CACHEFILE cf);
 
diff --git a/storage/tokudb/PerconaFT/ft/ft-flusher.cc b/storage/tokudb/PerconaFT/ft/ft-flusher.cc
index fb456ea6a18e3..e6452f60cfcc6 100644
--- a/storage/tokudb/PerconaFT/ft/ft-flusher.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-flusher.cc
@@ -496,7 +496,7 @@ handle_split_of_child(
 
     // We never set the rightmost blocknum to be the root.
     // Instead, we wait for the root to split and let promotion initialize the rightmost
-    // blocknum to be the first non-root leaf node on the right extreme to recieve an insert.
+    // blocknum to be the first non-root leaf node on the right extreme to receive an insert.
     BLOCKNUM rightmost_blocknum = toku_unsafe_fetch(&ft->rightmost_blocknum);
     invariant(ft->h->root_blocknum.b != rightmost_blocknum.b);
     if (childa->blocknum.b == rightmost_blocknum.b) {
@@ -1470,7 +1470,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
     // It is possible after reading in the entire child,
     // that we now know that the child is not reactive
     // if so, we can unpin parent right now
-    // we wont be splitting/merging child
+    // we won't be splitting/merging child
     // and we have already replaced the bnc
     // for the root with a fresh one
     enum reactivity child_re = toku_ftnode_get_reactivity(ft, child);
diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc
index 8f61bc67339fa..30a8710d7aa1e 100644
--- a/storage/tokudb/PerconaFT/ft/ft-ops.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc
@@ -149,22 +149,23 @@ basement nodes, bulk fetch,  and partial fetch:
 
 #include "ft/cachetable/checkpoint.h"
 #include "ft/cursor.h"
-#include "ft/ft.h"
 #include "ft/ft-cachetable-wrappers.h"
 #include "ft/ft-flusher.h"
 #include "ft/ft-internal.h"
-#include "ft/msg.h"
+#include "ft/ft.h"
 #include "ft/leafentry.h"
 #include "ft/logger/log-internal.h"
+#include "ft/msg.h"
 #include "ft/node.h"
 #include "ft/serialize/block_table.h"
-#include "ft/serialize/sub_block.h"
 #include "ft/serialize/ft-serialize.h"
 #include "ft/serialize/ft_layout_version.h"
 #include "ft/serialize/ft_node-serialize.h"
+#include "ft/serialize/sub_block.h"
 #include "ft/txn/txn_manager.h"
-#include "ft/ule.h"
 #include "ft/txn/xids.h"
+#include "ft/ule.h"
+#include "src/ydb-internal.h"
 
 #include <toku_race_tools.h>
 
@@ -179,6 +180,7 @@ basement nodes, bulk fetch,  and partial fetch:
 
 #include <stdint.h>
 
+#include <memory>
 /* Status is intended for display to humans to help understand system behavior.
  * It does not need to be perfectly thread-safe.
  */
@@ -598,15 +600,12 @@ void toku_ftnode_checkpoint_complete_callback(void *value_data) {
     }
 }
 
-void toku_ftnode_clone_callback(
-    void* value_data,
-    void** cloned_value_data,
-    long* clone_size,
-    PAIR_ATTR* new_attr,
-    bool for_checkpoint,
-    void* write_extraargs
-    )
-{
+void toku_ftnode_clone_callback(void *value_data,
+                                void **cloned_value_data,
+                                long *clone_size,
+                                PAIR_ATTR *new_attr,
+                                bool for_checkpoint,
+                                void *write_extraargs) {
     FTNODE node = static_cast<FTNODE>(value_data);
     toku_ftnode_assert_fully_in_memory(node);
     FT ft = static_cast<FT>(write_extraargs);
@@ -618,13 +617,16 @@ void toku_ftnode_clone_callback(
         toku_ftnode_leaf_rebalance(node, ft->h->basementnodesize);
     }
 
-    cloned_node->oldest_referenced_xid_known = node->oldest_referenced_xid_known;
-    cloned_node->max_msn_applied_to_node_on_disk = node->max_msn_applied_to_node_on_disk;
+    cloned_node->oldest_referenced_xid_known =
+        node->oldest_referenced_xid_known;
+    cloned_node->max_msn_applied_to_node_on_disk =
+        node->max_msn_applied_to_node_on_disk;
     cloned_node->flags = node->flags;
     cloned_node->blocknum = node->blocknum;
     cloned_node->layout_version = node->layout_version;
     cloned_node->layout_version_original = node->layout_version_original;
-    cloned_node->layout_version_read_from_disk = node->layout_version_read_from_disk;
+    cloned_node->layout_version_read_from_disk =
+        node->layout_version_read_from_disk;
     cloned_node->build_id = node->build_id;
     cloned_node->height = node->height;
     cloned_node->dirty = node->dirty;
@@ -649,38 +651,39 @@ void toku_ftnode_clone_callback(
     // set new pair attr if necessary
     if (node->height == 0) {
         *new_attr = make_ftnode_pair_attr(node);
-    }
-    else {
+        for (int i = 0; i < node->n_children; i++) {
+            BLB(node, i)->logical_rows_delta = 0;
+            BLB(cloned_node, i)->logical_rows_delta = 0;
+        }
+    } else {
         new_attr->is_valid = false;
     }
     *clone_size = ftnode_memory_size(cloned_node);
     *cloned_value_data = cloned_node;
 }
 
-void toku_ftnode_flush_callback(
-    CACHEFILE UU(cachefile),
-    int fd,
-    BLOCKNUM blocknum,
-    void *ftnode_v,
-    void** disk_data,
-    void *extraargs,
-    PAIR_ATTR size __attribute__((unused)),
-    PAIR_ATTR* new_size,
-    bool write_me,
-    bool keep_me,
-    bool for_checkpoint,
-    bool is_clone
-    )
-{
-    FT ft = (FT) extraargs;
-    FTNODE ftnode = (FTNODE) ftnode_v;
-    FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data;
+void toku_ftnode_flush_callback(CACHEFILE UU(cachefile),
+                                int fd,
+                                BLOCKNUM blocknum,
+                                void *ftnode_v,
+                                void **disk_data,
+                                void *extraargs,
+                                PAIR_ATTR size __attribute__((unused)),
+                                PAIR_ATTR *new_size,
+                                bool write_me,
+                                bool keep_me,
+                                bool for_checkpoint,
+                                bool is_clone) {
+    FT ft = (FT)extraargs;
+    FTNODE ftnode = (FTNODE)ftnode_v;
+    FTNODE_DISK_DATA *ndd = (FTNODE_DISK_DATA *)disk_data;
     assert(ftnode->blocknum.b == blocknum.b);
     int height = ftnode->height;
     if (write_me) {
         toku_ftnode_assert_fully_in_memory(ftnode);
         if (height > 0 && !is_clone) {
-            // cloned nodes already had their stale messages moved, see toku_ftnode_clone_callback()
+            // cloned nodes already had their stale messages moved, see
+            // toku_ftnode_clone_callback()
             toku_move_ftnode_messages_to_stale(ft, ftnode);
         } else if (height == 0) {
             toku_ftnode_leaf_run_gc(ft, ftnode);
@@ -688,7 +691,8 @@ void toku_ftnode_flush_callback(
                 toku_ftnode_update_disk_stats(ftnode, ft, for_checkpoint);
             }
         }
-        int r = toku_serialize_ftnode_to(fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint);
+        int r = toku_serialize_ftnode_to(
+            fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint);
         assert_zero(r);
         ftnode->layout_version_read_from_disk = FT_LAYOUT_VERSION;
     }
@@ -703,20 +707,22 @@ void toku_ftnode_flush_callback(
                 FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF_BYTES, node_size);
             }
             toku_free(*disk_data);
-        }
-        else {
+        } else {
             if (ftnode->height == 0) {
                 for (int i = 0; i < ftnode->n_children; i++) {
-                    if (BP_STATE(ftnode,i) == PT_AVAIL) {
+                    if (BP_STATE(ftnode, i) == PT_AVAIL) {
                         BASEMENTNODE bn = BLB(ftnode, i);
-                        toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta);
+                        toku_ft_decrease_stats(&ft->in_memory_stats,
+                                               bn->stat64_delta);
+                        if (!ftnode->dirty)
+                            toku_ft_adjust_logical_row_count(
+                                ft, -bn->logical_rows_delta);
                     }
                 }
             }
         }
         toku_ftnode_free(&ftnode);
-    }
-    else {
+    } else {
         *new_size = make_ftnode_pair_attr(ftnode);
     }
 }
@@ -845,10 +851,13 @@ static void compress_internal_node_partition(FTNODE node, int i, enum toku_compr
 }
 
 // callback for partially evicting a node
-int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_extraargs,
-                            void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra) {
-    FTNODE node = (FTNODE) ftnode_pv;
-    FT ft = (FT) write_extraargs;
+int toku_ftnode_pe_callback(void *ftnode_pv,
+                            PAIR_ATTR old_attr,
+                            void *write_extraargs,
+                            void (*finalize)(PAIR_ATTR new_attr, void *extra),
+                            void *finalize_extra) {
+    FTNODE node = (FTNODE)ftnode_pv;
+    FT ft = (FT)write_extraargs;
     int num_partial_evictions = 0;
 
     // Hold things we intend to destroy here.
@@ -866,7 +875,8 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext
     }
     // Don't partially evict nodes whose partitions can't be read back
     // from disk individually
-    if (node->layout_version_read_from_disk < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) {
+    if (node->layout_version_read_from_disk <
+        FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) {
         goto exit;
     }
     //
@@ -874,77 +884,77 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext
     //
     if (node->height > 0) {
         for (int i = 0; i < node->n_children; i++) {
-            if (BP_STATE(node,i) == PT_AVAIL) {
-                if (BP_SHOULD_EVICT(node,i)) {
+            if (BP_STATE(node, i) == PT_AVAIL) {
+                if (BP_SHOULD_EVICT(node, i)) {
                     NONLEAF_CHILDINFO bnc = BNC(node, i);
                     if (ft_compress_buffers_before_eviction &&
-                        // We may not serialize and compress a partition in memory if its
-                        // in memory layout version is different than what's on disk (and
-                        // therefore requires upgrade).
+                        // We may not serialize and compress a partition in
+                        // memory if its in memory layout version is different
+                        // than what's on disk (and therefore requires upgrade).
                         //
-                        // Auto-upgrade code assumes that if a node's layout version read
-                        // from disk is not current, it MUST require upgrade. Breaking
-                        // this rule would cause upgrade code to upgrade this partition
-                        // again after we serialize it as the current version, which is bad.
-                        node->layout_version == node->layout_version_read_from_disk) {
+                        // Auto-upgrade code assumes that if a node's layout
+                        // version read from disk is not current, it MUST
+                        // require upgrade.
+                        // Breaking this rule would cause upgrade code to
+                        // upgrade this partition again after we serialize it as
+                        // the current version, which is bad.
+                        node->layout_version ==
+                            node->layout_version_read_from_disk) {
                         toku_ft_bnc_move_messages_to_stale(ft, bnc);
                         compress_internal_node_partition(
                             node,
                             i,
                             // Always compress with quicklz
-                            TOKU_QUICKLZ_METHOD
-                            );
+                            TOKU_QUICKLZ_METHOD);
                     } else {
                         // We're not compressing buffers before eviction. Simply
-                        // detach the buffer and set the child's state to on-disk.
+                        // detach the buffer and set the child's state to
+                        // on-disk.
                         set_BNULL(node, i);
                         BP_STATE(node, i) = PT_ON_DISK;
                     }
                     buffers_to_destroy[num_buffers_to_destroy++] = bnc;
                     num_partial_evictions++;
+                } else {
+                    BP_SWEEP_CLOCK(node, i);
                 }
-                else {
-                    BP_SWEEP_CLOCK(node,i);
-                }
-            }
-            else {
+            } else {
                 continue;
             }
         }
-    }
-    //
-    // partial eviction strategy for basement nodes:
-    //  if the bn is compressed, evict it
-    //  else: check if it requires eviction, if it does, evict it, if not, sweep the clock count
-    //
-    else {
+    } else {
+        //
+        // partial eviction strategy for basement nodes:
+        //  if the bn is compressed, evict it
+        //  else: check if it requires eviction, if it does, evict it, if not,
+        //  sweep the clock count
+        //
         for (int i = 0; i < node->n_children; i++) {
             // Get rid of compressed stuff no matter what.
-            if (BP_STATE(node,i) == PT_COMPRESSED) {
+            if (BP_STATE(node, i) == PT_COMPRESSED) {
                 SUB_BLOCK sb = BSB(node, i);
                 pointers_to_free[num_pointers_to_free++] = sb->compressed_ptr;
                 pointers_to_free[num_pointers_to_free++] = sb;
                 set_BNULL(node, i);
-                BP_STATE(node,i) = PT_ON_DISK;
+                BP_STATE(node, i) = PT_ON_DISK;
                 num_partial_evictions++;
-            }
-            else if (BP_STATE(node,i) == PT_AVAIL) {
-                if (BP_SHOULD_EVICT(node,i)) {
+            } else if (BP_STATE(node, i) == PT_AVAIL) {
+                if (BP_SHOULD_EVICT(node, i)) {
                     BASEMENTNODE bn = BLB(node, i);
                     basements_to_destroy[num_basements_to_destroy++] = bn;
-                    toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta);
+                    toku_ft_decrease_stats(&ft->in_memory_stats,
+                                           bn->stat64_delta);
+                    toku_ft_adjust_logical_row_count(ft,
+                                                     -bn->logical_rows_delta);
                     set_BNULL(node, i);
                     BP_STATE(node, i) = PT_ON_DISK;
                     num_partial_evictions++;
+                } else {
+                    BP_SWEEP_CLOCK(node, i);
                 }
-                else {
-                    BP_SWEEP_CLOCK(node,i);
-                }
-            }
-            else if (BP_STATE(node,i) == PT_ON_DISK) {
+            } else if (BP_STATE(node, i) == PT_ON_DISK) {
                 continue;
-            }
-            else {
+            } else {
                 abort();
             }
         }
@@ -2378,12 +2388,16 @@ ft_send_update_msg(FT_HANDLE ft_h, const ft_msg &msg, TOKUTXN txn) {
     toku_ft_root_put_msg(ft_h->ft, msg, &gc_info);
 }
 
-void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra,
-                      TOKUTXN txn, bool oplsn_valid, LSN oplsn,
-                      bool do_logging) {
+void toku_ft_maybe_update(FT_HANDLE ft_h,
+                          const DBT *key,
+                          const DBT *update_function_extra,
+                          TOKUTXN txn,
+                          bool oplsn_valid,
+                          LSN oplsn,
+                          bool do_logging) {
     TXNID_PAIR xid = toku_txn_get_txnid(txn);
     if (txn) {
-        BYTESTRING keybs = { key->size, (char *) key->data };
+        BYTESTRING keybs = {key->size, (char *)key->data};
         toku_logger_save_rollback_cmdupdate(
             txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs);
         toku_txn_maybe_note_ft(txn, ft_h->ft);
@@ -2392,22 +2406,33 @@ void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_func
     TOKULOGGER logger;
     logger = toku_txn_logger(txn);
     if (do_logging && logger) {
-        BYTESTRING keybs = {.len=key->size, .data=(char *) key->data};
-        BYTESTRING extrabs = {.len=update_function_extra->size,
-                              .data = (char *) update_function_extra->data};
-        toku_log_enq_update(logger, NULL, 0, txn,
-                                toku_cachefile_filenum(ft_h->ft->cf),
-                                xid, keybs, extrabs);
+        BYTESTRING keybs = {.len = key->size, .data = (char *)key->data};
+        BYTESTRING extrabs = {.len = update_function_extra->size,
+                              .data = (char *)update_function_extra->data};
+        toku_log_enq_update(logger,
+                            NULL,
+                            0,
+                            txn,
+                            toku_cachefile_filenum(ft_h->ft->cf),
+                            xid,
+                            keybs,
+                            extrabs);
     }
 
     LSN treelsn;
-    if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
+    if (oplsn_valid &&
+        oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
         // do nothing
     } else {
-        XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
-        ft_msg msg(key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids);
+        XIDS message_xids =
+            txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
+        ft_msg msg(
+            key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids);
         ft_send_update_msg(ft_h, msg, txn);
     }
+    // updates get converted to insert messages, which should do a -1 on the
+    // logical row count when the messages are permanently applied
+    toku_ft_adjust_logical_row_count(ft_h->ft, 1);
 }
 
 void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra,
@@ -2570,12 +2595,104 @@ static inline int ft_open_maybe_direct(const char *filename, int oflag, int mode
 
 static const mode_t file_mode = S_IRUSR+S_IWUSR+S_IRGRP+S_IWGRP+S_IROTH+S_IWOTH;
 
+inline bool toku_file_is_root(const char *path, const char *last_slash) {
+    return last_slash == path;
+}
+
+static std::unique_ptr<char[], decltype(&toku_free)> toku_file_get_parent_dir(
+    const char *path) {
+    std::unique_ptr<char[], decltype(&toku_free)> result(nullptr, &toku_free);
+
+    bool has_trailing_slash = false;
+
+    /* Find the offset of the last slash */
+    const char *last_slash = strrchr(path, OS_PATH_SEPARATOR);
+
+    if (!last_slash) {
+        /* No slash in the path, return NULL */
+        return result;
+    }
+
+    /* Ok, there is a slash. Is there anything after it? */
+    if (static_cast<size_t>(last_slash - path + 1) == strlen(path)) {
+        has_trailing_slash = true;
+    }
+
+    /* Reduce repetative slashes. */
+    while (last_slash > path && last_slash[-1] == OS_PATH_SEPARATOR) {
+        last_slash--;
+    }
+
+    /* Check for the root of a drive. */
+    if (toku_file_is_root(path, last_slash)) {
+        return result;
+    }
+
+    /* If a trailing slash prevented the first strrchr() from trimming
+    the last component of the path, trim that component now. */
+    if (has_trailing_slash) {
+        /* Back up to the previous slash. */
+        last_slash--;
+        while (last_slash > path && last_slash[0] != OS_PATH_SEPARATOR) {
+            last_slash--;
+        }
+
+        /* Reduce repetative slashes. */
+        while (last_slash > path && last_slash[-1] == OS_PATH_SEPARATOR) {
+            last_slash--;
+        }
+    }
+
+    /* Check for the root of a drive. */
+    if (toku_file_is_root(path, last_slash)) {
+        return result;
+    }
+
+    result.reset(toku_strndup(path, last_slash - path));
+    return result;
+}
+
+static bool toku_create_subdirs_if_needed(const char *path) {
+    static const mode_t dir_mode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP |
+                                   S_IWGRP | S_IXGRP | S_IROTH | S_IXOTH;
+
+    toku_struct_stat stat;
+    bool subdir_exists = true;
+    auto subdir = toku_file_get_parent_dir(path);
+
+    if (!subdir.get())
+        return true;
+
+    if (toku_stat(subdir.get(), &stat) == -1) {
+        if (ENOENT == get_error_errno())
+            subdir_exists = false;
+        else
+            return false;
+    }
+
+    if (subdir_exists) {
+        if (!S_ISDIR(stat.st_mode))
+            return false;
+        return true;
+    }
+
+    if (!toku_create_subdirs_if_needed(subdir.get()))
+        return false;
+
+    if (toku_os_mkdir(subdir.get(), dir_mode))
+        return false;
+
+    return true;
+}
+
 // open a file for use by the ft
 // Requires:  File does not exist.
 static int ft_create_file(FT_HANDLE UU(ft_handle), const char *fname, int *fdp) {
     int r;
     int fd;
     int er;
+    if (!toku_create_subdirs_if_needed(fname))
+        return get_error_errno();
     fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode);
     assert(fd==-1);
     if ((er = get_maybe_error_errno()) != ENOENT) {
@@ -4404,6 +4521,55 @@ void toku_ft_unlink(FT_HANDLE handle) {
     toku_cachefile_unlink_on_close(cf);
 }
 
+int toku_ft_rename_iname(DB_TXN *txn,
+                         const char *data_dir,
+                         const char *old_iname,
+                         const char *new_iname,
+                         CACHETABLE ct) {
+    int r = 0;
+
+    std::unique_ptr<char[], decltype(&toku_free)> new_iname_full(nullptr,
+                                                                 &toku_free);
+    std::unique_ptr<char[], decltype(&toku_free)> old_iname_full(nullptr,
+                                                                 &toku_free);
+
+    new_iname_full.reset(toku_construct_full_name(2, data_dir, new_iname));
+    old_iname_full.reset(toku_construct_full_name(2, data_dir, old_iname));
+
+    if (txn) {
+        BYTESTRING bs_old_name = {static_cast<uint32_t>(strlen(old_iname) + 1),
+                                  const_cast<char *>(old_iname)};
+        BYTESTRING bs_new_name = {static_cast<uint32_t>(strlen(new_iname) + 1),
+                                  const_cast<char *>(new_iname)};
+        FILENUM filenum = FILENUM_NONE;
+        {
+            CACHEFILE cf;
+            r = toku_cachefile_of_iname_in_env(ct, old_iname, &cf);
+            if (r != ENOENT) {
+                char *old_fname_in_cf = toku_cachefile_fname_in_env(cf);
+                toku_cachefile_set_fname_in_env(cf, toku_xstrdup(new_iname));
+                toku_free(old_fname_in_cf);
+                filenum = toku_cachefile_filenum(cf);
+            }
+        }
+        toku_logger_save_rollback_frename(
+            db_txn_struct_i(txn)->tokutxn, &bs_old_name, &bs_new_name);
+        toku_log_frename(db_txn_struct_i(txn)->tokutxn->logger,
+                         (LSN *)0,
+                         0,
+                         toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn),
+                         bs_old_name,
+                         filenum,
+                         bs_new_name);
+    }
+
+    r = toku_os_rename(old_iname_full.get(), new_iname_full.get());
+    if (r != 0)
+        return r;
+    r = toku_fsync_directory(new_iname_full.get());
+    return r;
+}
+
 int toku_ft_get_fragmentation(FT_HANDLE ft_handle, TOKU_DB_FRAGMENTATION report) {
     int fd = toku_cachefile_get_fd(ft_handle->ft->cf);
     toku_ft_lock(ft_handle->ft);
diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.h b/storage/tokudb/PerconaFT/ft/ft-ops.h
index 313a74628ea17..70cf045d43c1c 100644
--- a/storage/tokudb/PerconaFT/ft/ft-ops.h
+++ b/storage/tokudb/PerconaFT/ft/ft-ops.h
@@ -48,6 +48,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #include "ft/msg.h"
 #include "util/dbt.h"
 
+#define OS_PATH_SEPARATOR '/'
+
 typedef struct ft_handle *FT_HANDLE;
 
 int toku_open_ft_handle (const char *fname, int is_create, FT_HANDLE *, int nodesize, int basementnodesize, enum toku_compression_method compression_method, CACHETABLE, TOKUTXN, int(*)(DB *,const DBT*,const DBT*)) __attribute__ ((warn_unused_result));
diff --git a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
index adac96f488213..e31d80772d56e 100644
--- a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
+++ b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc
@@ -73,30 +73,20 @@ static bool recount_rows_interrupt(void* extra, uint64_t deleted_rows) {
     return rre->_cancelled =
         rre->_progress_callback(rre->_keys, deleted_rows, rre->_progress_extra);
 }
-int toku_ft_recount_rows(
-    FT_HANDLE ft,
-    int (*progress_callback)(
-        uint64_t count,
-        uint64_t deleted,
-        void* progress_extra),
-    void* progress_extra) {
-
+int toku_ft_recount_rows(FT_HANDLE ft,
+                         int (*progress_callback)(uint64_t count,
+                                                  uint64_t deleted,
+                                                  void* progress_extra),
+                         void* progress_extra) {
     int ret = 0;
-    recount_rows_extra_t rre = {
-        progress_callback,
-        progress_extra,
-        0,
-        false
-        };
+    recount_rows_extra_t rre = {progress_callback, progress_extra, 0, false};
 
     ft_cursor c;
     ret = toku_ft_cursor_create(ft, &c, nullptr, C_READ_ANY, false, false);
-    if (ret) return ret;
+    if (ret)
+        return ret;
 
-    toku_ft_cursor_set_check_interrupt_cb(
-        &c,
-        recount_rows_interrupt,
-        &rre);
+    toku_ft_cursor_set_check_interrupt_cb(&c, recount_rows_interrupt, &rre);
 
     ret = toku_ft_cursor_first(&c, recount_rows_found, &rre);
     while (FT_LIKELY(ret == 0)) {
@@ -108,6 +98,7 @@ int toku_ft_recount_rows(
     if (rre._cancelled == false) {
         // update ft count
         toku_unsafe_set(&ft->ft->in_memory_logical_rows, rre._keys);
+        ft->ft->h->dirty = 1;
         ret = 0;
     }
 
diff --git a/storage/tokudb/PerconaFT/ft/ft.cc b/storage/tokudb/PerconaFT/ft/ft.cc
index 93d21233bf73d..7c94b4c59d345 100644
--- a/storage/tokudb/PerconaFT/ft/ft.cc
+++ b/storage/tokudb/PerconaFT/ft/ft.cc
@@ -253,7 +253,19 @@ static void ft_close(CACHEFILE cachefile, int fd, void *header_v, bool oplsn_val
             char* fname_in_env = toku_cachefile_fname_in_env(cachefile);
             assert(fname_in_env);
             BYTESTRING bs = {.len=(uint32_t) strlen(fname_in_env), .data=fname_in_env};
-            toku_log_fclose(logger, &lsn, ft->h->dirty, bs, toku_cachefile_filenum(cachefile)); // flush the log on close (if new header is being written), otherwise it might not make it out.
+            if (!toku_cachefile_is_skip_log_recover_on_close(cachefile)) {
+                toku_log_fclose(
+                    logger,
+                    &lsn,
+                    ft->h->dirty,
+                    bs,
+                    toku_cachefile_filenum(cachefile));  // flush the log on
+                                                         // close (if new header
+                                                         // is being written),
+                                                         // otherwise it might
+                                                         // not make it out.
+                toku_cachefile_do_log_recover_on_close(cachefile);
+            }
         }
     }
     if (ft->h->dirty) {               // this is the only place this bit is tested (in currentheader)
@@ -903,6 +915,9 @@ void toku_ft_adjust_logical_row_count(FT ft, int64_t delta) {
     // must be returned in toku_ft_stat64.
     if (delta != 0 && ft->in_memory_logical_rows != (uint64_t)-1) {
         toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), delta);
+        if (ft->in_memory_logical_rows == (uint64_t)-1) {
+            toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), 1);
+        }
     }
 }
 
diff --git a/storage/tokudb/PerconaFT/ft/ft.h b/storage/tokudb/PerconaFT/ft/ft.h
index d600e093bdcf0..7a3c4fa783cb9 100644
--- a/storage/tokudb/PerconaFT/ft/ft.h
+++ b/storage/tokudb/PerconaFT/ft/ft.h
@@ -53,6 +53,12 @@ typedef struct ft_options *FT_OPTIONS;
 void toku_ft_unlink(FT_HANDLE handle);
 void toku_ft_unlink_on_commit(FT_HANDLE handle, TOKUTXN txn);
 
+int toku_ft_rename_iname(DB_TXN *txn,
+                         const char *data_dir,
+                         const char *old_iname,
+                         const char *new_iname,
+                         CACHETABLE ct);
+
 void toku_ft_init_reflock(FT ft);
 void toku_ft_destroy_reflock(FT ft);
 void toku_ft_grab_reflock(FT ft);
diff --git a/storage/tokudb/PerconaFT/ft/loader/loader-internal.h b/storage/tokudb/PerconaFT/ft/loader/loader-internal.h
index dd070373e26d8..1aa2c20383150 100644
--- a/storage/tokudb/PerconaFT/ft/loader/loader-internal.h
+++ b/storage/tokudb/PerconaFT/ft/loader/loader-internal.h
@@ -301,7 +301,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
 
 void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error);
 
-// For test purposes only.  (In production, the rowset size is determined by negotation with the cachetable for some memory.  See #2613.)
+// For test purposes only.  (In production, the rowset size is determined by negotiation with the cachetable for some memory.  See #2613.)
 uint64_t toku_ft_loader_get_rowset_budget_for_testing (void);
 
 int toku_ft_loader_finish_extractor(FTLOADER bl);
diff --git a/storage/tokudb/PerconaFT/ft/loader/loader.cc b/storage/tokudb/PerconaFT/ft/loader/loader.cc
index 20f9363da1efa..528c86a8f79bf 100644
--- a/storage/tokudb/PerconaFT/ft/loader/loader.cc
+++ b/storage/tokudb/PerconaFT/ft/loader/loader.cc
@@ -91,7 +91,7 @@ toku_ft_loader_set_size_factor(uint32_t factor) {
 
 uint64_t
 toku_ft_loader_get_rowset_budget_for_testing (void)
-// For test purposes only.  In production, the rowset size is determined by negotation with the cachetable for some memory.  (See #2613).
+// For test purposes only.  In production, the rowset size is determined by negotiation with the cachetable for some memory.  (See #2613).
 {
     return 16ULL*size_factor*1024ULL;
 }
diff --git a/storage/tokudb/PerconaFT/ft/logger/logformat.cc b/storage/tokudb/PerconaFT/ft/logger/logformat.cc
index 6f3baa81c86b2..49b611388038d 100644
--- a/storage/tokudb/PerconaFT/ft/logger/logformat.cc
+++ b/storage/tokudb/PerconaFT/ft/logger/logformat.cc
@@ -90,6 +90,10 @@ const struct logtype rollbacks[] = {
     {"fcreate", 'F', FA{{"FILENUM", "filenum", 0},
                         {"BYTESTRING", "iname", 0},
                         NULLFIELD}, LOG_BEGIN_ACTION_NA},
+    //rename file
+    {"frename", 'n', FA{{"BYTESTRING",    "old_iname", 0},
+                        {"BYTESTRING",    "new_iname", 0},
+                        NULLFIELD}, LOG_BEGIN_ACTION_NA},
     // cmdinsert is used to insert a key-value pair into a DB.  For rollback we don't need the data.
     {"cmdinsert", 'i', FA{
                           {"FILENUM", "filenum", 0},
@@ -195,6 +199,11 @@ const struct logtype logtypes[] = {
     {"fdelete", 'U', FA{{"TXNID_PAIR",      "xid", 0},
                         {"FILENUM", "filenum", 0},
                         NULLFIELD}, SHOULD_LOG_BEGIN},
+    {"frename", 'n', FA{{"TXNID_PAIR",      "xid", 0},
+                        {"BYTESTRING",    "old_iname", 0},
+                        {"FILENUM",       "old_filenum",   0},
+                        {"BYTESTRING",    "new_iname", 0},
+                        NULLFIELD}, IGNORE_LOG_BEGIN},
     {"enq_insert", 'I', FA{{"FILENUM",    "filenum", 0},
                            {"TXNID_PAIR",      "xid", 0},
                            {"BYTESTRING", "key", 0},
diff --git a/storage/tokudb/PerconaFT/ft/logger/recover.cc b/storage/tokudb/PerconaFT/ft/logger/recover.cc
index 38f29773bd630..a9c30c0e37a91 100644
--- a/storage/tokudb/PerconaFT/ft/logger/recover.cc
+++ b/storage/tokudb/PerconaFT/ft/logger/recover.cc
@@ -36,6 +36,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
 
+#include <memory>
 #include "ft/cachetable/cachetable.h"
 #include "ft/cachetable/checkpoint.h"
 #include "ft/ft.h"
@@ -935,6 +936,83 @@ static int toku_recover_backward_fdelete (struct logtype_fdelete *UU(l), RECOVER
     return 0;
 }
 
+static int toku_recover_frename(struct logtype_frename *l, RECOVER_ENV renv) {
+    assert(renv);
+    assert(renv->env);
+
+    toku_struct_stat stat;
+    const char *data_dir = renv->env->get_data_dir(renv->env);
+    bool old_exist = true;
+    bool new_exist = true;
+
+    assert(data_dir);
+
+    struct file_map_tuple *tuple;
+
+    std::unique_ptr<char[], decltype(&toku_free)> old_iname_full(
+        toku_construct_full_name(2, data_dir, l->old_iname.data), &toku_free);
+    std::unique_ptr<char[], decltype(&toku_free)> new_iname_full(
+        toku_construct_full_name(2, data_dir, l->new_iname.data), &toku_free);
+
+    if (toku_stat(old_iname_full.get(), &stat) == -1) {
+        if (ENOENT == errno)
+            old_exist = false;
+        else
+            return 1;
+    }
+
+    if (toku_stat(new_iname_full.get(), &stat) == -1) {
+        if (ENOENT == errno)
+            new_exist = false;
+        else
+            return 1;
+    }
+
+    // Both old and new files can exist if:
+    // - rename() is not completed
+    // - fcreate was replayed during recovery
+    // 'Stalled cachefiles' container cachefile_list::m_stale_fileid contains
+    // closed but not yet evicted cachefiles and the key of this container is
+    // fs-dependent file id - (device id, inode number) pair. As it is supposed
+    // new file have not yet created during recovery process the 'stalled
+    // cachefile' container can contain only cache file of old file.
+    // To preserve the old cachefile file's id and keep it in
+    // 'stalled cachefiles' container the new file is removed
+    // and the old file is renamed.
+    if (old_exist && new_exist &&
+        (toku_os_unlink(new_iname_full.get()) == -1 ||
+         toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 ||
+         toku_fsync_directory(old_iname_full.get()) == -1 ||
+         toku_fsync_directory(new_iname_full.get()) == -1))
+        return 1;
+
+    if (old_exist && !new_exist &&
+        (toku_os_rename(old_iname_full.get(), new_iname_full.get()) == -1 ||
+         toku_fsync_directory(old_iname_full.get()) == -1 ||
+         toku_fsync_directory(new_iname_full.get()) == -1))
+        return 1;
+
+    if (file_map_find(&renv->fmap, l->old_filenum, &tuple) != DB_NOTFOUND) {
+        if (tuple->iname)
+            toku_free(tuple->iname);
+        tuple->iname = toku_xstrdup(l->new_iname.data);
+    }
+
+    TOKUTXN txn = NULL;
+    toku_txnid2txn(renv->logger, l->xid, &txn);
+
+    if (txn)
+        toku_logger_save_rollback_frename(txn, &l->old_iname, &l->new_iname);
+
+    return 0;
+}
+
+static int toku_recover_backward_frename(struct logtype_frename *UU(l),
+                                         RECOVER_ENV UU(renv)) {
+    // nothing
+    return 0;
+}
+
 static int toku_recover_enq_insert (struct logtype_enq_insert *l, RECOVER_ENV renv) {
     int r;
     TOKUTXN txn = NULL;
diff --git a/storage/tokudb/PerconaFT/ft/node.cc b/storage/tokudb/PerconaFT/ft/node.cc
index 58ba675eb7c24..12e5fda226e95 100644
--- a/storage/tokudb/PerconaFT/ft/node.cc
+++ b/storage/tokudb/PerconaFT/ft/node.cc
@@ -373,52 +373,48 @@ find_bounds_within_message_tree(
     }
 }
 
-/**
- * For each message in the ancestor's buffer (determined by childnum) that
- * is key-wise between lower_bound_exclusive and upper_bound_inclusive,
- * apply the message to the basement node.  We treat the bounds as minus
- * or plus infinity respectively if they are NULL.  Do not mark the node
- * as dirty (preserve previous state of 'dirty' bit).
- */
+// For each message in the ancestor's buffer (determined by childnum) that
+// is key-wise between lower_bound_exclusive and upper_bound_inclusive,
+// apply the message to the basement node.  We treat the bounds as minus
+// or plus infinity respectively if they are NULL.  Do not mark the node
+// as dirty (preserve previous state of 'dirty' bit).
 static void bnc_apply_messages_to_basement_node(
-    FT_HANDLE t,             // used for comparison function
-    BASEMENTNODE bn,   // where to apply messages
+    FT_HANDLE t,      // used for comparison function
+    BASEMENTNODE bn,  // where to apply messages
     FTNODE ancestor,  // the ancestor node where we can find messages to apply
-    int childnum,      // which child buffer of ancestor contains messages we want
-    const pivot_bounds &bounds,  // contains pivot key bounds of this basement node
-    txn_gc_info* gc_info,
-    bool* msgs_applied) {
-
+    int childnum,  // which child buffer of ancestor contains messages we want
+    const pivot_bounds &
+        bounds,  // contains pivot key bounds of this basement node
+    txn_gc_info *gc_info,
+    bool *msgs_applied) {
     int r;
     NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum);
 
     // Determine the offsets in the message trees between which we need to
     // apply messages from this buffer
-    STAT64INFO_S stats_delta = {0,0};
+    STAT64INFO_S stats_delta = {0, 0};
     uint64_t workdone_this_ancestor = 0;
     int64_t logical_rows_delta = 0;
 
     uint32_t stale_lbi, stale_ube;
     if (!bn->stale_ancestor_messages_applied) {
-        find_bounds_within_message_tree(
-            t->ft->cmp,
-            bnc->stale_message_tree,
-            &bnc->msg_buffer,
-            bounds,
-            &stale_lbi,
-            &stale_ube);
+        find_bounds_within_message_tree(t->ft->cmp,
+                                        bnc->stale_message_tree,
+                                        &bnc->msg_buffer,
+                                        bounds,
+                                        &stale_lbi,
+                                        &stale_ube);
     } else {
         stale_lbi = 0;
         stale_ube = 0;
     }
     uint32_t fresh_lbi, fresh_ube;
-    find_bounds_within_message_tree(
-        t->ft->cmp,
-        bnc->fresh_message_tree,
-        &bnc->msg_buffer,
-        bounds,
-        &fresh_lbi,
-        &fresh_ube);
+    find_bounds_within_message_tree(t->ft->cmp,
+                                    bnc->fresh_message_tree,
+                                    &bnc->msg_buffer,
+                                    bounds,
+                                    &fresh_lbi,
+                                    &fresh_ube);
 
     // We now know where all the messages we must apply are, so one of the
     // following 4 cases will do the application, depending on which of
@@ -432,44 +428,53 @@ static void bnc_apply_messages_to_basement_node(
         // We have messages in multiple trees, so we grab all
         // the relevant messages' offsets and sort them by MSN, then apply
         // them in MSN order.
-        const int buffer_size = ((stale_ube - stale_lbi) +
-                                 (fresh_ube - fresh_lbi) +
-                                 bnc->broadcast_list.size());
+        const int buffer_size =
+            ((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) +
+             bnc->broadcast_list.size());
         toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t));
         int32_t *offsets = reinterpret_cast<int32_t *>(offsets_buf.get());
-        struct store_msg_buffer_offset_extra sfo_extra = { .offsets = offsets, .i = 0 };
+        struct store_msg_buffer_offset_extra sfo_extra = {.offsets = offsets,
+                                                          .i = 0};
 
         // Populate offsets array with offsets to stale messages
-        r = bnc->stale_message_tree.iterate_on_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(stale_lbi, stale_ube, &sfo_extra);
+        r = bnc->stale_message_tree
+                .iterate_on_range<struct store_msg_buffer_offset_extra,
+                                  store_msg_buffer_offset>(
+                    stale_lbi, stale_ube, &sfo_extra);
         assert_zero(r);
 
         // Then store fresh offsets, and mark them to be moved to stale later.
-        r = bnc->fresh_message_tree.iterate_and_mark_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(fresh_lbi, fresh_ube, &sfo_extra);
+        r = bnc->fresh_message_tree
+                .iterate_and_mark_range<struct store_msg_buffer_offset_extra,
+                                        store_msg_buffer_offset>(
+                    fresh_lbi, fresh_ube, &sfo_extra);
         assert_zero(r);
 
         // Store offsets of all broadcast messages.
-        r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(&sfo_extra);
+        r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra,
+                                        store_msg_buffer_offset>(&sfo_extra);
         assert_zero(r);
         invariant(sfo_extra.i == buffer_size);
 
         // Sort by MSN.
-        toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::mergesort_r(offsets, buffer_size, bnc->msg_buffer);
+        toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::
+            mergesort_r(offsets, buffer_size, bnc->msg_buffer);
 
         // Apply the messages in MSN order.
         for (int i = 0; i < buffer_size; ++i) {
             *msgs_applied = true;
-            do_bn_apply_msg(
-                t,
-                bn,
-                &bnc->msg_buffer,
-                offsets[i],
-                gc_info,
-                &workdone_this_ancestor,
-                &stats_delta,
-                &logical_rows_delta);
+            do_bn_apply_msg(t,
+                            bn,
+                            &bnc->msg_buffer,
+                            offsets[i],
+                            gc_info,
+                            &workdone_this_ancestor,
+                            &stats_delta,
+                            &logical_rows_delta);
         }
     } else if (stale_lbi == stale_ube) {
-        // No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later.
+        // No stale messages to apply, we just apply fresh messages, and mark
+        // them to be moved to stale later.
         struct iterate_do_bn_apply_msg_extra iter_extra = {
             .t = t,
             .bn = bn,
@@ -477,16 +482,20 @@ static void bnc_apply_messages_to_basement_node(
             .gc_info = gc_info,
             .workdone = &workdone_this_ancestor,
             .stats_to_update = &stats_delta,
-            .logical_rows_delta = &logical_rows_delta
-        };
-        if (fresh_ube - fresh_lbi > 0) *msgs_applied = true;
-        r = bnc->fresh_message_tree.iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(fresh_lbi, fresh_ube, &iter_extra);
+            .logical_rows_delta = &logical_rows_delta};
+        if (fresh_ube - fresh_lbi > 0)
+            *msgs_applied = true;
+        r = bnc->fresh_message_tree
+                .iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra,
+                                        iterate_do_bn_apply_msg>(
+                    fresh_lbi, fresh_ube, &iter_extra);
         assert_zero(r);
     } else {
         invariant(fresh_lbi == fresh_ube);
         // No fresh messages to apply, we just apply stale messages.
 
-        if (stale_ube - stale_lbi > 0) *msgs_applied = true;
+        if (stale_ube - stale_lbi > 0)
+            *msgs_applied = true;
         struct iterate_do_bn_apply_msg_extra iter_extra = {
             .t = t,
             .bn = bn,
@@ -494,22 +503,26 @@ static void bnc_apply_messages_to_basement_node(
             .gc_info = gc_info,
             .workdone = &workdone_this_ancestor,
             .stats_to_update = &stats_delta,
-            .logical_rows_delta = &logical_rows_delta
-        };
+            .logical_rows_delta = &logical_rows_delta};
 
-        r = bnc->stale_message_tree.iterate_on_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(stale_lbi, stale_ube, &iter_extra);
+        r = bnc->stale_message_tree
+                .iterate_on_range<struct iterate_do_bn_apply_msg_extra,
+                                  iterate_do_bn_apply_msg>(
+                    stale_lbi, stale_ube, &iter_extra);
         assert_zero(r);
     }
     //
     // update stats
     //
     if (workdone_this_ancestor > 0) {
-        (void) toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), workdone_this_ancestor);
+        (void)toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum),
+                                      workdone_this_ancestor);
     }
     if (stats_delta.numbytes || stats_delta.numrows) {
         toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
     }
     toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
+    bn->logical_rows_delta += logical_rows_delta;
 }
 
 static void
diff --git a/storage/tokudb/PerconaFT/ft/node.h b/storage/tokudb/PerconaFT/ft/node.h
index ad0298e81c5de..52eefec0936ae 100644
--- a/storage/tokudb/PerconaFT/ft/node.h
+++ b/storage/tokudb/PerconaFT/ft/node.h
@@ -199,6 +199,7 @@ struct ftnode_leaf_basement_node {
     MSN max_msn_applied;            // max message sequence number applied
     bool stale_ancestor_messages_applied;
     STAT64INFO_S stat64_delta;      // change in stat64 counters since basement was last written to disk
+    int64_t logical_rows_delta;
 };
 typedef struct ftnode_leaf_basement_node *BASEMENTNODE;
 
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc
index 1355f3739ee1b..19811373d1636 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.cc
@@ -46,415 +46,214 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #include "portability/toku_stdlib.h"
 
 #include "ft/serialize/block_allocator.h"
-#include "ft/serialize/block_allocator_strategy.h"
+#include "ft/serialize/rbtree_mhs.h"
 
 #if TOKU_DEBUG_PARANOID
-#define VALIDATE() validate()
+#define VALIDATE() Validate()
 #else
 #define VALIDATE()
 #endif
 
-static FILE *ba_trace_file = nullptr;
-
-void block_allocator::maybe_initialize_trace(void) {
-    const char *ba_trace_path = getenv("TOKU_BA_TRACE_PATH");        
-    if (ba_trace_path != nullptr) {
-        ba_trace_file = toku_os_fopen(ba_trace_path, "w");
-        if (ba_trace_file == nullptr) {
-            fprintf(stderr, "tokuft: error: block allocator trace path found in environment (%s), "
-                            "but it could not be opened for writing (errno %d)\n",
-                            ba_trace_path, get_maybe_error_errno());
-        } else {
-            fprintf(stderr, "tokuft: block allocator tracing enabled, path: %s\n", ba_trace_path);
-        }
-    }
-}
-
-void block_allocator::maybe_close_trace() {
-    if (ba_trace_file != nullptr) {
-        int r = toku_os_fclose(ba_trace_file);
-        if (r != 0) {
-            fprintf(stderr, "tokuft: error: block allocator trace file did not close properly (r %d, errno %d)\n",
-                            r, get_maybe_error_errno());
-        } else {
-            fprintf(stderr, "tokuft: block allocator tracing finished, file closed successfully\n");
-        }
-    }
-}
-
-void block_allocator::_create_internal(uint64_t reserve_at_beginning, uint64_t alignment) {
-    // the alignment must be at least 512 and aligned with 512 to work with direct I/O
-    assert(alignment >= 512 && (alignment % 512) == 0);
+void BlockAllocator::CreateInternal(uint64_t reserve_at_beginning,
+                                    uint64_t alignment) {
+    // the alignment must be at least 512 and aligned with 512 to work with
+    // direct I/O
+    invariant(alignment >= 512 && (alignment % 512) == 0);
 
     _reserve_at_beginning = reserve_at_beginning;
     _alignment = alignment;
     _n_blocks = 0;
-    _blocks_array_size = 1;
-    XMALLOC_N(_blocks_array_size, _blocks_array);
     _n_bytes_in_use = reserve_at_beginning;
-    _strategy = BA_STRATEGY_FIRST_FIT;
-
-    memset(&_trace_lock, 0, sizeof(toku_mutex_t));
-    toku_mutex_init(&_trace_lock, nullptr);
+    _tree = new MhsRbTree::Tree(alignment);
+}
 
+void BlockAllocator::Create(uint64_t reserve_at_beginning, uint64_t alignment) {
+    CreateInternal(reserve_at_beginning, alignment);
+    _tree->Insert({reserve_at_beginning, MAX_BYTE});
     VALIDATE();
 }
 
-void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) {
-    _create_internal(reserve_at_beginning, alignment);
-    _trace_create();
+void BlockAllocator::Destroy() {
+    delete _tree;
 }
 
-void block_allocator::destroy() {
-    toku_free(_blocks_array);
-    _trace_destroy();
-    toku_mutex_destroy(&_trace_lock);
-}
+void BlockAllocator::CreateFromBlockPairs(uint64_t reserve_at_beginning,
+                                          uint64_t alignment,
+                                          struct BlockPair *translation_pairs,
+                                          uint64_t n_blocks) {
+    CreateInternal(reserve_at_beginning, alignment);
+    _n_blocks = n_blocks;
 
-void block_allocator::set_strategy(enum allocation_strategy strategy) {
-    _strategy = strategy;
-}
+    struct BlockPair *XMALLOC_N(n_blocks, pairs);
+    memcpy(pairs, translation_pairs, n_blocks * sizeof(struct BlockPair));
+    std::sort(pairs, pairs + n_blocks);
 
-void block_allocator::grow_blocks_array_by(uint64_t n_to_add) {
-    if (_n_blocks + n_to_add > _blocks_array_size) {
-        uint64_t new_size = _n_blocks + n_to_add;
-        uint64_t at_least = _blocks_array_size * 2;
-        if (at_least > new_size) {
-            new_size = at_least;
-        }
-        _blocks_array_size = new_size;
-        XREALLOC_N(_blocks_array_size, _blocks_array);
+    if (pairs[0]._offset > reserve_at_beginning) {
+        _tree->Insert(
+            {reserve_at_beginning, pairs[0]._offset - reserve_at_beginning});
     }
-}
-
-void block_allocator::grow_blocks_array() {
-    grow_blocks_array_by(1);
-}
-
-void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
-                                             struct blockpair *pairs, uint64_t n_blocks) {
-    _create_internal(reserve_at_beginning, alignment);
-
-    _n_blocks = n_blocks;
-    grow_blocks_array_by(_n_blocks);
-    memcpy(_blocks_array, pairs, _n_blocks * sizeof(struct blockpair));
-    std::sort(_blocks_array, _blocks_array + _n_blocks);
     for (uint64_t i = 0; i < _n_blocks; i++) {
-        // Allocator does not support size 0 blocks. See block_allocator_free_block.
-        invariant(_blocks_array[i].size > 0);
-        invariant(_blocks_array[i].offset >= _reserve_at_beginning);
-        invariant(_blocks_array[i].offset % _alignment == 0);
-
-        _n_bytes_in_use += _blocks_array[i].size;
+        // Allocator does not support size 0 blocks. See
+        // block_allocator_free_block.
+        invariant(pairs[i]._size > 0);
+        invariant(pairs[i]._offset >= _reserve_at_beginning);
+        invariant(pairs[i]._offset % _alignment == 0);
+
+        _n_bytes_in_use += pairs[i]._size;
+
+        MhsRbTree::OUUInt64 free_size(MAX_BYTE);
+        MhsRbTree::OUUInt64 free_offset(pairs[i]._offset + pairs[i]._size);
+        if (i < n_blocks - 1) {
+            MhsRbTree::OUUInt64 next_offset(pairs[i + 1]._offset);
+            invariant(next_offset >= free_offset);
+            free_size = next_offset - free_offset;
+            if (free_size == 0)
+                continue;
+        }
+        _tree->Insert({free_offset, free_size});
     }
-
+    toku_free(pairs);
     VALIDATE();
-
-    _trace_create_from_blockpairs();
 }
 
 // Effect: align a value by rounding up.
-static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
+static inline uint64_t Align(uint64_t value, uint64_t ba_alignment) {
     return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
 }
 
-struct block_allocator::blockpair *
-block_allocator::choose_block_to_alloc_after(size_t size, uint64_t heat) {
-    switch (_strategy) {
-    case BA_STRATEGY_FIRST_FIT:
-        return block_allocator_strategy::first_fit(_blocks_array, _n_blocks, size, _alignment);
-    case BA_STRATEGY_BEST_FIT:
-        return block_allocator_strategy::best_fit(_blocks_array, _n_blocks, size, _alignment);
-    case BA_STRATEGY_HEAT_ZONE:
-        return block_allocator_strategy::heat_zone(_blocks_array, _n_blocks, size, _alignment, heat);
-    case BA_STRATEGY_PADDED_FIT:
-        return block_allocator_strategy::padded_fit(_blocks_array, _n_blocks, size, _alignment);
-    default:
-        abort();
-    }
-}
-
-// Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512).
-void block_allocator::alloc_block(uint64_t size, uint64_t heat, uint64_t *offset) {
-    struct blockpair *bp;
-
+// Effect: Allocate a block. The resulting block must be aligned on the
+// ba->alignment (which to make direct_io happy must be a positive multiple of
+// 512).
+void BlockAllocator::AllocBlock(uint64_t size,
+                                uint64_t *offset) {
     // Allocator does not support size 0 blocks. See block_allocator_free_block.
     invariant(size > 0);
 
-    grow_blocks_array();
     _n_bytes_in_use += size;
+    *offset = _tree->Remove(size);
 
-    uint64_t end_of_reserve = align(_reserve_at_beginning, _alignment);
-
-    if (_n_blocks == 0) {
-        // First and only block
-        assert(_n_bytes_in_use == _reserve_at_beginning + size); // we know exactly how many are in use
-        _blocks_array[0].offset = align(_reserve_at_beginning, _alignment);
-        _blocks_array[0].size = size;
-        *offset = _blocks_array[0].offset;
-        goto done;
-    } else if (end_of_reserve + size <= _blocks_array[0].offset ) {
-        // Check to see if the space immediately after the reserve is big enough to hold the new block.
-        bp = &_blocks_array[0];
-        memmove(bp + 1, bp, _n_blocks * sizeof(*bp));
-        bp[0].offset = end_of_reserve;
-        bp[0].size = size;
-        *offset = end_of_reserve;
-        goto done;
-    }
-
-    bp = choose_block_to_alloc_after(size, heat);
-    if (bp != nullptr) {
-        // our allocation strategy chose the space after `bp' to fit the new block
-        uint64_t answer_offset = align(bp->offset + bp->size, _alignment);
-        uint64_t blocknum = bp - _blocks_array;
-        invariant(&_blocks_array[blocknum] == bp);
-        invariant(blocknum < _n_blocks);
-        memmove(bp + 2, bp + 1, (_n_blocks - blocknum - 1) * sizeof(*bp));
-        bp[1].offset = answer_offset;
-        bp[1].size = size;
-        *offset = answer_offset;
-    } else {
-        // It didn't fit anywhere, so fit it on the end.
-        assert(_n_blocks < _blocks_array_size);
-        bp = &_blocks_array[_n_blocks];
-        uint64_t answer_offset = align(bp[-1].offset + bp[-1].size, _alignment);
-        bp->offset = answer_offset;
-        bp->size = size;
-        *offset = answer_offset;
-    }
-
-done:
     _n_blocks++;
     VALIDATE();
-
-    _trace_alloc(size, heat, *offset);
-}
-
-// Find the index in the blocks array that has a particular offset.  Requires that the block exist.
-// Use binary search so it runs fast.
-int64_t block_allocator::find_block(uint64_t offset) {
-    VALIDATE();
-    if (_n_blocks == 1) {
-        assert(_blocks_array[0].offset == offset);
-        return 0;
-    }
-
-    uint64_t lo = 0;
-    uint64_t hi = _n_blocks;
-    while (1) {
-        assert(lo < hi); // otherwise no such block exists.
-        uint64_t mid = (lo + hi) / 2;
-        uint64_t thisoff = _blocks_array[mid].offset;
-        if (thisoff < offset) {
-            lo = mid + 1;
-        } else if (thisoff > offset) {
-            hi = mid;
-        } else {
-            return mid;
-        }
-    }
 }
 
-// To support 0-sized blocks, we need to include size as an input to this function.
+// To support 0-sized blocks, we need to include size as an input to this
+// function.
 // All 0-sized blocks at the same offset can be considered identical, but
 // a 0-sized block can share offset with a non-zero sized block.
-// The non-zero sized block is not exchangable with a zero sized block (or vice versa),
-// so inserting 0-sized blocks can cause corruption here.
-void block_allocator::free_block(uint64_t offset) {
+// The non-zero sized block is not exchangable with a zero sized block (or vice
+// versa), so inserting 0-sized blocks can cause corruption here.
+void BlockAllocator::FreeBlock(uint64_t offset, uint64_t size) {
     VALIDATE();
-    int64_t bn = find_block(offset);
-    assert(bn >= 0); // we require that there is a block with that offset.
-    _n_bytes_in_use -= _blocks_array[bn].size;
-    memmove(&_blocks_array[bn], &_blocks_array[bn + 1],
-            (_n_blocks - bn - 1) * sizeof(struct blockpair));
+    _n_bytes_in_use -= size;
+    _tree->Insert({offset, size});
     _n_blocks--;
     VALIDATE();
-    
-    _trace_free(offset);
-}
-
-uint64_t block_allocator::block_size(uint64_t offset) {
-    int64_t bn = find_block(offset);
-    assert(bn >=0); // we require that there is a block with that offset.
-    return _blocks_array[bn].size;
 }
 
-uint64_t block_allocator::allocated_limit() const {
-    if (_n_blocks == 0) {
-        return _reserve_at_beginning;
-    } else {
-        struct blockpair *last = &_blocks_array[_n_blocks - 1];
-        return last->offset + last->size;
-    }
+uint64_t BlockAllocator::AllocatedLimit() const {
+    MhsRbTree::Node *max_node = _tree->MaxNode();
+    return rbn_offset(max_node).ToInt();
 }
 
-// Effect: Consider the blocks in sorted order.  The reserved block at the beginning is number 0.  The next one is number 1 and so forth.
+// Effect: Consider the blocks in sorted order.  The reserved block at the
+// beginning is number 0.  The next one is number 1 and so forth.
 // Return the offset and size of the block with that number.
 // Return 0 if there is a block that big, return nonzero if b is too big.
-int block_allocator::get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size) {
-    if (b ==0 ) {
+int BlockAllocator::NthBlockInLayoutOrder(uint64_t b,
+                                          uint64_t *offset,
+                                          uint64_t *size) {
+    MhsRbTree::Node *x, *y;
+    if (b == 0) {
         *offset = 0;
         *size = _reserve_at_beginning;
-        return  0;
+        return 0;
     } else if (b > _n_blocks) {
         return -1;
     } else {
-        *offset =_blocks_array[b - 1].offset;
-        *size =_blocks_array[b - 1].size;
+        x = _tree->MinNode();
+        for (uint64_t i = 1; i <= b; i++) {
+            y = x;
+            x = _tree->Successor(x);
+        }
+        *size = (rbn_offset(x) - (rbn_offset(y) + rbn_size(y))).ToInt();
+        *offset = (rbn_offset(y) + rbn_size(y)).ToInt();
         return 0;
     }
 }
 
+struct VisUnusedExtra {
+    TOKU_DB_FRAGMENTATION _report;
+    uint64_t _align;
+};
+
+static void VisUnusedCollector(void *extra,
+                               MhsRbTree::Node *node,
+                               uint64_t UU(depth)) {
+    struct VisUnusedExtra *v_e = (struct VisUnusedExtra *)extra;
+    TOKU_DB_FRAGMENTATION report = v_e->_report;
+    uint64_t alignm = v_e->_align;
+
+    MhsRbTree::OUUInt64 offset = rbn_offset(node);
+    MhsRbTree::OUUInt64 size = rbn_size(node);
+    MhsRbTree::OUUInt64 answer_offset(Align(offset.ToInt(), alignm));
+    uint64_t free_space = (offset + size - answer_offset).ToInt();
+    if (free_space > 0) {
+        report->unused_bytes += free_space;
+        report->unused_blocks++;
+        if (free_space > report->largest_unused_block) {
+            report->largest_unused_block = free_space;
+        }
+    }
+}
 // Requires: report->file_size_bytes is filled in
 // Requires: report->data_bytes is filled in
 // Requires: report->checkpoint_bytes_additional is filled in
-void block_allocator::get_unused_statistics(TOKU_DB_FRAGMENTATION report) {
-    assert(_n_bytes_in_use == report->data_bytes + report->checkpoint_bytes_additional);
+void BlockAllocator::UnusedStatistics(TOKU_DB_FRAGMENTATION report) {
+    invariant(_n_bytes_in_use ==
+              report->data_bytes + report->checkpoint_bytes_additional);
 
     report->unused_bytes = 0;
     report->unused_blocks = 0;
     report->largest_unused_block = 0;
-    if (_n_blocks > 0) {
-        //Deal with space before block 0 and after reserve:
-        {
-            struct blockpair *bp = &_blocks_array[0];
-            assert(bp->offset >= align(_reserve_at_beginning, _alignment));
-            uint64_t free_space = bp->offset - align(_reserve_at_beginning, _alignment);
-            if (free_space > 0) {
-                report->unused_bytes += free_space;
-                report->unused_blocks++;
-                if (free_space > report->largest_unused_block) {
-                    report->largest_unused_block = free_space;
-                }
-            }
-        }
-
-        //Deal with space between blocks:
-        for (uint64_t blocknum = 0; blocknum +1 < _n_blocks; blocknum ++) {
-            // Consider the space after blocknum
-            struct blockpair *bp = &_blocks_array[blocknum];
-            uint64_t this_offset = bp[0].offset;
-            uint64_t this_size   = bp[0].size;
-            uint64_t end_of_this_block = align(this_offset+this_size, _alignment);
-            uint64_t next_offset = bp[1].offset;
-            uint64_t free_space  = next_offset - end_of_this_block;
-            if (free_space > 0) {
-                report->unused_bytes += free_space;
-                report->unused_blocks++;
-                if (free_space > report->largest_unused_block) {
-                    report->largest_unused_block = free_space;
-                }
-            }
-        }
-
-        //Deal with space after last block
-        {
-            struct blockpair *bp = &_blocks_array[_n_blocks-1];
-            uint64_t this_offset = bp[0].offset;
-            uint64_t this_size   = bp[0].size;
-            uint64_t end_of_this_block = align(this_offset+this_size, _alignment);
-            if (end_of_this_block < report->file_size_bytes) {
-                uint64_t free_space  = report->file_size_bytes - end_of_this_block;
-                assert(free_space > 0);
-                report->unused_bytes += free_space;
-                report->unused_blocks++;
-                if (free_space > report->largest_unused_block) {
-                    report->largest_unused_block = free_space;
-                }
-            }
-        }
-    } else {
-        // No blocks.  Just the reserve.
-        uint64_t end_of_this_block = align(_reserve_at_beginning, _alignment);
-        if (end_of_this_block < report->file_size_bytes) {
-            uint64_t free_space  = report->file_size_bytes - end_of_this_block;
-            assert(free_space > 0);
-            report->unused_bytes += free_space;
-            report->unused_blocks++;
-            if (free_space > report->largest_unused_block) {
-                report->largest_unused_block = free_space;
-            }
-        }
-    }
+    struct VisUnusedExtra extra = {report, _alignment};
+    _tree->InOrderVisitor(VisUnusedCollector, &extra);
 }
 
-void block_allocator::get_statistics(TOKU_DB_FRAGMENTATION report) {
-    report->data_bytes = _n_bytes_in_use; 
-    report->data_blocks = _n_blocks; 
+void BlockAllocator::Statistics(TOKU_DB_FRAGMENTATION report) {
+    report->data_bytes = _n_bytes_in_use;
+    report->data_blocks = _n_blocks;
     report->file_size_bytes = 0;
     report->checkpoint_bytes_additional = 0;
-    get_unused_statistics(report);
+    UnusedStatistics(report);
 }
 
-void block_allocator::validate() const {
-    uint64_t n_bytes_in_use = _reserve_at_beginning;
-    for (uint64_t i = 0; i < _n_blocks; i++) {
-        n_bytes_in_use += _blocks_array[i].size;
-        if (i > 0) {
-            assert(_blocks_array[i].offset >  _blocks_array[i - 1].offset);
-            assert(_blocks_array[i].offset >= _blocks_array[i - 1].offset + _blocks_array[i - 1].size );
-        }
-    }
-    assert(n_bytes_in_use == _n_bytes_in_use);
-}
-
-// Tracing
-
-void block_allocator::_trace_create(void) {
-    if (ba_trace_file != nullptr) {
-        toku_mutex_lock(&_trace_lock);
-        fprintf(ba_trace_file, "ba_trace_create %p %" PRIu64 " %" PRIu64 "\n",
-                this, _reserve_at_beginning, _alignment);
-        toku_mutex_unlock(&_trace_lock);
-
-        fflush(ba_trace_file);
-    }
-}
-
-void block_allocator::_trace_create_from_blockpairs(void) {
-    if (ba_trace_file != nullptr) {
-        toku_mutex_lock(&_trace_lock);
-        fprintf(ba_trace_file, "ba_trace_create_from_blockpairs %p %" PRIu64 " %" PRIu64 " ",
-                this, _reserve_at_beginning, _alignment);
-        for (uint64_t i = 0; i < _n_blocks; i++) {
-            fprintf(ba_trace_file, "[%" PRIu64 " %" PRIu64 "] ",
-                    _blocks_array[i].offset, _blocks_array[i].size);
-        }
-        fprintf(ba_trace_file, "\n");
-        toku_mutex_unlock(&_trace_lock);
-
-        fflush(ba_trace_file);
-    }
-}
-
-void block_allocator::_trace_destroy(void) {
-    if (ba_trace_file != nullptr) {
-        toku_mutex_lock(&_trace_lock);
-        fprintf(ba_trace_file, "ba_trace_destroy %p\n", this);
-        toku_mutex_unlock(&_trace_lock);
-
-        fflush(ba_trace_file);
-    }
-}
-
-void block_allocator::_trace_alloc(uint64_t size, uint64_t heat, uint64_t offset) {
-    if (ba_trace_file != nullptr) {
-        toku_mutex_lock(&_trace_lock);
-        fprintf(ba_trace_file, "ba_trace_alloc %p %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
-                this, size, heat, offset);
-        toku_mutex_unlock(&_trace_lock);
-
-        fflush(ba_trace_file);
+struct ValidateExtra {
+    uint64_t _bytes;
+    MhsRbTree::Node *_pre_node;
+};
+static void VisUsedBlocksInOrder(void *extra,
+                                 MhsRbTree::Node *cur_node,
+                                 uint64_t UU(depth)) {
+    struct ValidateExtra *v_e = (struct ValidateExtra *)extra;
+    MhsRbTree::Node *pre_node = v_e->_pre_node;
+    // verify no overlaps
+    if (pre_node) {
+        invariant(rbn_size(pre_node) > 0);
+        invariant(rbn_offset(cur_node) >
+                  rbn_offset(pre_node) + rbn_size(pre_node));
+        MhsRbTree::OUUInt64 used_space =
+            rbn_offset(cur_node) - (rbn_offset(pre_node) + rbn_size(pre_node));
+        v_e->_bytes += used_space.ToInt();
+    } else {
+        v_e->_bytes += rbn_offset(cur_node).ToInt();
     }
+    v_e->_pre_node = cur_node;
 }
 
-void block_allocator::_trace_free(uint64_t offset) {
-    if (ba_trace_file != nullptr) {
-        toku_mutex_lock(&_trace_lock);
-        fprintf(ba_trace_file, "ba_trace_free %p %" PRIu64 "\n", this, offset);
-        toku_mutex_unlock(&_trace_lock);
-
-        fflush(ba_trace_file);
-    }
+void BlockAllocator::Validate() const {
+    _tree->ValidateBalance();
+    _tree->ValidateMhs();
+    struct ValidateExtra extra = {0, nullptr};
+    _tree->InOrderVisitor(VisUsedBlocksInOrder, &extra);
+    invariant(extra._bytes == _n_bytes_in_use);
 }
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
index 9b2c1553e7f93..648ea9a9ef2c8 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_allocator.h
@@ -43,6 +43,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #include "portability/toku_pthread.h"
 #include "portability/toku_stdint.h"
 #include "portability/toku_stdlib.h"
+#include "ft/serialize/rbtree_mhs.h"
 
 // Block allocator.
 //
@@ -51,151 +52,128 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 // The allocation of block numbers is handled elsewhere.
 //
 // When creating a block allocator we also specify a certain-sized
-// block at the beginning that is preallocated (and cannot be allocated or freed)
+// block at the beginning that is preallocated (and cannot be allocated or
+// freed)
 //
 // We can allocate blocks of a particular size at a particular location.
-// We can allocate blocks of a particular size at a location chosen by the allocator.
 // We can free blocks.
 // We can determine the size of a block.
-
-class block_allocator {
-public:
+#define MAX_BYTE 0xffffffffffffffff
+class BlockAllocator {
+   public:
     static const size_t BLOCK_ALLOCATOR_ALIGNMENT = 4096;
 
     // How much must be reserved at the beginning for the block?
-    //  The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root.
+    //  The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1
+    //  pointer for each root.
     //  So 4096 should be enough.
     static const size_t BLOCK_ALLOCATOR_HEADER_RESERVE = 4096;
-    
-    static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT == 0,
+
+    static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT ==
+                      0,
                   "block allocator header must have proper alignment");
 
-    static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
+    static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE =
+        BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
 
-    enum allocation_strategy {
-        BA_STRATEGY_FIRST_FIT = 1,
-        BA_STRATEGY_BEST_FIT,
-        BA_STRATEGY_PADDED_FIT,
-        BA_STRATEGY_HEAT_ZONE
-    };
-
-    struct blockpair {
-        uint64_t offset;
-        uint64_t size;
-        blockpair(uint64_t o, uint64_t s) :
-            offset(o), size(s) {
-        }
-        int operator<(const struct blockpair &rhs) const {
-            return offset < rhs.offset;
-        }
-        int operator<(const uint64_t &o) const {
-            return offset < o;
+    struct BlockPair {
+        uint64_t _offset;
+        uint64_t _size;
+        BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {}
+        int operator<(const struct BlockPair &rhs) const {
+            return _offset < rhs._offset;
         }
+        int operator<(const uint64_t &o) const { return _offset < o; }
     };
 
-    // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
-    //         The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
+    // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING
+    // bytes are not put into a block.
+    //         The default allocation strategy is first fit
+    //         (BA_STRATEGY_FIRST_FIT)
     //  All blocks be start on a multiple of ALIGNMENT.
     //  Aborts if we run out of memory.
     // Parameters
-    //  reserve_at_beginning (IN)        Size of reserved block at beginning.  This size does not have to be aligned.
+    //  reserve_at_beginning (IN)        Size of reserved block at beginning.
+    //  This size does not have to be aligned.
     //  alignment (IN)                   Block alignment.
-    void create(uint64_t reserve_at_beginning, uint64_t alignment);
+    void Create(uint64_t reserve_at_beginning, uint64_t alignment);
 
-    // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
-    //         The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
-    //         The allocator is initialized to contain `n_blocks' of blockpairs, taken from `pairs'
+    // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING
+    // bytes are not put into a block.
+    //         The allocator is initialized to contain `n_blocks' of BlockPairs,
+    //         taken from `pairs'
     //  All blocks be start on a multiple of ALIGNMENT.
     //  Aborts if we run out of memory.
     // Parameters
     //  pairs,                           unowned array of pairs to copy
     //  n_blocks,                        Size of pairs array
-    //  reserve_at_beginning (IN)        Size of reserved block at beginning.  This size does not have to be aligned.
+    //  reserve_at_beginning (IN)        Size of reserved block at beginning.
+    //  This size does not have to be aligned.
     //  alignment (IN)                   Block alignment.
-    void create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
-                                struct blockpair *pairs, uint64_t n_blocks);
+    void CreateFromBlockPairs(uint64_t reserve_at_beginning,
+                              uint64_t alignment,
+                              struct BlockPair *pairs,
+                              uint64_t n_blocks);
 
     // Effect: Destroy this block allocator
-    void destroy();
-
-    // Effect: Set the allocation strategy that the allocator should use
-    // Requires: No other threads are operating on this block allocator
-    void set_strategy(enum allocation_strategy strategy);
+    void Destroy();
 
-    // Effect: Allocate a block of the specified size at an address chosen by the allocator.
+    // Effect: Allocate a block of the specified size at an address chosen by
+    // the allocator.
     //  Aborts if anything goes wrong.
     //  The block address will be a multiple of the alignment.
     // Parameters:
-    //  size (IN):    The size of the block.  (The size does not have to be aligned.)
+    //  size (IN):    The size of the block.  (The size does not have to be
+    //  aligned.)
     //  offset (OUT): The location of the block.
-    //  heat (IN):    A higher heat means we should be prepared to free this block soon (perhaps in the next checkpoint)
-    //                Heat values are lexiographically ordered (like integers), but their specific values are arbitrary
-    void alloc_block(uint64_t size, uint64_t heat, uint64_t *offset);
+    //  block soon (perhaps in the next checkpoint)
+    //                Heat values are lexiographically ordered (like integers),
+    //                but their specific values are arbitrary
+    void AllocBlock(uint64_t size, uint64_t *offset);
 
     // Effect: Free the block at offset.
     // Requires: There must be a block currently allocated at that offset.
     // Parameters:
     //  offset (IN): The offset of the block.
-    void free_block(uint64_t offset);
+    void FreeBlock(uint64_t offset, uint64_t size);
 
-    // Effect: Return the size of the block that starts at offset.
-    // Requires: There must be a block currently allocated at that offset.
-    // Parameters:
-    //  offset (IN): The offset of the block.
-    uint64_t block_size(uint64_t offset);
-
-    // Effect: Check to see if the block allocator is OK.  This may take a long time.
+    // Effect: Check to see if the block allocator is OK.  This may take a long
+    // time.
     // Usage Hints: Probably only use this for unit tests.
     // TODO: Private?
-    void validate() const;
+    void Validate() const;
 
     // Effect: Return the unallocated block address of "infinite" size.
-    //  That is, return the smallest address that is above all the allocated blocks.
-    uint64_t allocated_limit() const;
+    //  That is, return the smallest address that is above all the allocated
+    //  blocks.
+    uint64_t AllocatedLimit() const;
 
-    // Effect: Consider the blocks in sorted order.  The reserved block at the beginning is number 0.  The next one is number 1 and so forth.
+    // Effect: Consider the blocks in sorted order.  The reserved block at the
+    // beginning is number 0.  The next one is number 1 and so forth.
     //  Return the offset and size of the block with that number.
     //  Return 0 if there is a block that big, return nonzero if b is too big.
     // Rationale: This is probably useful only for tests.
-    int get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size);
+    int NthBlockInLayoutOrder(uint64_t b, uint64_t *offset, uint64_t *size);
 
     // Effect:  Fill in report to indicate how the file is used.
-    // Requires: 
+    // Requires:
     //  report->file_size_bytes is filled in
     //  report->data_bytes is filled in
     //  report->checkpoint_bytes_additional is filled in
-    void get_unused_statistics(TOKU_DB_FRAGMENTATION report);
+    void UnusedStatistics(TOKU_DB_FRAGMENTATION report);
 
     // Effect: Fill in report->data_bytes with the number of bytes in use
-    //         Fill in report->data_blocks with the number of blockpairs in use
+    //         Fill in report->data_blocks with the number of BlockPairs in use
     //         Fill in unused statistics using this->get_unused_statistics()
     // Requires:
     //  report->file_size is ignored on return
     //  report->checkpoint_bytes_additional is ignored on return
-    void get_statistics(TOKU_DB_FRAGMENTATION report);
-
-    // Block allocator tracing.
-    // - Enabled by setting TOKU_BA_TRACE_PATH to the file that the trace file
-    //   should be written to.
-    // - Trace may be replayed by ba_trace_replay tool in tools/ directory
-    //   eg: "cat mytracefile | ba_trace_replay"
-    static void maybe_initialize_trace();
-    static void maybe_close_trace();
-
-private:
-    void _create_internal(uint64_t reserve_at_beginning, uint64_t alignment);
-    void grow_blocks_array_by(uint64_t n_to_add);
-    void grow_blocks_array();
-    int64_t find_block(uint64_t offset);
-    struct blockpair *choose_block_to_alloc_after(size_t size, uint64_t heat);
-
-    // Tracing
-    toku_mutex_t _trace_lock;
-    void _trace_create(void);
-    void _trace_create_from_blockpairs(void);
-    void _trace_destroy(void);
-    void _trace_alloc(uint64_t size, uint64_t heat, uint64_t offset);
-    void _trace_free(uint64_t offset);
+    void Statistics(TOKU_DB_FRAGMENTATION report);
+
+    virtual ~BlockAllocator(){};
+
+   private:
+    void CreateInternal(uint64_t reserve_at_beginning, uint64_t alignment);
 
     // How much to reserve at the beginning
     uint64_t _reserve_at_beginning;
@@ -203,12 +181,8 @@ class block_allocator {
     uint64_t _alignment;
     // How many blocks
     uint64_t _n_blocks;
-    // How big is the blocks_array.  Must be >= n_blocks.
-    uint64_t _blocks_array_size;
-    // These blocks are sorted by address.
-    struct blockpair *_blocks_array;
-    // Including the reserve_at_beginning
     uint64_t _n_bytes_in_use;
-    // The allocation strategy are we using
-    enum allocation_strategy _strategy;
+
+    // These blocks are sorted by address.
+    MhsRbTree::Tree *_tree;
 };
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc b/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc
deleted file mode 100644
index 62bb8fc4a87bc..0000000000000
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.cc
+++ /dev/null
@@ -1,224 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*======
-This file is part of PerconaFT.
-
-
-Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
-
-    PerconaFT is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License, version 2,
-    as published by the Free Software Foundation.
-
-    PerconaFT is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
-
-----------------------------------------
-
-    PerconaFT is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Affero General Public License, version 3,
-    as published by the Free Software Foundation.
-
-    PerconaFT is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Affero General Public License for more details.
-
-    You should have received a copy of the GNU Affero General Public License
-    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
-======= */
-
-#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-
-#include <algorithm>
-
-#include <string.h>
-
-#include "portability/toku_assert.h"
-
-#include "ft/serialize/block_allocator_strategy.h"
-
-static uint64_t _align(uint64_t value, uint64_t ba_alignment) {
-    return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
-}
-
-static uint64_t _roundup_to_power_of_two(uint64_t value) {
-    uint64_t r = 4096;
-    while (r < value) {
-        r *= 2;
-        invariant(r > 0);
-    }
-    return r;
-}
-
-// First fit block allocation
-static struct block_allocator::blockpair *
-_first_fit(struct block_allocator::blockpair *blocks_array,
-           uint64_t n_blocks, uint64_t size, uint64_t alignment,
-           uint64_t max_padding) {
-    if (n_blocks == 1) {
-        // won't enter loop, can't underflow the direction < 0 case
-        return nullptr;
-    }
-
-    struct block_allocator::blockpair *bp = &blocks_array[0];
-    for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
-         n_spaces_to_check--, bp++) {
-        // Consider the space after bp
-        uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
-        uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
-        if (possible_offset + size <= bp[1].offset) { // bp[1] is always valid since bp < &blocks_array[n_blocks-1]
-            invariant(bp - blocks_array < (int64_t) n_blocks);
-            return bp;
-        }
-    }
-    return nullptr;
-}
-
-static struct block_allocator::blockpair *
-_first_fit_bw(struct block_allocator::blockpair *blocks_array,
-           uint64_t n_blocks, uint64_t size, uint64_t alignment,
-           uint64_t max_padding, struct block_allocator::blockpair *blocks_array_limit) {
-    if (n_blocks == 1) {
-        // won't enter loop, can't underflow the direction < 0 case
-        return nullptr;
-    }
-
-    struct block_allocator::blockpair *bp = &blocks_array[-1];
-    for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
-         n_spaces_to_check--, bp--) {
-        // Consider the space after bp
-        uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
-        uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
-        if (&bp[1] < blocks_array_limit && possible_offset + size <= bp[1].offset) {
-            invariant(blocks_array - bp < (int64_t) n_blocks);
-            return bp;
-        }
-    }
-    return nullptr;
-}
-
-struct block_allocator::blockpair *
-block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array,
-                                    uint64_t n_blocks, uint64_t size, uint64_t alignment) {
-    return _first_fit(blocks_array, n_blocks, size, alignment, 0);
-}
-
-// Best fit block allocation
-struct block_allocator::blockpair *
-block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array,
-                                   uint64_t n_blocks, uint64_t size, uint64_t alignment) {
-    struct block_allocator::blockpair *best_bp = nullptr;
-    uint64_t best_hole_size = 0;
-    for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) {
-        // Consider the space after blocknum
-        struct block_allocator::blockpair *bp = &blocks_array[blocknum];
-        uint64_t possible_offset = _align(bp->offset + bp->size, alignment);
-        uint64_t possible_end_offset = possible_offset + size;
-        if (possible_end_offset <= bp[1].offset) {
-            // It fits here. Is it the best fit?
-            uint64_t hole_size = bp[1].offset - possible_end_offset;
-            if (best_bp == nullptr || hole_size < best_hole_size) {
-                best_hole_size = hole_size;
-                best_bp = bp;
-            }
-        }
-    }
-    return best_bp;
-}
-
-static uint64_t padded_fit_alignment = 4096;
-
-// TODO: These compiler specific directives should be abstracted in a portability header
-//       portability/toku_compiler.h?
-__attribute__((__constructor__))
-static void determine_padded_fit_alignment_from_env(void) {
-    // TODO: Should be in portability as 'toku_os_getenv()?'
-    const char *s = getenv("TOKU_BA_PADDED_FIT_ALIGNMENT");
-    if (s != nullptr && strlen(s) > 0) {
-        const int64_t alignment = strtoll(s, nullptr, 10);
-        if (alignment <= 0) {
-            fprintf(stderr, "tokuft: error: block allocator padded fit alignment found in environment (%s), "
-                            "but it's out of range (should be an integer > 0). defaulting to %" PRIu64 "\n",
-                            s, padded_fit_alignment);
-        } else {
-            padded_fit_alignment = _roundup_to_power_of_two(alignment);
-            fprintf(stderr, "tokuft: setting block allocator padded fit alignment to %" PRIu64 "\n",
-                    padded_fit_alignment);
-        }
-    }
-}
-
-// First fit into a block that is oversized by up to max_padding.
-// The hope is that if we purposefully waste a bit of space at allocation
-// time we'll be more likely to reuse this block later.
-struct block_allocator::blockpair *
-block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array,
-                                     uint64_t n_blocks, uint64_t size, uint64_t alignment) {
-    return _first_fit(blocks_array, n_blocks, size, alignment, padded_fit_alignment);
-}
-
-static double hot_zone_threshold = 0.85;
-
-// TODO: These compiler specific directives should be abstracted in a portability header
-//       portability/toku_compiler.h?
-__attribute__((__constructor__))
-static void determine_hot_zone_threshold_from_env(void) {
-    // TODO: Should be in portability as 'toku_os_getenv()?'
-    const char *s = getenv("TOKU_BA_HOT_ZONE_THRESHOLD");
-    if (s != nullptr && strlen(s) > 0) {
-        const double hot_zone = strtod(s, nullptr);
-        if (hot_zone < 1 || hot_zone > 99) {
-            fprintf(stderr, "tokuft: error: block allocator hot zone threshold found in environment (%s), "
-                            "but it's out of range (should be an integer 1 through 99). defaulting to 85\n", s);
-            hot_zone_threshold = 85 / 100;
-        } else {
-            fprintf(stderr, "tokuft: setting block allocator hot zone threshold to %s\n", s);
-            hot_zone_threshold = hot_zone / 100;
-        }
-    }
-}
-
-struct block_allocator::blockpair *
-block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array,
-                                    uint64_t n_blocks, uint64_t size, uint64_t alignment,
-                                    uint64_t heat) {
-    if (heat > 0) {
-        struct block_allocator::blockpair *bp, *boundary_bp;
-
-        // Hot allocation. Find the beginning of the hot zone.
-        boundary_bp = &blocks_array[n_blocks - 1];
-        uint64_t highest_offset = _align(boundary_bp->offset + boundary_bp->size, alignment);
-        uint64_t hot_zone_offset = static_cast<uint64_t>(hot_zone_threshold * highest_offset);
-
-        boundary_bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset);
-        uint64_t blocks_in_zone = (blocks_array + n_blocks) - boundary_bp;
-        uint64_t blocks_outside_zone = boundary_bp - blocks_array;
-        invariant(blocks_in_zone + blocks_outside_zone == n_blocks);
-
-        if (blocks_in_zone > 0) {
-            // Find the first fit in the hot zone, going forward.
-            bp = _first_fit(boundary_bp, blocks_in_zone, size, alignment, 0);
-            if (bp != nullptr) {
-                return bp;
-            }
-        }
-        if (blocks_outside_zone > 0) {
-            // Find the first fit in the cold zone, going backwards.
-            bp = _first_fit_bw(boundary_bp, blocks_outside_zone, size, alignment, 0, &blocks_array[n_blocks]);
-            if (bp != nullptr) {
-                return bp;
-            }
-        }
-    } else {
-        // Cold allocations are simply first-fit from the beginning.
-        return _first_fit(blocks_array, n_blocks, size, alignment, 0);
-    }
-    return nullptr;
-}
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
index 7101ba9f58c17..d2532134d96cf 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc
@@ -46,31 +46,27 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #include "ft/ft-internal.h"
 
 // TODO: reorganize this dependency (FT-303)
-#include "ft/ft-ops.h" // for toku_maybe_truncate_file
+#include "ft/ft-ops.h"  // for toku_maybe_truncate_file
 #include "ft/serialize/block_table.h"
 #include "ft/serialize/rbuf.h"
 #include "ft/serialize/wbuf.h"
 #include "ft/serialize/block_allocator.h"
-
 #include "util/nb_mutex.h"
 #include "util/scoped_malloc.h"
 
 // indicates the end of a freelist
-static const BLOCKNUM freelist_null = { -1 };
+static const BLOCKNUM freelist_null = {-1};
 
 // value of block_translation_pair.size if blocknum is unused
-static const DISKOFF size_is_free = (DISKOFF) -1;
+static const DISKOFF size_is_free = (DISKOFF)-1;
 
-// value of block_translation_pair.u.diskoff if blocknum is used but does not yet have a diskblock
-static const DISKOFF diskoff_unused = (DISKOFF) -2;
+// value of block_translation_pair.u.diskoff if blocknum is used but does not
+// yet have a diskblock
+static const DISKOFF diskoff_unused = (DISKOFF)-2;
 
-void block_table::_mutex_lock() {
-    toku_mutex_lock(&_mutex);
-}
+void block_table::_mutex_lock() { toku_mutex_lock(&_mutex); }
 
-void block_table::_mutex_unlock() {
-    toku_mutex_unlock(&_mutex);
-}
+void block_table::_mutex_unlock() { toku_mutex_unlock(&_mutex); }
 
 // TODO: Move lock to FT
 void toku_ft_lock(FT ft) {
@@ -85,13 +81,16 @@ void toku_ft_unlock(FT ft) {
     bt->_mutex_unlock();
 }
 
-// There are two headers: the reserve must fit them both and be suitably aligned.
-static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE %
-              block_allocator::BLOCK_ALLOCATOR_ALIGNMENT == 0,
+// There are two headers: the reserve must fit them both and be suitably
+// aligned.
+static_assert(BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE %
+                      BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT ==
+                  0,
               "Block allocator's header reserve must be suitibly aligned");
-static_assert(block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 ==
-              block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
-              "Block allocator's total header reserve must exactly fit two headers");
+static_assert(
+    BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 ==
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
+    "Block allocator's total header reserve must exactly fit two headers");
 
 // does NOT initialize the block allocator: the caller is responsible
 void block_table::_create_internal() {
@@ -99,25 +98,30 @@ void block_table::_create_internal() {
     memset(&_inprogress, 0, sizeof(struct translation));
     memset(&_checkpointed, 0, sizeof(struct translation));
     memset(&_mutex, 0, sizeof(_mutex));
+    _bt_block_allocator = new BlockAllocator();
     toku_mutex_init(&_mutex, nullptr);
     nb_mutex_init(&_safe_file_size_lock);
 }
 
-// Fill in the checkpointed translation from buffer, and copy checkpointed to current.
-// The one read from disk is the last known checkpointed one, so we are keeping it in 
-// place and then setting current (which is never stored on disk) for current use.
-// The translation_buffer has translation only, we create the rest of the block_table.
-int block_table::create_from_buffer(int fd,
-                                    DISKOFF location_on_disk, //Location of translation_buffer
-                                    DISKOFF size_on_disk,
-                                    unsigned char *translation_buffer) {
+// Fill in the checkpointed translation from buffer, and copy checkpointed to
+// current.
+// The one read from disk is the last known checkpointed one, so we are keeping
+// it in
+// place and then setting current (which is never stored on disk) for current
+// use.
+// The translation_buffer has translation only, we create the rest of the
+// block_table.
+int block_table::create_from_buffer(
+    int fd,
+    DISKOFF location_on_disk,  // Location of translation_buffer
+    DISKOFF size_on_disk,
+    unsigned char *translation_buffer) {
     // Does not initialize the block allocator
     _create_internal();
 
     // Deserialize the translation and copy it to current
-    int r = _translation_deserialize_from_buffer(&_checkpointed,
-                                                 location_on_disk, size_on_disk,
-                                                 translation_buffer);
+    int r = _translation_deserialize_from_buffer(
+        &_checkpointed, location_on_disk, size_on_disk, translation_buffer);
     if (r != 0) {
         return r;
     }
@@ -130,22 +134,26 @@ int block_table::create_from_buffer(int fd,
     invariant(file_size >= 0);
     _safe_file_size = file_size;
 
-    // Gather the non-empty translations and use them to create the block allocator
+    // Gather the non-empty translations and use them to create the block
+    // allocator
     toku::scoped_malloc pairs_buf(_checkpointed.smallest_never_used_blocknum.b *
-                                  sizeof(struct block_allocator::blockpair));
-    struct block_allocator::blockpair *CAST_FROM_VOIDP(pairs, pairs_buf.get());
+                                  sizeof(struct BlockAllocator::BlockPair));
+    struct BlockAllocator::BlockPair *CAST_FROM_VOIDP(pairs, pairs_buf.get());
     uint64_t n_pairs = 0;
     for (int64_t i = 0; i < _checkpointed.smallest_never_used_blocknum.b; i++) {
         struct block_translation_pair pair = _checkpointed.block_translation[i];
         if (pair.size > 0) {
             invariant(pair.u.diskoff != diskoff_unused);
-            pairs[n_pairs++] = block_allocator::blockpair(pair.u.diskoff, pair.size);
+            pairs[n_pairs++] =
+                BlockAllocator::BlockPair(pair.u.diskoff, pair.size);
         }
     }
 
-    _bt_block_allocator.create_from_blockpairs(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
-                                               block_allocator::BLOCK_ALLOCATOR_ALIGNMENT,
-                                               pairs, n_pairs);
+    _bt_block_allocator->CreateFromBlockPairs(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
+        BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT,
+        pairs,
+        n_pairs);
 
     return 0;
 }
@@ -155,8 +163,10 @@ void block_table::create() {
     _create_internal();
 
     _checkpointed.type = TRANSLATION_CHECKPOINTED;
-    _checkpointed.smallest_never_used_blocknum = make_blocknum(RESERVED_BLOCKNUMS);
-    _checkpointed.length_of_array = _checkpointed.smallest_never_used_blocknum.b;
+    _checkpointed.smallest_never_used_blocknum =
+        make_blocknum(RESERVED_BLOCKNUMS);
+    _checkpointed.length_of_array =
+        _checkpointed.smallest_never_used_blocknum.b;
     _checkpointed.blocknum_freelist_head = freelist_null;
     XMALLOC_N(_checkpointed.length_of_array, _checkpointed.block_translation);
     for (int64_t i = 0; i < _checkpointed.length_of_array; i++) {
@@ -164,12 +174,13 @@ void block_table::create() {
         _checkpointed.block_translation[i].u.diskoff = diskoff_unused;
     }
 
-    // we just created a default checkpointed, now copy it to current.  
+    // we just created a default checkpointed, now copy it to current.
     _copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT);
 
     // Create an empty block allocator.
-    _bt_block_allocator.create(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
-                               block_allocator::BLOCK_ALLOCATOR_ALIGNMENT);
+    _bt_block_allocator->Create(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
+        BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT);
 }
 
 // TODO: Refactor with FT-303
@@ -185,20 +196,24 @@ static void ft_set_dirty(FT ft, bool for_checkpoint) {
 
 void block_table::_maybe_truncate_file(int fd, uint64_t size_needed_before) {
     toku_mutex_assert_locked(&_mutex);
-    uint64_t new_size_needed = _bt_block_allocator.allocated_limit();
-    //Save a call to toku_os_get_file_size (kernel call) if unlikely to be useful.
-    if (new_size_needed < size_needed_before && new_size_needed < _safe_file_size) {
+    uint64_t new_size_needed = _bt_block_allocator->AllocatedLimit();
+    // Save a call to toku_os_get_file_size (kernel call) if unlikely to be
+    // useful.
+    if (new_size_needed < size_needed_before &&
+        new_size_needed < _safe_file_size) {
         nb_mutex_lock(&_safe_file_size_lock, &_mutex);
 
         // Must hold _safe_file_size_lock to change _safe_file_size.
         if (new_size_needed < _safe_file_size) {
             int64_t safe_file_size_before = _safe_file_size;
-            // Not safe to use the 'to-be-truncated' portion until truncate is done.
+            // Not safe to use the 'to-be-truncated' portion until truncate is
+            // done.
             _safe_file_size = new_size_needed;
             _mutex_unlock();
 
             uint64_t size_after;
-            toku_maybe_truncate_file(fd, new_size_needed, safe_file_size_before, &size_after);
+            toku_maybe_truncate_file(
+                fd, new_size_needed, safe_file_size_before, &size_after);
             _mutex_lock();
 
             _safe_file_size = size_after;
@@ -213,26 +228,35 @@ void block_table::maybe_truncate_file_on_open(int fd) {
     _mutex_unlock();
 }
 
-void block_table::_copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype) {
-    // We intend to malloc a fresh block, so the incoming translation should be empty
+void block_table::_copy_translation(struct translation *dst,
+                                    struct translation *src,
+                                    enum translation_type newtype) {
+    // We intend to malloc a fresh block, so the incoming translation should be
+    // empty
     invariant_null(dst->block_translation);
 
     invariant(src->length_of_array >= src->smallest_never_used_blocknum.b);
     invariant(newtype == TRANSLATION_DEBUG ||
-              (src->type == TRANSLATION_CURRENT && newtype == TRANSLATION_INPROGRESS) ||
-              (src->type == TRANSLATION_CHECKPOINTED && newtype == TRANSLATION_CURRENT));
+              (src->type == TRANSLATION_CURRENT &&
+               newtype == TRANSLATION_INPROGRESS) ||
+              (src->type == TRANSLATION_CHECKPOINTED &&
+               newtype == TRANSLATION_CURRENT));
     dst->type = newtype;
     dst->smallest_never_used_blocknum = src->smallest_never_used_blocknum;
-    dst->blocknum_freelist_head = src->blocknum_freelist_head; 
+    dst->blocknum_freelist_head = src->blocknum_freelist_head;
 
-    // destination btt is of fixed size. Allocate + memcpy the exact length necessary.
+    // destination btt is of fixed size. Allocate + memcpy the exact length
+    // necessary.
     dst->length_of_array = dst->smallest_never_used_blocknum.b;
     XMALLOC_N(dst->length_of_array, dst->block_translation);
-    memcpy(dst->block_translation, src->block_translation, dst->length_of_array * sizeof(*dst->block_translation));
+    memcpy(dst->block_translation,
+           src->block_translation,
+           dst->length_of_array * sizeof(*dst->block_translation));
 
     // New version of btt is not yet stored on disk.
     dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size = 0;
-    dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = diskoff_unused;
+    dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff =
+        diskoff_unused;
 }
 
 int64_t block_table::get_blocks_in_use_unlocked() {
@@ -240,8 +264,9 @@ int64_t block_table::get_blocks_in_use_unlocked() {
     struct translation *t = &_current;
     int64_t num_blocks = 0;
     {
-        //Reserved blocknums do not get upgraded; They are part of the header.
-        for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) {
+        // Reserved blocknums do not get upgraded; They are part of the header.
+        for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b;
+             b.b++) {
             if (t->block_translation[b.b].size != size_is_free) {
                 num_blocks++;
             }
@@ -251,38 +276,43 @@ int64_t block_table::get_blocks_in_use_unlocked() {
 }
 
 void block_table::_maybe_optimize_translation(struct translation *t) {
-    //Reduce 'smallest_never_used_blocknum.b' (completely free blocknums instead of just
-    //on a free list.  Doing so requires us to regenerate the free list.
-    //This is O(n) work, so do it only if you're already doing that.
+    // Reduce 'smallest_never_used_blocknum.b' (completely free blocknums
+    // instead of just
+    // on a free list.  Doing so requires us to regenerate the free list.
+    // This is O(n) work, so do it only if you're already doing that.
 
     BLOCKNUM b;
     paranoid_invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS);
-    //Calculate how large the free suffix is.
+    // Calculate how large the free suffix is.
     int64_t freed;
     {
-        for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS; b.b--) {
-            if (t->block_translation[b.b-1].size != size_is_free) {
+        for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS;
+             b.b--) {
+            if (t->block_translation[b.b - 1].size != size_is_free) {
                 break;
             }
         }
         freed = t->smallest_never_used_blocknum.b - b.b;
     }
-    if (freed>0) {
+    if (freed > 0) {
         t->smallest_never_used_blocknum.b = b.b;
-        if (t->length_of_array/4 > t->smallest_never_used_blocknum.b) {
-            //We're using more memory than necessary to represent this now.  Reduce.
+        if (t->length_of_array / 4 > t->smallest_never_used_blocknum.b) {
+            // We're using more memory than necessary to represent this now.
+            // Reduce.
             uint64_t new_length = t->smallest_never_used_blocknum.b * 2;
             XREALLOC_N(new_length, t->block_translation);
             t->length_of_array = new_length;
-            //No need to zero anything out. 
+            // No need to zero anything out.
         }
 
-        //Regenerate free list.
+        // Regenerate free list.
         t->blocknum_freelist_head.b = freelist_null.b;
-        for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) {
+        for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b;
+             b.b++) {
             if (t->block_translation[b.b].size == size_is_free) {
-                t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head;
-                t->blocknum_freelist_head                      = b;
+                t->block_translation[b.b].u.next_free_blocknum =
+                    t->blocknum_freelist_head;
+                t->blocknum_freelist_head = b;
             }
         }
     }
@@ -303,14 +333,16 @@ void block_table::note_start_checkpoint_unlocked() {
 }
 
 void block_table::note_skipped_checkpoint() {
-    //Purpose, alert block translation that the checkpoint was skipped, e.x. for a non-dirty header
+    // Purpose, alert block translation that the checkpoint was skipped, e.x.
+    // for a non-dirty header
     _mutex_lock();
     paranoid_invariant_notnull(_inprogress.block_translation);
     _checkpoint_skipped = true;
     _mutex_unlock();
 }
 
-// Purpose: free any disk space used by previous checkpoint that isn't in use by either
+// Purpose: free any disk space used by previous checkpoint that isn't in use by
+// either
 //           - current state
 //           - in-progress checkpoint
 //          capture inprogress as new checkpointed.
@@ -323,7 +355,7 @@ void block_table::note_skipped_checkpoint() {
 void block_table::note_end_checkpoint(int fd) {
     // Free unused blocks
     _mutex_lock();
-    uint64_t allocated_limit_at_start = _bt_block_allocator.allocated_limit();
+    uint64_t allocated_limit_at_start = _bt_block_allocator->AllocatedLimit();
     paranoid_invariant_notnull(_inprogress.block_translation);
     if (_checkpoint_skipped) {
         toku_free(_inprogress.block_translation);
@@ -331,17 +363,23 @@ void block_table::note_end_checkpoint(int fd) {
         goto end;
     }
 
-    //Make certain inprogress was allocated space on disk
-    assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0);
-    assert(_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff > 0);
+    // Make certain inprogress was allocated space on disk
+    invariant(
+        _inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0);
+    invariant(
+        _inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff >
+        0);
 
     {
         struct translation *t = &_checkpointed;
         for (int64_t i = 0; i < t->length_of_array; i++) {
             struct block_translation_pair *pair = &t->block_translation[i];
-            if (pair->size > 0 && !_translation_prevents_freeing(&_inprogress, make_blocknum(i), pair)) {
-                assert(!_translation_prevents_freeing(&_current, make_blocknum(i), pair));
-                _bt_block_allocator.free_block(pair->u.diskoff);
+            if (pair->size > 0 &&
+                !_translation_prevents_freeing(
+                    &_inprogress, make_blocknum(i), pair)) {
+                invariant(!_translation_prevents_freeing(
+                              &_current, make_blocknum(i), pair));
+                _bt_block_allocator->FreeBlock(pair->u.diskoff, pair->size);
             }
         }
         toku_free(_checkpointed.block_translation);
@@ -359,53 +397,65 @@ bool block_table::_is_valid_blocknum(struct translation *t, BLOCKNUM b) {
     return b.b >= 0 && b.b < t->smallest_never_used_blocknum.b;
 }
 
-void block_table::_verify_valid_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) {
+void block_table::_verify_valid_blocknum(struct translation *UU(t),
+                                         BLOCKNUM UU(b)) {
     invariant(_is_valid_blocknum(t, b));
 }
 
-bool block_table::_is_valid_freeable_blocknum(struct translation *t, BLOCKNUM b) {
+bool block_table::_is_valid_freeable_blocknum(struct translation *t,
+                                              BLOCKNUM b) {
     invariant(t->length_of_array >= t->smallest_never_used_blocknum.b);
     return b.b >= RESERVED_BLOCKNUMS && b.b < t->smallest_never_used_blocknum.b;
 }
 
 // should be freeable
-void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t), BLOCKNUM UU(b)) {
+void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t),
+                                                  BLOCKNUM UU(b)) {
     invariant(_is_valid_freeable_blocknum(t, b));
 }
 
 // Also used only in ft-serialize-test.
-void block_table::block_free(uint64_t offset) {
+void block_table::block_free(uint64_t offset, uint64_t size) {
     _mutex_lock();
-    _bt_block_allocator.free_block(offset);
+    _bt_block_allocator->FreeBlock(offset, size);
     _mutex_unlock();
 }
 
 int64_t block_table::_calculate_size_on_disk(struct translation *t) {
-    return 8 + // smallest_never_used_blocknum
-           8 + // blocknum_freelist_head
-           t->smallest_never_used_blocknum.b * 16 + // Array
-           4; // 4 for checksum
+    return 8 +  // smallest_never_used_blocknum
+           8 +  // blocknum_freelist_head
+           t->smallest_never_used_blocknum.b * 16 +  // Array
+           4;                                        // 4 for checksum
 }
 
-// We cannot free the disk space allocated to this blocknum if it is still in use by the given translation table.
-bool block_table::_translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair) {
-    return t->block_translation &&
-           b.b < t->smallest_never_used_blocknum.b &&
+// We cannot free the disk space allocated to this blocknum if it is still in
+// use by the given translation table.
+bool block_table::_translation_prevents_freeing(
+    struct translation *t,
+    BLOCKNUM b,
+    struct block_translation_pair *old_pair) {
+    return t->block_translation && b.b < t->smallest_never_used_blocknum.b &&
            old_pair->u.diskoff == t->block_translation[b.b].u.diskoff;
 }
 
-void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint, uint64_t heat) {
+void block_table::_realloc_on_disk_internal(BLOCKNUM b,
+                                            DISKOFF size,
+                                            DISKOFF *offset,
+                                            FT ft,
+                                            bool for_checkpoint) {
     toku_mutex_assert_locked(&_mutex);
     ft_set_dirty(ft, for_checkpoint);
 
     struct translation *t = &_current;
     struct block_translation_pair old_pair = t->block_translation[b.b];
-    //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint
-    bool cannot_free = (bool)
-        ((!for_checkpoint && _translation_prevents_freeing(&_inprogress,   b, &old_pair)) ||
-         _translation_prevents_freeing(&_checkpointed, b, &old_pair));
-    if (!cannot_free && old_pair.u.diskoff!=diskoff_unused) {
-        _bt_block_allocator.free_block(old_pair.u.diskoff);
+    // Free the old block if it is not still in use by the checkpoint in
+    // progress or the previous checkpoint
+    bool cannot_free =
+        (!for_checkpoint &&
+         _translation_prevents_freeing(&_inprogress, b, &old_pair)) ||
+        _translation_prevents_freeing(&_checkpointed, b, &old_pair);
+    if (!cannot_free && old_pair.u.diskoff != diskoff_unused) {
+        _bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size);
     }
 
     uint64_t allocator_offset = diskoff_unused;
@@ -413,19 +463,22 @@ void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *o
     if (size > 0) {
         // Allocate a new block if the size is greater than 0,
         // if the size is just 0, offset will be set to diskoff_unused
-        _bt_block_allocator.alloc_block(size, heat, &allocator_offset);
+        _bt_block_allocator->AllocBlock(size, &allocator_offset);
     }
     t->block_translation[b.b].u.diskoff = allocator_offset;
     *offset = allocator_offset;
 
-    //Update inprogress btt if appropriate (if called because Pending bit is set).
+    // Update inprogress btt if appropriate (if called because Pending bit is
+    // set).
     if (for_checkpoint) {
         paranoid_invariant(b.b < _inprogress.length_of_array);
         _inprogress.block_translation[b.b] = t->block_translation[b.b];
     }
 }
 
-void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset) {
+void block_table::_ensure_safe_write_unlocked(int fd,
+                                              DISKOFF block_size,
+                                              DISKOFF block_offset) {
     // Requires: holding _mutex
     uint64_t size_needed = block_size + block_offset;
     if (size_needed > _safe_file_size) {
@@ -435,7 +488,8 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF
             _mutex_unlock();
 
             int64_t size_after;
-            toku_maybe_preallocate_in_file(fd, size_needed, _safe_file_size, &size_after);
+            toku_maybe_preallocate_in_file(
+                fd, size_needed, _safe_file_size, &size_after);
 
             _mutex_lock();
             _safe_file_size = size_after;
@@ -444,11 +498,16 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF
     }
 }
 
-void block_table::realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint, uint64_t heat) {
+void block_table::realloc_on_disk(BLOCKNUM b,
+                                  DISKOFF size,
+                                  DISKOFF *offset,
+                                  FT ft,
+                                  int fd,
+                                  bool for_checkpoint) {
     _mutex_lock();
     struct translation *t = &_current;
     _verify_valid_freeable_blocknum(t, b);
-    _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint, heat);
+    _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint);
 
     _ensure_safe_write_unlocked(fd, size, *offset);
     _mutex_unlock();
@@ -458,70 +517,97 @@ bool block_table::_pair_is_unallocated(struct block_translation_pair *pair) {
     return pair->size == 0 && pair->u.diskoff == diskoff_unused;
 }
 
-// Effect: figure out where to put the inprogress btt on disk, allocate space for it there.
-//   The space must be 512-byte aligned (both the starting address and the size).
-//   As a result, the allcoated space may be a little bit bigger (up to the next 512-byte boundary) than the actual btt.
+// Effect: figure out where to put the inprogress btt on disk, allocate space
+// for it there.
+//   The space must be 512-byte aligned (both the starting address and the
+//   size).
+//   As a result, the allcoated space may be a little bit bigger (up to the next
+//   512-byte boundary) than the actual btt.
 void block_table::_alloc_inprogress_translation_on_disk_unlocked() {
     toku_mutex_assert_locked(&_mutex);
 
     struct translation *t = &_inprogress;
     paranoid_invariant_notnull(t->block_translation);
     BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
-    //Each inprogress is allocated only once
+    // Each inprogress is allocated only once
     paranoid_invariant(_pair_is_unallocated(&t->block_translation[b.b]));
 
-    //Allocate a new block
+    // Allocate a new block
     int64_t size = _calculate_size_on_disk(t);
     uint64_t offset;
-    _bt_block_allocator.alloc_block(size, 0, &offset);
+    _bt_block_allocator->AllocBlock(size, &offset);
     t->block_translation[b.b].u.diskoff = offset;
-    t->block_translation[b.b].size      = size;
+    t->block_translation[b.b].size = size;
 }
 
 // Effect: Serializes the blocktable to a wbuf (which starts uninitialized)
-//   A clean shutdown runs checkpoint start so that current and inprogress are copies.
-//   The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the total length is a multiple of 512 (so we pad with zeros at the end if needd)
-//   The address is guaranteed to be 512-byte aligned, but the size is not guaranteed.
-//   It *is* guaranteed that we can read up to the next 512-byte boundary, however
-void block_table::serialize_translation_to_wbuf(int fd, struct wbuf *w,
-                                                int64_t *address, int64_t *size) {
+//   A clean shutdown runs checkpoint start so that current and inprogress are
+//   copies.
+//   The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the
+//   total length is a multiple of 512 (so we pad with zeros at the end if
+//   needd)
+//   The address is guaranteed to be 512-byte aligned, but the size is not
+//   guaranteed.
+//   It *is* guaranteed that we can read up to the next 512-byte boundary,
+//   however
+void block_table::serialize_translation_to_wbuf(int fd,
+                                                struct wbuf *w,
+                                                int64_t *address,
+                                                int64_t *size) {
     _mutex_lock();
     struct translation *t = &_inprogress;
 
     BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
-    _alloc_inprogress_translation_on_disk_unlocked(); // The allocated block must be 512-byte aligned to make O_DIRECT happy.
+    _alloc_inprogress_translation_on_disk_unlocked();  // The allocated block
+                                                       // must be 512-byte
+                                                       // aligned to make
+                                                       // O_DIRECT happy.
     uint64_t size_translation = _calculate_size_on_disk(t);
-    uint64_t size_aligned     = roundup_to_multiple(512, size_translation);
-    assert((int64_t)size_translation==t->block_translation[b.b].size);
+    uint64_t size_aligned = roundup_to_multiple(512, size_translation);
+    invariant((int64_t)size_translation == t->block_translation[b.b].size);
     {
-        //Init wbuf
+        // Init wbuf
         if (0)
-            printf("%s:%d writing translation table of size_translation %" PRIu64 " at %" PRId64 "\n", __FILE__, __LINE__, size_translation, t->block_translation[b.b].u.diskoff);
+            printf(
+                "%s:%d writing translation table of size_translation %" PRIu64
+                " at %" PRId64 "\n",
+                __FILE__,
+                __LINE__,
+                size_translation,
+                t->block_translation[b.b].u.diskoff);
         char *XMALLOC_N_ALIGNED(512, size_aligned, buf);
-        for (uint64_t i=size_translation; i<size_aligned; i++) buf[i]=0; // fill in the end of the buffer with zeros.
+        for (uint64_t i = size_translation; i < size_aligned; i++)
+            buf[i] = 0;  // fill in the end of the buffer with zeros.
         wbuf_init(w, buf, size_aligned);
     }
-    wbuf_BLOCKNUM(w, t->smallest_never_used_blocknum); 
-    wbuf_BLOCKNUM(w, t->blocknum_freelist_head); 
+    wbuf_BLOCKNUM(w, t->smallest_never_used_blocknum);
+    wbuf_BLOCKNUM(w, t->blocknum_freelist_head);
     int64_t i;
-    for (i=0; i<t->smallest_never_used_blocknum.b; i++) {
+    for (i = 0; i < t->smallest_never_used_blocknum.b; i++) {
         if (0)
-            printf("%s:%d %" PRId64 ",%" PRId64 "\n", __FILE__, __LINE__, t->block_translation[i].u.diskoff, t->block_translation[i].size);
+            printf("%s:%d %" PRId64 ",%" PRId64 "\n",
+                   __FILE__,
+                   __LINE__,
+                   t->block_translation[i].u.diskoff,
+                   t->block_translation[i].size);
         wbuf_DISKOFF(w, t->block_translation[i].u.diskoff);
         wbuf_DISKOFF(w, t->block_translation[i].size);
     }
     uint32_t checksum = toku_x1764_finish(&w->checksum);
     wbuf_int(w, checksum);
     *address = t->block_translation[b.b].u.diskoff;
-    *size    = size_translation;
-    assert((*address)%512 == 0);
+    *size = size_translation;
+    invariant((*address) % 512 == 0);
 
     _ensure_safe_write_unlocked(fd, size_aligned, *address);
     _mutex_unlock();
 }
 
-// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?)
-void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) {
+// Perhaps rename: purpose is get disk address of a block, given its blocknum
+// (blockid?)
+void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b,
+                                                              DISKOFF *offset,
+                                                              DISKOFF *size) {
     struct translation *t = &_current;
     _verify_valid_blocknum(t, b);
     if (offset) {
@@ -532,8 +618,11 @@ void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOF
     }
 }
 
-// Perhaps rename: purpose is get disk address of a block, given its blocknum (blockid?)
-void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size) {
+// Perhaps rename: purpose is get disk address of a block, given its blocknum
+// (blockid?)
+void block_table::translate_blocknum_to_offset_size(BLOCKNUM b,
+                                                    DISKOFF *offset,
+                                                    DISKOFF *size) {
     _mutex_lock();
     _translate_blocknum_to_offset_size_unlocked(b, offset, size);
     _mutex_unlock();
@@ -544,13 +633,13 @@ void block_table::translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset,
 // given that one more never-used blocknum will soon be used.
 void block_table::_maybe_expand_translation(struct translation *t) {
     if (t->length_of_array <= t->smallest_never_used_blocknum.b) {
-        //expansion is necessary
+        // expansion is necessary
         uint64_t new_length = t->smallest_never_used_blocknum.b * 2;
         XREALLOC_N(new_length, t->block_translation);
         uint64_t i;
         for (i = t->length_of_array; i < new_length; i++) {
             t->block_translation[i].u.next_free_blocknum = freelist_null;
-            t->block_translation[i].size                 = size_is_free;
+            t->block_translation[i].size = size_is_free;
         }
         t->length_of_array = new_length;
     }
@@ -563,7 +652,8 @@ void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) {
     if (t->blocknum_freelist_head.b == freelist_null.b) {
         // no previously used blocknums are available
         // use a never used blocknum
-        _maybe_expand_translation(t); //Ensure a never used blocknums is available
+        _maybe_expand_translation(
+            t);  // Ensure a never used blocknums is available
         result = t->smallest_never_used_blocknum;
         t->smallest_never_used_blocknum.b++;
     } else {  // reuse a previously used blocknum
@@ -571,11 +661,11 @@ void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) {
         BLOCKNUM next = t->block_translation[result.b].u.next_free_blocknum;
         t->blocknum_freelist_head = next;
     }
-    //Verify the blocknum is free
+    // Verify the blocknum is free
     paranoid_invariant(t->block_translation[result.b].size == size_is_free);
-    //blocknum is not free anymore
+    // blocknum is not free anymore
     t->block_translation[result.b].u.diskoff = diskoff_unused;
-    t->block_translation[result.b].size    = 0;
+    t->block_translation[result.b].size = 0;
     _verify_valid_freeable_blocknum(t, result);
     *res = result;
     ft_set_dirty(ft, false);
@@ -587,42 +677,46 @@ void block_table::allocate_blocknum(BLOCKNUM *res, FT ft) {
     _mutex_unlock();
 }
 
-void block_table::_free_blocknum_in_translation(struct translation *t, BLOCKNUM b) {
+void block_table::_free_blocknum_in_translation(struct translation *t,
+                                                BLOCKNUM b) {
     _verify_valid_freeable_blocknum(t, b);
     paranoid_invariant(t->block_translation[b.b].size != size_is_free);
 
-    t->block_translation[b.b].size                 = size_is_free;
+    t->block_translation[b.b].size = size_is_free;
     t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head;
-    t->blocknum_freelist_head                      = b;
+    t->blocknum_freelist_head = b;
 }
 
 // Effect: Free a blocknum.
 // If the blocknum holds the only reference to a block on disk, free that block
-void block_table::_free_blocknum_unlocked(BLOCKNUM *bp, FT ft, bool for_checkpoint) {
+void block_table::_free_blocknum_unlocked(BLOCKNUM *bp,
+                                          FT ft,
+                                          bool for_checkpoint) {
     toku_mutex_assert_locked(&_mutex);
     BLOCKNUM b = *bp;
-    bp->b = 0; //Remove caller's reference.
+    bp->b = 0;  // Remove caller's reference.
 
     struct block_translation_pair old_pair = _current.block_translation[b.b];
 
     _free_blocknum_in_translation(&_current, b);
     if (for_checkpoint) {
-        paranoid_invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS);
+        paranoid_invariant(ft->checkpoint_header->type ==
+                           FT_CHECKPOINT_INPROGRESS);
         _free_blocknum_in_translation(&_inprogress, b);
     }
 
-    //If the size is 0, no disk block has ever been assigned to this blocknum.
+    // If the size is 0, no disk block has ever been assigned to this blocknum.
     if (old_pair.size > 0) {
-        //Free the old block if it is not still in use by the checkpoint in progress or the previous checkpoint
-        bool cannot_free = (bool)
-            (_translation_prevents_freeing(&_inprogress,   b, &old_pair) ||
-             _translation_prevents_freeing(&_checkpointed, b, &old_pair));
+        // Free the old block if it is not still in use by the checkpoint in
+        // progress or the previous checkpoint
+        bool cannot_free =
+            _translation_prevents_freeing(&_inprogress, b, &old_pair) ||
+            _translation_prevents_freeing(&_checkpointed, b, &old_pair);
         if (!cannot_free) {
-            _bt_block_allocator.free_block(old_pair.u.diskoff);
+            _bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size);
         }
-    }
-    else {
-        paranoid_invariant(old_pair.size==0);
+    } else {
+        paranoid_invariant(old_pair.size == 0);
         paranoid_invariant(old_pair.u.diskoff == diskoff_unused);
     }
     ft_set_dirty(ft, for_checkpoint);
@@ -644,13 +738,14 @@ void block_table::verify_no_free_blocknums() {
 void block_table::free_unused_blocknums(BLOCKNUM root) {
     _mutex_lock();
     int64_t smallest = _current.smallest_never_used_blocknum.b;
-    for (int64_t i=RESERVED_BLOCKNUMS; i < smallest; i++) {
+    for (int64_t i = RESERVED_BLOCKNUMS; i < smallest; i++) {
         if (i == root.b) {
             continue;
         }
         BLOCKNUM b = make_blocknum(i);
         if (_current.block_translation[b.b].size == 0) {
-            invariant(_current.block_translation[b.b].u.diskoff == diskoff_unused);
+            invariant(_current.block_translation[b.b].u.diskoff ==
+                      diskoff_unused);
             _free_blocknum_in_translation(&_current, b);
         }
     }
@@ -675,13 +770,14 @@ bool block_table::_no_data_blocks_except_root(BLOCKNUM root) {
             goto cleanup;
         }
     }
- cleanup:
+cleanup:
     _mutex_unlock();
     return ok;
 }
 
 // Verify there are no data blocks except root.
-// TODO(leif): This actually takes a lock, but I don't want to fix all the callers right now.
+// TODO(leif): This actually takes a lock, but I don't want to fix all the
+// callers right now.
 void block_table::verify_no_data_blocks_except_root(BLOCKNUM UU(root)) {
     paranoid_invariant(_no_data_blocks_except_root(root));
 }
@@ -705,13 +801,24 @@ void block_table::_dump_translation_internal(FILE *f, struct translation *t) {
     if (t->block_translation) {
         BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
         fprintf(f, " length_of_array[%" PRId64 "]", t->length_of_array);
-        fprintf(f, " smallest_never_used_blocknum[%" PRId64 "]", t->smallest_never_used_blocknum.b);
-        fprintf(f, " blocknum_free_list_head[%" PRId64 "]", t->blocknum_freelist_head.b);
-        fprintf(f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size);
-        fprintf(f, " location_on_disk[%" PRId64 "]\n", t->block_translation[b.b].u.diskoff);
+        fprintf(f,
+                " smallest_never_used_blocknum[%" PRId64 "]",
+                t->smallest_never_used_blocknum.b);
+        fprintf(f,
+                " blocknum_free_list_head[%" PRId64 "]",
+                t->blocknum_freelist_head.b);
+        fprintf(
+            f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size);
+        fprintf(f,
+                " location_on_disk[%" PRId64 "]\n",
+                t->block_translation[b.b].u.diskoff);
         int64_t i;
-        for (i=0; i<t->length_of_array; i++) {
-            fprintf(f, " %" PRId64 ": %" PRId64 " %" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size);
+        for (i = 0; i < t->length_of_array; i++) {
+            fprintf(f,
+                    " %" PRId64 ": %" PRId64 " %" PRId64 "\n",
+                    i,
+                    t->block_translation[i].u.diskoff,
+                    t->block_translation[i].size);
         }
         fprintf(f, "\n");
     } else {
@@ -724,9 +831,13 @@ void block_table::_dump_translation_internal(FILE *f, struct translation *t) {
 void block_table::dump_translation_table_pretty(FILE *f) {
     _mutex_lock();
     struct translation *t = &_checkpointed;
-    assert(t->block_translation != nullptr);
+    invariant(t->block_translation != nullptr);
     for (int64_t i = 0; i < t->length_of_array; ++i) {
-        fprintf(f, "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n", i, t->block_translation[i].u.diskoff, t->block_translation[i].size);
+        fprintf(f,
+                "%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n",
+                i,
+                t->block_translation[i].u.diskoff,
+                t->block_translation[i].size);
     }
     _mutex_unlock();
 }
@@ -750,7 +861,10 @@ void block_table::blocknum_dump_translation(BLOCKNUM b) {
     struct translation *t = &_current;
     if (b.b < t->length_of_array) {
         struct block_translation_pair *bx = &t->block_translation[b.b];
-        printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n", b.b, bx->u.diskoff, bx->size);
+        printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n",
+               b.b,
+               bx->u.diskoff,
+               bx->size);
     }
     _mutex_unlock();
 }
@@ -763,26 +877,31 @@ void block_table::destroy(void) {
     toku_free(_inprogress.block_translation);
     toku_free(_checkpointed.block_translation);
 
-    _bt_block_allocator.destroy();
+    _bt_block_allocator->Destroy();
+    delete _bt_block_allocator;
     toku_mutex_destroy(&_mutex);
     nb_mutex_destroy(&_safe_file_size_lock);
 }
 
-int block_table::_translation_deserialize_from_buffer(struct translation *t,
-                                                      DISKOFF location_on_disk,
-                                                      uint64_t size_on_disk,
-                                                      // out: buffer with serialized translation
-                                                      unsigned char *translation_buffer) {
+int block_table::_translation_deserialize_from_buffer(
+    struct translation *t,
+    DISKOFF location_on_disk,
+    uint64_t size_on_disk,
+    // out: buffer with serialized translation
+    unsigned char *translation_buffer) {
     int r = 0;
-    assert(location_on_disk != 0);
+    invariant(location_on_disk != 0);
     t->type = TRANSLATION_CHECKPOINTED;
 
     // check the checksum
     uint32_t x1764 = toku_x1764_memory(translation_buffer, size_on_disk - 4);
     uint64_t offset = size_on_disk - 4;
-    uint32_t stored_x1764 = toku_dtoh32(*(int*)(translation_buffer + offset));
+    uint32_t stored_x1764 = toku_dtoh32(*(int *)(translation_buffer + offset));
     if (x1764 != stored_x1764) {
-        fprintf(stderr, "Translation table checksum failure: calc=0x%08x read=0x%08x\n", x1764, stored_x1764);
+        fprintf(stderr,
+                "Translation table checksum failure: calc=0x%08x read=0x%08x\n",
+                x1764,
+                stored_x1764);
         r = TOKUDB_BAD_CHECKSUM;
         goto exit;
     }
@@ -790,42 +909,47 @@ int block_table::_translation_deserialize_from_buffer(struct translation *t,
     struct rbuf rb;
     rb.buf = translation_buffer;
     rb.ndone = 0;
-    rb.size = size_on_disk-4;//4==checksum
+    rb.size = size_on_disk - 4;  // 4==checksum
 
-    t->smallest_never_used_blocknum = rbuf_blocknum(&rb); 
+    t->smallest_never_used_blocknum = rbuf_blocknum(&rb);
     t->length_of_array = t->smallest_never_used_blocknum.b;
     invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS);
-    t->blocknum_freelist_head = rbuf_blocknum(&rb); 
+    t->blocknum_freelist_head = rbuf_blocknum(&rb);
     XMALLOC_N(t->length_of_array, t->block_translation);
     for (int64_t i = 0; i < t->length_of_array; i++) {
         t->block_translation[i].u.diskoff = rbuf_DISKOFF(&rb);
         t->block_translation[i].size = rbuf_DISKOFF(&rb);
     }
-    invariant(_calculate_size_on_disk(t) == (int64_t) size_on_disk);
-    invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size == (int64_t) size_on_disk);
-    invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff == location_on_disk);
+    invariant(_calculate_size_on_disk(t) == (int64_t)size_on_disk);
+    invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size ==
+              (int64_t)size_on_disk);
+    invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff ==
+              location_on_disk);
 
 exit:
     return r;
 }
 
 int block_table::iterate(enum translation_type type,
-                         BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only) {
+                         BLOCKTABLE_CALLBACK f,
+                         void *extra,
+                         bool data_only,
+                         bool used_only) {
     struct translation *src;
-    
+
     int r = 0;
     switch (type) {
-    case TRANSLATION_CURRENT:
-        src = &_current;
-        break;
-    case TRANSLATION_INPROGRESS:
-        src = &_inprogress;
-        break;
-    case TRANSLATION_CHECKPOINTED:
-        src = &_checkpointed;
-        break;
-    default:
-        r = EINVAL;
+        case TRANSLATION_CURRENT:
+            src = &_current;
+            break;
+        case TRANSLATION_INPROGRESS:
+            src = &_inprogress;
+            break;
+        case TRANSLATION_CHECKPOINTED:
+            src = &_checkpointed;
+            break;
+        default:
+            r = EINVAL;
     }
 
     struct translation fakecurrent;
@@ -839,12 +963,15 @@ int block_table::iterate(enum translation_type type,
             src->block_translation[RESERVED_BLOCKNUM_TRANSLATION];
         _mutex_unlock();
         int64_t i;
-        for (i=0; i<t->smallest_never_used_blocknum.b; i++) {
+        for (i = 0; i < t->smallest_never_used_blocknum.b; i++) {
             struct block_translation_pair pair = t->block_translation[i];
-            if (data_only && i< RESERVED_BLOCKNUMS) continue;
-            if (used_only && pair.size <= 0) continue;
+            if (data_only && i < RESERVED_BLOCKNUMS)
+                continue;
+            if (used_only && pair.size <= 0)
+                continue;
             r = f(make_blocknum(i), pair.size, pair.u.diskoff, extra);
-            if (r!=0) break;
+            if (r != 0)
+                break;
         }
         toku_free(t->block_translation);
     }
@@ -856,8 +983,11 @@ typedef struct {
     int64_t total_space;
 } frag_extra;
 
-static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extra) {
-    frag_extra *info = (frag_extra *) extra;
+static int frag_helper(BLOCKNUM UU(b),
+                       int64_t size,
+                       int64_t address,
+                       void *extra) {
+    frag_extra *info = (frag_extra *)extra;
 
     if (size + address > info->total_space)
         info->total_space = size + address;
@@ -865,22 +995,30 @@ static int frag_helper(BLOCKNUM UU(b), int64_t size, int64_t address, void *extr
     return 0;
 }
 
-void block_table::internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep) {
-    frag_extra info = { 0, 0 };
+void block_table::internal_fragmentation(int64_t *total_sizep,
+                                         int64_t *used_sizep) {
+    frag_extra info = {0, 0};
     int r = iterate(TRANSLATION_CHECKPOINTED, frag_helper, &info, false, true);
-    assert_zero(r);
+    invariant_zero(r);
 
-    if (total_sizep) *total_sizep = info.total_space;
-    if (used_sizep)  *used_sizep  = info.used_space;
+    if (total_sizep)
+        *total_sizep = info.total_space;
+    if (used_sizep)
+        *used_sizep = info.used_space;
 }
 
-void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, FT ft) {
+void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size,
+                                                       DISKOFF *offset,
+                                                       FT ft) {
     toku_mutex_assert_locked(&_mutex);
     BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR);
-    _realloc_on_disk_internal(b, size, offset, ft, false, 0);
+    _realloc_on_disk_internal(b, size, offset, ft, false);
 }
 
-void block_table::realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, FT ft, int fd) {
+void block_table::realloc_descriptor_on_disk(DISKOFF size,
+                                             DISKOFF *offset,
+                                             FT ft,
+                                             int fd) {
     _mutex_lock();
     _realloc_descriptor_on_disk_unlocked(size, offset, ft);
     _ensure_safe_write_unlocked(fd, size, *offset);
@@ -897,11 +1035,12 @@ void block_table::get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size) {
 void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) {
     // Requires:  blocktable lock is held.
     // Requires:  report->file_size_bytes is already filled in.
-    
+
     // Count the headers.
-    report->data_bytes = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+    report->data_bytes = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
     report->data_blocks = 1;
-    report->checkpoint_bytes_additional = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+    report->checkpoint_bytes_additional =
+        BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
     report->checkpoint_blocks_additional = 1;
 
     struct translation *current = &_current;
@@ -915,30 +1054,34 @@ void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) {
 
     struct translation *checkpointed = &_checkpointed;
     for (int64_t i = 0; i < checkpointed->length_of_array; i++) {
-        struct block_translation_pair *pair = &checkpointed->block_translation[i];
-        if (pair->size > 0 && !(i < current->length_of_array &&
-                                current->block_translation[i].size > 0 &&
-                                current->block_translation[i].u.diskoff == pair->u.diskoff)) {
-                report->checkpoint_bytes_additional += pair->size;
-                report->checkpoint_blocks_additional++;
+        struct block_translation_pair *pair =
+            &checkpointed->block_translation[i];
+        if (pair->size > 0 &&
+            !(i < current->length_of_array &&
+              current->block_translation[i].size > 0 &&
+              current->block_translation[i].u.diskoff == pair->u.diskoff)) {
+            report->checkpoint_bytes_additional += pair->size;
+            report->checkpoint_blocks_additional++;
         }
     }
 
     struct translation *inprogress = &_inprogress;
     for (int64_t i = 0; i < inprogress->length_of_array; i++) {
         struct block_translation_pair *pair = &inprogress->block_translation[i];
-        if (pair->size > 0 && !(i < current->length_of_array &&
-                                current->block_translation[i].size > 0 &&
-                                current->block_translation[i].u.diskoff == pair->u.diskoff) &&
-                              !(i < checkpointed->length_of_array &&
-                                checkpointed->block_translation[i].size > 0 &&
-                                checkpointed->block_translation[i].u.diskoff == pair->u.diskoff)) {
+        if (pair->size > 0 &&
+            !(i < current->length_of_array &&
+              current->block_translation[i].size > 0 &&
+              current->block_translation[i].u.diskoff == pair->u.diskoff) &&
+            !(i < checkpointed->length_of_array &&
+              checkpointed->block_translation[i].size > 0 &&
+              checkpointed->block_translation[i].u.diskoff ==
+                  pair->u.diskoff)) {
             report->checkpoint_bytes_additional += pair->size;
             report->checkpoint_blocks_additional++;
         }
     }
 
-    _bt_block_allocator.get_unused_statistics(report);
+    _bt_block_allocator->UnusedStatistics(report);
 }
 
 void block_table::get_info64(struct ftinfo64 *s) {
@@ -967,25 +1110,38 @@ void block_table::get_info64(struct ftinfo64 *s) {
     _mutex_unlock();
 }
 
-int block_table::iterate_translation_tables(uint64_t checkpoint_count,
-                                            int (*iter)(uint64_t checkpoint_count,
-                                                        int64_t total_num_rows,
-                                                        int64_t blocknum,
-                                                        int64_t diskoff,
-                                                        int64_t size,
-                                                        void *extra),
-                                            void *iter_extra) {
+int block_table::iterate_translation_tables(
+    uint64_t checkpoint_count,
+    int (*iter)(uint64_t checkpoint_count,
+                int64_t total_num_rows,
+                int64_t blocknum,
+                int64_t diskoff,
+                int64_t size,
+                void *extra),
+    void *iter_extra) {
     int error = 0;
     _mutex_lock();
 
-    int64_t total_num_rows = _current.length_of_array + _checkpointed.length_of_array;
+    int64_t total_num_rows =
+        _current.length_of_array + _checkpointed.length_of_array;
     for (int64_t i = 0; error == 0 && i < _current.length_of_array; ++i) {
         struct block_translation_pair *block = &_current.block_translation[i];
-        error = iter(checkpoint_count, total_num_rows, i, block->u.diskoff, block->size, iter_extra);
+        error = iter(checkpoint_count,
+                     total_num_rows,
+                     i,
+                     block->u.diskoff,
+                     block->size,
+                     iter_extra);
     }
     for (int64_t i = 0; error == 0 && i < _checkpointed.length_of_array; ++i) {
-        struct block_translation_pair *block = &_checkpointed.block_translation[i];
-        error = iter(checkpoint_count - 1, total_num_rows, i, block->u.diskoff, block->size, iter_extra);
+        struct block_translation_pair *block =
+            &_checkpointed.block_translation[i];
+        error = iter(checkpoint_count - 1,
+                     total_num_rows,
+                     i,
+                     block->u.diskoff,
+                     block->size,
+                     iter_extra);
     }
 
     _mutex_unlock();
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.h b/storage/tokudb/PerconaFT/ft/serialize/block_table.h
index 8d39167454094..dd732d4f3726c 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_table.h
+++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.h
@@ -62,13 +62,16 @@ enum {
     RESERVED_BLOCKNUMS
 };
 
-typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra);
+typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b,
+                                   int64_t size,
+                                   int64_t address,
+                                   void *extra);
 
 static inline BLOCKNUM make_blocknum(int64_t b) {
-    BLOCKNUM result = { .b = b };
+    BLOCKNUM result = {.b = b};
     return result;
 }
-static const BLOCKNUM ROLLBACK_NONE = { .b = 0 };
+static const BLOCKNUM ROLLBACK_NONE = {.b = 0};
 
 /**
  *  There are three copies of the translation table (btt) in the block table:
@@ -80,18 +83,20 @@ static const BLOCKNUM ROLLBACK_NONE = { .b = 0 };
  *
  *    inprogress     Is only filled by copying from current,
  *                   and is the only version ever serialized to disk.
- *                   (It is serialized to disk on checkpoint and clean shutdown.)
+ *                   (It is serialized to disk on checkpoint and clean
+ *shutdown.)
  *                   At end of checkpoint it replaces 'checkpointed'.
  *                   During a checkpoint, any 'pending' dirty writes will update
  *                   inprogress.
  *
  *    current        Is initialized by copying from checkpointed,
- *                   is the only version ever modified while the database is in use, 
+ *                   is the only version ever modified while the database is in
+ *use,
  *                   and is the only version ever copied to inprogress.
  *                   It is never stored on disk.
  */
 class block_table {
-public:
+   public:
     enum translation_type {
         TRANSLATION_NONE = 0,
         TRANSLATION_CURRENT,
@@ -102,7 +107,10 @@ class block_table {
 
     void create();
 
-    int create_from_buffer(int fd, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer);
+    int create_from_buffer(int fd,
+                           DISKOFF location_on_disk,
+                           DISKOFF size_on_disk,
+                           unsigned char *translation_buffer);
 
     void destroy();
 
@@ -114,11 +122,21 @@ class block_table {
 
     // Blocknums
     void allocate_blocknum(BLOCKNUM *res, struct ft *ft);
-    void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint, uint64_t heat);
+    void realloc_on_disk(BLOCKNUM b,
+                         DISKOFF size,
+                         DISKOFF *offset,
+                         struct ft *ft,
+                         int fd,
+                         bool for_checkpoint);
     void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint);
-    void translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
+    void translate_blocknum_to_offset_size(BLOCKNUM b,
+                                           DISKOFF *offset,
+                                           DISKOFF *size);
     void free_unused_blocknums(BLOCKNUM root);
-    void realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, struct ft *ft, int fd);
+    void realloc_descriptor_on_disk(DISKOFF size,
+                                    DISKOFF *offset,
+                                    struct ft *ft,
+                                    int fd);
     void get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size);
 
     // External verfication
@@ -127,15 +145,22 @@ class block_table {
     void verify_no_free_blocknums();
 
     // Serialization
-    void serialize_translation_to_wbuf(int fd, struct wbuf *w, int64_t *address, int64_t *size);
+    void serialize_translation_to_wbuf(int fd,
+                                       struct wbuf *w,
+                                       int64_t *address,
+                                       int64_t *size);
 
     // DEBUG ONLY (ftdump included), tests included
     void blocknum_dump_translation(BLOCKNUM b);
     void dump_translation_table_pretty(FILE *f);
     void dump_translation_table(FILE *f);
-    void block_free(uint64_t offset);
+    void block_free(uint64_t offset, uint64_t size);
 
-    int iterate(enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only); 
+    int iterate(enum translation_type type,
+                BLOCKTABLE_CALLBACK f,
+                void *extra,
+                bool data_only,
+                bool used_only);
     void internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep);
 
     // Requires: blocktable lock is held.
@@ -146,13 +171,16 @@ class block_table {
 
     void get_info64(struct ftinfo64 *);
 
-    int iterate_translation_tables(uint64_t, int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), void *);
+    int iterate_translation_tables(
+        uint64_t,
+        int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *),
+        void *);
 
-private:
+   private:
     struct block_translation_pair {
         // If in the freelist, use next_free_blocknum, otherwise diskoff.
         union {
-            DISKOFF  diskoff; 
+            DISKOFF diskoff;
             BLOCKNUM next_free_blocknum;
         } u;
 
@@ -173,7 +201,8 @@ class block_table {
     struct translation {
         enum translation_type type;
 
-        // Number of elements in array (block_translation).  always >= smallest_never_used_blocknum
+        // Number of elements in array (block_translation).  always >=
+        // smallest_never_used_blocknum
         int64_t length_of_array;
         BLOCKNUM smallest_never_used_blocknum;
 
@@ -181,20 +210,28 @@ class block_table {
         BLOCKNUM blocknum_freelist_head;
         struct block_translation_pair *block_translation;
 
-        // size_on_disk is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].size
-        // location_on is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff
+        // size_on_disk is stored in
+        // block_translation[RESERVED_BLOCKNUM_TRANSLATION].size
+        // location_on is stored in
+        // block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff
     };
 
     void _create_internal();
-    int _translation_deserialize_from_buffer(struct translation *t,    // destination into which to deserialize
-                                             DISKOFF location_on_disk, // location of translation_buffer
-                                             uint64_t size_on_disk,
-                                             unsigned char * translation_buffer);   // buffer with serialized translation
-
-    void _copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype);
+    int _translation_deserialize_from_buffer(
+        struct translation *t,     // destination into which to deserialize
+        DISKOFF location_on_disk,  // location of translation_buffer
+        uint64_t size_on_disk,
+        unsigned char *
+            translation_buffer);  // buffer with serialized translation
+
+    void _copy_translation(struct translation *dst,
+                           struct translation *src,
+                           enum translation_type newtype);
     void _maybe_optimize_translation(struct translation *t);
     void _maybe_expand_translation(struct translation *t);
-    bool _translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair);
+    bool _translation_prevents_freeing(struct translation *t,
+                                       BLOCKNUM b,
+                                       struct block_translation_pair *old_pair);
     void _free_blocknum_in_translation(struct translation *t, BLOCKNUM b);
     int64_t _calculate_size_on_disk(struct translation *t);
     bool _pair_is_unallocated(struct block_translation_pair *pair);
@@ -203,14 +240,26 @@ class block_table {
 
     // Blocknum management
     void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft);
-    void _free_blocknum_unlocked(BLOCKNUM *bp, struct ft *ft, bool for_checkpoint);
-    void _realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, struct ft *ft);
-    void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint, uint64_t heat);
-    void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
+    void _free_blocknum_unlocked(BLOCKNUM *bp,
+                                 struct ft *ft,
+                                 bool for_checkpoint);
+    void _realloc_descriptor_on_disk_unlocked(DISKOFF size,
+                                              DISKOFF *offset,
+                                              struct ft *ft);
+    void _realloc_on_disk_internal(BLOCKNUM b,
+                                   DISKOFF size,
+                                   DISKOFF *offset,
+                                   struct ft *ft,
+                                   bool for_checkpoint);
+    void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b,
+                                                     DISKOFF *offset,
+                                                     DISKOFF *size);
 
     // File management
     void _maybe_truncate_file(int fd, uint64_t size_needed_before);
-    void _ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset);
+    void _ensure_safe_write_unlocked(int fd,
+                                     DISKOFF block_size,
+                                     DISKOFF block_offset);
 
     // Verification
     bool _is_valid_blocknum(struct translation *t, BLOCKNUM b);
@@ -220,29 +269,33 @@ class block_table {
     bool _no_data_blocks_except_root(BLOCKNUM root);
     bool _blocknum_allocated(BLOCKNUM b);
 
-    // Locking 
+    // Locking
     //
     // TODO: Move the lock to the FT
     void _mutex_lock();
     void _mutex_unlock();
 
-    // The current translation is the one used by client threads. 
+    // The current translation is the one used by client threads.
     // It is not represented on disk.
     struct translation _current;
 
-    // The translation used by the checkpoint currently in progress. 
-    // If the checkpoint thread allocates a block, it must also update the current translation.
+    // The translation used by the checkpoint currently in progress.
+    // If the checkpoint thread allocates a block, it must also update the
+    // current translation.
     struct translation _inprogress;
 
-    // The translation for the data that shall remain inviolate on disk until the next checkpoint finishes,
+    // The translation for the data that shall remain inviolate on disk until
+    // the next checkpoint finishes,
     // after which any blocks used only in this translation can be freed.
     struct translation _checkpointed;
 
-    // The in-memory data structure for block allocation. 
+    // The in-memory data structure for block allocation.
     // There is no on-disk data structure for block allocation.
-    // Note: This is *allocation* not *translation* - the block allocator is unaware of which
-    //       blocks are used for which translation, but simply allocates and deallocates blocks.
-    block_allocator _bt_block_allocator;
+    // Note: This is *allocation* not *translation* - the block allocator is
+    // unaware of which
+    //       blocks are used for which translation, but simply allocates and
+    //       deallocates blocks.
+    BlockAllocator *_bt_block_allocator;
     toku_mutex_t _mutex;
     struct nb_mutex _safe_file_size_lock;
     bool _checkpoint_skipped;
@@ -257,16 +310,16 @@ class block_table {
 
 #include "ft/serialize/wbuf.h"
 
-static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
+static inline void wbuf_BLOCKNUM(struct wbuf *w, BLOCKNUM b) {
     wbuf_ulonglong(w, b.b);
 }
 
-static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
+static inline void wbuf_nocrc_BLOCKNUM(struct wbuf *w, BLOCKNUM b) {
     wbuf_nocrc_ulonglong(w, b.b);
 }
 
 static inline void wbuf_DISKOFF(struct wbuf *wb, DISKOFF off) {
-    wbuf_ulonglong(wb, (uint64_t) off);
+    wbuf_ulonglong(wb, (uint64_t)off);
 }
 
 #include "ft/serialize/rbuf.h"
@@ -280,6 +333,8 @@ static inline BLOCKNUM rbuf_blocknum(struct rbuf *rb) {
     return result;
 }
 
-static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb, memarena *UU(ma), BLOCKNUM *blocknum) {
+static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb,
+                                    memarena *UU(ma),
+                                    BLOCKNUM *blocknum) {
     *blocknum = rbuf_blocknum(rb);
 }
diff --git a/storage/tokudb/PerconaFT/ft/serialize/compress.cc b/storage/tokudb/PerconaFT/ft/serialize/compress.cc
index 1719b6b7cb51d..c2f815c6cf22c 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/compress.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/compress.cc
@@ -235,7 +235,7 @@ void toku_decompress (Bytef       *dest,   uLongf destLen,
         strm.zalloc = Z_NULL;
         strm.zfree = Z_NULL;
         strm.opaque = Z_NULL;
-        char windowBits = source[1];
+        int8_t windowBits = source[1];
         int r = inflateInit2(&strm, windowBits);
         lazy_assert(r == Z_OK);
         strm.next_out = dest;
diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
index 49d4368a3ab83..8fcb529341205 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc
@@ -217,8 +217,8 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
             // translation table itself won't fit in main memory.
             ssize_t readsz = toku_os_pread(fd, tbuf, size_to_read,
                                            translation_address_on_disk);
-            assert(readsz >= translation_size_on_disk);
-            assert(readsz <= (ssize_t)size_to_read);
+            invariant(readsz >= translation_size_on_disk);
+            invariant(readsz <= (ssize_t)size_to_read);
         }
         // Create table and read in data.
         r = ft->blocktable.create_from_buffer(fd,
@@ -411,73 +411,90 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
     return r;
 }
 
-static size_t
-serialize_ft_min_size (uint32_t version) {
+static size_t serialize_ft_min_size(uint32_t version) {
     size_t size = 0;
 
-    switch(version) {
-    case FT_LAYOUT_VERSION_29:
-        size += sizeof(uint64_t); // logrows in ft
-    case FT_LAYOUT_VERSION_28:
-        size += sizeof(uint32_t); // fanout in ft
-    case FT_LAYOUT_VERSION_27:
-    case FT_LAYOUT_VERSION_26:
-    case FT_LAYOUT_VERSION_25:
-    case FT_LAYOUT_VERSION_24:
-    case FT_LAYOUT_VERSION_23:
-    case FT_LAYOUT_VERSION_22:
-    case FT_LAYOUT_VERSION_21:
-        size += sizeof(MSN);       // max_msn_in_ft
-    case FT_LAYOUT_VERSION_20:
-    case FT_LAYOUT_VERSION_19:
-        size += 1; // compression method
-        size += sizeof(MSN);       // highest_unused_msn_for_upgrade
-    case FT_LAYOUT_VERSION_18:
-        size += sizeof(uint64_t);  // time_of_last_optimize_begin
-        size += sizeof(uint64_t);  // time_of_last_optimize_end
-        size += sizeof(uint32_t);  // count_of_optimize_in_progress
-        size += sizeof(MSN);       // msn_at_start_of_last_completed_optimize
-        size -= 8;                 // removed num_blocks_to_upgrade_14
-        size -= 8;                 // removed num_blocks_to_upgrade_13
-    case FT_LAYOUT_VERSION_17:
-        size += 16;
-        invariant(sizeof(STAT64INFO_S) == 16);
-    case FT_LAYOUT_VERSION_16:
-    case FT_LAYOUT_VERSION_15:
-        size += 4;  // basement node size
-        size += 8;  // num_blocks_to_upgrade_14 (previously num_blocks_to_upgrade, now one int each for upgrade from 13, 14
-        size += 8;  // time of last verification
-    case FT_LAYOUT_VERSION_14:
-        size += 8;  //TXNID that created
-    case FT_LAYOUT_VERSION_13:
-        size += ( 4 // build_id
-                  +4 // build_id_original
-                  +8 // time_of_creation
-                  +8 // time_of_last_modification
-            );
+    switch (version) {
+        case FT_LAYOUT_VERSION_29:
+            size += sizeof(uint64_t);  // logrows in ft
+        case FT_LAYOUT_VERSION_28:
+            size += sizeof(uint32_t);  // fanout in ft
+        case FT_LAYOUT_VERSION_27:
+        case FT_LAYOUT_VERSION_26:
+        case FT_LAYOUT_VERSION_25:
+        case FT_LAYOUT_VERSION_24:
+        case FT_LAYOUT_VERSION_23:
+        case FT_LAYOUT_VERSION_22:
+        case FT_LAYOUT_VERSION_21:
+            size += sizeof(MSN);  // max_msn_in_ft
+        case FT_LAYOUT_VERSION_20:
+        case FT_LAYOUT_VERSION_19:
+            size += 1;            // compression method
+            size += sizeof(MSN);  // highest_unused_msn_for_upgrade
+        case FT_LAYOUT_VERSION_18:
+            size += sizeof(uint64_t);  // time_of_last_optimize_begin
+            size += sizeof(uint64_t);  // time_of_last_optimize_end
+            size += sizeof(uint32_t);  // count_of_optimize_in_progress
+            size += sizeof(MSN);  // msn_at_start_of_last_completed_optimize
+            size -= 8;            // removed num_blocks_to_upgrade_14
+            size -= 8;            // removed num_blocks_to_upgrade_13
+        case FT_LAYOUT_VERSION_17:
+            size += 16;
+            invariant(sizeof(STAT64INFO_S) == 16);
+        case FT_LAYOUT_VERSION_16:
+        case FT_LAYOUT_VERSION_15:
+            size += 4;  // basement node size
+            size += 8;  // num_blocks_to_upgrade_14 (previously
+                        // num_blocks_to_upgrade, now one int each for upgrade
+                        // from 13, 14
+            size += 8;  // time of last verification
+        case FT_LAYOUT_VERSION_14:
+            size += 8;  // TXNID that created
+        case FT_LAYOUT_VERSION_13:
+            size += (4  // build_id
+                     +
+                     4  // build_id_original
+                     +
+                     8  // time_of_creation
+                     +
+                     8  // time_of_last_modification
+                     );
         // fall through
-    case FT_LAYOUT_VERSION_12:
-        size += (+8 // "tokudata"
-                 +4 // version
-                 +4 // original_version
-                 +4 // size
-                 +8 // byte order verification
-                 +8 // checkpoint_count
-                 +8 // checkpoint_lsn
-                 +4 // tree's nodesize
-                 +8 // translation_size_on_disk
-                 +8 // translation_address_on_disk
-                 +4 // checksum
-                 +8 // Number of blocks in old version.
-                 +8 // diskoff
-                 +4 // flags
-            );
-        break;
-    default:
-        abort();
-    }
-
-    lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
+        case FT_LAYOUT_VERSION_12:
+            size += (+8  // "tokudata"
+                     +
+                     4  // version
+                     +
+                     4  // original_version
+                     +
+                     4  // size
+                     +
+                     8  // byte order verification
+                     +
+                     8  // checkpoint_count
+                     +
+                     8  // checkpoint_lsn
+                     +
+                     4  // tree's nodesize
+                     +
+                     8  // translation_size_on_disk
+                     +
+                     8  // translation_address_on_disk
+                     +
+                     4  // checksum
+                     +
+                     8  // Number of blocks in old version.
+                     +
+                     8  // diskoff
+                     +
+                     4  // flags
+                     );
+            break;
+        default:
+            abort();
+    }
+
+    lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
     return size;
 }
 
@@ -486,7 +503,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
                                      struct rbuf *rb,
                                      uint64_t *checkpoint_count,
                                      LSN *checkpoint_lsn,
-                                     uint32_t * version_p)
+                                     uint32_t *version_p)
 // Effect: Read and parse the header of a fractalal tree
 //
 //  Simply reading the raw bytes of the header into an rbuf is insensitive
@@ -496,18 +513,18 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
 //  file AND the header is useless
 {
     int r = 0;
-    const int64_t prefix_size = 8 + // magic ("tokudata")
-                                4 + // version
-                                4 + // build_id
-                                4;  // size
+    const int64_t prefix_size = 8 +  // magic ("tokudata")
+                                4 +  // version
+                                4 +  // build_id
+                                4;   // size
     const int64_t read_size = roundup_to_multiple(512, prefix_size);
     unsigned char *XMALLOC_N_ALIGNED(512, read_size, prefix);
     rb->buf = NULL;
     int64_t n = toku_os_pread(fd, prefix, read_size, offset_of_header);
     if (n != read_size) {
-        if (n==0) {
+        if (n == 0) {
             r = TOKUDB_DICTIONARY_NO_HEADER;
-        } else if (n<0) {
+        } else if (n < 0) {
             r = get_error_errno();
         } else {
             r = EINVAL;
@@ -518,95 +535,102 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
 
     rbuf_init(rb, prefix, prefix_size);
 
-    //Check magic number
+    // Check magic number
     const void *magic;
     rbuf_literal_bytes(rb, &magic, 8);
-    if (memcmp(magic,"tokudata",8)!=0) {
-        if ((*(uint64_t*)magic) == 0) {
+    if (memcmp(magic, "tokudata", 8) != 0) {
+        if ((*(uint64_t *)magic) == 0) {
             r = TOKUDB_DICTIONARY_NO_HEADER;
         } else {
-            r = EINVAL; //Not a tokudb file! Do not use.
+            r = EINVAL;  // Not a tokudb file! Do not use.
         }
         goto exit;
     }
 
-    //Version MUST be in network order regardless of disk order.
+    // Version MUST be in network order regardless of disk order.
     uint32_t version;
     version = rbuf_network_int(rb);
     *version_p = version;
     if (version < FT_LAYOUT_MIN_SUPPORTED_VERSION) {
-        r = TOKUDB_DICTIONARY_TOO_OLD; //Cannot use
+        r = TOKUDB_DICTIONARY_TOO_OLD;  // Cannot use
         goto exit;
     } else if (version > FT_LAYOUT_VERSION) {
-        r = TOKUDB_DICTIONARY_TOO_NEW; //Cannot use
+        r = TOKUDB_DICTIONARY_TOO_NEW;  // Cannot use
         goto exit;
     }
 
-    //build_id MUST be in network order regardless of disk order.
+    // build_id MUST be in network order regardless of disk order.
     uint32_t build_id __attribute__((__unused__));
     build_id = rbuf_network_int(rb);
     int64_t min_header_size;
     min_header_size = serialize_ft_min_size(version);
 
-    //Size MUST be in network order regardless of disk order.
+    // Size MUST be in network order regardless of disk order.
     uint32_t size;
     size = rbuf_network_int(rb);
-    //If too big, it is corrupt.  We would probably notice during checksum
-    //but may have to do a multi-gigabyte malloc+read to find out.
-    //If its too small reading rbuf would crash, so verify.
-    if (size > block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE || size < min_header_size) {
+    // If too big, it is corrupt.  We would probably notice during checksum
+    // but may have to do a multi-gigabyte malloc+read to find out.
+    // If its too small reading rbuf would crash, so verify.
+    if (size > BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE ||
+        size < min_header_size) {
         r = TOKUDB_DICTIONARY_NO_HEADER;
         goto exit;
     }
 
-    lazy_assert(rb->ndone==prefix_size);
+    lazy_assert(rb->ndone == prefix_size);
     rb->size = size;
     {
         toku_free(rb->buf);
         uint32_t size_to_read = roundup_to_multiple(512, size);
         XMALLOC_N_ALIGNED(512, size_to_read, rb->buf);
 
-        assert(offset_of_header%512==0);
+        invariant(offset_of_header % 512 == 0);
         n = toku_os_pread(fd, rb->buf, size_to_read, offset_of_header);
         if (n != size_to_read) {
             if (n < 0) {
                 r = get_error_errno();
             } else {
-                r = EINVAL; //Header might be useless (wrong size) or could be a disk read error.
+                r = EINVAL;  // Header might be useless (wrong size) or could be
+                             // a disk read error.
             }
             goto exit;
         }
     }
-    //It's version 14 or later.  Magic looks OK.
-    //We have an rbuf that represents the header.
-    //Size is within acceptable bounds.
+    // It's version 14 or later.  Magic looks OK.
+    // We have an rbuf that represents the header.
+    // Size is within acceptable bounds.
 
-    //Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function changed)
+    // Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function
+    // changed)
     uint32_t calculated_x1764;
-    calculated_x1764 = toku_x1764_memory(rb->buf, rb->size-4);
+    calculated_x1764 = toku_x1764_memory(rb->buf, rb->size - 4);
     uint32_t stored_x1764;
-    stored_x1764 = toku_dtoh32(*(int*)(rb->buf+rb->size-4));
+    stored_x1764 = toku_dtoh32(*(int *)(rb->buf + rb->size - 4));
     if (calculated_x1764 != stored_x1764) {
-        r = TOKUDB_BAD_CHECKSUM; //Header useless
-        fprintf(stderr, "Header checksum failure: calc=0x%08x read=0x%08x\n", calculated_x1764, stored_x1764);
+        r = TOKUDB_BAD_CHECKSUM;  // Header useless
+        fprintf(stderr,
+                "Header checksum failure: calc=0x%08x read=0x%08x\n",
+                calculated_x1764,
+                stored_x1764);
         goto exit;
     }
 
-    //Verify byte order
+    // Verify byte order
     const void *tmp_byte_order_check;
     lazy_assert((sizeof toku_byte_order_host) == 8);
-    rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order
+    rbuf_literal_bytes(
+        rb, &tmp_byte_order_check, 8);  // Must not translate byte order
     int64_t byte_order_stored;
-    byte_order_stored = *(int64_t*)tmp_byte_order_check;
+    byte_order_stored = *(int64_t *)tmp_byte_order_check;
     if (byte_order_stored != toku_byte_order_host) {
-        r = TOKUDB_DICTIONARY_NO_HEADER; //Cannot use dictionary
+        r = TOKUDB_DICTIONARY_NO_HEADER;  // Cannot use dictionary
         goto exit;
     }
 
-    //Load checkpoint count
+    // Load checkpoint count
     *checkpoint_count = rbuf_ulonglong(rb);
     *checkpoint_lsn = rbuf_LSN(rb);
-    //Restart at beginning during regular deserialization
+    // Restart at beginning during regular deserialization
     rb->ndone = 0;
 
 exit:
@@ -620,11 +644,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
 // Read ft from file into struct.  Read both headers and use one.
 // We want the latest acceptable header whose checkpoint_lsn is no later
 // than max_acceptable_lsn.
-int
-toku_deserialize_ft_from(int fd,
-                         LSN max_acceptable_lsn,
-                         FT *ft)
-{
+int toku_deserialize_ft_from(int fd, LSN max_acceptable_lsn, FT *ft) {
     struct rbuf rb_0;
     struct rbuf rb_1;
     uint64_t checkpoint_count_0 = 0;
@@ -638,13 +658,23 @@ toku_deserialize_ft_from(int fd,
     int r0, r1, r;
 
     toku_off_t header_0_off = 0;
-    r0 = deserialize_ft_from_fd_into_rbuf(fd, header_0_off, &rb_0, &checkpoint_count_0, &checkpoint_lsn_0, &version_0);
+    r0 = deserialize_ft_from_fd_into_rbuf(fd,
+                                          header_0_off,
+                                          &rb_0,
+                                          &checkpoint_count_0,
+                                          &checkpoint_lsn_0,
+                                          &version_0);
     if (r0 == 0 && checkpoint_lsn_0.lsn <= max_acceptable_lsn.lsn) {
         h0_acceptable = true;
     }
 
-    toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
-    r1 = deserialize_ft_from_fd_into_rbuf(fd, header_1_off, &rb_1, &checkpoint_count_1, &checkpoint_lsn_1, &version_1);
+    toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+    r1 = deserialize_ft_from_fd_into_rbuf(fd,
+                                          header_1_off,
+                                          &rb_1,
+                                          &checkpoint_count_1,
+                                          &checkpoint_lsn_1,
+                                          &version_1);
     if (r1 == 0 && checkpoint_lsn_1.lsn <= max_acceptable_lsn.lsn) {
         h1_acceptable = true;
     }
@@ -655,24 +685,29 @@ toku_deserialize_ft_from(int fd,
         // We were unable to read either header or at least one is too
         // new.  Certain errors are higher priority than others. Order of
         // these if/else if is important.
-        if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) {
+        if (r0 == TOKUDB_DICTIONARY_TOO_NEW ||
+            r1 == TOKUDB_DICTIONARY_TOO_NEW) {
             r = TOKUDB_DICTIONARY_TOO_NEW;
-        } else if (r0 == TOKUDB_DICTIONARY_TOO_OLD || r1 == TOKUDB_DICTIONARY_TOO_OLD) {
+        } else if (r0 == TOKUDB_DICTIONARY_TOO_OLD ||
+                   r1 == TOKUDB_DICTIONARY_TOO_OLD) {
             r = TOKUDB_DICTIONARY_TOO_OLD;
         } else if (r0 == TOKUDB_BAD_CHECKSUM && r1 == TOKUDB_BAD_CHECKSUM) {
             fprintf(stderr, "Both header checksums failed.\n");
             r = TOKUDB_BAD_CHECKSUM;
-        } else if (r0 == TOKUDB_DICTIONARY_NO_HEADER || r1 == TOKUDB_DICTIONARY_NO_HEADER) {
+        } else if (r0 == TOKUDB_DICTIONARY_NO_HEADER ||
+                   r1 == TOKUDB_DICTIONARY_NO_HEADER) {
             r = TOKUDB_DICTIONARY_NO_HEADER;
         } else {
-            r = r0 ? r0 : r1; //Arbitrarily report the error from the
-                              //first header, unless it's readable
+            r = r0 ? r0 : r1;  // Arbitrarily report the error from the
+            // first header, unless it's readable
         }
 
-        // it should not be possible for both headers to be later than the max_acceptable_lsn
-        invariant(!((r0==0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) &&
-                    (r1==0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn)));
-        invariant(r!=0);
+        // it should not be possible for both headers to be later than the
+        // max_acceptable_lsn
+        invariant(
+            !((r0 == 0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) &&
+              (r1 == 0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn)));
+        invariant(r != 0);
         goto exit;
     }
 
@@ -682,8 +717,7 @@ toku_deserialize_ft_from(int fd,
             invariant(version_0 >= version_1);
             rb = &rb_0;
             version = version_0;
-        }
-        else {
+        } else {
             invariant(checkpoint_count_1 == checkpoint_count_0 + 1);
             invariant(version_1 >= version_0);
             rb = &rb_1;
@@ -692,14 +726,18 @@ toku_deserialize_ft_from(int fd,
     } else if (h0_acceptable) {
         if (r1 == TOKUDB_BAD_CHECKSUM) {
             // print something reassuring
-            fprintf(stderr, "Header 2 checksum failed, but header 1 ok.  Proceeding.\n");
+            fprintf(
+                stderr,
+                "Header 2 checksum failed, but header 1 ok.  Proceeding.\n");
         }
         rb = &rb_0;
         version = version_0;
     } else if (h1_acceptable) {
         if (r0 == TOKUDB_BAD_CHECKSUM) {
             // print something reassuring
-            fprintf(stderr, "Header 1 checksum failed, but header 2 ok.  Proceeding.\n");
+            fprintf(
+                stderr,
+                "Header 1 checksum failed, but header 2 ok.  Proceeding.\n");
         }
         rb = &rb_1;
         version = version_1;
@@ -718,15 +756,13 @@ toku_deserialize_ft_from(int fd,
     return r;
 }
 
-
-size_t toku_serialize_ft_size (FT_HEADER h) {
+size_t toku_serialize_ft_size(FT_HEADER h) {
     size_t size = serialize_ft_min_size(h->layout_version);
-    //There is no dynamic data.
-    lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
+    // There is no dynamic data.
+    lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
     return size;
 }
 
-
 void toku_serialize_ft_to_wbuf (
     struct wbuf *wbuf, 
     FT_HEADER h, 
@@ -771,52 +807,60 @@ void toku_serialize_ft_to_wbuf (
 }
 
 void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) {
-    lazy_assert(h->type==FT_CHECKPOINT_INPROGRESS);
+    lazy_assert(h->type == FT_CHECKPOINT_INPROGRESS);
     struct wbuf w_translation;
     int64_t size_translation;
     int64_t address_translation;
 
     // Must serialize translation first, to get address,size for header.
-    bt->serialize_translation_to_wbuf(fd, &w_translation,
-                                      &address_translation,
-                                      &size_translation);
-    assert(size_translation == w_translation.ndone);
+    bt->serialize_translation_to_wbuf(
+        fd, &w_translation, &address_translation, &size_translation);
+    invariant(size_translation == w_translation.ndone);
 
-    // the number of bytes available in the buffer is 0 mod 512, and those last bytes are all initialized.
-    assert(w_translation.size % 512 == 0);
+    // the number of bytes available in the buffer is 0 mod 512, and those last
+    // bytes are all initialized.
+    invariant(w_translation.size % 512 == 0);
 
     struct wbuf w_main;
-    size_t size_main       = toku_serialize_ft_size(h);
+    size_t size_main = toku_serialize_ft_size(h);
     size_t size_main_aligned = roundup_to_multiple(512, size_main);
-    assert(size_main_aligned<block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
+    invariant(size_main_aligned <
+              BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
     char *XMALLOC_N_ALIGNED(512, size_main_aligned, mainbuf);
-    for (size_t i=size_main; i<size_main_aligned; i++) mainbuf[i]=0; // initialize the end of the buffer with zeros
+    for (size_t i = size_main; i < size_main_aligned; i++)
+        mainbuf[i] = 0;  // initialize the end of the buffer with zeros
     wbuf_init(&w_main, mainbuf, size_main);
-    toku_serialize_ft_to_wbuf(&w_main, h, address_translation, size_translation);
+    toku_serialize_ft_to_wbuf(
+        &w_main, h, address_translation, size_translation);
     lazy_assert(w_main.ndone == size_main);
 
     // Actually write translation table
-    // This write is guaranteed to read good data at the end of the buffer, since the
+    // This write is guaranteed to read good data at the end of the buffer,
+    // since the
     // w_translation.buf is padded with zeros to a 512-byte boundary.
-    toku_os_full_pwrite(fd, w_translation.buf, roundup_to_multiple(512, size_translation), address_translation);
-
-    //Everything but the header MUST be on disk before header starts.
-    //Otherwise we will think the header is good and some blocks might not
-    //yet be on disk.
-    //If the header has a cachefile we need to do cachefile fsync (to
-    //prevent crash if we redirected to dev null)
-    //If there is no cachefile we still need to do an fsync.
+    toku_os_full_pwrite(fd,
+                        w_translation.buf,
+                        roundup_to_multiple(512, size_translation),
+                        address_translation);
+
+    // Everything but the header MUST be on disk before header starts.
+    // Otherwise we will think the header is good and some blocks might not
+    // yet be on disk.
+    // If the header has a cachefile we need to do cachefile fsync (to
+    // prevent crash if we redirected to dev null)
+    // If there is no cachefile we still need to do an fsync.
     if (cf) {
         toku_cachefile_fsync(cf);
-    }
-    else {
+    } else {
         toku_file_fsync(fd);
     }
 
-    //Alternate writing header to two locations:
+    // Alternate writing header to two locations:
     //   Beginning (0) or BLOCK_ALLOCATOR_HEADER_RESERVE
     toku_off_t main_offset;
-    main_offset = (h->checkpoint_count & 0x1) ? 0 : block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+    main_offset = (h->checkpoint_count & 0x1)
+                      ? 0
+                      : BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
     toku_os_full_pwrite(fd, w_main.buf, size_main_aligned, main_offset);
     toku_free(w_main.buf);
     toku_free(w_translation.buf);
diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
index c4f4886b6a03b..5914f8a1050e9 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
+++ b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc
@@ -99,13 +99,11 @@ void toku_ft_serialize_layer_init(void) {
     num_cores = toku_os_get_number_active_processors();
     int r = toku_thread_pool_create(&ft_pool, num_cores);
     lazy_assert_zero(r);
-    block_allocator::maybe_initialize_trace();
     toku_serialize_in_parallel = false;
 }
 
 void toku_ft_serialize_layer_destroy(void) {
     toku_thread_pool_destroy(&ft_pool);
-    block_allocator::maybe_close_trace();
 }
 
 enum { FILE_CHANGE_INCREMENT = (16 << 20) };
@@ -773,19 +771,23 @@ int toku_serialize_ftnode_to_memory(FTNODE node,
     return 0;
 }
 
-int
-toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT ft, bool for_checkpoint) {
-
+int toku_serialize_ftnode_to(int fd,
+                             BLOCKNUM blocknum,
+                             FTNODE node,
+                             FTNODE_DISK_DATA *ndd,
+                             bool do_rebalancing,
+                             FT ft,
+                             bool for_checkpoint) {
     size_t n_to_write;
     size_t n_uncompressed_bytes;
     char *compressed_buf = nullptr;
 
-    // because toku_serialize_ftnode_to is only called for 
+    // because toku_serialize_ftnode_to is only called for
     // in toku_ftnode_flush_callback, we pass false
     // for in_parallel. The reasoning is that when we write
-    // nodes to disk via toku_ftnode_flush_callback, we 
+    // nodes to disk via toku_ftnode_flush_callback, we
     // assume that it is being done on a non-critical
-    // background thread (probably for checkpointing), and therefore 
+    // background thread (probably for checkpointing), and therefore
     // should not hog CPU,
     //
     // Should the above facts change, we may want to revisit
@@ -802,32 +804,32 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
         toku_unsafe_fetch(&toku_serialize_in_parallel),
         &n_to_write,
         &n_uncompressed_bytes,
-        &compressed_buf
-        );
+        &compressed_buf);
     if (r != 0) {
         return r;
     }
 
-    // If the node has never been written, then write the whole buffer, including the zeros
-    invariant(blocknum.b>=0);
+    // If the node has never been written, then write the whole buffer,
+    // including the zeros
+    invariant(blocknum.b >= 0);
     DISKOFF offset;
 
     // Dirties the ft
-    ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
-                                   ft, fd, for_checkpoint,
-                                   // Allocations for nodes high in the tree are considered 'hot',
-                                   // as they are likely to move again in the next checkpoint.
-                                   node->height);
+    ft->blocktable.realloc_on_disk(
+        blocknum, n_to_write, &offset, ft, fd, for_checkpoint);
 
     tokutime_t t0 = toku_time_now();
     toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
     tokutime_t t1 = toku_time_now();
 
     tokutime_t io_time = t1 - t0;
-    toku_ft_status_update_flush_reason(node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);
+    toku_ft_status_update_flush_reason(
+        node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);
 
     toku_free(compressed_buf);
-    node->dirty = 0;  // See #1957.   Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
+    node->dirty = 0;  // See #1957.   Must set the node to be clean after
+                      // serializing it so that it doesn't get written again on
+                      // the next checkpoint or eviction.
     return 0;
 }
 
@@ -994,6 +996,7 @@ BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) {
     bn->seqinsert = orig_bn->seqinsert;
     bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied;
     bn->stat64_delta = orig_bn->stat64_delta;
+    bn->logical_rows_delta = orig_bn->logical_rows_delta;
     bn->data_buffer.clone(&orig_bn->data_buffer);
     return bn;
 }
@@ -1004,6 +1007,7 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) {
     bn->seqinsert = 0;
     bn->stale_ancestor_messages_applied = false;
     bn->stat64_delta = ZEROSTATS;
+    bn->logical_rows_delta = 0;
     bn->data_buffer.init_zero();
     return bn;
 }
@@ -1897,7 +1901,7 @@ read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum,
                                             /* out */ int *layout_version_p);
 
 // This function upgrades a version 14 or 13 ftnode to the current
-// verison. NOTE: This code assumes the first field of the rbuf has
+// version. NOTE: This code assumes the first field of the rbuf has
 // already been read from the buffer (namely the layout_version of the
 // ftnode.)
 static int
@@ -2488,9 +2492,12 @@ toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIAL
     serialized->blocknum = log->blocknum;
 }
 
-int
-toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized,
-                                FT ft, bool for_checkpoint) {
+int toku_serialize_rollback_log_to(int fd,
+                                   ROLLBACK_LOG_NODE log,
+                                   SERIALIZED_ROLLBACK_LOG_NODE serialized_log,
+                                   bool is_serialized,
+                                   FT ft,
+                                   bool for_checkpoint) {
     size_t n_to_write;
     char *compressed_buf;
     struct serialized_rollback_log_node serialized_local;
@@ -2511,21 +2518,21 @@ toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBA
                                            serialized_log->n_sub_blocks,
                                            serialized_log->sub_block,
                                            ft->h->compression_method,
-                                           &n_to_write, &compressed_buf);
+                                           &n_to_write,
+                                           &compressed_buf);
 
     // Dirties the ft
     DISKOFF offset;
-    ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
-                                   ft, fd, for_checkpoint,
-                                   // We consider rollback log flushing the hottest possible allocation,
-                                   // since rollback logs are short-lived compared to FT nodes.
-                                   INT_MAX);
+    ft->blocktable.realloc_on_disk(
+        blocknum, n_to_write, &offset, ft, fd, for_checkpoint);
 
     toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
     toku_free(compressed_buf);
     if (!is_serialized) {
         toku_static_serialized_rollback_log_destroy(&serialized_local);
-        log->dirty = 0;  // See #1957.   Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
+        log->dirty = 0;  // See #1957.   Must set the node to be clean after
+                         // serializing it so that it doesn't get written again
+                         // on the next checkpoint or eviction.
     }
     return 0;
 }
@@ -2704,7 +2711,7 @@ decompress_from_raw_block_into_rbuf(uint8_t *raw_block, size_t raw_block_size, s
 }
 
 static int decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) {
-    // This function exists solely to accomodate future changes in compression.
+    // This function exists solely to accommodate future changes in compression.
     int r = 0;
     if ((version == FT_LAYOUT_VERSION_13 || version == FT_LAYOUT_VERSION_14) ||
         (FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) ||
diff --git a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc
new file mode 100644
index 0000000000000..922850fb3e096
--- /dev/null
+++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc
@@ -0,0 +1,833 @@
+/*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILIT or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#include "ft/serialize/rbtree_mhs.h"
+#include "portability/toku_assert.h"
+#include "portability/toku_portability.h"
+#include <algorithm>
+
+namespace MhsRbTree {
+
+    Tree::Tree() : _root(NULL), _align(1) {}
+
+    Tree::Tree(uint64_t align) : _root(NULL), _align(align) {}
+
+    Tree::~Tree() { Destroy(); }
+
+    void Tree::PreOrder(Node *tree) const {
+        if (tree != NULL) {
+            fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
+            PreOrder(tree->_left);
+            PreOrder(tree->_right);
+        }
+    }
+
+    void Tree::PreOrder() { PreOrder(_root); }
+
+    void Tree::InOrder(Node *tree) const {
+        if (tree != NULL) {
+            InOrder(tree->_left);
+            fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
+            InOrder(tree->_right);
+        }
+    }
+
+    // yeah, i only care about in order visitor. -Jun
+    void Tree::InOrderVisitor(Node *tree,
+                              void (*f)(void *, Node *, uint64_t),
+                              void *extra,
+                              uint64_t depth) {
+        if (tree != NULL) {
+            InOrderVisitor(tree->_left, f, extra, depth + 1);
+            f(extra, tree, depth);
+            InOrderVisitor(tree->_right, f, extra, depth + 1);
+        }
+    }
+
+    void Tree::InOrderVisitor(void (*f)(void *, Node *, uint64_t),
+                              void *extra) {
+        InOrderVisitor(_root, f, extra, 0);
+    }
+
+    void Tree::InOrder() { InOrder(_root); }
+
+    void Tree::PostOrder(Node *tree) const {
+        if (tree != NULL) {
+            PostOrder(tree->_left);
+            PostOrder(tree->_right);
+            fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
+        }
+    }
+
+    void Tree::PostOrder() { PostOrder(_root); }
+
+    Node *Tree::SearchByOffset(uint64_t offset) {
+        Node *x = _root;
+        while ((x != NULL) && (rbn_offset(x).ToInt() != offset)) {
+            if (offset < rbn_offset(x).ToInt())
+                x = x->_left;
+            else
+                x = x->_right;
+        }
+
+        return x;
+    }
+
+    // mostly for testing
+    Node *Tree::SearchFirstFitBySize(uint64_t size) {
+        if (EffectiveSize(_root) < size && rbn_left_mhs(_root) < size &&
+            rbn_right_mhs(_root) < size) {
+            return nullptr;
+        } else {
+            return SearchFirstFitBySizeHelper(_root, size);
+        }
+    }
+
+    Node *Tree::SearchFirstFitBySizeHelper(Node *x, uint64_t size) {
+        if (EffectiveSize(x) >= size) {
+            // only possible to go left
+            if (rbn_left_mhs(x) >= size)
+                return SearchFirstFitBySizeHelper(x->_left, size);
+            else
+                return x;
+        }
+        if (rbn_left_mhs(x) >= size)
+            return SearchFirstFitBySizeHelper(x->_left, size);
+
+        if (rbn_right_mhs(x) >= size)
+            return SearchFirstFitBySizeHelper(x->_right, size);
+
+        // this is an invalid state
+        Dump();
+        ValidateBalance();
+        ValidateMhs();
+        invariant(0);
+        return NULL;
+    }
+
+    Node *Tree::MinNode(Node *tree) {
+        if (tree == NULL)
+            return NULL;
+
+        while (tree->_left != NULL)
+            tree = tree->_left;
+        return tree;
+    }
+
+    Node *Tree::MinNode() { return MinNode(_root); }
+
+    Node *Tree::MaxNode(Node *tree) {
+        if (tree == NULL)
+            return NULL;
+
+        while (tree->_right != NULL)
+            tree = tree->_right;
+        return tree;
+    }
+
+    Node *Tree::MaxNode() { return MaxNode(_root); }
+
+    Node *Tree::SuccessorHelper(Node *y, Node *x) {
+        while ((y != NULL) && (x == y->_right)) {
+            x = y;
+            y = y->_parent;
+        }
+        return y;
+    }
+    Node *Tree::Successor(Node *x) {
+        if (x->_right != NULL)
+            return MinNode(x->_right);
+
+        Node *y = x->_parent;
+        return SuccessorHelper(y, x);
+    }
+
+    Node *Tree::PredecessorHelper(Node *y, Node *x) {
+        while ((y != NULL) && (x == y->_left)) {
+            x = y;
+            y = y->_parent;
+        }
+
+        return y;
+    }
+    Node *Tree::Predecessor(Node *x) {
+        if (x->_left != NULL)
+            return MaxNode(x->_left);
+
+        Node *y = x->_parent;
+        return SuccessorHelper(y, x);
+    }
+
+    /*
+    *      px                              px
+    *     /                               /
+    *    x                               y
+    *   /  \      --(left rotation)-->  / \               #
+    *  lx   y                          x  ry
+    *     /   \                       /  \
+    *    ly   ry                      lx  ly
+    *  max_hole_size updates are pretty local
+    */
+
+    void Tree::LeftRotate(Node *&root, Node *x) {
+        Node *y = x->_right;
+
+        x->_right = y->_left;
+        rbn_right_mhs(x) = rbn_left_mhs(y);
+
+        if (y->_left != NULL)
+            y->_left->_parent = x;
+
+        y->_parent = x->_parent;
+
+        if (x->_parent == NULL) {
+            root = y;
+        } else {
+            if (x->_parent->_left == x) {
+                x->_parent->_left = y;
+            } else {
+                x->_parent->_right = y;
+            }
+        }
+        y->_left = x;
+        rbn_left_mhs(y) = mhs_of_subtree(x);
+
+        x->_parent = y;
+    }
+
+    /*            py                               py
+     *           /                                /
+     *          y                                x
+     *         /  \      --(right rotate)-->    /  \                     #
+     *        x   ry                           lx   y
+     *       / \                                   / \                   #
+     *      lx  rx                                rx  ry
+     *
+     */
+
+    void Tree::RightRotate(Node *&root, Node *y) {
+        Node *x = y->_left;
+
+        y->_left = x->_right;
+        rbn_left_mhs(y) = rbn_right_mhs(x);
+
+        if (x->_right != NULL)
+            x->_right->_parent = y;
+
+        x->_parent = y->_parent;
+
+        if (y->_parent == NULL) {
+            root = x;
+        } else {
+            if (y == y->_parent->_right)
+                y->_parent->_right = x;
+            else
+                y->_parent->_left = x;
+        }
+
+        x->_right = y;
+        rbn_right_mhs(x) = mhs_of_subtree(y);
+        y->_parent = x;
+    }
+
+    // walking from this node up to update the mhs info
+    // whenver there is change on left/right mhs or size we should recalculate.
+    // prerequisit: the children of the node are mhs up-to-date.
+    void Tree::RecalculateMhs(Node *node) {
+        uint64_t *p_node_mhs = 0;
+        Node *parent = node->_parent;
+
+        if (!parent)
+            return;
+
+        uint64_t max_mhs = mhs_of_subtree(node);
+        if (node == parent->_left) {
+            p_node_mhs = &rbn_left_mhs(parent);
+        } else if (node == parent->_right) {
+            p_node_mhs = &rbn_right_mhs(parent);
+        } else {
+            return;
+        }
+        if (*p_node_mhs != max_mhs) {
+            *p_node_mhs = max_mhs;
+            RecalculateMhs(parent);
+        }
+    }
+
+    void Tree::IsNewNodeMergable(Node *pred,
+                                 Node *succ,
+                                 Node::BlockPair pair,
+                                 bool *left_merge,
+                                 bool *right_merge) {
+        if (pred) {
+            OUUInt64 end_of_pred = rbn_size(pred) + rbn_offset(pred);
+            if (end_of_pred < pair._offset)
+                *left_merge = false;
+            else {
+                invariant(end_of_pred == pair._offset);
+                *left_merge = true;
+            }
+        }
+        if (succ) {
+            OUUInt64 begin_of_succ = rbn_offset(succ);
+            OUUInt64 end_of_node = pair._offset + pair._size;
+            if (end_of_node < begin_of_succ) {
+                *right_merge = false;
+            } else {
+                invariant(end_of_node == begin_of_succ);
+                *right_merge = true;
+            }
+        }
+    }
+
+    void Tree::AbsorbNewNode(Node *pred,
+                             Node *succ,
+                             Node::BlockPair pair,
+                             bool left_merge,
+                             bool right_merge,
+                             bool is_right_child) {
+        invariant(left_merge || right_merge);
+        if (left_merge && right_merge) {
+            // merge to the succ
+            if (!is_right_child) {
+                rbn_size(succ) += pair._size;
+                rbn_offset(succ) = pair._offset;
+                // merge to the pred
+                rbn_size(pred) += rbn_size(succ);
+                // to keep the invariant of the tree -no overlapping holes
+                rbn_offset(succ) += rbn_size(succ);
+                rbn_size(succ) = 0;
+                RecalculateMhs(succ);
+                RecalculateMhs(pred);
+                // pred dominates succ. this is going to
+                // update the pred labels separately.
+                // remove succ
+                RawRemove(_root, succ);
+            } else {
+                rbn_size(pred) += pair._size;
+                rbn_offset(succ) = rbn_offset(pred);
+                rbn_size(succ) += rbn_size(pred);
+                rbn_offset(pred) += rbn_size(pred);
+                rbn_size(pred) = 0;
+                RecalculateMhs(pred);
+                RecalculateMhs(succ);
+                // now remove pred
+                RawRemove(_root, pred);
+            }
+        } else if (left_merge) {
+            rbn_size(pred) += pair._size;
+            RecalculateMhs(pred);
+        } else if (right_merge) {
+            rbn_offset(succ) -= pair._size;
+            rbn_size(succ) += pair._size;
+            RecalculateMhs(succ);
+        }
+    }
+    // this is the most tedious part, but not complicated:
+    // 1.find where to insert the pair
+    // 2.if the pred and succ can merge with the pair. merge with them. either
+    // pred
+    // or succ can be removed.
+    // 3. if only left-mergable or right-mergeable, just merge
+    // 4. non-mergable case. insert the node and run the fixup.
+
+    int Tree::Insert(Node *&root, Node::BlockPair pair) {
+        Node *x = _root;
+        Node *y = NULL;
+        bool left_merge = false;
+        bool right_merge = false;
+        Node *node = NULL;
+
+        while (x != NULL) {
+            y = x;
+            if (pair._offset < rbn_key(x))
+                x = x->_left;
+            else
+                x = x->_right;
+        }
+
+        // we found where to insert, lets find out the pred and succ for
+        // possible
+        // merges.
+        //  node->parent = y;
+        Node *pred, *succ;
+        if (y != NULL) {
+            if (pair._offset < rbn_key(y)) {
+                // as the left child
+                pred = PredecessorHelper(y->_parent, y);
+                succ = y;
+                IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge);
+                if (left_merge || right_merge) {
+                    AbsorbNewNode(
+                        pred, succ, pair, left_merge, right_merge, false);
+                } else {
+                    // construct the node
+                    Node::Pair mhsp {0, 0};
+                    node =
+                        new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
+                    if (!node)
+                        return -1;
+                    y->_left = node;
+                    node->_parent = y;
+                    RecalculateMhs(node);
+                }
+
+            } else {
+                // as the right child
+                pred = y;
+                succ = SuccessorHelper(y->_parent, y);
+                IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge);
+                if (left_merge || right_merge) {
+                    AbsorbNewNode(
+                        pred, succ, pair, left_merge, right_merge, true);
+                } else {
+                    // construct the node
+                    Node::Pair mhsp {0, 0};
+                    node =
+                        new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
+                    if (!node)
+                        return -1;
+                    y->_right = node;
+                    node->_parent = y;
+                    RecalculateMhs(node);
+                }
+            }
+        } else {
+            Node::Pair mhsp {0, 0};
+            node = new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
+            if (!node)
+                return -1;
+            root = node;
+        }
+        if (!left_merge && !right_merge) {
+            invariant_notnull(node);
+            node->_color = EColor::RED;
+            return InsertFixup(root, node);
+        }
+        return 0;
+    }
+
+    int Tree::InsertFixup(Node *&root, Node *node) {
+        Node *parent, *gparent;
+        while ((parent = rbn_parent(node)) && rbn_is_red(parent)) {
+            gparent = rbn_parent(parent);
+            if (parent == gparent->_left) {
+                {
+                    Node *uncle = gparent->_right;
+                    if (uncle && rbn_is_red(uncle)) {
+                        rbn_set_black(uncle);
+                        rbn_set_black(parent);
+                        rbn_set_red(gparent);
+                        node = gparent;
+                        continue;
+                    }
+                }
+
+                if (parent->_right == node) {
+                    Node *tmp;
+                    LeftRotate(root, parent);
+                    tmp = parent;
+                    parent = node;
+                    node = tmp;
+                }
+
+                rbn_set_black(parent);
+                rbn_set_red(gparent);
+                RightRotate(root, gparent);
+            } else {
+                {
+                    Node *uncle = gparent->_left;
+                    if (uncle && rbn_is_red(uncle)) {
+                        rbn_set_black(uncle);
+                        rbn_set_black(parent);
+                        rbn_set_red(gparent);
+                        node = gparent;
+                        continue;
+                    }
+                }
+
+                if (parent->_left == node) {
+                    Node *tmp;
+                    RightRotate(root, parent);
+                    tmp = parent;
+                    parent = node;
+                    node = tmp;
+                }
+                rbn_set_black(parent);
+                rbn_set_red(gparent);
+                LeftRotate(root, gparent);
+            }
+        }
+        rbn_set_black(root);
+        return 0;
+    }
+
+    int Tree::Insert(Node::BlockPair pair) { return Insert(_root, pair); }
+
+    uint64_t Tree::Remove(size_t size) {
+        Node *node = SearchFirstFitBySize(size);
+        return Remove(_root, node, size);
+    }
+
+    void Tree::RawRemove(Node *&root, Node *node) {
+        Node *child, *parent;
+        EColor color;
+
+        if ((node->_left != NULL) && (node->_right != NULL)) {
+            Node *replace = node;
+            replace = replace->_right;
+            while (replace->_left != NULL)
+                replace = replace->_left;
+
+            if (rbn_parent(node)) {
+                if (rbn_parent(node)->_left == node)
+                    rbn_parent(node)->_left = replace;
+                else
+                    rbn_parent(node)->_right = replace;
+            } else {
+                root = replace;
+            }
+            child = replace->_right;
+            parent = rbn_parent(replace);
+            color = rbn_color(replace);
+
+            if (parent == node) {
+                parent = replace;
+            } else {
+                if (child)
+                    rbn_parent(child) = parent;
+
+                parent->_left = child;
+                rbn_left_mhs(parent) = rbn_right_mhs(replace);
+                RecalculateMhs(parent);
+                replace->_right = node->_right;
+                rbn_set_parent(node->_right, replace);
+                rbn_right_mhs(replace) = rbn_right_mhs(node);
+            }
+
+            replace->_parent = node->_parent;
+            replace->_color = node->_color;
+            replace->_left = node->_left;
+            rbn_left_mhs(replace) = rbn_left_mhs(node);
+            node->_left->_parent = replace;
+            RecalculateMhs(replace);
+            if (color == EColor::BLACK)
+                RawRemoveFixup(root, child, parent);
+            delete node;
+            return;
+        }
+
+        if (node->_left != NULL)
+            child = node->_left;
+        else
+            child = node->_right;
+
+        parent = node->_parent;
+        color = node->_color;
+
+        if (child)
+            child->_parent = parent;
+
+        if (parent) {
+            if (parent->_left == node) {
+                parent->_left = child;
+                rbn_left_mhs(parent) = child ? mhs_of_subtree(child) : 0;
+            } else {
+                parent->_right = child;
+                rbn_right_mhs(parent) = child ? mhs_of_subtree(child) : 0;
+            }
+            RecalculateMhs(parent);
+        } else
+            root = child;
+        if (color == EColor::BLACK)
+            RawRemoveFixup(root, child, parent);
+        delete node;
+    }
+
+    void Tree::RawRemove(uint64_t offset) {
+        Node *node = SearchByOffset(offset);
+        RawRemove(_root, node);
+    }
+    static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
+        return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
+    }
+    uint64_t Tree::Remove(Node *&root, Node *node, size_t size) {
+        OUUInt64 n_offset = rbn_offset(node);
+        OUUInt64 n_size = rbn_size(node);
+        OUUInt64 answer_offset(align(rbn_offset(node).ToInt(), _align));
+
+        invariant((answer_offset + size) <= (n_offset + n_size));
+        if (answer_offset == n_offset) {
+            rbn_offset(node) += size;
+            rbn_size(node) -= size;
+            RecalculateMhs(node);
+            if (rbn_size(node) == 0) {
+                RawRemove(root, node);
+            }
+
+        } else {
+            if (answer_offset + size == n_offset + n_size) {
+                rbn_size(node) -= size;
+                RecalculateMhs(node);
+            } else {
+                // well, cut in the middle...
+                rbn_size(node) = answer_offset - n_offset;
+                RecalculateMhs(node);
+                Insert(_root,
+                       {(answer_offset + size),
+                        (n_offset + n_size) - (answer_offset + size)});
+            }
+        }
+        return answer_offset.ToInt();
+    }
+
+    void Tree::RawRemoveFixup(Node *&root, Node *node, Node *parent) {
+        Node *other;
+        while ((!node || rbn_is_black(node)) && node != root) {
+            if (parent->_left == node) {
+                other = parent->_right;
+                if (rbn_is_red(other)) {
+                    // Case 1: the brother of X, w, is read
+                    rbn_set_black(other);
+                    rbn_set_red(parent);
+                    LeftRotate(root, parent);
+                    other = parent->_right;
+                }
+                if ((!other->_left || rbn_is_black(other->_left)) &&
+                    (!other->_right || rbn_is_black(other->_right))) {
+                    // Case 2: w is black and both of w's children are black
+                    rbn_set_red(other);
+                    node = parent;
+                    parent = rbn_parent(node);
+                } else {
+                    if (!other->_right || rbn_is_black(other->_right)) {
+                        // Case 3: w is black and left child of w is red but
+                        // right
+                        // child is black
+                        rbn_set_black(other->_left);
+                        rbn_set_red(other);
+                        RightRotate(root, other);
+                        other = parent->_right;
+                    }
+                    // Case 4: w is black and right child of w is red,
+                    // regardless of
+                    // left child's color
+                    rbn_set_color(other, rbn_color(parent));
+                    rbn_set_black(parent);
+                    rbn_set_black(other->_right);
+                    LeftRotate(root, parent);
+                    node = root;
+                    break;
+                }
+            } else {
+                other = parent->_left;
+                if (rbn_is_red(other)) {
+                    // Case 1: w is red
+                    rbn_set_black(other);
+                    rbn_set_red(parent);
+                    RightRotate(root, parent);
+                    other = parent->_left;
+                }
+                if ((!other->_left || rbn_is_black(other->_left)) &&
+                    (!other->_right || rbn_is_black(other->_right))) {
+                    // Case 2: w is black and both children are black
+                    rbn_set_red(other);
+                    node = parent;
+                    parent = rbn_parent(node);
+                } else {
+                    if (!other->_left || rbn_is_black(other->_left)) {
+                        // Case 3: w is black and left child of w is red whereas
+                        // right child is black
+                        rbn_set_black(other->_right);
+                        rbn_set_red(other);
+                        LeftRotate(root, other);
+                        other = parent->_left;
+                    }
+                    // Case 4:w is black and right child of w is red, regardless
+                    // of
+                    // the left child's color
+                    rbn_set_color(other, rbn_color(parent));
+                    rbn_set_black(parent);
+                    rbn_set_black(other->_left);
+                    RightRotate(root, parent);
+                    node = root;
+                    break;
+                }
+            }
+        }
+        if (node)
+            rbn_set_black(node);
+    }
+
+    void Tree::Destroy(Node *&tree) {
+        if (tree == NULL)
+            return;
+
+        if (tree->_left != NULL)
+            Destroy(tree->_left);
+        if (tree->_right != NULL)
+            Destroy(tree->_right);
+
+        delete tree;
+        tree = NULL;
+    }
+
+    void Tree::Destroy() { Destroy(_root); }
+
+    void Tree::Dump(Node *tree, Node::BlockPair pair, EDirection dir) {
+        if (tree != NULL) {
+            if (dir == EDirection::NONE)
+                fprintf(stderr,
+                        "(%" PRIu64 ",%" PRIu64 ", mhs:(%" PRIu64 ",%" PRIu64
+                        "))(B) is root\n",
+                        rbn_offset(tree).ToInt(),
+                        rbn_size(tree).ToInt(),
+                        rbn_left_mhs(tree),
+                        rbn_right_mhs(tree));
+            else
+                fprintf(stderr,
+                        "(%" PRIu64 ",%" PRIu64 ",mhs:(%" PRIu64 ",%" PRIu64
+                        "))(%c) is %" PRIu64 "'s %s\n",
+                        rbn_offset(tree).ToInt(),
+                        rbn_size(tree).ToInt(),
+                        rbn_left_mhs(tree),
+                        rbn_right_mhs(tree),
+                        rbn_is_red(tree) ? 'R' : 'B',
+                        pair._offset.ToInt(),
+                        dir == EDirection::RIGHT ? "right child" : "left child");
+
+            Dump(tree->_left, tree->_hole, EDirection::LEFT);
+            Dump(tree->_right, tree->_hole, EDirection::RIGHT);
+        }
+    }
+
+    uint64_t Tree::EffectiveSize(Node *node) {
+        OUUInt64 offset = rbn_offset(node);
+        OUUInt64 size = rbn_size(node);
+        OUUInt64 end = offset + size;
+        OUUInt64 aligned_offset(align(offset.ToInt(), _align));
+        if (aligned_offset > end) {
+            return 0;
+        }
+        return (end - aligned_offset).ToInt();
+    }
+
+    void Tree::Dump() {
+        if (_root != NULL)
+            Dump(_root, _root->_hole, (EDirection)0);
+    }
+
+    static void vis_bal_f(void *extra, Node *node, uint64_t depth) {
+        uint64_t **p = (uint64_t **)extra;
+        uint64_t min = *p[0];
+        uint64_t max = *p[1];
+        if (node->_left) {
+            Node *left = node->_left;
+            invariant(node == left->_parent);
+        }
+
+        if (node->_right) {
+            Node *right = node->_right;
+            invariant(node == right->_parent);
+        }
+
+        if (!node->_left || !node->_right) {
+            if (min > depth) {
+                *p[0] = depth;
+            } else if (max < depth) {
+                *p[1] = depth;
+            }
+        }
+    }
+
+    void Tree::ValidateBalance() {
+        uint64_t min_depth = 0xffffffffffffffff;
+        uint64_t max_depth = 0;
+        if (!_root) {
+            return;
+        }
+        uint64_t *p[2] = {&min_depth, &max_depth};
+        InOrderVisitor(vis_bal_f, (void *)p);
+        invariant((min_depth + 1) * 2 >= max_depth + 1);
+    }
+
+    static void vis_cmp_f(void *extra, Node *node, uint64_t UU(depth)) {
+        Node::BlockPair **p = (Node::BlockPair **)extra;
+
+        invariant_notnull(*p);
+        invariant((*p)->_offset == node->_hole._offset);
+
+        *p = *p + 1;
+    }
+
+    // validate the input pairs matches with sorted pairs
+    void Tree::ValidateInOrder(Node::BlockPair *pairs) {
+        InOrderVisitor(vis_cmp_f, &pairs);
+    }
+
+    uint64_t Tree::ValidateMhs(Node *node) {
+        if (!node)
+            return 0;
+        else {
+            uint64_t mhs_left = ValidateMhs(node->_left);
+            uint64_t mhs_right = ValidateMhs(node->_right);
+            if (mhs_left != rbn_left_mhs(node)) {
+                printf("assert failure: mhs_left = %" PRIu64 "\n", mhs_left);
+                Dump(node, node->_hole, (EDirection)0);
+            }
+            invariant(mhs_left == rbn_left_mhs(node));
+
+            if (mhs_right != rbn_right_mhs(node)) {
+                printf("assert failure: mhs_right = %" PRIu64 "\n", mhs_right);
+                Dump(node, node->_hole, (EDirection)0);
+            }
+            invariant(mhs_right == rbn_right_mhs(node));
+            return std::max(EffectiveSize(node), std::max(mhs_left, mhs_right));
+        }
+    }
+
+    void Tree::ValidateMhs() {
+        if (!_root)
+            return;
+        uint64_t mhs_left = ValidateMhs(_root->_left);
+        uint64_t mhs_right = ValidateMhs(_root->_right);
+        invariant(mhs_left == rbn_left_mhs(_root));
+        invariant(mhs_right == rbn_right_mhs(_root));
+    }
+
+}  // namespace MhsRbTree
diff --git a/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
new file mode 100644
index 0000000000000..eb8c953b08c13
--- /dev/null
+++ b/storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
@@ -0,0 +1,355 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#pragma once
+
+#include <db.h>
+
+#include "portability/toku_pthread.h"
+#include "portability/toku_stdint.h"
+#include "portability/toku_stdlib.h"
+
+// RBTree(Red-black tree) with max hole sizes for subtrees.
+
+// This is a tentative data struct to improve the block allocation time
+// complexity from the linear time to the log time. Please be noted this DS only
+// supports first-fit for now. It is actually easier to do it with
+// best-fit.(just
+// sort by size).
+
+// RBTree is a classic data struct with O(log(n)) for insertion, deletion and
+// search. Many years have seen its efficiency.
+
+// a *hole* is the representation of an available BlockPair for allocation.
+// defined as (start_address,size) or (offset, size) interchangably.
+
+// each node has a *label* to indicate a pair of the max hole sizes for its
+// subtree.
+
+// We are implementing a RBTree with max hole sizes for subtree. It is a red
+// black tree that is sorted by the start_address but also labeld with the max
+// hole sizes of the subtrees.
+
+//        [(6,3)]  -> [(offset, size)], the hole
+//        [{2,5}]  -> [{mhs_of_left, mhs_of_right}], the label
+/*        /     \           */
+// [(0, 1)]    [(10,  5)]
+// [{0, 2}]    [{0,   0}]
+/*        \                 */
+//       [(3,  2)]
+//       [{0,  0}]
+// request of allocation size=2 goes from root to [(3,2)].
+
+// above example shows a simplified RBTree_max_holes.
+// it is easier to tell the search time is O(log(n)) as we can make a decision
+// on each descent until we get to the target.
+
+// the only question is if we can keep the maintenance cost low -- and i think
+// it is not a problem becoz an insertion/deletion is only going to update the
+// max_hole_sizes of the nodes along the path from the root to the node to be
+// deleted/inserted. The path can be cached and search is anyway O(log(n)).
+
+// unlike the typical rbtree, Tree has to handle the inserts and deletes
+// with more care: an allocation that triggers the delete might leave some
+// unused space which we can simply update the start_addr and size without
+// worrying overlapping. An free might not only mean the insertion but also
+// *merging* with the adjacent holes.
+
+namespace MhsRbTree {
+
+#define offset_t uint64_t
+    enum class EColor { RED, BLACK };
+    enum class EDirection { NONE = 0, LEFT, RIGHT };
+
+    // I am a bit tired of fixing overflow/underflow, just quickly craft some
+    // int
+    // class that has an infinity-like max value and prevents overflow and
+    // underflow. If you got a file offset larger than MHS_MAX_VAL, it is not
+    // a problem here. :-/  - JYM
+    class OUUInt64 {
+       public:
+        static const uint64_t MHS_MAX_VAL = 0xffffffffffffffff;
+        OUUInt64() : _value(0) {}
+        OUUInt64(uint64_t s) : _value(s) {}
+        OUUInt64(const OUUInt64& o) : _value(o._value) {}
+        bool operator<(const OUUInt64 &r) const {
+            invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+            return _value < r.ToInt();
+        }
+        bool operator>(const OUUInt64 &r) const {
+            invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+            return _value > r.ToInt();
+        }
+        bool operator<=(const OUUInt64 &r) const {
+            invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+            return _value <= r.ToInt();
+        }
+        bool operator>=(const OUUInt64 &r) const {
+            invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
+            return _value >= r.ToInt();
+        }
+        OUUInt64 operator+(const OUUInt64 &r) const {
+            if (_value == MHS_MAX_VAL || r.ToInt() == MHS_MAX_VAL) {
+                OUUInt64 tmp(MHS_MAX_VAL);
+                return tmp;
+            } else {
+                // detecting overflow
+                invariant((MHS_MAX_VAL - _value) >= r.ToInt());
+                uint64_t plus = _value + r.ToInt();
+                OUUInt64 tmp(plus);
+                return tmp;
+            }
+        }
+        OUUInt64 operator-(const OUUInt64 &r) const {
+            invariant(r.ToInt() != MHS_MAX_VAL);
+            if (_value == MHS_MAX_VAL) {
+                return *this;
+            } else {
+                invariant(_value >= r.ToInt());
+                uint64_t minus = _value - r.ToInt();
+                OUUInt64 tmp(minus);
+                return tmp;
+            }
+        }
+        OUUInt64 operator-=(const OUUInt64 &r) {
+            if (_value != MHS_MAX_VAL) {
+                invariant(r.ToInt() != MHS_MAX_VAL);
+                invariant(_value >= r.ToInt());
+                _value -= r.ToInt();
+            }
+            return *this;
+        }
+        OUUInt64 operator+=(const OUUInt64 &r) {
+            if (_value != MHS_MAX_VAL) {
+                if (r.ToInt() == MHS_MAX_VAL) {
+                    _value = MHS_MAX_VAL;
+                } else {
+                    invariant((MHS_MAX_VAL - _value) >= r.ToInt());
+                    this->_value += r.ToInt();
+                }
+            }
+            return *this;
+        }
+        bool operator==(const OUUInt64 &r) const {
+            return _value == r.ToInt();
+        }
+        bool operator!=(const OUUInt64 &r) const {
+            return _value != r.ToInt();
+        }
+        OUUInt64 operator=(const OUUInt64 &r) {
+            _value = r.ToInt();
+            return *this;
+        }
+        uint64_t ToInt() const { return _value; }
+
+       private:
+        uint64_t _value;
+    };
+
+    class Node {
+       public:
+        class BlockPair {
+           public:
+            OUUInt64 _offset;
+            OUUInt64 _size;
+
+            BlockPair() : _offset(0), _size(0) {}
+            BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {}
+            BlockPair(OUUInt64 o, OUUInt64 s) : _offset(o), _size(s) {}
+            BlockPair(const BlockPair &o)
+                : _offset(o._offset), _size(o._size) {}
+
+            int operator<(const BlockPair &rhs) const {
+                return _offset < rhs._offset;
+            }
+            int operator<(const uint64_t &o) const { return _offset < o; }
+        };
+
+        struct Pair {
+            uint64_t _left;
+            uint64_t _right;
+            Pair(uint64_t l, uint64_t r) : _left(l), _right(r) {}
+        };
+
+        EColor _color;
+        BlockPair _hole;
+        Pair _label;
+        Node *_left;
+        Node *_right;
+        Node *_parent;
+
+        Node(EColor c,
+             Node::BlockPair h,
+             Pair lb,
+             Node *l,
+             Node *r,
+             Node *p)
+            : _color(c),
+              _hole(h),
+              _label(lb),
+              _left(l),
+              _right(r),
+              _parent(p) {}
+    };
+
+    class Tree {
+       private:
+        Node *_root;
+        uint64_t _align;
+
+       public:
+        Tree();
+        Tree(uint64_t);
+        ~Tree();
+
+        void PreOrder();
+        void InOrder();
+        void PostOrder();
+        // immutable operations
+        Node *SearchByOffset(uint64_t addr);
+        Node *SearchFirstFitBySize(uint64_t size);
+
+        Node *MinNode();
+        Node *MaxNode();
+
+        Node *Successor(Node *);
+        Node *Predecessor(Node *);
+
+        // mapped from tree_allocator::free_block
+        int Insert(Node::BlockPair pair);
+        // mapped from tree_allocator::alloc_block
+        uint64_t Remove(size_t size);
+        // mapped from tree_allocator::alloc_block_after
+
+        void RawRemove(uint64_t offset);
+        void Destroy();
+        // print the tree
+        void Dump();
+        // validation
+        // balance
+        void ValidateBalance();
+        void ValidateInOrder(Node::BlockPair *);
+        void InOrderVisitor(void (*f)(void *, Node *, uint64_t), void *);
+        void ValidateMhs();
+
+       private:
+        void PreOrder(Node *node) const;
+        void InOrder(Node *node) const;
+        void PostOrder(Node *node) const;
+        Node *SearchByOffset(Node *node, offset_t addr) const;
+        Node *SearchFirstFitBySize(Node *node, size_t size) const;
+
+        Node *MinNode(Node *node);
+        Node *MaxNode(Node *node);
+
+        // rotations to fix up. we will have to update the labels too.
+        void LeftRotate(Node *&root, Node *x);
+        void RightRotate(Node *&root, Node *y);
+
+        int Insert(Node *&root, Node::BlockPair pair);
+        int InsertFixup(Node *&root, Node *node);
+
+        void RawRemove(Node *&root, Node *node);
+        uint64_t Remove(Node *&root, Node *node, size_t size);
+        void RawRemoveFixup(Node *&root, Node *node, Node *parent);
+
+        void Destroy(Node *&tree);
+        void Dump(Node *tree, Node::BlockPair pair, EDirection dir);
+        void RecalculateMhs(Node *node);
+        void IsNewNodeMergable(Node *, Node *, Node::BlockPair, bool *, bool *);
+        void AbsorbNewNode(Node *, Node *, Node::BlockPair, bool, bool, bool);
+        Node *SearchFirstFitBySizeHelper(Node *x, uint64_t size);
+
+        Node *SuccessorHelper(Node *y, Node *x);
+
+        Node *PredecessorHelper(Node *y, Node *x);
+
+        void InOrderVisitor(Node *,
+                            void (*f)(void *, Node *, uint64_t),
+                            void *,
+                            uint64_t);
+        uint64_t ValidateMhs(Node *);
+
+        uint64_t EffectiveSize(Node *);
+// mixed with some macros.....
+#define rbn_parent(r) ((r)->_parent)
+#define rbn_color(r) ((r)->_color)
+#define rbn_is_red(r) ((r)->_color == EColor::RED)
+#define rbn_is_black(r) ((r)->_color == EColor::BLACK)
+#define rbn_set_black(r)     \
+    do {                     \
+        (r)->_color = EColor::BLACK; \
+    } while (0)
+#define rbn_set_red(r)     \
+    do {                   \
+        (r)->_color = EColor::RED; \
+    } while (0)
+#define rbn_set_parent(r, p) \
+    do {                     \
+        (r)->_parent = (p);  \
+    } while (0)
+#define rbn_set_color(r, c) \
+    do {                    \
+        (r)->_color = (c);  \
+    } while (0)
+#define rbn_set_offset(r)         \
+    do {                          \
+        (r)->_hole._offset = (c); \
+    } while (0)
+#define rbn_set_size(r, c)      \
+    do {                        \
+        (r)->_hole._size = (c); \
+    } while (0)
+#define rbn_set_left_mhs(r, c)   \
+    do {                         \
+        (r)->_label._left = (c); \
+    } while (0)
+#define rbn_set_right_mhs(r, c)   \
+    do {                          \
+        (r)->_label._right = (c); \
+    } while (0)
+#define rbn_size(r) ((r)->_hole._size)
+#define rbn_offset(r) ((r)->_hole._offset)
+#define rbn_key(r) ((r)->_hole._offset)
+#define rbn_left_mhs(r) ((r)->_label._left)
+#define rbn_right_mhs(r) ((r)->_label._right)
+#define mhs_of_subtree(y) \
+    (std::max(std::max(rbn_left_mhs(y), rbn_right_mhs(y)), EffectiveSize(y)))
+    };
+
+}  // namespace MhsRbTree
diff --git a/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc b/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc
deleted file mode 100644
index 3670ef81cc2f7..0000000000000
--- a/storage/tokudb/PerconaFT/ft/tests/block_allocator_strategy_test.cc
+++ /dev/null
@@ -1,126 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*======
-This file is part of PerconaFT.
-
-
-Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
-
-    PerconaFT is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License, version 2,
-    as published by the Free Software Foundation.
-
-    PerconaFT is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
-
-----------------------------------------
-
-    PerconaFT is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Affero General Public License, version 3,
-    as published by the Free Software Foundation.
-
-    PerconaFT is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Affero General Public License for more details.
-
-    You should have received a copy of the GNU Affero General Public License
-    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
-======= */
-
-#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-
-#include "ft/tests/test.h"
-
-#include "ft/serialize/block_allocator_strategy.h"
-
-static const uint64_t alignment = 4096;
-
-static void test_first_vs_best_fit(void) {
-    struct block_allocator::blockpair pairs[] = {
-        block_allocator::blockpair(1 * alignment, 6 * alignment),
-        // hole between 7x align -> 8x align
-        block_allocator::blockpair(8 * alignment, 4 * alignment),
-        // hole between 12x align -> 16x align
-        block_allocator::blockpair(16 * alignment, 1 * alignment),
-        block_allocator::blockpair(17 * alignment, 2 * alignment),
-        // hole between 19 align -> 21x align
-        block_allocator::blockpair(21 * alignment, 2 * alignment),
-    };
-    const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
-    
-    block_allocator::blockpair *bp;
-
-    // first fit
-    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 100, alignment);
-    assert(bp == &pairs[0]);
-    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 4096, alignment);
-    assert(bp == &pairs[0]);
-    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 3 * 4096, alignment);
-    assert(bp == &pairs[1]);
-    bp = block_allocator_strategy::first_fit(pairs, n_blocks, 5 * 4096, alignment);
-    assert(bp == nullptr);
-
-    // best fit
-    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 100, alignment);
-    assert(bp == &pairs[0]);
-    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 4100, alignment);
-    assert(bp == &pairs[3]);
-    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 3 * 4096, alignment);
-    assert(bp == &pairs[1]);
-    bp = block_allocator_strategy::best_fit(pairs, n_blocks, 5 * 4096, alignment);
-    assert(bp == nullptr);
-}
-
-static void test_padded_fit(void) {
-    struct block_allocator::blockpair pairs[] = {
-        block_allocator::blockpair(1 * alignment, 1 * alignment),
-        // 4096 byte hole after bp[0]
-        block_allocator::blockpair(3 * alignment, 1 * alignment),
-        // 8192 byte hole after bp[1]
-        block_allocator::blockpair(6 * alignment, 1 * alignment),
-        // 16384 byte hole after bp[2]
-        block_allocator::blockpair(11 * alignment, 1 * alignment),
-        // 32768 byte hole after bp[3]
-        block_allocator::blockpair(17 * alignment, 1 * alignment),
-        // 116kb hole after bp[4]
-        block_allocator::blockpair(113 * alignment, 1 * alignment),
-        // 256kb hole after bp[5]
-        block_allocator::blockpair(371 * alignment, 1 * alignment),
-    };
-    const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
-    
-    block_allocator::blockpair *bp;
-
-    // padding for a 100 byte allocation will be < than standard alignment,
-    // so it should fit in the first 4096 byte hole.
-    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 4000, alignment);
-    assert(bp == &pairs[0]);
-
-    // Even padded, a 12kb alloc will fit in a 16kb hole
-    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 3 * alignment, alignment);
-    assert(bp == &pairs[2]);
-
-    // would normally fit in the 116kb hole but the padding will bring it over
-    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 116 * alignment, alignment);
-    assert(bp == &pairs[5]);
-
-    bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 127 * alignment, alignment);
-    assert(bp == &pairs[5]);
-}
-
-int test_main(int argc, const char *argv[]) {
-    (void) argc;
-    (void) argv;
-
-    test_first_vs_best_fit();
-    test_padded_fit();
-
-    return 0;
-}
diff --git a/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc b/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc
index d80ee83cbc95f..3eff52b915d66 100644
--- a/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/block_allocator_test.cc
@@ -38,253 +38,243 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
 #include "test.h"
 
-static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) {
-    ba->validate();
+static void ba_alloc(BlockAllocator *ba, uint64_t size, uint64_t *answer) {
+    ba->Validate();
     uint64_t actual_answer;
-    const uint64_t heat = random() % 2;
-    ba->alloc_block(512 * size, heat, &actual_answer);
-    ba->validate();
+    ba->AllocBlock(512 * size, &actual_answer);
+    ba->Validate();
 
-    assert(actual_answer%512==0);
-    *answer = actual_answer/512;
+    invariant(actual_answer % 512 == 0);
+    *answer = actual_answer / 512;
 }
 
-static void ba_free(block_allocator *ba, uint64_t offset) {
-    ba->validate();
-    ba->free_block(offset * 512);
-    ba->validate();
+static void ba_free(BlockAllocator *ba, uint64_t offset, uint64_t size) {
+    ba->Validate();
+    ba->FreeBlock(offset * 512, 512 * size);
+    ba->Validate();
 }
 
-static void ba_check_l(block_allocator *ba, uint64_t blocknum_in_layout_order,
-                       uint64_t expected_offset, uint64_t expected_size) {
+static void ba_check_l(BlockAllocator *ba,
+                       uint64_t blocknum_in_layout_order,
+                       uint64_t expected_offset,
+                       uint64_t expected_size) {
     uint64_t actual_offset, actual_size;
-    int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size);
-    assert(r==0);
-    assert(expected_offset*512 == actual_offset);
-    assert(expected_size  *512 == actual_size);
+    int r = ba->NthBlockInLayoutOrder(
+        blocknum_in_layout_order, &actual_offset, &actual_size);
+    invariant(r == 0);
+    invariant(expected_offset * 512 == actual_offset);
+    invariant(expected_size * 512 == actual_size);
 }
 
-static void ba_check_none(block_allocator *ba, uint64_t blocknum_in_layout_order) {
+static void ba_check_none(BlockAllocator *ba,
+                          uint64_t blocknum_in_layout_order) {
     uint64_t actual_offset, actual_size;
-    int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size);
-    assert(r==-1);
+    int r = ba->NthBlockInLayoutOrder(
+        blocknum_in_layout_order, &actual_offset, &actual_size);
+    invariant(r == -1);
 }
 
-
 // Simple block allocator test
-static void test_ba0(block_allocator::allocation_strategy strategy) {
-    block_allocator allocator;
-    block_allocator *ba = &allocator;
-    ba->create(100*512, 1*512);
-    ba->set_strategy(strategy);
-    assert(ba->allocated_limit()==100*512);
+static void test_ba0() {
+    BlockAllocator allocator;
+    BlockAllocator *ba = &allocator;
+    ba->Create(100 * 512, 1 * 512);
+    invariant(ba->AllocatedLimit() == 100 * 512);
 
     uint64_t b2, b3, b4, b5, b6, b7;
-    ba_alloc(ba, 100, &b2);     
-    ba_alloc(ba, 100, &b3);     
-    ba_alloc(ba, 100, &b4);     
-    ba_alloc(ba, 100, &b5);     
-    ba_alloc(ba, 100, &b6);     
-    ba_alloc(ba, 100, &b7);     
-    ba_free(ba, b2);
-    ba_alloc(ba, 100, &b2);  
-    ba_free(ba, b4);         
-    ba_free(ba, b6);         
+    ba_alloc(ba, 100, &b2);
+    ba_alloc(ba, 100, &b3);
+    ba_alloc(ba, 100, &b4);
+    ba_alloc(ba, 100, &b5);
+    ba_alloc(ba, 100, &b6);
+    ba_alloc(ba, 100, &b7);
+    ba_free(ba, b2, 100);
+    ba_alloc(ba, 100, &b2);
+    ba_free(ba, b4, 100);
+    ba_free(ba, b6, 100);
     uint64_t b8, b9;
-    ba_alloc(ba, 100, &b4);    
-    ba_free(ba, b2);           
-    ba_alloc(ba, 100, &b6);    
-    ba_alloc(ba, 100, &b8);    
-    ba_alloc(ba, 100, &b9);    
-    ba_free(ba, b6);           
-    ba_free(ba, b7);           
-    ba_free(ba, b8);           
-    ba_alloc(ba, 100, &b6);    
-    ba_alloc(ba, 100, &b7);    
-    ba_free(ba, b4);           
-    ba_alloc(ba, 100, &b4);    
-
-    ba->destroy();
+    ba_alloc(ba, 100, &b4);
+    ba_free(ba, b2, 100);
+    ba_alloc(ba, 100, &b6);
+    ba_alloc(ba, 100, &b8);
+    ba_alloc(ba, 100, &b9);
+    ba_free(ba, b6, 100);
+    ba_free(ba, b7, 100);
+    ba_free(ba, b8, 100);
+    ba_alloc(ba, 100, &b6);
+    ba_alloc(ba, 100, &b7);
+    ba_free(ba, b4, 100);
+    ba_alloc(ba, 100, &b4);
+
+    ba->Destroy();
 }
 
 // Manually to get coverage of all the code in the block allocator.
-static void
-test_ba1(block_allocator::allocation_strategy strategy, int n_initial) {
-    block_allocator allocator;
-    block_allocator *ba = &allocator;
-    ba->create(0*512, 1*512);
-    ba->set_strategy(strategy);
-
-    int n_blocks=0;
+static void test_ba1(int n_initial) {
+    BlockAllocator allocator;
+    BlockAllocator *ba = &allocator;
+    ba->Create(0 * 512, 1 * 512);
+
+    int n_blocks = 0;
     uint64_t blocks[1000];
     for (int i = 0; i < 1000; i++) {
-	if (i < n_initial || random() % 2 == 0) {
-	    if (n_blocks < 1000) {
-		ba_alloc(ba, 1, &blocks[n_blocks]);
-		//printf("A[%d]=%ld\n", n_blocks, blocks[n_blocks]);
-		n_blocks++;
-	    } 
-	} else {
-	    if (n_blocks > 0) {
-		int blocknum = random()%n_blocks;
-		//printf("F[%d]%ld\n", blocknum, blocks[blocknum]);
-		ba_free(ba, blocks[blocknum]);
-		blocks[blocknum]=blocks[n_blocks-1];
-		n_blocks--;
-	    }
-	}
+        if (i < n_initial || random() % 2 == 0) {
+            if (n_blocks < 1000) {
+                ba_alloc(ba, 1, &blocks[n_blocks]);
+                // printf("A[%d]=%ld\n", n_blocks, blocks[n_blocks]);
+                n_blocks++;
+            }
+        } else {
+            if (n_blocks > 0) {
+                int blocknum = random() % n_blocks;
+                // printf("F[%d]=%ld\n", blocknum, blocks[blocknum]);
+                ba_free(ba, blocks[blocknum], 1);
+                blocks[blocknum] = blocks[n_blocks - 1];
+                n_blocks--;
+            }
+        }
     }
-    
-    ba->destroy();
+
+    ba->Destroy();
 }
-    
+
 // Check to see if it is first fit or best fit.
-static void
-test_ba2 (void)
-{
-    block_allocator allocator;
-    block_allocator *ba = &allocator;
+static void test_ba2(void) {
+    BlockAllocator allocator;
+    BlockAllocator *ba = &allocator;
     uint64_t b[6];
     enum { BSIZE = 1024 };
-    ba->create(100*512, BSIZE*512);
-    ba->set_strategy(block_allocator::BA_STRATEGY_FIRST_FIT);
-    assert(ba->allocated_limit()==100*512);
-
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_none (ba, 1);
-
-    ba_alloc (ba, 100, &b[0]);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1, BSIZE, 100);
-    ba_check_none (ba, 2);
-
-    ba_alloc (ba, BSIZE + 100, &b[1]);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
-    ba_check_none (ba, 3);
-
-    ba_alloc (ba, 100, &b[2]);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
-    ba_check_l    (ba, 3, 4*BSIZE,       100);
-    ba_check_none (ba, 4);
-
-    ba_alloc (ba, 100, &b[3]);
-    ba_alloc (ba, 100, &b[4]);
-    ba_alloc (ba, 100, &b[5]);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
-    ba_check_l    (ba, 3, 4*BSIZE,       100);
-    ba_check_l    (ba, 4, 5*BSIZE,       100);
-    ba_check_l    (ba, 5, 6*BSIZE,       100);
-    ba_check_l    (ba, 6, 7*BSIZE,       100);
-    ba_check_none (ba, 7);
-   
-    ba_free (ba, 4*BSIZE);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
-    ba_check_l    (ba, 3, 5*BSIZE,       100);
-    ba_check_l    (ba, 4, 6*BSIZE,       100);
-    ba_check_l    (ba, 5, 7*BSIZE,       100);
-    ba_check_none (ba, 6);
+    ba->Create(100 * 512, BSIZE * 512);
+    invariant(ba->AllocatedLimit() == 100 * 512);
+
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_none(ba, 1);
+
+    ba_alloc(ba, 100, &b[0]);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_none(ba, 2);
+
+    ba_alloc(ba, BSIZE + 100, &b[1]);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+    ba_check_none(ba, 3);
+
+    ba_alloc(ba, 100, &b[2]);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+    ba_check_l(ba, 3, 4 * BSIZE, 100);
+    ba_check_none(ba, 4);
+
+    ba_alloc(ba, 100, &b[3]);
+    ba_alloc(ba, 100, &b[4]);
+    ba_alloc(ba, 100, &b[5]);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+    ba_check_l(ba, 3, 4 * BSIZE, 100);
+    ba_check_l(ba, 4, 5 * BSIZE, 100);
+    ba_check_l(ba, 5, 6 * BSIZE, 100);
+    ba_check_l(ba, 6, 7 * BSIZE, 100);
+    ba_check_none(ba, 7);
+
+    ba_free(ba, 4 * BSIZE, 100);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+    ba_check_l(ba, 3, 5 * BSIZE, 100);
+    ba_check_l(ba, 4, 6 * BSIZE, 100);
+    ba_check_l(ba, 5, 7 * BSIZE, 100);
+    ba_check_none(ba, 6);
 
     uint64_t b2;
     ba_alloc(ba, 100, &b2);
-    assert(b2==4*BSIZE);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
-    ba_check_l    (ba, 3, 4*BSIZE,       100);
-    ba_check_l    (ba, 4, 5*BSIZE,       100);
-    ba_check_l    (ba, 5, 6*BSIZE,       100);
-    ba_check_l    (ba, 6, 7*BSIZE,       100);
-    ba_check_none (ba, 7);
-
-    ba_free (ba,   BSIZE);
-    ba_free (ba, 5*BSIZE);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1, 2*BSIZE, BSIZE + 100);
-    ba_check_l    (ba, 2, 4*BSIZE,       100);
-    ba_check_l    (ba, 3, 6*BSIZE,       100);
-    ba_check_l    (ba, 4, 7*BSIZE,       100);
-    ba_check_none (ba, 5);
-
-    // This alloc will allocate the first block after the reserve space in the case of first fit.
+    invariant(b2 == 4 * BSIZE);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+    ba_check_l(ba, 3, 4 * BSIZE, 100);
+    ba_check_l(ba, 4, 5 * BSIZE, 100);
+    ba_check_l(ba, 5, 6 * BSIZE, 100);
+    ba_check_l(ba, 6, 7 * BSIZE, 100);
+    ba_check_none(ba, 7);
+
+    ba_free(ba, BSIZE, 100);
+    ba_free(ba, 5 * BSIZE, 100);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, 2 * BSIZE, BSIZE + 100);
+    ba_check_l(ba, 2, 4 * BSIZE, 100);
+    ba_check_l(ba, 3, 6 * BSIZE, 100);
+    ba_check_l(ba, 4, 7 * BSIZE, 100);
+    ba_check_none(ba, 5);
+
+    // This alloc will allocate the first block after the reserve space in the
+    // case of first fit.
     uint64_t b3;
     ba_alloc(ba, 100, &b3);
-    assert(b3==  BSIZE);      // First fit.
+    invariant(b3 == BSIZE);  // First fit.
     // if (b3==5*BSIZE) then it is next fit.
 
     // Now 5*BSIZE is free
     uint64_t b5;
     ba_alloc(ba, 100, &b5);
-    assert(b5==5*BSIZE);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
-    ba_check_l    (ba, 3, 4*BSIZE,       100);
-    ba_check_l    (ba, 4, 5*BSIZE,       100);
-    ba_check_l    (ba, 5, 6*BSIZE,       100);
-    ba_check_l    (ba, 6, 7*BSIZE,       100);
-    ba_check_none (ba, 7);
+    invariant(b5 == 5 * BSIZE);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+    ba_check_l(ba, 3, 4 * BSIZE, 100);
+    ba_check_l(ba, 4, 5 * BSIZE, 100);
+    ba_check_l(ba, 5, 6 * BSIZE, 100);
+    ba_check_l(ba, 6, 7 * BSIZE, 100);
+    ba_check_none(ba, 7);
 
     // Now all blocks are busy
     uint64_t b6, b7, b8;
     ba_alloc(ba, 100, &b6);
     ba_alloc(ba, 100, &b7);
     ba_alloc(ba, 100, &b8);
-    assert(b6==8*BSIZE);
-    assert(b7==9*BSIZE);
-    assert(b8==10*BSIZE);
-    ba_check_l    (ba, 0, 0, 100);
-    ba_check_l    (ba, 1,   BSIZE,       100);
-    ba_check_l    (ba, 2, 2*BSIZE, BSIZE + 100);
-    ba_check_l    (ba, 3, 4*BSIZE,       100);
-    ba_check_l    (ba, 4, 5*BSIZE,       100);
-    ba_check_l    (ba, 5, 6*BSIZE,       100);
-    ba_check_l    (ba, 6, 7*BSIZE,       100);
-    ba_check_l    (ba, 7, 8*BSIZE,       100);
-    ba_check_l    (ba, 8, 9*BSIZE,       100);
-    ba_check_l    (ba, 9, 10*BSIZE,       100);
-    ba_check_none (ba, 10);
-    
-    ba_free(ba, 9*BSIZE);
-    ba_free(ba, 7*BSIZE);
+    invariant(b6 == 8 * BSIZE);
+    invariant(b7 == 9 * BSIZE);
+    invariant(b8 == 10 * BSIZE);
+    ba_check_l(ba, 0, 0, 100);
+    ba_check_l(ba, 1, BSIZE, 100);
+    ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
+    ba_check_l(ba, 3, 4 * BSIZE, 100);
+    ba_check_l(ba, 4, 5 * BSIZE, 100);
+    ba_check_l(ba, 5, 6 * BSIZE, 100);
+    ba_check_l(ba, 6, 7 * BSIZE, 100);
+    ba_check_l(ba, 7, 8 * BSIZE, 100);
+    ba_check_l(ba, 8, 9 * BSIZE, 100);
+    ba_check_l(ba, 9, 10 * BSIZE, 100);
+    ba_check_none(ba, 10);
+
+    ba_free(ba, 9 * BSIZE, 100);
+    ba_free(ba, 7 * BSIZE, 100);
     uint64_t b9;
     ba_alloc(ba, 100, &b9);
-    assert(b9==7*BSIZE);
+    invariant(b9 == 7 * BSIZE);
 
-    ba_free(ba, 5*BSIZE);
-    ba_free(ba, 2*BSIZE);
+    ba_free(ba, 5 * BSIZE, 100);
+    ba_free(ba, 2 * BSIZE, BSIZE + 100);
     uint64_t b10, b11;
     ba_alloc(ba, 100, &b10);
-    assert(b10==2*BSIZE);
+    invariant(b10 == 2 * BSIZE);
     ba_alloc(ba, 100, &b11);
-    assert(b11==3*BSIZE);
+    invariant(b11 == 3 * BSIZE);
     ba_alloc(ba, 100, &b11);
-    assert(b11==5*BSIZE);
+    invariant(b11 == 5 * BSIZE);
 
-    ba->destroy();
+    ba->Destroy();
 }
 
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
-    enum block_allocator::allocation_strategy strategies[] = {
-        block_allocator::BA_STRATEGY_FIRST_FIT,
-        block_allocator::BA_STRATEGY_BEST_FIT,
-        block_allocator::BA_STRATEGY_PADDED_FIT,
-        block_allocator::BA_STRATEGY_HEAT_ZONE,
-    };
-    for (size_t i = 0; i < sizeof(strategies) / sizeof(strategies[0]); i++) {
-        test_ba0(strategies[i]);
-        test_ba1(strategies[i], 0);
-        test_ba1(strategies[i], 10);
-        test_ba1(strategies[i], 20);
-    }
+int test_main(int argc __attribute__((__unused__)),
+              const char *argv[] __attribute__((__unused__))) {
+    test_ba0();
+    test_ba1(0);
+    test_ba1(10);
+    test_ba1(20);
     test_ba2();
     return 0;
 }
diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc
index a7c48ef709af6..ee68ab3ef0bb9 100644
--- a/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-5978.cc
@@ -45,7 +45,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 // #5978 is fixed. Here is what we do. We have four pairs with
 // blocknums and fullhashes of 1,2,3,4. The cachetable has only
 // two bucket mutexes, so 1 and 3 share a pair mutex, as do 2 and 4.
-// We pin all four with expensive write locks. Then, on backgroud threads,
+// We pin all four with expensive write locks. Then, on background threads,
 // we call get_and_pin_nonblocking on 3, where the unlockers unpins 2, and
 // we call get_and_pin_nonblocking on 4, where the unlockers unpins 1. Run this
 // enough times, and we should see a deadlock before the fix, and no deadlock
diff --git a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc
index be4bae898bebb..51cf70c3e76a6 100644
--- a/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/cachetable-simple-clone2.cc
@@ -77,7 +77,7 @@ flush (
 
 //
 // test the following things for simple cloning:
-//  - verifies that after teh checkpoint ends, the PAIR is properly 
+//  - verifies that after the checkpoint ends, the PAIR is properly
 //     dirty or clean based on the second unpin
 //
 static void
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc b/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc
index cb03a23e0fc65..7abd2267a7ea6 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-bfe-query.cc
@@ -38,69 +38,72 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
 #include "test.h"
 
-static  int
-int64_key_cmp (DB *db UU(), const DBT *a, const DBT *b) {
-    int64_t x = *(int64_t *) a->data;
-    int64_t y = *(int64_t *) b->data;
-
-    if (x<y) return -1;
-    if (x>y) return 1;
+static int int64_key_cmp(DB *db UU(), const DBT *a, const DBT *b) {
+    int64_t x = *(int64_t *)a->data;
+    int64_t y = *(int64_t *)b->data;
+
+    if (x < y)
+        return -1;
+    if (x > y)
+        return 1;
     return 0;
 }
 
-static void
-test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
+static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     int r;
     FT_CURSOR XMALLOC(cursor);
     FTNODE dn = NULL;
     PAIR_ATTR attr;
-    
+
     // first test that prefetching everything should work
-    memset(&cursor->range_lock_left_key, 0 , sizeof(DBT));
-    memset(&cursor->range_lock_right_key, 0 , sizeof(DBT));
+    memset(&cursor->range_lock_left_key, 0, sizeof(DBT));
+    memset(&cursor->range_lock_right_key, 0, sizeof(DBT));
     cursor->left_is_neg_infty = true;
     cursor->right_is_pos_infty = true;
     cursor->disable_prefetching = false;
-    
+
     ftnode_fetch_extra bfe;
 
     // quick test to see that we have the right behavior when we set
     // disable_prefetching to true
     cursor->disable_prefetching = true;
-    bfe.create_for_prefetch( ft_h, cursor);
+    bfe.create_for_prefetch(ft_h, cursor);
     FTNODE_DISK_DATA ndd = NULL;
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     bfe.destroy();
     toku_ftnode_free(&dn);
     toku_free(ndd);
 
     // now enable prefetching again
     cursor->disable_prefetching = false;
-    
-    bfe.create_for_prefetch( ft_h, cursor);
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_AVAIL);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_COMPRESSED);
-    assert(BP_STATE(dn,1) == PT_COMPRESSED);
-    assert(BP_STATE(dn,2) == PT_COMPRESSED);
+
+    bfe.create_for_prefetch(ft_h, cursor);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_AVAIL);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
+    invariant(BP_STATE(dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
     bfe.destroy();
     toku_ftnode_free(&dn);
     toku_free(ndd);
@@ -108,21 +111,23 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     uint64_t left_key = 150;
     toku_fill_dbt(&cursor->range_lock_left_key, &left_key, sizeof(uint64_t));
     cursor->left_is_neg_infty = false;
-    bfe.create_for_prefetch( ft_h, cursor);
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_COMPRESSED);
-    assert(BP_STATE(dn,2) == PT_COMPRESSED);
+    bfe.create_for_prefetch(ft_h, cursor);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
     bfe.destroy();
     toku_ftnode_free(&dn);
     toku_free(ndd);
@@ -130,63 +135,69 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     uint64_t right_key = 151;
     toku_fill_dbt(&cursor->range_lock_right_key, &right_key, sizeof(uint64_t));
     cursor->right_is_pos_infty = false;
-    bfe.create_for_prefetch( ft_h, cursor);
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_COMPRESSED);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    bfe.create_for_prefetch(ft_h, cursor);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     bfe.destroy();
     toku_ftnode_free(&dn);
     toku_free(ndd);
 
     left_key = 100000;
     right_key = 100000;
-    bfe.create_for_prefetch( ft_h, cursor);
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_COMPRESSED);
+    bfe.create_for_prefetch(ft_h, cursor);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
     bfe.destroy();
     toku_free(ndd);
     toku_ftnode_free(&dn);
 
     left_key = 100;
     right_key = 100;
-    bfe.create_for_prefetch( ft_h, cursor);
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_AVAIL);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_COMPRESSED);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    bfe.create_for_prefetch(ft_h, cursor);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_AVAIL);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     bfe.destroy();
     toku_ftnode_free(&dn);
     toku_free(ndd);
@@ -194,20 +205,19 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     toku_free(cursor);
 }
 
-static void
-test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
+static void test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
     int r;
     FT_CURSOR XMALLOC(cursor);
     FTNODE dn = NULL;
     FTNODE_DISK_DATA ndd = NULL;
     PAIR_ATTR attr;
-    
+
     // first test that prefetching everything should work
-    memset(&cursor->range_lock_left_key, 0 , sizeof(DBT));
-    memset(&cursor->range_lock_right_key, 0 , sizeof(DBT));
+    memset(&cursor->range_lock_left_key, 0, sizeof(DBT));
+    memset(&cursor->range_lock_right_key, 0, sizeof(DBT));
     cursor->left_is_neg_infty = true;
     cursor->right_is_pos_infty = true;
-    
+
     uint64_t left_key = 150;
     uint64_t right_key = 151;
     DBT left, right;
@@ -216,101 +226,106 @@ test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
 
     ftnode_fetch_extra bfe;
     bfe.create_for_subset_read(
-        ft_h,
-        NULL, 
-        &left,
-        &right,
-        false,
-        false,
-        false,
-        false
-        );
-    
+        ft_h, NULL, &left, &right, false, false, false, false);
+
     // fake the childnum to read
     // set disable_prefetching ON
     bfe.child_to_read = 2;
     bfe.disable_prefetching = true;
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
-    // need to call this twice because we had a subset read before, that touched the clock
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_COMPRESSED);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
+    // need to call this twice because we had a subset read before, that touched
+    // the clock
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_ON_DISK);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
     toku_ftnode_free(&dn);
     toku_free(ndd);
 
     // fake the childnum to read
     bfe.child_to_read = 2;
     bfe.disable_prefetching = false;
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
-    // need to call this twice because we had a subset read before, that touched the clock
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_COMPRESSED);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_COMPRESSED);
-    assert(BP_STATE(dn,2) == PT_COMPRESSED);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
+    // need to call this twice because we had a subset read before, that touched
+    // the clock
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_ON_DISK);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_AVAIL);
+    invariant(BP_STATE(dn, 0) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_AVAIL);
     toku_ftnode_free(&dn);
     toku_free(ndd);
 
     // fake the childnum to read
     bfe.child_to_read = 0;
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
-    assert(r==0);
-    assert(dn->n_children == 3);
-    assert(BP_STATE(dn,0) == PT_AVAIL);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
-    // need to call this twice because we had a subset read before, that touched the clock
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_AVAIL);
-    assert(BP_STATE(dn,1) == PT_COMPRESSED);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
-    toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(dn,0) == PT_COMPRESSED);
-    assert(BP_STATE(dn,1) == PT_COMPRESSED);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
+    invariant(r == 0);
+    invariant(dn->n_children == 3);
+    invariant(BP_STATE(dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+    // need to call this twice because we had a subset read before, that touched
+    // the clock
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
+    toku_ftnode_pe_callback(
+        dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+    invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
-    assert(BP_STATE(dn,0) == PT_AVAIL);
-    assert(BP_STATE(dn,1) == PT_AVAIL);
-    assert(BP_STATE(dn,2) == PT_ON_DISK);
+    invariant(BP_STATE(dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(dn, 1) == PT_AVAIL);
+    invariant(BP_STATE(dn, 2) == PT_ON_DISK);
     toku_ftnode_free(&dn);
     toku_free(ndd);
 
     toku_free(cursor);
 }
 
-
-static void
-test_prefetching(void) {
+static void test_prefetching(void) {
     //    struct ft_handle source_ft;
     struct ftnode sn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -327,7 +342,7 @@ test_prefetching(void) {
 
     uint64_t key1 = 100;
     uint64_t key2 = 200;
-    
+
     MALLOC_N(sn.n_children, sn.bp);
     DBT pivotkeys[2];
     toku_fill_dbt(&pivotkeys[0], &key1, sizeof(key1));
@@ -336,13 +351,13 @@ test_prefetching(void) {
     BP_BLOCKNUM(&sn, 0).b = 30;
     BP_BLOCKNUM(&sn, 1).b = 35;
     BP_BLOCKNUM(&sn, 2).b = 40;
-    BP_STATE(&sn,0) = PT_AVAIL;
-    BP_STATE(&sn,1) = PT_AVAIL;
-    BP_STATE(&sn,2) = PT_AVAIL;
+    BP_STATE(&sn, 0) = PT_AVAIL;
+    BP_STATE(&sn, 1) = PT_AVAIL;
+    BP_STATE(&sn, 2) = PT_AVAIL;
     set_BNC(&sn, 0, toku_create_empty_nl());
     set_BNC(&sn, 1, toku_create_empty_nl());
     set_BNC(&sn, 2, toku_create_empty_nl());
-    //Create XIDS
+    // Create XIDS
     XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123;
     XIDS xids_234;
@@ -352,7 +367,7 @@ test_prefetching(void) {
     CKERR(r);
 
     // data in the buffers does not matter in this test
-    //Cleanup:
+    // Cleanup:
     toku_xids_destroy(&xids_0);
     toku_xids_destroy(&xids_123);
     toku_xids_destroy(&xids_234);
@@ -363,41 +378,48 @@ test_prefetching(void) {
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft_h->cmp.create(int64_key_cmp, nullptr);
     ft->ft = ft_h;
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
     FTNODE_DISK_DATA ndd = NULL;
-    r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
-    assert(r==0);
+    r = toku_serialize_ftnode_to(
+        fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+    invariant(r == 0);
 
-    test_prefetch_read(fd, ft, ft_h);    
+    test_prefetch_read(fd, ft, ft_h);
     test_subset_read(fd, ft, ft_h);
 
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     ft_h->cmp.destroy();
     toku_free(ft_h->h);
@@ -405,11 +427,12 @@ test_prefetching(void) {
     toku_free(ft);
     toku_free(ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+              const char *argv[] __attribute__((__unused__))) {
     test_prefetching();
 
     return 0;
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc
index ceef3772e2a8b..26a3dae673cd9 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-clock-test.cc
@@ -40,38 +40,28 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
 #include "ft/cursor.h"
 
-enum ftnode_verify_type {
-    read_all=1,
-    read_compressed,
-    read_none
-};
+enum ftnode_verify_type { read_all = 1, read_compressed, read_none };
 
 #ifndef MIN
 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
 #endif
 
-static int
-string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
-{
+static int string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
     char *CAST_FROM_VOIDP(s, a->data);
     char *CAST_FROM_VOIDP(t, b->data);
     return strcmp(s, t);
 }
 
-static void
-le_add_to_bn(bn_data* bn, uint32_t idx, const  char *key, int keylen, const char *val, int vallen)
-{
+static void le_add_to_bn(bn_data *bn,
+                         uint32_t idx,
+                         const char *key,
+                         int keylen,
+                         const char *val,
+                         int vallen) {
     LEAFENTRY r = NULL;
     uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen);
     void *maybe_free = nullptr;
-    bn->get_space_for_insert(
-        idx, 
-        key,
-        keylen,
-        size_needed,
-        &r,
-        &maybe_free
-        );
+    bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free);
     if (maybe_free) {
         toku_free(maybe_free);
     }
@@ -81,70 +71,67 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const  char *key, int keylen, const char
     memcpy(r->u.clean.val, val, vallen);
 }
 
-
-static void
-le_malloc(bn_data* bn, uint32_t idx, const char *key, const char *val)
-{
+static void le_malloc(bn_data *bn,
+                      uint32_t idx,
+                      const char *key,
+                      const char *val) {
     int keylen = strlen(key) + 1;
     int vallen = strlen(val) + 1;
     le_add_to_bn(bn, idx, key, keylen, val, vallen);
 }
 
-
-static void
-test1(int fd, FT ft_h, FTNODE *dn) {
+static void test1(int fd, FT ft_h, FTNODE *dn) {
     int r;
     ftnode_fetch_extra bfe_all;
     bfe_all.create_for_full_read(ft_h);
     FTNODE_DISK_DATA ndd = NULL;
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_all);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_all);
     bool is_leaf = ((*dn)->height == 0);
-    assert(r==0);
+    invariant(r == 0);
     for (int i = 0; i < (*dn)->n_children; i++) {
-        assert(BP_STATE(*dn,i) == PT_AVAIL);
+        invariant(BP_STATE(*dn, i) == PT_AVAIL);
     }
     // should sweep and NOT get rid of anything
     PAIR_ATTR attr;
-    memset(&attr,0,sizeof(attr));
+    memset(&attr, 0, sizeof(attr));
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
     for (int i = 0; i < (*dn)->n_children; i++) {
-        assert(BP_STATE(*dn,i) == PT_AVAIL);
+        invariant(BP_STATE(*dn, i) == PT_AVAIL);
     }
     // should sweep and get compress all
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
     for (int i = 0; i < (*dn)->n_children; i++) {
         if (!is_leaf) {
-            assert(BP_STATE(*dn,i) == PT_COMPRESSED);
-        }
-        else {
-            assert(BP_STATE(*dn,i) == PT_ON_DISK);
+            invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
+        } else {
+            invariant(BP_STATE(*dn, i) == PT_ON_DISK);
         }
     }
     PAIR_ATTR size;
     bool req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
-    assert(req);
+    invariant(req);
     toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
     for (int i = 0; i < (*dn)->n_children; i++) {
-        assert(BP_STATE(*dn,i) == PT_AVAIL);
+        invariant(BP_STATE(*dn, i) == PT_AVAIL);
     }
     // should sweep and get compress all
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
     for (int i = 0; i < (*dn)->n_children; i++) {
         if (!is_leaf) {
-            assert(BP_STATE(*dn,i) == PT_COMPRESSED);
-        }
-        else {
-            assert(BP_STATE(*dn,i) == PT_ON_DISK);
+            invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
+        } else {
+            invariant(BP_STATE(*dn, i) == PT_ON_DISK);
         }
-    }    
+    }
 
     req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
-    assert(req);
+    invariant(req);
     toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
     for (int i = 0; i < (*dn)->n_children; i++) {
-        assert(BP_STATE(*dn,i) == PT_AVAIL);
+        invariant(BP_STATE(*dn, i) == PT_AVAIL);
     }
     (*dn)->dirty = 1;
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
@@ -152,101 +139,102 @@ test1(int fd, FT ft_h, FTNODE *dn) {
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
     for (int i = 0; i < (*dn)->n_children; i++) {
-        assert(BP_STATE(*dn,i) == PT_AVAIL);
+        invariant(BP_STATE(*dn, i) == PT_AVAIL);
     }
     toku_free(ndd);
     toku_ftnode_free(dn);
 }
 
-
-static int search_cmp(const struct ft_search& UU(so), const DBT* UU(key)) {
+static int search_cmp(const struct ft_search &UU(so), const DBT *UU(key)) {
     return 0;
 }
 
-static void
-test2(int fd, FT ft_h, FTNODE *dn) {
+static void test2(int fd, FT ft_h, FTNODE *dn) {
     DBT left, right;
     DB dummy_db;
     memset(&dummy_db, 0, sizeof(dummy_db));
     memset(&left, 0, sizeof(left));
     memset(&right, 0, sizeof(right));
     ft_search search;
-    
+
     ftnode_fetch_extra bfe_subset;
     bfe_subset.create_for_subset_read(
         ft_h,
-        ft_search_init(&search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
+        ft_search_init(
+            &search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
         &left,
         &right,
         true,
         true,
         false,
-        false
-        );
+        false);
 
     FTNODE_DISK_DATA ndd = NULL;
-    int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_subset);
-    assert(r==0);
+    int r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_subset);
+    invariant(r == 0);
     bool is_leaf = ((*dn)->height == 0);
-    // at this point, although both partitions are available, only the 
+    // at this point, although both partitions are available, only the
     // second basement node should have had its clock
     // touched
-    assert(BP_STATE(*dn, 0) == PT_AVAIL);
-    assert(BP_STATE(*dn, 1) == PT_AVAIL);
-    assert(BP_SHOULD_EVICT(*dn, 0));
-    assert(!BP_SHOULD_EVICT(*dn, 1));
+    invariant(BP_STATE(*dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(*dn, 1) == PT_AVAIL);
+    invariant(BP_SHOULD_EVICT(*dn, 0));
+    invariant(!BP_SHOULD_EVICT(*dn, 1));
     PAIR_ATTR attr;
-    memset(&attr,0,sizeof(attr));
+    memset(&attr, 0, sizeof(attr));
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
-    assert(BP_STATE(*dn, 1) == PT_AVAIL);
-    assert(BP_SHOULD_EVICT(*dn, 1));
+    invariant(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
+    invariant(BP_STATE(*dn, 1) == PT_AVAIL);
+    invariant(BP_SHOULD_EVICT(*dn, 1));
     toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
-    assert(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
+    invariant(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
 
     bool req = toku_ftnode_pf_req_callback(*dn, &bfe_subset);
-    assert(req);
+    invariant(req);
     toku_ftnode_pf_callback(*dn, ndd, &bfe_subset, fd, &attr);
-    assert(BP_STATE(*dn, 0) == PT_AVAIL);
-    assert(BP_STATE(*dn, 1) == PT_AVAIL);
-    assert(BP_SHOULD_EVICT(*dn, 0));
-    assert(!BP_SHOULD_EVICT(*dn, 1));
+    invariant(BP_STATE(*dn, 0) == PT_AVAIL);
+    invariant(BP_STATE(*dn, 1) == PT_AVAIL);
+    invariant(BP_SHOULD_EVICT(*dn, 0));
+    invariant(!BP_SHOULD_EVICT(*dn, 1));
 
     toku_free(ndd);
     toku_ftnode_free(dn);
 }
 
-static void
-test3_leaf(int fd, FT ft_h, FTNODE *dn) {
+static void test3_leaf(int fd, FT ft_h, FTNODE *dn) {
     DBT left, right;
     DB dummy_db;
     memset(&dummy_db, 0, sizeof(dummy_db));
     memset(&left, 0, sizeof(left));
     memset(&right, 0, sizeof(right));
-    
+
     ftnode_fetch_extra bfe_min;
     bfe_min.create_for_min_read(ft_h);
 
     FTNODE_DISK_DATA ndd = NULL;
-    int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_min);
-    assert(r==0);
+    int r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_min);
+    invariant(r == 0);
     //
     // make sure we have a leaf
     //
-    assert((*dn)->height == 0);
+    invariant((*dn)->height == 0);
     for (int i = 0; i < (*dn)->n_children; i++) {
-        assert(BP_STATE(*dn, i) == PT_ON_DISK);
+        invariant(BP_STATE(*dn, i) == PT_ON_DISK);
     }
     toku_ftnode_free(dn);
     toku_free(ndd);
 }
 
-static void
-test_serialize_nonleaf(void) {
+static void test_serialize_nonleaf(void) {
     //    struct ft_handle source_ft;
     struct ftnode sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -265,11 +253,11 @@ test_serialize_nonleaf(void) {
     sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1);
     BP_BLOCKNUM(&sn, 0).b = 30;
     BP_BLOCKNUM(&sn, 1).b = 35;
-    BP_STATE(&sn,0) = PT_AVAIL;
-    BP_STATE(&sn,1) = PT_AVAIL;
+    BP_STATE(&sn, 0) = PT_AVAIL;
+    BP_STATE(&sn, 1) = PT_AVAIL;
     set_BNC(&sn, 0, toku_create_empty_nl());
     set_BNC(&sn, 1, toku_create_empty_nl());
-    //Create XIDS
+    // Create XIDS
     XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123;
     XIDS xids_234;
@@ -281,11 +269,38 @@ test_serialize_nonleaf(void) {
     toku::comparator cmp;
     cmp.create(string_key_cmp, nullptr);
 
-    toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp);
-    toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp);
-    toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp);
-
-    //Cleanup:
+    toku_bnc_insert_msg(BNC(&sn, 0),
+                        "a",
+                        2,
+                        "aval",
+                        5,
+                        FT_NONE,
+                        next_dummymsn(),
+                        xids_0,
+                        true,
+                        cmp);
+    toku_bnc_insert_msg(BNC(&sn, 0),
+                        "b",
+                        2,
+                        "bval",
+                        5,
+                        FT_NONE,
+                        next_dummymsn(),
+                        xids_123,
+                        false,
+                        cmp);
+    toku_bnc_insert_msg(BNC(&sn, 1),
+                        "x",
+                        2,
+                        "xval",
+                        5,
+                        FT_NONE,
+                        next_dummymsn(),
+                        xids_234,
+                        true,
+                        cmp);
+
+    // Cleanup:
     toku_xids_destroy(&xids_0);
     toku_xids_destroy(&xids_123);
     toku_xids_destroy(&xids_234);
@@ -297,35 +312,41 @@ test_serialize_nonleaf(void) {
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft_h->cmp.create(string_key_cmp, nullptr);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
     FTNODE_DISK_DATA ndd = NULL;
-    r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
-    assert(r==0);
+    r = toku_serialize_ftnode_to(
+        fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+    invariant(r == 0);
 
     test1(fd, ft_h, &dn);
     test2(fd, ft_h, &dn);
@@ -333,22 +354,26 @@ test_serialize_nonleaf(void) {
     toku_destroy_ftnode_internals(&sn);
     toku_free(ndd);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     ft_h->cmp.destroy();
     toku_free(ft_h);
     toku_free(ft);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-static void
-test_serialize_leaf(void) {
+static void test_serialize_leaf(void) {
     //    struct ft_handle source_ft;
     struct ftnode sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -364,8 +389,8 @@ test_serialize_leaf(void) {
     MALLOC_N(sn.n_children, sn.bp);
     DBT pivotkey;
     sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1);
-    BP_STATE(&sn,0) = PT_AVAIL;
-    BP_STATE(&sn,1) = PT_AVAIL;
+    BP_STATE(&sn, 0) = PT_AVAIL;
+    BP_STATE(&sn, 1) = PT_AVAIL;
     set_BLB(&sn, 0, toku_create_empty_bn());
     set_BLB(&sn, 1, toku_create_empty_bn());
     le_malloc(BLB_DATA(&sn, 0), 0, "a", "aval");
@@ -378,51 +403,59 @@ test_serialize_leaf(void) {
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
     FTNODE_DISK_DATA ndd = NULL;
-    r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
-    assert(r==0);
+    r = toku_serialize_ftnode_to(
+        fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+    invariant(r == 0);
 
     test1(fd, ft_h, &dn);
-    test3_leaf(fd, ft_h,&dn);
+    test3_leaf(fd, ft_h, &dn);
 
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
     toku_free(ft);
     toku_free(ndd);
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+              const char *argv[] __attribute__((__unused__))) {
     initialize_dummymsn();
     test_serialize_nonleaf();
     test_serialize_leaf();
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc
index 9828f49513c79..d50488ae19708 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-benchmark.cc
@@ -41,27 +41,21 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #include <sys/time.h>
 #include "test.h"
 
-
-
 #ifndef MIN
 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
 #endif
 const double USECS_PER_SEC = 1000000.0;
 
-static void
-le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int vallen)
-{
+static void le_add_to_bn(bn_data *bn,
+                         uint32_t idx,
+                         char *key,
+                         int keylen,
+                         char *val,
+                         int vallen) {
     LEAFENTRY r = NULL;
     uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen);
     void *maybe_free = nullptr;
-    bn->get_space_for_insert(
-        idx, 
-        key,
-        keylen,
-        size_needed,
-        &r,
-        &maybe_free
-        );
+    bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free);
     if (maybe_free) {
         toku_free(maybe_free);
     }
@@ -71,20 +65,24 @@ le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int va
     memcpy(r->u.clean.val, val, vallen);
 }
 
-static int
-long_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
-{
+static int long_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
     const long *CAST_FROM_VOIDP(x, a->data);
     const long *CAST_FROM_VOIDP(y, b->data);
     return (*x > *y) - (*x < *y);
 }
 
-static void
-test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
+static void test_serialize_leaf(int valsize,
+                                int nelts,
+                                double entropy,
+                                int ser_runs,
+                                int deser_runs) {
     //    struct ft_handle source_ft;
     struct ftnode *sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -102,7 +100,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
     MALLOC_N(sn->n_children, sn->bp);
     sn->pivotkeys.create_empty();
     for (int i = 0; i < sn->n_children; ++i) {
-        BP_STATE(sn,i) = PT_AVAIL;
+        BP_STATE(sn, i) = PT_AVAIL;
         set_BLB(sn, i, toku_create_empty_bn());
     }
     int nperbn = nelts / sn->n_children;
@@ -112,24 +110,19 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
             k = ck * nperbn + i;
             char buf[valsize];
             int c;
-            for (c = 0; c < valsize * entropy; ) {
-                int *p = (int *) &buf[c];
+            for (c = 0; c < valsize * entropy;) {
+                int *p = (int *)&buf[c];
                 *p = rand();
                 c += sizeof(*p);
             }
             memset(&buf[c], 0, valsize - c);
             le_add_to_bn(
-                BLB_DATA(sn,ck),
-                i,
-                (char *)&k, 
-                sizeof k, 
-                buf, 
-                sizeof buf
-                );
+                BLB_DATA(sn, ck), i, (char *)&k, sizeof k, buf, sizeof buf);
         }
         if (ck < 7) {
             DBT pivotkey;
-            sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck);
+            sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)),
+                                    ck);
         }
     }
 
@@ -139,31 +132,36 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft_h->cmp.create(long_key_cmp, nullptr);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
 
     struct timeval total_start;
@@ -176,8 +174,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
         gettimeofday(&t[0], NULL);
         ndd = NULL;
         sn->dirty = 1;
-        r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, &ndd, true, ft->ft, false);
-        assert(r==0);
+        r = toku_serialize_ftnode_to(
+            fd, make_blocknum(20), sn, &ndd, true, ft->ft, false);
+        invariant(r == 0);
         gettimeofday(&t[1], NULL);
         total_start.tv_sec += t[0].tv_sec;
         total_start.tv_usec += t[0].tv_usec;
@@ -186,12 +185,14 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
         toku_free(ndd);
     }
     double dt;
-    dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
+    dt = (total_end.tv_sec - total_start.tv_sec) +
+         ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
     dt *= 1000;
     dt /= ser_runs;
-    printf("serialize leaf(ms):   %0.05lf (average of %d runs)\n", dt, ser_runs);
+    printf(
+        "serialize leaf(ms):   %0.05lf (average of %d runs)\n", dt, ser_runs);
 
-    //reset 
+    // reset
     total_start.tv_sec = total_start.tv_usec = 0;
     total_end.tv_sec = total_end.tv_usec = 0;
 
@@ -200,8 +201,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
         bfe.create_for_full_read(ft_h);
         gettimeofday(&t[0], NULL);
         FTNODE_DISK_DATA ndd2 = NULL;
-        r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
-        assert(r==0);
+        r = toku_deserialize_ftnode_from(
+            fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe);
+        invariant(r == 0);
         gettimeofday(&t[1], NULL);
 
         total_start.tv_sec += t[0].tv_sec;
@@ -212,35 +214,46 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
         toku_ftnode_free(&dn);
         toku_free(ndd2);
     }
-    dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
+    dt = (total_end.tv_sec - total_start.tv_sec) +
+         ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
     dt *= 1000;
     dt /= deser_runs;
-    printf("deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs);
-    printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (average of %d runs)\n",
-           tokutime_to_seconds(bfe.io_time)*1000,
-           tokutime_to_seconds(bfe.decompress_time)*1000,
-           tokutime_to_seconds(bfe.deserialize_time)*1000,
-           deser_runs
-           );
+    printf(
+        "deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs);
+    printf(
+        "io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf "
+        "(average of %d runs)\n",
+        tokutime_to_seconds(bfe.io_time) * 1000,
+        tokutime_to_seconds(bfe.decompress_time) * 1000,
+        tokutime_to_seconds(bfe.deserialize_time) * 1000,
+        deser_runs);
 
     toku_ftnode_free(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     ft_h->cmp.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
     toku_free(ft);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-static void
-test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
+static void test_serialize_nonleaf(int valsize,
+                                   int nelts,
+                                   double entropy,
+                                   int ser_runs,
+                                   int deser_runs) {
     //    struct ft_handle source_ft;
     struct ftnode sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -257,11 +270,11 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     MALLOC_N(sn.n_children, sn.bp);
     sn.pivotkeys.create_empty();
     for (int i = 0; i < sn.n_children; ++i) {
-        BP_BLOCKNUM(&sn, i).b = 30 + (i*5);
-        BP_STATE(&sn,i) = PT_AVAIL;
+        BP_BLOCKNUM(&sn, i).b = 30 + (i * 5);
+        BP_STATE(&sn, i) = PT_AVAIL;
         set_BNC(&sn, i, toku_create_empty_nl());
     }
-    //Create XIDS
+    // Create XIDS
     XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123;
     r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123);
@@ -276,14 +289,23 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
             k = ck * nperchild + i;
             char buf[valsize];
             int c;
-            for (c = 0; c < valsize * entropy; ) {
-                int *p = (int *) &buf[c];
+            for (c = 0; c < valsize * entropy;) {
+                int *p = (int *)&buf[c];
                 *p = rand();
                 c += sizeof(*p);
             }
             memset(&buf[c], 0, valsize - c);
 
-            toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, cmp);
+            toku_bnc_insert_msg(bnc,
+                                &k,
+                                sizeof k,
+                                buf,
+                                valsize,
+                                FT_NONE,
+                                next_dummymsn(),
+                                xids_123,
+                                true,
+                                cmp);
         }
         if (ck < 7) {
             DBT pivotkey;
@@ -291,7 +313,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
         }
     }
 
-    //Cleanup:
+    // Cleanup:
     toku_xids_destroy(&xids_0);
     toku_xids_destroy(&xids_123);
     cmp.destroy();
@@ -302,65 +324,78 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft_h->cmp.create(long_key_cmp, nullptr);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
 
     struct timeval t[2];
     gettimeofday(&t[0], NULL);
     FTNODE_DISK_DATA ndd = NULL;
-    r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
-    assert(r==0);
+    r = toku_serialize_ftnode_to(
+        fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
+    invariant(r == 0);
     gettimeofday(&t[1], NULL);
     double dt;
-    dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
+    dt = (t[1].tv_sec - t[0].tv_sec) +
+         ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
     dt *= 1000;
-    printf("serialize nonleaf(ms):   %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
+    printf(
+        "serialize nonleaf(ms):   %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
 
     ftnode_fetch_extra bfe;
     bfe.create_for_full_read(ft_h);
     gettimeofday(&t[0], NULL);
     FTNODE_DISK_DATA ndd2 = NULL;
-    r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
-    assert(r==0);
+    r = toku_deserialize_ftnode_from(
+        fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe);
+    invariant(r == 0);
     gettimeofday(&t[1], NULL);
-    dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
+    dt = (t[1].tv_sec - t[0].tv_sec) +
+         ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
     dt *= 1000;
-    printf("deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs);
-    printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (IGNORED RUNS=%d)\n",
-           tokutime_to_seconds(bfe.io_time)*1000,
-           tokutime_to_seconds(bfe.decompress_time)*1000,
-           tokutime_to_seconds(bfe.deserialize_time)*1000,
-           deser_runs
-           );
+    printf(
+        "deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs);
+    printf(
+        "io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf "
+        "(IGNORED RUNS=%d)\n",
+        tokutime_to_seconds(bfe.io_time) * 1000,
+        tokutime_to_seconds(bfe.decompress_time) * 1000,
+        tokutime_to_seconds(bfe.deserialize_time) * 1000,
+        deser_runs);
 
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     ft_h->cmp.destroy();
@@ -369,17 +404,21 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
     toku_free(ndd);
     toku_free(ndd2);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+              const char *argv[] __attribute__((__unused__))) {
     const int DEFAULT_RUNS = 5;
     long valsize, nelts, ser_runs = DEFAULT_RUNS, deser_runs = DEFAULT_RUNS;
     double entropy = 0.3;
 
     if (argc != 3 && argc != 5) {
-        fprintf(stderr, "Usage: %s <valsize> <nelts> [<serialize_runs> <deserialize_runs>]\n", argv[0]);
+        fprintf(stderr,
+                "Usage: %s <valsize> <nelts> [<serialize_runs> "
+                "<deserialize_runs>]\n",
+                argv[0]);
         fprintf(stderr, "Default (and min) runs is %d\n", DEFAULT_RUNS);
         return 2;
     }
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc
index 332aaa0c170b1..0cddaf1965141 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-serialize-test.cc
@@ -39,26 +39,20 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #include "test.h"
 #include "bndata.h"
 
-
-
 #ifndef MIN
 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
 #endif
 
-static size_t
-le_add_to_bn(bn_data* bn, uint32_t idx, const  char *key, int keysize, const char *val, int valsize)
-{
+static size_t le_add_to_bn(bn_data *bn,
+                           uint32_t idx,
+                           const char *key,
+                           int keysize,
+                           const char *val,
+                           int valsize) {
     LEAFENTRY r = NULL;
     uint32_t size_needed = LE_CLEAN_MEMSIZE(valsize);
     void *maybe_free = nullptr;
-    bn->get_space_for_insert(
-        idx,
-        key,
-        keysize,
-        size_needed,
-        &r,
-        &maybe_free
-        );
+    bn->get_space_for_insert(idx, key, keysize, size_needed, &r, &maybe_free);
     if (maybe_free) {
         toku_free(maybe_free);
     }
@@ -70,16 +64,19 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const  char *key, int keysize, const cha
 }
 
 class test_key_le_pair {
-    public:
+   public:
     uint32_t keylen;
-    char* keyp;
+    char *keyp;
     LEAFENTRY le;
 
     test_key_le_pair() : keylen(), keyp(), le() {}
     void init(const char *_keyp, const char *_val) {
         init(_keyp, strlen(_keyp) + 1, _val, strlen(_val) + 1);
     }
-    void init(const char * _keyp, uint32_t _keylen, const char*_val, uint32_t _vallen) {
+    void init(const char *_keyp,
+              uint32_t _keylen,
+              const char *_val,
+              uint32_t _vallen) {
         keylen = _keylen;
 
         CAST_FROM_VOIDP(le, toku_malloc(LE_CLEAN_MEMSIZE(_vallen)));
@@ -95,126 +92,144 @@ class test_key_le_pair {
     }
 };
 
-enum ftnode_verify_type {
-    read_all=1,
-    read_compressed,
-    read_none
-};
+enum ftnode_verify_type { read_all = 1, read_compressed, read_none };
 
-static int
-string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
-{
+static int string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
     char *CAST_FROM_VOIDP(s, a->data);
     char *CAST_FROM_VOIDP(t, b->data);
     return strcmp(s, t);
 }
 
-static void
-setup_dn(enum ftnode_verify_type bft, int fd, FT ft_h, FTNODE *dn, FTNODE_DISK_DATA* ndd) {
+static void setup_dn(enum ftnode_verify_type bft,
+                     int fd,
+                     FT ft_h,
+                     FTNODE *dn,
+                     FTNODE_DISK_DATA *ndd) {
     int r;
     if (bft == read_all) {
         ftnode_fetch_extra bfe;
         bfe.create_for_full_read(ft_h);
-        r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe);
-        assert(r==0);
-    }
-    else if (bft == read_compressed || bft == read_none) {
+        r = toku_deserialize_ftnode_from(
+            fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe);
+        invariant(r == 0);
+    } else if (bft == read_compressed || bft == read_none) {
         ftnode_fetch_extra bfe;
         bfe.create_for_min_read(ft_h);
-        r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe);
-        assert(r==0);
-        // assert all bp's are compressed or on disk.
+        r = toku_deserialize_ftnode_from(
+            fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, ndd, &bfe);
+        invariant(r == 0);
+        // invariant all bp's are compressed or on disk.
         for (int i = 0; i < (*dn)->n_children; i++) {
-            assert(BP_STATE(*dn,i) == PT_COMPRESSED || BP_STATE(*dn, i) == PT_ON_DISK);
+            invariant(BP_STATE(*dn, i) == PT_COMPRESSED ||
+                   BP_STATE(*dn, i) == PT_ON_DISK);
         }
         // if read_none, get rid of the compressed bp's
         if (bft == read_none) {
             if ((*dn)->height == 0) {
-                toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
-                // assert all bp's are on disk
+                toku_ftnode_pe_callback(*dn,
+                                        make_pair_attr(0xffffffff),
+                                        ft_h,
+                                        def_pe_finalize_impl,
+                                        nullptr);
+                // invariant all bp's are on disk
                 for (int i = 0; i < (*dn)->n_children; i++) {
                     if ((*dn)->height == 0) {
-                        assert(BP_STATE(*dn,i) == PT_ON_DISK);
-                        assert(is_BNULL(*dn, i));
-                    }
-                    else {
-                        assert(BP_STATE(*dn,i) == PT_COMPRESSED);
+                        invariant(BP_STATE(*dn, i) == PT_ON_DISK);
+                        invariant(is_BNULL(*dn, i));
+                    } else {
+                        invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
                     }
                 }
-            }
-            else {
+            } else {
                 // first decompress everything, and make sure
                 // that it is available
                 // then run partial eviction to get it compressed
                 PAIR_ATTR attr;
                 bfe.create_for_full_read(ft_h);
-                assert(toku_ftnode_pf_req_callback(*dn, &bfe));
+                invariant(toku_ftnode_pf_req_callback(*dn, &bfe));
                 r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr);
-                assert(r==0);
-                // assert all bp's are available
+                invariant(r == 0);
+                // invariant all bp's are available
                 for (int i = 0; i < (*dn)->n_children; i++) {
-                    assert(BP_STATE(*dn,i) == PT_AVAIL);
+                    invariant(BP_STATE(*dn, i) == PT_AVAIL);
                 }
-                toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+                toku_ftnode_pe_callback(*dn,
+                                        make_pair_attr(0xffffffff),
+                                        ft_h,
+                                        def_pe_finalize_impl,
+                                        nullptr);
                 for (int i = 0; i < (*dn)->n_children; i++) {
-                    // assert all bp's are still available, because we touched the clock
-                    assert(BP_STATE(*dn,i) == PT_AVAIL);
-                    // now assert all should be evicted
-                    assert(BP_SHOULD_EVICT(*dn, i));
+                    // invariant all bp's are still available, because we touched
+                    // the clock
+                    invariant(BP_STATE(*dn, i) == PT_AVAIL);
+                    // now invariant all should be evicted
+                    invariant(BP_SHOULD_EVICT(*dn, i));
                 }
-                toku_ftnode_pe_callback(*dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
+                toku_ftnode_pe_callback(*dn,
+                                        make_pair_attr(0xffffffff),
+                                        ft_h,
+                                        def_pe_finalize_impl,
+                                        nullptr);
                 for (int i = 0; i < (*dn)->n_children; i++) {
-                    assert(BP_STATE(*dn,i) == PT_COMPRESSED);
+                    invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
                 }
             }
         }
         // now decompress them
         bfe.create_for_full_read(ft_h);
-        assert(toku_ftnode_pf_req_callback(*dn, &bfe));
+        invariant(toku_ftnode_pf_req_callback(*dn, &bfe));
         PAIR_ATTR attr;
         r = toku_ftnode_pf_callback(*dn, *ndd, &bfe, fd, &attr);
-        assert(r==0);
-        // assert all bp's are available
+        invariant(r == 0);
+        // invariant all bp's are available
         for (int i = 0; i < (*dn)->n_children; i++) {
-            assert(BP_STATE(*dn,i) == PT_AVAIL);
+            invariant(BP_STATE(*dn, i) == PT_AVAIL);
         }
         // continue on with test
-    }
-    else {
+    } else {
         // if we get here, this is a test bug, NOT a bug in development code
-        assert(false);
+        invariant(false);
     }
 }
 
-static void write_sn_to_disk(int fd, FT_HANDLE ft, FTNODE sn, FTNODE_DISK_DATA* src_ndd, bool do_clone) {
+static void write_sn_to_disk(int fd,
+                             FT_HANDLE ft,
+                             FTNODE sn,
+                             FTNODE_DISK_DATA *src_ndd,
+                             bool do_clone) {
     int r;
     if (do_clone) {
-        void* cloned_node_v = NULL;
+        void *cloned_node_v = NULL;
         PAIR_ATTR attr;
         long clone_size;
-        toku_ftnode_clone_callback(sn, &cloned_node_v, &clone_size, &attr, false, ft->ft);
+        toku_ftnode_clone_callback(
+            sn, &cloned_node_v, &clone_size, &attr, false, ft->ft);
         FTNODE CAST_FROM_VOIDP(cloned_node, cloned_node_v);
-        r = toku_serialize_ftnode_to(fd, make_blocknum(20), cloned_node, src_ndd, false, ft->ft, false);
-        assert(r==0);        
+        r = toku_serialize_ftnode_to(
+            fd, make_blocknum(20), cloned_node, src_ndd, false, ft->ft, false);
+        invariant(r == 0);
         toku_ftnode_free(&cloned_node);
-    }
-    else {
-        r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, src_ndd, true, ft->ft, false);
-        assert(r==0);
+    } else {
+        r = toku_serialize_ftnode_to(
+            fd, make_blocknum(20), sn, src_ndd, true, ft->ft, false);
+        invariant(r == 0);
     }
 }
 
-static void
-test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_check_msn(enum ftnode_verify_type bft,
+                                          bool do_clone) {
     //    struct ft_handle source_ft;
     struct ftnode sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
-#define PRESERIALIZE_MSN_ON_DISK ((MSN) { MIN_MSN.msn + 42 })
-#define POSTSERIALIZE_MSN_ON_DISK ((MSN) { MIN_MSN.msn + 84 })
+#define PRESERIALIZE_MSN_ON_DISK ((MSN){MIN_MSN.msn + 42})
+#define POSTSERIALIZE_MSN_ON_DISK ((MSN){MIN_MSN.msn + 84})
 
     sn.max_msn_applied_to_node_on_disk = PRESERIALIZE_MSN_ON_DISK;
     sn.flags = 0x11223344;
@@ -228,14 +243,14 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
     MALLOC_N(sn.n_children, sn.bp);
     DBT pivotkey;
     sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1);
-    BP_STATE(&sn,0) = PT_AVAIL;
-    BP_STATE(&sn,1) = PT_AVAIL;
+    BP_STATE(&sn, 0) = PT_AVAIL;
+    BP_STATE(&sn, 1) = PT_AVAIL;
     set_BLB(&sn, 0, toku_create_empty_bn());
     set_BLB(&sn, 1, toku_create_empty_bn());
     le_add_to_bn(BLB_DATA(&sn, 0), 0, "a", 2, "aval", 5);
     le_add_to_bn(BLB_DATA(&sn, 0), 1, "b", 2, "bval", 5);
     le_add_to_bn(BLB_DATA(&sn, 1), 0, "x", 2, "xval", 5);
-    BLB_MAX_MSN_APPLIED(&sn, 0) = ((MSN) { MIN_MSN.msn + 73 });
+    BLB_MAX_MSN_APPLIED(&sn, 0) = ((MSN){MIN_MSN.msn + 73});
     BLB_MAX_MSN_APPLIED(&sn, 1) = POSTSERIALIZE_MSN_ON_DISK;
 
     FT_HANDLE XMALLOC(ft);
@@ -244,30 +259,35 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
 
-    //Want to use block #20
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;
     FTNODE_DISK_DATA dest_ndd = NULL;
@@ -276,16 +296,18 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->blocknum.b==20);
+    invariant(dn->blocknum.b == 20);
 
-    assert(dn->layout_version ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
-    assert(dn->height == 0);
-    assert(dn->n_children>=1);
-    assert(dn->max_msn_applied_to_node_on_disk.msn == POSTSERIALIZE_MSN_ON_DISK.msn);
+    invariant(dn->layout_version == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+    invariant(dn->height == 0);
+    invariant(dn->n_children >= 1);
+    invariant(dn->max_msn_applied_to_node_on_disk.msn ==
+           POSTSERIALIZE_MSN_ON_DISK.msn);
     {
-        // Man, this is way too ugly.  This entire test suite needs to be refactored.
+        // Man, this is way too ugly.  This entire test suite needs to be
+        // refactored.
         // Create a dummy mempool and put the leaves there.  Ugh.
         test_key_le_pair elts[3];
         elts[0].init("a", "aval");
@@ -294,34 +316,41 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
         const uint32_t npartitions = dn->n_children;
         uint32_t last_i = 0;
         for (uint32_t bn = 0; bn < npartitions; ++bn) {
-            assert(BLB_MAX_MSN_APPLIED(dn, bn).msn == POSTSERIALIZE_MSN_ON_DISK.msn);
-            assert(dest_ndd[bn].start > 0);
-            assert(dest_ndd[bn].size  > 0);
+            invariant(BLB_MAX_MSN_APPLIED(dn, bn).msn ==
+                   POSTSERIALIZE_MSN_ON_DISK.msn);
+            invariant(dest_ndd[bn].start > 0);
+            invariant(dest_ndd[bn].size > 0);
             if (bn > 0) {
-                assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+                invariant(dest_ndd[bn].start >=
+                       dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
             }
             for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
                 LEAFENTRY curr_le;
                 uint32_t curr_keylen;
-                void* curr_key;
-                BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
-                assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le));
-                assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0);
-                if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, elts[last_i].keyp) <= 0);
+                void *curr_key;
+                BLB_DATA(dn, bn)
+                    ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+                invariant(leafentry_memsize(curr_le) ==
+                       leafentry_memsize(elts[last_i].le));
+                invariant(memcmp(curr_le,
+                              elts[last_i].le,
+                              leafentry_memsize(curr_le)) == 0);
+                if (bn < npartitions - 1) {
+                    invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+                                  elts[last_i].keyp) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
             }
-
         }
-        assert(last_i == 3);
+        invariant(last_i == 3);
     }
 
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -329,17 +358,21 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
     toku_free(src_ndd);
     toku_free(dest_ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-static void
-test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft,
+                                                  bool do_clone) {
     int r;
     struct ftnode sn, *dn;
-    const int keylens = 256*1024, vallens = 0;
+    const int keylens = 256 * 1024, vallens = 0;
     const uint32_t nrows = 8;
-    // assert(val_size > BN_MAX_SIZE);  // BN_MAX_SIZE isn't visible
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    // invariant(val_size > BN_MAX_SIZE);  // BN_MAX_SIZE isn't visible
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     sn.flags = 0x11223344;
@@ -354,21 +387,27 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
     MALLOC_N(sn.n_children, sn.bp);
     sn.pivotkeys.create_empty();
     for (int i = 0; i < sn.n_children; ++i) {
-        BP_STATE(&sn,i) = PT_AVAIL;
+        BP_STATE(&sn, i) = PT_AVAIL;
         set_BLB(&sn, i, toku_create_empty_bn());
     }
     for (uint32_t i = 0; i < nrows; ++i) {  // one basement per row
         char key[keylens], val[vallens];
-        key[keylens-1] = '\0';
+        key[keylens - 1] = '\0';
         char c = 'a' + i;
-        memset(key, c, keylens-1);
-        le_add_to_bn(BLB_DATA(&sn, i), 0, (char *) &key, sizeof(key), (char *) &val, sizeof(val));
-        if (i < nrows-1) {
+        memset(key, c, keylens - 1);
+        le_add_to_bn(BLB_DATA(&sn, i),
+                     0,
+                     (char *)&key,
+                     sizeof(key),
+                     (char *)&val,
+                     sizeof(val));
+        if (i < nrows - 1) {
             uint32_t keylen;
-            void* curr_key;
+            void *curr_key;
             BLB_DATA(&sn, i)->fetch_key_and_len(0, &keylen, &curr_key);
             DBT pivotkey;
-            sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, curr_key, keylen), i);
+            sn.pivotkeys.insert_at(toku_fill_dbt(&pivotkey, curr_key, keylen),
+                                   i);
         }
     }
 
@@ -378,29 +417,34 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;
     FTNODE_DISK_DATA dest_ndd = NULL;
@@ -408,55 +452,64 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
     write_sn_to_disk(fd, ft, &sn, &src_ndd, do_clone);
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
-    
-    assert(dn->blocknum.b==20);
 
-    assert(dn->layout_version ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
+    invariant(dn->blocknum.b == 20);
+
+    invariant(dn->layout_version == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
     {
-        // Man, this is way too ugly.  This entire test suite needs to be refactored.
+        // Man, this is way too ugly.  This entire test suite needs to be
+        // refactored.
         // Create a dummy mempool and put the leaves there.  Ugh.
         test_key_le_pair *les = new test_key_le_pair[nrows];
         {
             char key[keylens], val[vallens];
-            key[keylens-1] = '\0';
+            key[keylens - 1] = '\0';
             for (uint32_t i = 0; i < nrows; ++i) {
                 char c = 'a' + i;
-                memset(key, c, keylens-1);
-                les[i].init((char *) &key, sizeof(key), (char *) &val, sizeof(val));
+                memset(key, c, keylens - 1);
+                les[i].init(
+                    (char *)&key, sizeof(key), (char *)&val, sizeof(val));
             }
         }
         const uint32_t npartitions = dn->n_children;
         uint32_t last_i = 0;
         for (uint32_t bn = 0; bn < npartitions; ++bn) {
-            assert(dest_ndd[bn].start > 0);
-            assert(dest_ndd[bn].size  > 0);
+            invariant(dest_ndd[bn].start > 0);
+            invariant(dest_ndd[bn].size > 0);
             if (bn > 0) {
-                assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+                invariant(dest_ndd[bn].start >=
+                       dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
             }
-            assert(BLB_DATA(dn, bn)->num_klpairs() > 0);
+            invariant(BLB_DATA(dn, bn)->num_klpairs() > 0);
             for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
                 LEAFENTRY curr_le;
                 uint32_t curr_keylen;
-                void* curr_key;
-                BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
-                assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
-                assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
-                if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, les[last_i].keyp) <= 0);
+                void *curr_key;
+                BLB_DATA(dn, bn)
+                    ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+                invariant(leafentry_memsize(curr_le) ==
+                       leafentry_memsize(les[last_i].le));
+                invariant(memcmp(curr_le,
+                              les[last_i].le,
+                              leafentry_memsize(curr_le)) == 0);
+                if (bn < npartitions - 1) {
+                    invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+                                  les[last_i].keyp) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
             }
         }
-        assert(last_i == nrows);
+        invariant(last_i == nrows);
         delete[] les;
     }
 
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -464,15 +517,19 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
     toku_free(src_ndd);
     toku_free(dest_ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-static void
-test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft,
+                                               bool do_clone) {
     int r;
     struct ftnode sn, *dn;
-    const uint32_t nrows = 196*1024;
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    const uint32_t nrows = 196 * 1024;
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     sn.flags = 0x11223344;
@@ -487,14 +544,19 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
     XMALLOC_N(sn.n_children, sn.bp);
     sn.pivotkeys.create_empty();
     for (int i = 0; i < sn.n_children; ++i) {
-        BP_STATE(&sn,i) = PT_AVAIL;
-        set_BLB(&sn, i, toku_create_empty_bn()); 
+        BP_STATE(&sn, i) = PT_AVAIL;
+        set_BLB(&sn, i, toku_create_empty_bn());
     }
     size_t total_size = 0;
     for (uint32_t i = 0; i < nrows; ++i) {
         uint32_t key = i;
         uint32_t val = i;
-        total_size += le_add_to_bn(BLB_DATA(&sn, 0), i, (char *) &key, sizeof(key), (char *) &val, sizeof(val));
+        total_size += le_add_to_bn(BLB_DATA(&sn, 0),
+                                   i,
+                                   (char *)&key,
+                                   sizeof(key),
+                                   (char *)&val,
+                                   sizeof(val));
     }
 
     FT_HANDLE XMALLOC(ft);
@@ -503,30 +565,35 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
 
     FTNODE_DISK_DATA src_ndd = NULL;
@@ -535,56 +602,66 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->blocknum.b==20);
+    invariant(dn->blocknum.b == 20);
 
-    assert(dn->layout_version ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
+    invariant(dn->layout_version == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
     {
-        // Man, this is way too ugly.  This entire test suite needs to be refactored.
+        // Man, this is way too ugly.  This entire test suite needs to be
+        // refactored.
         // Create a dummy mempool and put the leaves there.  Ugh.
         test_key_le_pair *les = new test_key_le_pair[nrows];
         {
             int key = 0, val = 0;
             for (uint32_t i = 0; i < nrows; ++i, key++, val++) {
-                les[i].init((char *) &key, sizeof(key), (char *) &val, sizeof(val));
+                les[i].init(
+                    (char *)&key, sizeof(key), (char *)&val, sizeof(val));
             }
         }
         const uint32_t npartitions = dn->n_children;
         uint32_t last_i = 0;
         for (uint32_t bn = 0; bn < npartitions; ++bn) {
-            assert(dest_ndd[bn].start > 0);
-            assert(dest_ndd[bn].size  > 0);
+            invariant(dest_ndd[bn].start > 0);
+            invariant(dest_ndd[bn].size > 0);
             if (bn > 0) {
-                assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+                invariant(dest_ndd[bn].start >=
+                       dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
             }
-            assert(BLB_DATA(dn, bn)->num_klpairs() > 0);
+            invariant(BLB_DATA(dn, bn)->num_klpairs() > 0);
             for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
                 LEAFENTRY curr_le;
                 uint32_t curr_keylen;
-                void* curr_key;
-                BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
-                assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
-                assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
-                if (bn < npartitions-1) {
-                    uint32_t *CAST_FROM_VOIDP(pivot, dn->pivotkeys.get_pivot(bn).data);
-                    void* tmp = les[last_i].keyp;
+                void *curr_key;
+                BLB_DATA(dn, bn)
+                    ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+                invariant(leafentry_memsize(curr_le) ==
+                       leafentry_memsize(les[last_i].le));
+                invariant(memcmp(curr_le,
+                              les[last_i].le,
+                              leafentry_memsize(curr_le)) == 0);
+                if (bn < npartitions - 1) {
+                    uint32_t *CAST_FROM_VOIDP(pivot,
+                                              dn->pivotkeys.get_pivot(bn).data);
+                    void *tmp = les[last_i].keyp;
                     uint32_t *CAST_FROM_VOIDP(item, tmp);
-                    assert(*pivot >= *item);
+                    invariant(*pivot >= *item);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
             }
             // don't check soft_copy_is_up_to_date or seqinsert
-            assert(BLB_DATA(dn, bn)->get_disk_size() < 128*1024);  // BN_MAX_SIZE, apt to change
+            invariant(BLB_DATA(dn, bn)->get_disk_size() <
+                   128 * 1024);  // BN_MAX_SIZE, apt to change
         }
-        assert(last_i == nrows);
+        invariant(last_i == nrows);
         delete[] les;
     }
 
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -592,19 +669,22 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
     toku_free(src_ndd);
     toku_free(dest_ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-
-static void
-test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft,
+                                                bool do_clone) {
     int r;
     struct ftnode sn, *dn;
     const uint32_t nrows = 7;
     const size_t key_size = 8;
-    const size_t val_size = 512*1024;
-    // assert(val_size > BN_MAX_SIZE);  // BN_MAX_SIZE isn't visible
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    const size_t val_size = 512 * 1024;
+    // invariant(val_size > BN_MAX_SIZE);  // BN_MAX_SIZE isn't visible
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     sn.max_msn_applied_to_node_on_disk.msn = 0;
     sn.flags = 0x11223344;
@@ -615,21 +695,21 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
     sn.n_children = 1;
     sn.dirty = 1;
     sn.oldest_referenced_xid_known = TXNID_NONE;
-    
+
     MALLOC_N(sn.n_children, sn.bp);
     sn.pivotkeys.create_empty();
     for (int i = 0; i < sn.n_children; ++i) {
-        BP_STATE(&sn,i) = PT_AVAIL;
+        BP_STATE(&sn, i) = PT_AVAIL;
         set_BLB(&sn, i, toku_create_empty_bn());
     }
     for (uint32_t i = 0; i < nrows; ++i) {
         char key[key_size], val[val_size];
-        key[key_size-1] = '\0';
-        val[val_size-1] = '\0';
+        key[key_size - 1] = '\0';
+        val[val_size - 1] = '\0';
         char c = 'a' + i;
-        memset(key, c, key_size-1);
-        memset(val, c, val_size-1);
-        le_add_to_bn(BLB_DATA(&sn, 0), i,key, 8, val, val_size);
+        memset(key, c, key_size - 1);
+        memset(val, c, val_size - 1);
+        le_add_to_bn(BLB_DATA(&sn, 0), i, key, 8, val, val_size);
     }
 
     FT_HANDLE XMALLOC(ft);
@@ -638,30 +718,35 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
 
     FTNODE_DISK_DATA src_ndd = NULL;
@@ -670,58 +755,66 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->blocknum.b==20);
+    invariant(dn->blocknum.b == 20);
 
-    assert(dn->layout_version ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
+    invariant(dn->layout_version == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
     {
-        // Man, this is way too ugly.  This entire test suite needs to be refactored.
+        // Man, this is way too ugly.  This entire test suite needs to be
+        // refactored.
         // Create a dummy mempool and put the leaves there.  Ugh.
         test_key_le_pair *les = new test_key_le_pair[nrows];
         {
             char key[key_size], val[val_size];
-            key[key_size-1] = '\0';
-            val[val_size-1] = '\0';
+            key[key_size - 1] = '\0';
+            val[val_size - 1] = '\0';
             for (uint32_t i = 0; i < nrows; ++i) {
                 char c = 'a' + i;
-                memset(key, c, key_size-1);
-                memset(val, c, val_size-1);
+                memset(key, c, key_size - 1);
+                memset(val, c, val_size - 1);
                 les[i].init(key, key_size, val, val_size);
             }
         }
         const uint32_t npartitions = dn->n_children;
-        assert(npartitions == nrows);
+        invariant(npartitions == nrows);
         uint32_t last_i = 0;
         for (uint32_t bn = 0; bn < npartitions; ++bn) {
-            assert(dest_ndd[bn].start > 0);
-            assert(dest_ndd[bn].size  > 0);
+            invariant(dest_ndd[bn].start > 0);
+            invariant(dest_ndd[bn].size > 0);
             if (bn > 0) {
-                assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+                invariant(dest_ndd[bn].start >=
+                       dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
             }
-            assert(BLB_DATA(dn, bn)->num_klpairs() > 0);
+            invariant(BLB_DATA(dn, bn)->num_klpairs() > 0);
             for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
                 LEAFENTRY curr_le;
                 uint32_t curr_keylen;
-                void* curr_key;
-                BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
-                assert(leafentry_memsize(curr_le) == leafentry_memsize(les[last_i].le));
-                assert(memcmp(curr_le, les[last_i].le, leafentry_memsize(curr_le)) == 0);
-                if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(les[last_i].keyp)) <= 0);
+                void *curr_key;
+                BLB_DATA(dn, bn)
+                    ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+                invariant(leafentry_memsize(curr_le) ==
+                       leafentry_memsize(les[last_i].le));
+                invariant(memcmp(curr_le,
+                              les[last_i].le,
+                              leafentry_memsize(curr_le)) == 0);
+                if (bn < npartitions - 1) {
+                    invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+                                  (char *)(les[last_i].keyp)) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
             }
             // don't check soft_copy_is_up_to_date or seqinsert
         }
-        assert(last_i == 7);
+        invariant(last_i == 7);
         delete[] les;
     }
 
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -729,15 +822,19 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
     toku_free(src_ndd);
     toku_free(dest_ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-
-static void
-test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_empty_basement_nodes(
+    enum ftnode_verify_type bft,
+    bool do_clone) {
     struct ftnode sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -760,7 +857,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
     toku_fill_dbt(&pivotkeys[5], "x", 2);
     sn.pivotkeys.create_from_dbts(pivotkeys, 6);
     for (int i = 0; i < sn.n_children; ++i) {
-        BP_STATE(&sn,i) = PT_AVAIL;
+        BP_STATE(&sn, i) = PT_AVAIL;
         set_BLB(&sn, i, toku_create_empty_bn());
         BLB_SEQINSERT(&sn, i) = 0;
     }
@@ -774,30 +871,35 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;
     FTNODE_DISK_DATA dest_ndd = NULL;
@@ -805,17 +907,18 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->blocknum.b==20);
+    invariant(dn->blocknum.b == 20);
 
-    assert(dn->layout_version ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
-    assert(dn->height == 0);
-    assert(dn->n_children>0);
+    invariant(dn->layout_version == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+    invariant(dn->height == 0);
+    invariant(dn->n_children > 0);
     {
         test_key_le_pair elts[3];
 
-        // Man, this is way too ugly.  This entire test suite needs to be refactored.
+        // Man, this is way too ugly.  This entire test suite needs to be
+        // refactored.
         // Create a dummy mempool and put the leaves there.  Ugh.
         elts[0].init("a", "aval");
         elts[1].init("b", "bval");
@@ -823,33 +926,39 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
         const uint32_t npartitions = dn->n_children;
         uint32_t last_i = 0;
         for (uint32_t bn = 0; bn < npartitions; ++bn) {
-            assert(dest_ndd[bn].start > 0);
-            assert(dest_ndd[bn].size  > 0);
+            invariant(dest_ndd[bn].start > 0);
+            invariant(dest_ndd[bn].size > 0);
             if (bn > 0) {
-                assert(dest_ndd[bn].start >= dest_ndd[bn-1].start + dest_ndd[bn-1].size);
+                invariant(dest_ndd[bn].start >=
+                       dest_ndd[bn - 1].start + dest_ndd[bn - 1].size);
             }
             for (uint32_t i = 0; i < BLB_DATA(dn, bn)->num_klpairs(); i++) {
                 LEAFENTRY curr_le;
                 uint32_t curr_keylen;
-                void* curr_key;
-                BLB_DATA(dn, bn)->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
-                assert(leafentry_memsize(curr_le) == leafentry_memsize(elts[last_i].le));
-                assert(memcmp(curr_le, elts[last_i].le, leafentry_memsize(curr_le)) == 0);
-                if (bn < npartitions-1) {
-                    assert(strcmp((char*)dn->pivotkeys.get_pivot(bn).data, (char*)(elts[last_i].keyp)) <= 0);
+                void *curr_key;
+                BLB_DATA(dn, bn)
+                    ->fetch_klpair(i, &curr_le, &curr_keylen, &curr_key);
+                invariant(leafentry_memsize(curr_le) ==
+                       leafentry_memsize(elts[last_i].le));
+                invariant(memcmp(curr_le,
+                              elts[last_i].le,
+                              leafentry_memsize(curr_le)) == 0);
+                if (bn < npartitions - 1) {
+                    invariant(strcmp((char *)dn->pivotkeys.get_pivot(bn).data,
+                                  (char *)(elts[last_i].keyp)) <= 0);
                 }
                 // TODO for later, get a key comparison here as well
                 last_i++;
             }
-
         }
-        assert(last_i == 3);
+        invariant(last_i == 3);
     }
 
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -857,14 +966,19 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
     toku_free(src_ndd);
     toku_free(dest_ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-static void
-test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_leaf_with_multiple_empty_basement_nodes(
+    enum ftnode_verify_type bft,
+    bool do_clone) {
     struct ftnode sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -884,7 +998,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
     toku_fill_dbt(&pivotkeys[2], "A", 2);
     sn.pivotkeys.create_from_dbts(pivotkeys, 3);
     for (int i = 0; i < sn.n_children; ++i) {
-        BP_STATE(&sn,i) = PT_AVAIL;
+        BP_STATE(&sn, i) = PT_AVAIL;
         set_BLB(&sn, i, toku_create_empty_bn());
     }
 
@@ -894,30 +1008,35 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
 
     FTNODE_DISK_DATA src_ndd = NULL;
@@ -926,29 +1045,31 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->blocknum.b==20);
+    invariant(dn->blocknum.b == 20);
 
-    assert(dn->layout_version ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
-    assert(dn->height == 0);
-    assert(dn->n_children == 1);
+    invariant(dn->layout_version == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+    invariant(dn->height == 0);
+    invariant(dn->n_children == 1);
     {
         const uint32_t npartitions = dn->n_children;
         for (uint32_t i = 0; i < npartitions; ++i) {
-            assert(dest_ndd[i].start > 0);
-            assert(dest_ndd[i].size  > 0);
+            invariant(dest_ndd[i].start > 0);
+            invariant(dest_ndd[i].size > 0);
             if (i > 0) {
-                assert(dest_ndd[i].start >= dest_ndd[i-1].start + dest_ndd[i-1].size);
+                invariant(dest_ndd[i].start >=
+                       dest_ndd[i - 1].start + dest_ndd[i - 1].size);
             }
-            assert(BLB_DATA(dn, i)->num_klpairs() == 0);
+            invariant(BLB_DATA(dn, i)->num_klpairs() == 0);
         }
     }
-    
+
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     toku_free(ft_h->h);
     toku_free(ft_h);
@@ -956,16 +1077,18 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
     toku_free(src_ndd);
     toku_free(dest_ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-
-static void
-test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
+static void test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     //    struct ft_handle source_ft;
     struct ftnode sn, *dn;
 
-    int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
+    int fd = open(TOKU_TEST_FILENAME,
+                  O_RDWR | O_CREAT | O_BINARY,
+                  S_IRWXU | S_IRWXG | S_IRWXO);
+    invariant(fd >= 0);
 
     int r;
 
@@ -984,11 +1107,11 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1);
     BP_BLOCKNUM(&sn, 0).b = 30;
     BP_BLOCKNUM(&sn, 1).b = 35;
-    BP_STATE(&sn,0) = PT_AVAIL;
-    BP_STATE(&sn,1) = PT_AVAIL;
+    BP_STATE(&sn, 0) = PT_AVAIL;
+    BP_STATE(&sn, 1) = PT_AVAIL;
     set_BNC(&sn, 0, toku_create_empty_nl());
     set_BNC(&sn, 1, toku_create_empty_nl());
-    //Create XIDS
+    // Create XIDS
     XIDS xids_0 = toku_xids_get_root_xids();
     XIDS xids_123;
     XIDS xids_234;
@@ -1000,11 +1123,38 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     toku::comparator cmp;
     cmp.create(string_key_cmp, nullptr);
 
-    toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp);
-    toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp);
-    toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp);
-
-    //Cleanup:
+    toku_bnc_insert_msg(BNC(&sn, 0),
+                        "a",
+                        2,
+                        "aval",
+                        5,
+                        FT_NONE,
+                        next_dummymsn(),
+                        xids_0,
+                        true,
+                        cmp);
+    toku_bnc_insert_msg(BNC(&sn, 0),
+                        "b",
+                        2,
+                        "bval",
+                        5,
+                        FT_NONE,
+                        next_dummymsn(),
+                        xids_123,
+                        false,
+                        cmp);
+    toku_bnc_insert_msg(BNC(&sn, 1),
+                        "x",
+                        2,
+                        "xval",
+                        5,
+                        FT_NONE,
+                        next_dummymsn(),
+                        xids_234,
+                        true,
+                        cmp);
+
+    // Cleanup:
     toku_xids_destroy(&xids_0);
     toku_xids_destroy(&xids_123);
     toku_xids_destroy(&xids_234);
@@ -1016,31 +1166,36 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
                  make_blocknum(0),
                  ZERO_LSN,
                  TXNID_NONE,
-                 4*1024*1024,
-                 128*1024,
+                 4 * 1024 * 1024,
+                 128 * 1024,
                  TOKU_DEFAULT_COMPRESSION_METHOD,
                  16);
     ft_h->cmp.create(string_key_cmp, nullptr);
     ft->ft = ft_h;
-    
+
     ft_h->blocktable.create();
-    { int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
-    //Want to use block #20
+    {
+        int r_truncate = ftruncate(fd, 0);
+        CKERR(r_truncate);
+    }
+    // Want to use block #20
     BLOCKNUM b = make_blocknum(0);
     while (b.b < 20) {
         ft_h->blocktable.allocate_blocknum(&b, ft_h);
     }
-    assert(b.b == 20);
+    invariant(b.b == 20);
 
     {
         DISKOFF offset;
         DISKOFF size;
-        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
-        assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
 
         ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
-        assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
-        assert(size   == 100);
+        invariant(offset ==
+               (DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+        invariant(size == 100);
     }
     FTNODE_DISK_DATA src_ndd = NULL;
     FTNODE_DISK_DATA dest_ndd = NULL;
@@ -1048,30 +1203,31 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
 
     setup_dn(bft, fd, ft_h, &dn, &dest_ndd);
 
-    assert(dn->blocknum.b==20);
+    invariant(dn->blocknum.b == 20);
 
-    assert(dn->layout_version ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_original ==FT_LAYOUT_VERSION);
-    assert(dn->layout_version_read_from_disk ==FT_LAYOUT_VERSION);
-    assert(dn->height == 1);
-    assert(dn->n_children==2);
-    assert(strcmp((char*)dn->pivotkeys.get_pivot(0).data, "hello")==0);
-    assert(dn->pivotkeys.get_pivot(0).size==6);
-    assert(BP_BLOCKNUM(dn,0).b==30);
-    assert(BP_BLOCKNUM(dn,1).b==35);
+    invariant(dn->layout_version == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_original == FT_LAYOUT_VERSION);
+    invariant(dn->layout_version_read_from_disk == FT_LAYOUT_VERSION);
+    invariant(dn->height == 1);
+    invariant(dn->n_children == 2);
+    invariant(strcmp((char *)dn->pivotkeys.get_pivot(0).data, "hello") == 0);
+    invariant(dn->pivotkeys.get_pivot(0).size == 6);
+    invariant(BP_BLOCKNUM(dn, 0).b == 30);
+    invariant(BP_BLOCKNUM(dn, 1).b == 35);
 
     message_buffer *src_msg_buffer1 = &BNC(&sn, 0)->msg_buffer;
     message_buffer *src_msg_buffer2 = &BNC(&sn, 1)->msg_buffer;
     message_buffer *dest_msg_buffer1 = &BNC(dn, 0)->msg_buffer;
     message_buffer *dest_msg_buffer2 = &BNC(dn, 1)->msg_buffer;
 
-    assert(src_msg_buffer1->equals(dest_msg_buffer1));
-    assert(src_msg_buffer2->equals(dest_msg_buffer2));
+    invariant(src_msg_buffer1->equals(dest_msg_buffer1));
+    invariant(src_msg_buffer2->equals(dest_msg_buffer2));
 
     toku_ftnode_free(&dn);
     toku_destroy_ftnode_internals(&sn);
 
-    ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
+    ft_h->blocktable.block_free(
+        BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
     ft_h->blocktable.destroy();
     ft_h->cmp.destroy();
     toku_free(ft_h->h);
@@ -1080,11 +1236,12 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
     toku_free(src_ndd);
     toku_free(dest_ndd);
 
-    r = close(fd); assert(r != -1);
+    r = close(fd);
+    invariant(r != -1);
 }
 
-int
-test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
+int test_main(int argc __attribute__((__unused__)),
+              const char *argv[] __attribute__((__unused__))) {
     initialize_dummymsn();
 
     test_serialize_nonleaf(read_none, false);
@@ -1103,10 +1260,12 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_
 
     test_serialize_leaf_with_multiple_empty_basement_nodes(read_none, false);
     test_serialize_leaf_with_multiple_empty_basement_nodes(read_all, false);
-    test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, false);
+    test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed,
+                                                           false);
     test_serialize_leaf_with_multiple_empty_basement_nodes(read_none, true);
     test_serialize_leaf_with_multiple_empty_basement_nodes(read_all, true);
-    test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, true);
+    test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed,
+                                                           true);
 
     test_serialize_leaf_with_empty_basement_nodes(read_none, false);
     test_serialize_leaf_with_empty_basement_nodes(read_all, false);
diff --git a/storage/tokudb/PerconaFT/ft/tests/ft-test.cc b/storage/tokudb/PerconaFT/ft/tests/ft-test.cc
index 598a1cc7085c2..706bd94fbc3af 100644
--- a/storage/tokudb/PerconaFT/ft/tests/ft-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/ft-test.cc
@@ -164,17 +164,16 @@ static void  test_read_what_was_written (void) {
     int r;
     const int NVALS=10000;
 
-    if (verbose) printf("test_read_what_was_written(): "); fflush(stdout);
+    if (verbose) {
+        printf("test_read_what_was_written(): "); fflush(stdout);
+    }
 
     unlink(fname);
-    
 
     toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);  assert(r==0);
     r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
-    toku_cachetable_close(&ct);
-
-    
+    toku_cachetable_close(&ct);    
 
     /* Now see if we can read an empty tree in. */
     toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
@@ -189,8 +188,6 @@ static void  test_read_what_was_written (void) {
     r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
     toku_cachetable_close(&ct);
 
-    
-
     /* Now see if we can read it in and get the value. */
     toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
     r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
diff --git a/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc b/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc
index 53973794eae74..aeb5a897c488f 100644
--- a/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/pqueue-test.cc
@@ -109,7 +109,9 @@ static int run_test(void)
         r = pqueue_pop(pq, &node);   assert(r==0);
         if (verbose) printf("%d : %d\n", i, *(int*)(node->key->data));
         if ( *(int*)(node->key->data) != i ) { 
-            if (verbose) printf("FAIL\n"); return -1; 
+            if (verbose)
+                printf("FAIL\n");
+            return -1;
         }
     }
     pqueue_free(pq);
diff --git a/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc b/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc
index a78f787cdf299..f200496486250 100644
--- a/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/test-leafentry-nested.cc
@@ -793,7 +793,7 @@ static void test_le_garbage_collection_birdie(void) {
     do_garbage_collect = ule_worth_running_garbage_collection(&ule, 200);
     invariant(do_garbage_collect);
 
-    // It is definately worth doing when the above case is true
+    // It is definitely worth doing when the above case is true
     // and there is more than one provisional entry.
     ule.num_cuxrs = 1;
     ule.num_puxrs = 2;
diff --git a/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc b/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc
index 419af550545c4..71357a1e16ad2 100644
--- a/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc
+++ b/storage/tokudb/PerconaFT/ft/tests/test-oldest-referenced-xid-flush.cc
@@ -72,7 +72,7 @@ static void dummy_update_status(FTNODE UU(child), int UU(dirtied), void* UU(extr
 
 enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
 
-static void test_oldest_referenced_xid_gets_propogated(void) {
+static void test_oldest_referenced_xid_gets_propagated(void) {
     int r;
     CACHETABLE ct;
     FT_HANDLE t;
@@ -166,7 +166,7 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
     toku_ft_flush_some_child(t->ft, node, &fa);
 
     // pin the child, verify that oldest referenced xid was
-    // propogated from parent to child during the flush
+    // propagated from parent to child during the flush
     toku_pin_ftnode(
         t->ft, 
         child_nonleaf_blocknum,
@@ -185,6 +185,6 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
 
 int test_main(int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
     default_parse_args(argc, argv);
-    test_oldest_referenced_xid_gets_propogated();
+    test_oldest_referenced_xid_gets_propagated();
     return 0;
 }
diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc
similarity index 55%
rename from storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h
rename to storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc
index 8aded3898c1de..ea4f9374dc336 100644
--- a/storage/tokudb/PerconaFT/ft/serialize/block_allocator_strategy.h
+++ b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-with-mhs.cc
@@ -36,30 +36,62 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
 
-#pragma once
-
-#include <db.h>
-
-#include "ft/serialize/block_allocator.h"
-
-// Block allocation strategy implementations
-
-class block_allocator_strategy {
-public:
-    static struct block_allocator::blockpair *
-    first_fit(struct block_allocator::blockpair *blocks_array,
-              uint64_t n_blocks, uint64_t size, uint64_t alignment);
-
-    static struct block_allocator::blockpair *
-    best_fit(struct block_allocator::blockpair *blocks_array,
-             uint64_t n_blocks, uint64_t size, uint64_t alignment);
-
-    static struct block_allocator::blockpair *
-    padded_fit(struct block_allocator::blockpair *blocks_array,
-               uint64_t n_blocks, uint64_t size, uint64_t alignment);
-
-    static struct block_allocator::blockpair *
-    heat_zone(struct block_allocator::blockpair *blocks_array,
-              uint64_t n_blocks, uint64_t size, uint64_t alignment,
-              uint64_t heat);
-};
+#include "ft/serialize/rbtree_mhs.h"
+#include "test.h"
+#include <algorithm>
+#include <vector>
+#include <ctime>
+#include <cstdlib>
+
+static void test_insert_remove(void) {
+    uint64_t i;
+    MhsRbTree::Tree *tree = new MhsRbTree::Tree();
+    verbose = 0;
+
+    tree->Insert({0, 100});
+
+    for (i = 0; i < 10; i++) {
+        tree->Remove(3);
+        tree->Remove(2);
+    }
+    tree->ValidateBalance();
+    tree->ValidateMhs();
+
+    for (i = 0; i < 10; i++) {
+        tree->Insert({5 * i, 3});
+    }
+    tree->ValidateBalance();
+    tree->ValidateMhs();
+
+    uint64_t offset = tree->Remove(2);
+    invariant(offset == 0);
+    offset = tree->Remove(10);
+    invariant(offset == 50);
+    offset = tree->Remove(3);
+    invariant(offset == 5);
+    tree->ValidateBalance();
+    tree->ValidateMhs();
+
+    tree->Insert({48, 2});
+    tree->Insert({50, 10});
+
+    tree->ValidateBalance();
+    tree->ValidateMhs();
+
+    tree->Insert({3, 7});
+    offset = tree->Remove(10);
+    invariant(offset == 2);
+    tree->ValidateBalance();
+    tree->ValidateMhs();
+    tree->Dump();
+    delete tree;
+}
+
+int test_main(int argc, const char *argv[]) {
+    default_parse_args(argc, argv);
+
+    test_insert_remove();
+    if (verbose)
+        printf("test ok\n");
+    return 0;
+}
diff --git a/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc
new file mode 100644
index 0000000000000..cefe66335a6cb
--- /dev/null
+++ b/storage/tokudb/PerconaFT/ft/tests/test-rbtree-insert-remove-without-mhs.cc
@@ -0,0 +1,103 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of PerconaFT.
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License, version 2,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+
+----------------------------------------
+
+    PerconaFT is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License, version 3,
+    as published by the Free Software Foundation.
+
+    PerconaFT is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#include "ft/serialize/rbtree_mhs.h"
+#include "test.h"
+#include <algorithm>
+#include <vector>
+#include <ctime>
+#include <cstdlib>
+
+#define N 1000000
+std::vector<MhsRbTree::Node::BlockPair> input_vector;
+MhsRbTree::Node::BlockPair old_vector[N];
+
+static int myrandom(int i) { return std::rand() % i; }
+
+static void generate_random_input() {
+    std::srand(unsigned(std::time(0)));
+
+    // set some values:
+    for (uint64_t i = 0; i < N; ++i) {
+        MhsRbTree::Node::BlockPair bp = {i+1, 0};
+        input_vector.push_back(bp);
+        old_vector[i] = bp;
+    }
+    // using built-in random generator:
+    std::random_shuffle(input_vector.begin(), input_vector.end(), myrandom);
+}
+
+static void test_insert_remove(void) {
+    int i;
+    MhsRbTree::Tree *tree = new MhsRbTree::Tree();
+    verbose = 0;
+    generate_random_input();
+    if (verbose) {
+        printf("\n we are going to insert the following block offsets\n");
+        for (i = 0; i < N; i++)
+            printf("%" PRIu64 "\t", input_vector[i]._offset.ToInt());
+    }
+    for (i = 0; i < N; i++) {
+        tree->Insert(input_vector[i]);
+        // tree->ValidateBalance();
+    }
+    tree->ValidateBalance();
+    MhsRbTree::Node::BlockPair *p_bps = &old_vector[0];
+    tree->ValidateInOrder(p_bps);
+    printf("min node of the tree:%" PRIu64 "\n",
+           rbn_offset(tree->MinNode()).ToInt());
+    printf("max node of the tree:%" PRIu64 "\n",
+           rbn_offset(tree->MaxNode()).ToInt());
+
+    for (i = 0; i < N; i++) {
+        // tree->ValidateBalance();
+        tree->RawRemove(input_vector[i]._offset.ToInt());
+    }
+
+    tree->Destroy();
+    delete tree;
+}
+
+int test_main(int argc, const char *argv[]) {
+    default_parse_args(argc, argv);
+
+    test_insert_remove();
+    if (verbose)
+        printf("test ok\n");
+    return 0;
+}
diff --git a/storage/tokudb/PerconaFT/ft/txn/roll.cc b/storage/tokudb/PerconaFT/ft/txn/roll.cc
index 407116b983c1a..9f3977743a049 100644
--- a/storage/tokudb/PerconaFT/ft/txn/roll.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/roll.cc
@@ -38,18 +38,18 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
 /* rollback and rollforward routines. */
 
-
-#include "ft/ft.h"
+#include <memory>
 #include "ft/ft-ops.h"
+#include "ft/ft.h"
 #include "ft/log_header.h"
 #include "ft/logger/log-internal.h"
-#include "ft/txn/xids.h"
 #include "ft/txn/rollback-apply.h"
+#include "ft/txn/xids.h"
 
 // functionality provided by roll.c is exposed by an autogenerated
 // header file, logheader.h
 //
-// this (poorly) explains the absense of "roll.h"
+// this (poorly) explains the absence of "roll.h"
 
 // these flags control whether or not we send commit messages for
 // various operations
@@ -162,10 +162,122 @@ toku_rollback_fcreate (FILENUM    filenum,
     // directory row lock for its dname) and we would not get this
     // far if there were other live handles.
     toku_cachefile_unlink_on_close(cf);
+    toku_cachefile_skip_log_recover_on_close(cf);
 done:
     return 0;
 }
 
+int toku_commit_frename(BYTESTRING /* old_name */,
+                        BYTESTRING /* new_iname */,
+                        TOKUTXN /* txn */,
+                        LSN UU(oplsn)) {
+    return 0;
+}
+
+int toku_rollback_frename(BYTESTRING old_iname,
+                          BYTESTRING new_iname,
+                          TOKUTXN txn,
+                          LSN UU(oplsn)) {
+    assert(txn);
+    assert(txn->logger);
+    assert(txn->logger->ct);
+
+    CACHETABLE cachetable = txn->logger->ct;
+
+    toku_struct_stat stat;
+    bool old_exist = true;
+    bool new_exist = true;
+
+    std::unique_ptr<char[], decltype(&toku_free)> old_iname_full(
+        toku_cachetable_get_fname_in_cwd(cachetable, old_iname.data),
+        &toku_free);
+    std::unique_ptr<char[], decltype(&toku_free)> new_iname_full(
+        toku_cachetable_get_fname_in_cwd(cachetable, new_iname.data),
+        &toku_free);
+
+    if (toku_stat(old_iname_full.get(), &stat) == -1) {
+        if (ENOENT == errno)
+            old_exist = false;
+        else
+            return 1;
+    }
+
+    if (toku_stat(new_iname_full.get(), &stat) == -1) {
+        if (ENOENT == errno)
+            new_exist = false;
+        else
+            return 1;
+    }
+
+    // Both old and new files can exist if:
+    // - rename() is not completed
+    // - fcreate was replayed during recovery
+    // 'Stalled cachefiles' container cachefile_list::m_stale_fileid contains
+    // closed but not yet evicted cachefiles and the key of this container is
+    // fs-dependent file id - (device id, inode number) pair. To preserve the
+    // new cachefile
+    // file's id and keep it in 'stalled cachefiles' container the old file is
+    // removed
+    // and the new file is renamed.
+    if (old_exist && new_exist &&
+        (toku_os_unlink(old_iname_full.get()) == -1 ||
+         toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 ||
+         toku_fsync_directory(new_iname_full.get()) == -1 ||
+         toku_fsync_directory(old_iname_full.get()) == -1))
+        return 1;
+
+    if (!old_exist && new_exist &&
+        (toku_os_rename(new_iname_full.get(), old_iname_full.get()) == -1 ||
+         toku_fsync_directory(new_iname_full.get()) == -1 ||
+         toku_fsync_directory(old_iname_full.get()) == -1))
+        return 1;
+
+    // it's ok if both files do not exist on recovery
+    if (!old_exist && !new_exist)
+        assert(txn->for_recovery);
+
+    CACHEFILE cf;
+    int r = toku_cachefile_of_iname_in_env(cachetable, new_iname.data, &cf);
+    if (r != ENOENT) {
+        char *old_fname_in_cf = toku_cachefile_fname_in_env(cf);
+        toku_cachefile_set_fname_in_env(cf, toku_xstrdup(old_iname.data));
+        toku_free(old_fname_in_cf);
+        // There is at least one case when fclose logging cause error:
+        // 1) start transaction
+        // 2) create ft 'a'(write "fcreate" in recovery log)
+        // 3) rename ft 'a' to 'b'(write "frename" in recovery log)
+        // 4) abort transaction:
+        //    a) rollback rename ft (renames 'b' to 'a')
+        //    b) rollback create ft (removes 'a'):
+        //       invokes toku_cachefile_unlink_on_close - lazy unlink on file
+        //       close,
+        //       it just sets corresponding flag in cachefile object
+        //    c) write "unlink" for 'a' in recovery log
+        //       (when transaction is aborted all locks are released,
+        //       when file lock is released the file is closed and unlinked if
+        //       corresponding flag is set in cachefile object)
+        // 5) crash
+        //
+        // After this we have the following records in recovery log:
+        // - create ft 'a',
+        // - rename 'a' to 'b',
+        // - unlink 'a'
+        //
+        // On recovery:
+        // - create 'a'
+        // - rename 'a' to 'b'
+        // - unlink 'a' - as 'a' file does not exist we have crash on assert
+        // here
+        //
+        // There is no need to write "unlink" in recovery log in (4a) because
+        // 'a' will be removed
+        // on transaction rollback on recovery.
+        toku_cachefile_skip_log_recover_on_close(cf);
+    }
+
+    return 0;
+}
+
 int find_ft_from_filenum (const FT &ft, const FILENUM &filenum);
 int find_ft_from_filenum (const FT &ft, const FILENUM &filenum) {
     FILENUM thisfnum = toku_cachefile_filenum(ft->cf);
diff --git a/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc b/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
index df830afd0df68..c9464c3ed60a3 100644
--- a/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/rollback-apply.cc
@@ -169,7 +169,7 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
             txn->roll_info.spilled_rollback_head      = ROLLBACK_NONE; 
             txn->roll_info.spilled_rollback_tail      = ROLLBACK_NONE; 
         }
-        // if we're commiting a child rollback, put its entries into the parent
+        // if we're committing a child rollback, put its entries into the parent
         // by pinning both child and parent and then linking the child log entry
         // list to the end of the parent log entry list.
         if (txn_has_current_rollback_log(txn)) {
diff --git a/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc b/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc
index 68c94c2ad1192..08d7c8874e50e 100644
--- a/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc
+++ b/storage/tokudb/PerconaFT/ft/txn/rollback-ct-callbacks.cc
@@ -59,21 +59,18 @@ rollback_log_destroy(ROLLBACK_LOG_NODE log) {
 
 // flush an ununused log to disk, by allocating a size 0 blocknum in
 // the blocktable
-static void
-toku_rollback_flush_unused_log(
-    ROLLBACK_LOG_NODE log,
-    BLOCKNUM logname,
-    int fd,
-    FT ft,
-    bool write_me,
-    bool keep_me,
-    bool for_checkpoint,
-    bool is_clone
-    )
-{
+static void toku_rollback_flush_unused_log(ROLLBACK_LOG_NODE log,
+                                           BLOCKNUM logname,
+                                           int fd,
+                                           FT ft,
+                                           bool write_me,
+                                           bool keep_me,
+                                           bool for_checkpoint,
+                                           bool is_clone) {
     if (write_me) {
         DISKOFF offset;
-        ft->blocktable.realloc_on_disk(logname, 0, &offset, ft, fd, for_checkpoint, INT_MAX);
+        ft->blocktable.realloc_on_disk(
+            logname, 0, &offset, ft, fd, for_checkpoint);
     }
     if (!keep_me && !is_clone) {
         toku_free(log);
diff --git a/storage/tokudb/PerconaFT/ft/ule.cc b/storage/tokudb/PerconaFT/ft/ule.cc
index ac393fbf17991..e3dce6d27dd8d 100644
--- a/storage/tokudb/PerconaFT/ft/ule.cc
+++ b/storage/tokudb/PerconaFT/ft/ule.cc
@@ -587,8 +587,8 @@ bool toku_le_worth_running_garbage_collection(
 //                by new txns.
 //            2.) There is only one committed entry, but the outermost
 //                provisional entry is older than the oldest known referenced
-//                xid, so it must have commited. Therefor we can promote it to
-//                committed and get rid of the old commited entry.
+//                xid, so it must have committed. Therefor we can promote it to
+//                committed and get rid of the old committed entry.
     if (le->type != LE_MVCC) {
         return false;
     }
diff --git a/storage/tokudb/PerconaFT/portability/CMakeLists.txt b/storage/tokudb/PerconaFT/portability/CMakeLists.txt
index 9f84d9b03df2a..4793db63cc1e8 100644
--- a/storage/tokudb/PerconaFT/portability/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/portability/CMakeLists.txt
@@ -14,12 +14,11 @@ set(tokuportability_srcs
   )
 
 add_library(${LIBTOKUPORTABILITY} SHARED ${tokuportability_srcs})
-target_link_libraries(${LIBTOKUPORTABILITY} LINK_PRIVATE ${LIBJEMALLOC})
 target_link_libraries(${LIBTOKUPORTABILITY} LINK_PUBLIC ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
 
 add_library(tokuportability_static_conv STATIC ${tokuportability_srcs})
 set_target_properties(tokuportability_static_conv PROPERTIES POSITION_INDEPENDENT_CODE ON)
-set(tokuportability_source_libs tokuportability_static_conv ${LIBJEMALLOC} ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
+set(tokuportability_source_libs tokuportability_static_conv ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
 toku_merge_static_libs(${LIBTOKUPORTABILITY}_static ${LIBTOKUPORTABILITY}_static "${tokuportability_source_libs}")
 
 maybe_add_gcov_to_libraries(${LIBTOKUPORTABILITY} tokuportability_static_conv)
diff --git a/storage/tokudb/PerconaFT/portability/file.cc b/storage/tokudb/PerconaFT/portability/file.cc
index 5332a2dff5553..0e3efc1a12afc 100644
--- a/storage/tokudb/PerconaFT/portability/file.cc
+++ b/storage/tokudb/PerconaFT/portability/file.cc
@@ -356,6 +356,12 @@ toku_os_close(int fd) {  // if EINTR, retry until success
     return r;
 }
 
+int toku_os_rename(const char *old_name, const char *new_name) {
+    return rename(old_name, new_name);
+}
+
+int toku_os_unlink(const char *path) { return unlink(path); }
+
 ssize_t 
 toku_os_read(int fd, void *buf, size_t count) {
     ssize_t r;
diff --git a/storage/tokudb/PerconaFT/portability/huge_page_detection.cc b/storage/tokudb/PerconaFT/portability/huge_page_detection.cc
index bc48e93937da4..8e73c56a6c549 100644
--- a/storage/tokudb/PerconaFT/portability/huge_page_detection.cc
+++ b/storage/tokudb/PerconaFT/portability/huge_page_detection.cc
@@ -90,7 +90,13 @@ static bool check_huge_pages_in_practice(void)
 
     const long pagesize = 4096;
     const long n_pages = TWO_MB/pagesize;
+#ifdef __linux__
+    // On linux mincore is defined as mincore(void *, size_t, unsigned char *)
     unsigned char vec[n_pages];
+#else
+    // On BSD (OS X included) it is defined as mincore(void *, size_t, char *)
+    char vec[n_pages];
+#endif
     {
         int r = mincore(second, TWO_MB, vec);
         if (r!=0 && errno==ENOMEM) {
diff --git a/storage/tokudb/PerconaFT/portability/memory.cc b/storage/tokudb/PerconaFT/portability/memory.cc
index 2de12699c61f9..5430ff84b7059 100644
--- a/storage/tokudb/PerconaFT/portability/memory.cc
+++ b/storage/tokudb/PerconaFT/portability/memory.cc
@@ -313,6 +313,15 @@ toku_strdup(const char *s) {
     return (char *) toku_memdup(s, strlen(s)+1);
 }
 
+char *toku_strndup(const char *s, size_t n) {
+    size_t s_size = strlen(s);
+    size_t bytes_to_copy = n > s_size ? s_size : n;
+    ++bytes_to_copy;
+    char *result = (char *)toku_memdup(s, bytes_to_copy);
+    result[bytes_to_copy - 1] = 0;
+    return result;
+}
+
 void
 toku_free(void *p) {
     if (p) {
diff --git a/storage/tokudb/PerconaFT/portability/memory.h b/storage/tokudb/PerconaFT/portability/memory.h
index 7780536f279ec..5ae652d39fc52 100644
--- a/storage/tokudb/PerconaFT/portability/memory.h
+++ b/storage/tokudb/PerconaFT/portability/memory.h
@@ -125,7 +125,9 @@ size_t toku_malloc_usable_size(void *p) __attribute__((__visibility__("default")
 void *toku_memdup (const void *v, size_t len);
 /* Toku-version of strdup.  Use this so that it calls toku_malloc() */
 char *toku_strdup (const char *s)   __attribute__((__visibility__("default")));
-
+/* Toku-version of strndup.  Use this so that it calls toku_malloc() */
+char *toku_strndup(const char *s, size_t n)
+    __attribute__((__visibility__("default")));
 /* Copy memory.  Analogous to strdup() Crashes instead of returning NULL */
 void *toku_xmemdup (const void *v, size_t len) __attribute__((__visibility__("default")));
 /* Toku-version of strdup.  Use this so that it calls toku_xmalloc()  Crashes instead of returning NULL */
diff --git a/storage/tokudb/PerconaFT/portability/portability.cc b/storage/tokudb/PerconaFT/portability/portability.cc
index ba9f8d48ed5dc..19f445a85d7f4 100644
--- a/storage/tokudb/PerconaFT/portability/portability.cc
+++ b/storage/tokudb/PerconaFT/portability/portability.cc
@@ -63,6 +63,9 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #if defined(HAVE_SYS_SYSCTL_H)
 # include <sys/sysctl.h>
 #endif
+#if defined(HAVE_PTHREAD_H)
+# include <pthread.h>
+#endif
 #if defined(HAVE_PTHREAD_NP_H)
 # include <pthread_np.h>
 #endif
@@ -102,7 +105,11 @@ toku_os_getpid(void) {
 
 int
 toku_os_gettid(void) {
-#if defined(__NR_gettid)
+#if defined(HAVE_PTHREAD_THREADID_NP)
+    uint64_t result;
+    pthread_threadid_np(NULL, &result);
+    return (int) result; // Used for instrumentation so overflow is ok here.
+#elif defined(__NR_gettid)
     return syscall(__NR_gettid);
 #elif defined(SYS_gettid)
     return syscall(SYS_gettid);
diff --git a/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc b/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc
index 880f9a3a9bbf7..dbbea974a49ff 100644
--- a/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc
+++ b/storage/tokudb/PerconaFT/portability/tests/test-max-data.cc
@@ -64,7 +64,7 @@ int main(int argc, char *const argv[]) {
     if (verbose) printf("maxdata=%" PRIu64 " 0x%" PRIx64 "\n", maxdata, maxdata);
 
     // check the data size
-#if __x86_64__
+#if defined(__x86_64__) || defined(__aarch64__)
     assert(maxdata > (1ULL << 32));
 #elif __i386__
     assert(maxdata < (1ULL << 32));
diff --git a/storage/tokudb/PerconaFT/portability/tests/test-xid.cc b/storage/tokudb/PerconaFT/portability/tests/test-xid.cc
index 9ee68906bb37a..71736f898ef87 100644
--- a/storage/tokudb/PerconaFT/portability/tests/test-xid.cc
+++ b/storage/tokudb/PerconaFT/portability/tests/test-xid.cc
@@ -51,11 +51,18 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #if defined(HAVE_PTHREAD_NP_H)
 # include <pthread_np.h>
 #endif
+#if defined(HAVE_PTHREAD_H)
+# include <pthread.h>
+#endif
 
 // since we implement the same thing here as in toku_os_gettid, this test
 // is pretty pointless
 static int gettid(void) {
-#if defined(__NR_gettid)
+#if defined(HAVE_PTHREAD_THREADID_NP)
+    uint64_t result;
+    pthread_threadid_np(NULL, &result);
+    return (int) result;
+#elif defined(__NR_gettid)
     return syscall(__NR_gettid);
 #elif defined(SYS_gettid)
     return syscall(SYS_gettid);
diff --git a/storage/tokudb/PerconaFT/portability/toku_config.h.in b/storage/tokudb/PerconaFT/portability/toku_config.h.in
index e1412cc9e14d5..18f6779796fe5 100644
--- a/storage/tokudb/PerconaFT/portability/toku_config.h.in
+++ b/storage/tokudb/PerconaFT/portability/toku_config.h.in
@@ -42,7 +42,6 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
 #cmakedefine TOKU_DEBUG_PARANOID 1
 #cmakedefine USE_VALGRIND 1
-
 #cmakedefine HAVE_ALLOCA_H 1
 #cmakedefine HAVE_ARPA_INET_H 1
 #cmakedefine HAVE_BYTESWAP_H 1
@@ -88,6 +87,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #cmakedefine HAVE_PTHREAD_RWLOCKATTR_SETKIND_NP 1
 #cmakedefine HAVE_PTHREAD_YIELD 1
 #cmakedefine HAVE_PTHREAD_YIELD_NP 1
+#cmakedefine HAVE_PTHREAD_THREADID_NP 1
 #cmakedefine HAVE_PTHREAD_GETTHREADID_NP 1
 
 #cmakedefine PTHREAD_YIELD_RETURNS_INT 1
diff --git a/storage/tokudb/PerconaFT/portability/toku_portability.h b/storage/tokudb/PerconaFT/portability/toku_portability.h
index 921d3a309f6c0..f127b0fe172dc 100644
--- a/storage/tokudb/PerconaFT/portability/toku_portability.h
+++ b/storage/tokudb/PerconaFT/portability/toku_portability.h
@@ -246,6 +246,8 @@ int toku_os_open(const char *path, int oflag, int mode);
 int toku_os_open_direct(const char *path, int oflag, int mode);
 int toku_os_close(int fd);
 int toku_os_fclose(FILE * stream);
+int toku_os_rename(const char *old_name, const char *new_name);
+int toku_os_unlink(const char *path);
 ssize_t toku_os_read(int fd, void *buf, size_t count);
 ssize_t toku_os_pread(int fd, void *buf, size_t count, off_t offset);
 void toku_os_recursive_delete(const char *path);
diff --git a/storage/tokudb/PerconaFT/portability/toku_time.h b/storage/tokudb/PerconaFT/portability/toku_time.h
index 11a3f3aa2b99c..a1278ef033731 100644
--- a/storage/tokudb/PerconaFT/portability/toku_time.h
+++ b/storage/tokudb/PerconaFT/portability/toku_time.h
@@ -98,9 +98,17 @@ double tokutime_to_seconds(tokutime_t)  __attribute__((__visibility__("default")
 
 // Get the value of tokutime for right now.  We want this to be fast, so we expose the implementation as RDTSC.
 static inline tokutime_t toku_time_now(void) {
+#if defined(__x86_64__) || defined(__i386__)
     uint32_t lo, hi;
     __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
     return (uint64_t)hi << 32 | lo;
+#elif defined (__aarch64__)
+    uint64_t result;
+    __asm __volatile__ ("mrs %[rt], cntvct_el0" : [rt] "=r" (result));
+    return result;
+#else
+#error No timer implementation for this platform
+#endif
 }
 
 static inline uint64_t toku_current_time_microsec(void) {
diff --git a/storage/tokudb/PerconaFT/src/indexer-internal.h b/storage/tokudb/PerconaFT/src/indexer-internal.h
index 48e62ee49b2d7..fdaa561e3d027 100644
--- a/storage/tokudb/PerconaFT/src/indexer-internal.h
+++ b/storage/tokudb/PerconaFT/src/indexer-internal.h
@@ -42,7 +42,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #include <toku_pthread.h>
 
 // the indexer_commit_keys is an ordered set of keys described by a DBT in the keys array.
-// the array is a resizeable array with max size "max_keys" and current size "current_keys".
+// the array is a resizable array with max size "max_keys" and current size "current_keys".
 // the ordered set is used by the hotindex undo function to collect the commit keys.
 struct indexer_commit_keys {
     int max_keys;        // max number of keys
diff --git a/storage/tokudb/PerconaFT/src/indexer-undo-do.cc b/storage/tokudb/PerconaFT/src/indexer-undo-do.cc
index 8d0b080b9fe88..4c7f5336161da 100644
--- a/storage/tokudb/PerconaFT/src/indexer-undo-do.cc
+++ b/storage/tokudb/PerconaFT/src/indexer-undo-do.cc
@@ -528,7 +528,7 @@ indexer_find_prev_xr(DB_INDEXER *UU(indexer), ULEHANDLE ule, uint64_t xrindex, u
 }
 
 // inject "delete" message into ft with logging in recovery and rollback logs,
-// and making assocation between txn and ft
+// and making association between txn and ft
 static int 
 indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids, TOKUTXN txn) {
     int result = 0;
@@ -577,7 +577,7 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi
 }
 
 // inject "insert" message into ft with logging in recovery and rollback logs,
-// and making assocation between txn and ft
+// and making association between txn and ft
 static int 
 indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids, TOKUTXN txn) {
     int result = 0;
diff --git a/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt b/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt
index 47f6aa44a75e8..c01a8f0d62870 100644
--- a/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/src/tests/CMakeLists.txt
@@ -108,11 +108,11 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS)
   foreach(ov c d r)
 
     if (ov STREQUAL c)
-      set(gset 0)
       set(hset 0)
+      set(iset 0)
     else ()
-      set(gset 0 1 2 3 4 5)
-      set(hset 0 1)
+      set(hset 0 1 2 3 4 5)
+      set(iset 0 1)
     endif ()
 
     foreach(av 0 1)
@@ -130,25 +130,27 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS)
           foreach(dv ${dset})
             foreach(ev ${eset})
               foreach(fv 0 1)
-                foreach(gv ${gset})
+                foreach(gv 0 1)
                   foreach(hv ${hset})
-
-                    if ((NOT ov STREQUAL c) AND (NOT cv) AND ((NOT bv) OR (NOT ev) OR (dv)))
-                      set(iset 0 1)
-                    else ()
-                      set(iset 0)
-                    endif ()
-
                     foreach(iv ${iset})
-                      set(testname "ydb/recovery_fileops_unit.${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}")
-                      set(envdir "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}")
-                      set(errfile "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}.ctest-errors")
-                      add_test(NAME ${testname}
-                        COMMAND run_recovery_fileops_unit.sh $<TARGET_FILE:recovery_fileops_unit.tdb> ${errfile} 137
-                        -O ${ov} -A ${av} -B ${bv} -C ${cv} -D ${dv} -E ${ev} -F ${fv} -G ${gv} -H ${hv} -I ${iv}
-                        )
-                      setup_toku_test_properties(${testname} ${envdir})
-                      set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${errfile}")
+
+                      if ((NOT ov STREQUAL c) AND (NOT cv) AND ((NOT bv) OR (NOT ev) OR (dv)))
+                        set(jset 0 1)
+                      else ()
+                        set(jset 0)
+                      endif ()
+
+                      foreach(jv ${jset})
+                        set(testname "ydb/recovery_fileops_unit.${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}")
+                        set(envdir "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}")
+                        set(errfile "recovery_fileops_unit_dir/${ov}${av}${bv}${cv}${dv}${ev}${fv}${gv}${hv}${iv}${jv}.ctest-errors")
+                        add_test(NAME ${testname}
+                          COMMAND run_recovery_fileops_unit.sh $<TARGET_FILE:recovery_fileops_unit.tdb> ${errfile} 137
+                          -O ${ov} -A ${av} -B ${bv} -C ${cv} -D ${dv} -E ${ev} -F ${fv} -G ${gv} -H ${hv} -I ${iv} -J ${jv}
+                          )
+                        setup_toku_test_properties(${testname} ${envdir})
+                        set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${errfile}")
+                      endforeach(jv)
                     endforeach(iv)
                   endforeach(hv)
                 endforeach(gv)
diff --git a/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test b/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test
index 20df13923e6d5..7cce68e6ff8e5 100644
--- a/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test
+++ b/storage/tokudb/PerconaFT/src/tests/hotindexer-undo-do-tests/commit.i0.test
@@ -1,3 +1,3 @@
-# commited insert
+# committed insert
 key k1
 insert committed 0 v100
diff --git a/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc b/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc
index 3f2f8d7455a98..aaf77c503cc42 100644
--- a/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc
+++ b/storage/tokudb/PerconaFT/src/tests/loader-dup-test.cc
@@ -51,7 +51,7 @@ int DISALLOW_PUTS=0;
 int COMPRESS=0;
 enum {MAGIC=311};
 
-bool dup_row_at_end = false; // false: duplicate at the begining.  true: duplicate at the end.   The duplicated row is row 0.
+bool dup_row_at_end = false; // false: duplicate at the beginning.  true: duplicate at the end.   The duplicated row is row 0.
 int  dup_row_id     = 0;     // 0 means to use row 1 if inserting at the end, row NUM_ROWS if inserting at the beginning.  Otherwise insert the row specified here.
 
 //
diff --git a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
index a4dc0ea9236b4..cc99ab560d85e 100644
--- a/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
+++ b/storage/tokudb/PerconaFT/src/tests/recovery_fileops_unit.cc
@@ -36,17 +36,17 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
 #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
 
-#include "test.h"
-#include "toku_pthread.h"
 #include <db.h>
-#include <sys/stat.h>
 #include <stdlib.h>
-
+#include <sys/stat.h>
+#include "ft/logger/logger.h"
+#include "test.h"
+#include "toku_pthread.h"
 
 static int do_recover;
 static int do_crash;
 static char fileop;
-static int choices['I'-'A'+1];
+static int choices['J' - 'A' + 1];
 const int num_choices = sizeof(choices)/sizeof(choices[0]);
 static DB_TXN *txn;
 const char *oldname = "oldfoo";
@@ -58,11 +58,14 @@ static char *cmd;
 
 static void
 usage(void) {
-    fprintf(stderr, "Usage:\n%s [-v|-q]* [-h] (-c|-r) -O fileop -A# -B# -C# -D# -E# -F# [-G# -H# -I#]\n"
-                    "  fileop = c/r/d (create/rename/delete)\n"
-                    "  Where # is a single digit number > 0.\n"
-                    "  A-F are required for fileop=create\n"
-                    "  A-I are required for fileop=delete, fileop=rename\n", cmd);
+    fprintf(stderr,
+            "Usage:\n%s [-v|-q]* [-h] (-c|-r) -O fileop -A# -B# -C# -D# -E# "
+            "-F# -G# [-H# -I# -J#]\n"
+            "  fileop = c/r/d (create/rename/delete)\n"
+            "  Where # is a single digit number > 0.\n"
+            "  A-G are required for fileop=create\n"
+            "  A-I are required for fileop=delete, fileop=rename\n",
+            cmd);
     exit(1);
 }
 
@@ -129,19 +132,18 @@ get_choice_flush_log_before_crash(void) {
     return get_bool_choice('F');
 }
 
-static int
-get_choice_create_type(void) {
-    return get_x_choice('G', 6);
-}
+static int get_choice_dir_per_db(void) { return get_bool_choice('G'); }
+
+static int get_choice_create_type(void) { return get_x_choice('H', 6); }
 
 static int
 get_choice_txn_does_open_close_before_fileop(void) {
-    return get_bool_choice('H');
+    return get_bool_choice('I');
 }
 
 static int
 get_choice_lock_table_split_fcreate(void) {
-    int choice = get_bool_choice('I');
+    int choice = get_bool_choice('J');
     if (choice)
         assert(fileop_did_commit());
     return choice;
@@ -157,62 +159,64 @@ do_args(int argc, char * const argv[]) {
     }
 
     char c;
-    while ((c = getopt(argc, argv, "vqhcrO:A:B:C:D:E:F:G:H:I:X:")) != -1) {
-	switch(c) {
-        case 'v':
-	    verbose++;
-            break;
-        case 'q':
-            verbose--;
-	    if (verbose<0) verbose=0;
-            break;
-        case 'h':
-        case '?':
-            usage();
-            break;
-        case 'c':
-            do_crash = 1;
-            break;
-        case 'r':
-            do_recover = 1;
-            break;
-        case 'O':
-            if (fileop != '\0')
+    while ((c = getopt(argc, argv, "vqhcrO:A:B:C:D:E:F:G:H:I:J:X:")) != -1) {
+        switch (c) {
+            case 'v':
+                verbose++;
+                break;
+            case 'q':
+                verbose--;
+                if (verbose < 0)
+                    verbose = 0;
+                break;
+            case 'h':
+            case '?':
                 usage();
-            fileop = optarg[0];
-            switch (fileop) {
-                case 'c':
-                case 'r':
-                case 'd':
-                    break;
-                default:
+                break;
+            case 'c':
+                do_crash = 1;
+                break;
+            case 'r':
+                do_recover = 1;
+                break;
+            case 'O':
+                if (fileop != '\0')
                     usage();
-                    break;
-            }
-            break;
-        case 'A':
-        case 'B':
-        case 'C':
-        case 'D':
-        case 'E':
-        case 'F':
-        case 'G':
-        case 'H':
-        case 'I':
-            if (fileop == '\0')
-                usage();
-            int num;
-            num = atoi(optarg);
-            if (num < 0 || num > 9)
-                usage();
-            choices[c - 'A'] = num;
-            break;
-        case 'X':
-            if (strcmp(optarg, "novalgrind") == 0) {
-                // provide a way for the shell script runner to pass an
-                // arg that suppresses valgrind on this child process
+                fileop = optarg[0];
+                switch (fileop) {
+                    case 'c':
+                    case 'r':
+                    case 'd':
+                        break;
+                    default:
+                        usage();
+                        break;
+                }
+                break;
+            case 'A':
+            case 'B':
+            case 'C':
+            case 'D':
+            case 'E':
+            case 'F':
+            case 'G':
+            case 'H':
+            case 'I':
+            case 'J':
+                if (fileop == '\0')
+                    usage();
+                int num;
+                num = atoi(optarg);
+                if (num < 0 || num > 9)
+                    usage();
+                choices[c - 'A'] = num;
                 break;
-            }
+            case 'X':
+                if (strcmp(optarg, "novalgrind") == 0) {
+                    // provide a way for the shell script runner to pass an
+                    // arg that suppresses valgrind on this child process
+                    break;
+                }
             // otherwise, fall through to an error
 	default:
             usage();
@@ -222,7 +226,7 @@ do_args(int argc, char * const argv[]) {
     if (argc!=optind) { usage(); exit(1); }
 
     for (i = 0; i < num_choices; i++) {
-        if (i >= 'G' - 'A' && fileop == 'c')
+        if (i >= 'H' - 'A' && fileop == 'c')
             break;
         if (choices[i] == -1)
             usage();
@@ -261,6 +265,8 @@ static void env_startup(void) {
     int envflags = DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_CREATE | DB_PRIVATE | recover_flag;
     r = db_env_create(&env, 0);
     CKERR(r);
+    r = env->set_dir_per_db(env, get_choice_dir_per_db());
+    CKERR(r);
     env->set_errfile(env, stderr);
     r = env->open(env, TOKU_TEST_FILENAME, envflags, S_IRWXU+S_IRWXG+S_IRWXO);
     CKERR(r);
@@ -625,8 +631,11 @@ recover_and_verify(void) {
         else if (did_create_commit_early())
             expect_old_name = 1;
     }
-    verify_file_exists(oldname, expect_old_name);
-    verify_file_exists(newname, expect_new_name);
+    // We can't expect files existence until recovery log was not flushed
+    if ((get_choice_flush_log_before_crash())) {
+        verify_file_exists(oldname, expect_old_name);
+        verify_file_exists(newname, expect_new_name);
+    }
     env_shutdown();
 }
 
diff --git a/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc b/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
index a2b48e443cdfd..48843a0bd3257 100644
--- a/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
+++ b/storage/tokudb/PerconaFT/src/tests/stat64-root-changes.cc
@@ -166,7 +166,7 @@ run_test (void) {
 
         DB_BTREE_STAT64 s;
         r = db->stat64(db, NULL, &s); CKERR(r);
-        assert(s.bt_nkeys == 0);
+        assert(s.bt_nkeys == 1);
 
         r = db->close(db, 0);     CKERR(r);
 
@@ -176,7 +176,7 @@ run_test (void) {
         r = txn->commit(txn, 0);    CKERR(r);
 
         r = db->stat64(db, NULL, &s); CKERR(r);
-        assert(s.bt_nkeys == 0);
+        assert(s.bt_nkeys == 1);
     }
 
     // verify update callback overwrites the row
diff --git a/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc b/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc
index 8e5109cd2a973..f6111d4b67c0b 100644
--- a/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_insert_many_gc.cc
@@ -78,7 +78,7 @@ static void test_insert_many_gc(void) {
     // from having an MVCC stack of size 'N'. At the time of this
     // writing, we run full GC on leaf-inject when the leaf is
     // 32mb or larger. A good invariant is that the max LE size
-    // never grew larger than 35mb and that the max commited xr stack
+    // never grew larger than 35mb and that the max committed xr stack
     // length never exceeded 35
     const uint64_t le_max_memsize = get_engine_status_val(env, "LE_MAX_MEMSIZE");
     const uint64_t le_max_committed_xr = get_engine_status_val(env, "LE_MAX_COMMITTED_XR");
diff --git a/storage/tokudb/PerconaFT/src/tests/test_stress0.cc b/storage/tokudb/PerconaFT/src/tests/test_stress0.cc
index aaafe284906a0..88140dd173184 100644
--- a/storage/tokudb/PerconaFT/src/tests/test_stress0.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_stress0.cc
@@ -53,7 +53,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 // This test is a micro stress test that does multithreaded updates on a fixed size table.
 // There is also a thread that scans the table with bulk fetch, ensuring the sum is zero.
 //
-// This test is targetted at stressing the locktree, hence the small table and many update threads.
+// This test is targeted at stressing the locktree, hence the small table and many update threads.
 //
 
 static int UU() lock_escalation_op(DB_TXN *UU(txn), ARG arg, void* operation_extra, void *UU(stats_extra)) {
diff --git a/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc b/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc
index fec454b80093b..301eed1560e13 100644
--- a/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc
+++ b/storage/tokudb/PerconaFT/src/tests/test_txn_abort5a.cc
@@ -123,7 +123,8 @@ test_main(int argc, char *const argv[]) {
             continue;
         }
     }
-    if (verbose>0) printf("%s", __FILE__); if (verbose>1) printf("\n");
+    if (verbose>0) printf("%s", __FILE__);
+    if (verbose>1) printf("\n");
     for (i=1; i<100; i++) 
         test_txn_abort(i);
     if (verbose>1) printf("%s OK\n", __FILE__);
diff --git a/storage/tokudb/PerconaFT/src/ydb-internal.h b/storage/tokudb/PerconaFT/src/ydb-internal.h
index 462a2a3d861e3..d40f7795b0b86 100644
--- a/storage/tokudb/PerconaFT/src/ydb-internal.h
+++ b/storage/tokudb/PerconaFT/src/ydb-internal.h
@@ -114,7 +114,7 @@ struct __toku_db_env_internal {
 
     char *real_data_dir;                                // data dir used when the env is opened (relative to cwd, or absolute with leading /)
     char *real_log_dir;                                 // log dir used when the env is opened  (relative to cwd, or absolute with leading /)
-    char *real_tmp_dir;                                 // tmp dir used for temporary files (relative to cwd, or absoulte with leading /)
+    char *real_tmp_dir;                                 // tmp dir used for temporary files (relative to cwd, or absolute with leading /)
 
     fs_redzone_state fs_state;
     uint64_t fs_seq;                                    // how many times has fs_poller run?
@@ -132,7 +132,8 @@ struct __toku_db_env_internal {
     int datadir_lockfd;
     int logdir_lockfd;
     int tmpdir_lockfd;
-    bool check_thp;                                     // if set check if transparent huge pages are disables
+    bool check_thp;  // if set check if transparent huge pages are disabled
+    bool dir_per_db;
     uint64_t (*get_loader_memory_size_callback)(void);
     uint64_t default_lock_timeout_msec;
     uint64_t (*get_lock_timeout_callback)(uint64_t default_lock_timeout_msec);
diff --git a/storage/tokudb/PerconaFT/src/ydb.cc b/storage/tokudb/PerconaFT/src/ydb.cc
index aed271bce4069..3341f6d76c624 100644
--- a/storage/tokudb/PerconaFT/src/ydb.cc
+++ b/storage/tokudb/PerconaFT/src/ydb.cc
@@ -1298,6 +1298,22 @@ env_get_check_thp(DB_ENV * env) {
     return env->i->check_thp;
 }
 
+static bool env_set_dir_per_db(DB_ENV *env, bool new_val) {
+    HANDLE_PANICKED_ENV(env);
+    bool r = env->i->dir_per_db;
+    env->i->dir_per_db = new_val;
+    return r;
+}
+
+static bool env_get_dir_per_db(DB_ENV *env) {
+    HANDLE_PANICKED_ENV(env);
+    return env->i->dir_per_db;
+}
+
+static const char *env_get_data_dir(DB_ENV *env) {
+    return env->i->real_data_dir;
+}
+
 static int env_dbremove(DB_ENV * env, DB_TXN *txn, const char *fname, const char *dbname, uint32_t flags);
 
 static int
@@ -2700,6 +2716,9 @@ toku_env_create(DB_ENV ** envp, uint32_t flags) {
     USENV(do_backtrace);
     USENV(set_check_thp);
     USENV(get_check_thp);
+    USENV(set_dir_per_db);
+    USENV(get_dir_per_db);
+    USENV(get_data_dir);
 #undef USENV
     
     // unlocked methods
@@ -3045,7 +3064,7 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co
     if (env_is_db_with_dname_open(env, newname)) {
         return toku_ydb_do_error(env, EINVAL, "Cannot rename dictionary; Dictionary with target name has an open handle.\n");
     }
-    
+
     DBT old_dname_dbt;  
     DBT new_dname_dbt;  
     DBT iname_dbt;  
@@ -3065,10 +3084,35 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co
             r = EEXIST;
         }
         else if (r == DB_NOTFOUND) {
+            DBT new_iname_dbt;
+            // Do not rename ft file if 'dir_per_db' option is not set
+            auto new_iname =
+                env->get_dir_per_db(env)
+                    ? generate_iname_for_rename_or_open(
+                          env, txn, newname, false)
+                    : std::unique_ptr<char[], decltype(&toku_free)>(
+                          toku_strdup(iname), &toku_free);
+            toku_fill_dbt(
+                &new_iname_dbt, new_iname.get(), strlen(new_iname.get()) + 1);
+
             // remove old (dname,iname) and insert (newname,iname) in directory
             r = toku_db_del(env->i->directory, txn, &old_dname_dbt, DB_DELETE_ANY, true);
             if (r != 0) { goto exit; }
-            r = toku_db_put(env->i->directory, txn, &new_dname_dbt, &iname_dbt, 0, true);
+
+            // Do not rename ft file if 'dir_per_db' option is not set
+            if (env->get_dir_per_db(env))
+                r = toku_ft_rename_iname(txn,
+                                         env->get_data_dir(env),
+                                         iname,
+                                         new_iname.get(),
+                                         env->i->cachetable);
+
+            r = toku_db_put(env->i->directory,
+                            txn,
+                            &new_dname_dbt,
+                            &new_iname_dbt,
+                            0,
+                            true);
             if (r != 0) { goto exit; }
 
             //Now that we have writelocks on both dnames, verify that there are still no handles open. (to prevent race conditions)
@@ -3091,7 +3135,7 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co
             // otherwise, we're okay in marking this ft as remove on
             // commit. no new handles can open for this dictionary
             // because the txn has directory write locks on the dname
-            if (txn && !can_acquire_table_lock(env, txn, iname)) {
+            if (txn && !can_acquire_table_lock(env, txn, new_iname.get())) {
                 r = DB_LOCK_NOTGRANTED;
             }
             // We don't do anything at the ft or cachetable layer for rename.
diff --git a/storage/tokudb/PerconaFT/src/ydb_db.cc b/storage/tokudb/PerconaFT/src/ydb_db.cc
index e5bd4e7d089d5..100d1bfa20b14 100644
--- a/storage/tokudb/PerconaFT/src/ydb_db.cc
+++ b/storage/tokudb/PerconaFT/src/ydb_db.cc
@@ -83,8 +83,7 @@ ydb_db_layer_get_status(YDB_DB_LAYER_STATUS statp) {
     *statp = ydb_db_layer_status;
 }
 
-static void
-create_iname_hint(const char *dname, char *hint) {
+void create_iname_hint(const char *dname, char *hint) {
     //Requires: size of hint array must be > strlen(dname)
     //Copy alphanumeric characters only.
     //Replace strings of non-alphanumeric characters with a single underscore.
@@ -105,11 +104,43 @@ create_iname_hint(const char *dname, char *hint) {
     *hint = '\0';
 }
 
+void create_iname_hint_for_dbdir(const char *dname, char *hint) {
+    assert(dname);
+    if (*dname == '.')
+        ++dname;
+    if (*dname == '/')
+        ++dname;
+    bool underscored = false;
+    bool dbdir_is_parsed = false;
+    // Do not change the first '/' because this is
+    // delimiter which splits name into database dir
+    // and table dir.
+    while (*dname) {
+        if (isalnum(*dname) || (*dname == '/' && !dbdir_is_parsed)) {
+            char c = *dname++;
+            *hint++ = c;
+            if (c == '/')
+                dbdir_is_parsed = true;
+            underscored = false;
+        } else {
+            if (!underscored)
+                *hint++ = '_';
+            dname++;
+            underscored = true;
+        }
+    }
+    *hint = '\0';
+}
+
 // n < 0  means to ignore mark and ignore n
 // n >= 0 means to include mark ("_B_" or "_P_") with hex value of n in iname
 // (intended for use by loader, which will create many inames using one txnid).
-static char *
-create_iname(DB_ENV *env, uint64_t id1, uint64_t id2, char *hint, const char *mark, int n) {
+char *create_iname(DB_ENV *env,
+                   uint64_t id1,
+                   uint64_t id2,
+                   char *hint,
+                   const char *mark,
+                   int n) {
     int bytes;
     char inamebase[strlen(hint) +
                    8 +  // hex file format version
@@ -138,6 +169,34 @@ create_iname(DB_ENV *env, uint64_t id1, uint64_t id2, char *hint, const char *ma
     return rval;
 }
 
+static uint64_t nontransactional_open_id = 0;
+
+std::unique_ptr<char[], decltype(&toku_free)> generate_iname_for_rename_or_open(
+    DB_ENV *env,
+    DB_TXN *txn,
+    const char *dname,
+    bool is_open) {
+    std::unique_ptr<char[], decltype(&toku_free)> result(nullptr, &toku_free);
+    char hint[strlen(dname) + 1];
+    uint64_t id1 = 0;
+    uint64_t id2 = 0;
+
+    if (txn) {
+        id1 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).parent_id64;
+        id2 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).child_id64;
+    } else if (is_open)
+        id1 = toku_sync_fetch_and_add(&nontransactional_open_id, 1);
+
+    if (env->get_dir_per_db(env) && !toku_os_is_absolute_name(dname))
+        create_iname_hint_for_dbdir(dname, hint);
+    else
+        create_iname_hint(dname, hint);
+
+    result.reset(create_iname(env, id1, id2, hint, NULL, -1));
+
+    return result;
+}
+
 static int toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYPE dbtype, uint32_t flags, int mode);
 
 // Effect: Do the work required of DB->close().
@@ -227,8 +286,6 @@ db_open_subdb(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTY
     return r;
 }
 
-static uint64_t nontransactional_open_id = 0;
-
 // inames are created here.
 // algorithm:
 //  begin txn
@@ -286,27 +343,15 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP
     toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1);
     toku_init_dbt_flags(&iname_dbt, DB_DBT_REALLOC);
     r = toku_db_get(db->dbenv->i->directory, txn, &dname_dbt, &iname_dbt, DB_SERIALIZABLE);  // allocates memory for iname
-    char *iname = (char *) iname_dbt.data;
+    std::unique_ptr<char[], decltype(&toku_free)> iname(
+        static_cast<char *>(iname_dbt.data), &toku_free);
     if (r == DB_NOTFOUND && !is_db_create) {
         r = ENOENT;
     } else if (r==0 && is_db_excl) {
         r = EEXIST;
     } else if (r == DB_NOTFOUND) {
-        char hint[strlen(dname) + 1];
-
-        // create iname and make entry in directory
-        uint64_t id1 = 0;
-        uint64_t id2 = 0;
-
-        if (txn) {
-            id1 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).parent_id64;
-            id2 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).child_id64;
-        } else {
-            id1 = toku_sync_fetch_and_add(&nontransactional_open_id, 1);
-        }
-        create_iname_hint(dname, hint);
-        iname = create_iname(db->dbenv, id1, id2, hint, NULL, -1);  // allocated memory for iname
-        toku_fill_dbt(&iname_dbt, iname, strlen(iname) + 1);
+        iname = generate_iname_for_rename_or_open(db->dbenv, txn, dname, true);
+        toku_fill_dbt(&iname_dbt, iname.get(), strlen(iname.get()) + 1);
         //
         // put_flags will be 0 for performance only, avoid unnecessary query
         // if we are creating a hot index, per #3166, we do not want the write lock  in directory grabbed.
@@ -318,16 +363,13 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP
 
     // we now have an iname
     if (r == 0) {
-        r = toku_db_open_iname(db, txn, iname, flags, mode);
+        r = toku_db_open_iname(db, txn, iname.get(), flags, mode);
         if (r == 0) {
             db->i->dname = toku_xstrdup(dname);
             env_note_db_opened(db->dbenv, db);  // tell env that a new db handle is open (using dname)
         }
     }
 
-    if (iname) {
-        toku_free(iname);
-    }
     return r;
 }
 
@@ -1181,7 +1223,10 @@ load_inames(DB_ENV * env, DB_TXN * txn, int N, DB * dbs[/*N*/], const char * new
         toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1);
         // now create new iname
         char hint[strlen(dname) + 1];
-        create_iname_hint(dname, hint);
+        if (env->get_dir_per_db(env) && !toku_os_is_absolute_name(dname))
+            create_iname_hint_for_dbdir(dname, hint);
+        else
+            create_iname_hint(dname, hint);
         const char *new_iname = create_iname(env, xid.parent_id64, xid.child_id64, hint, mark, i);               // allocates memory for iname_in_env
         new_inames_in_env[i] = new_iname;
         toku_fill_dbt(&iname_dbt, new_iname, strlen(new_iname) + 1);      // iname_in_env goes in directory
diff --git a/storage/tokudb/PerconaFT/src/ydb_db.h b/storage/tokudb/PerconaFT/src/ydb_db.h
index 8b92dd1c3cb83..8be28857c142b 100644
--- a/storage/tokudb/PerconaFT/src/ydb_db.h
+++ b/storage/tokudb/PerconaFT/src/ydb_db.h
@@ -43,6 +43,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 #include "ydb-internal.h"
 #include "ydb_txn.h"
 
+#include <memory>
+
 typedef enum {
     YDB_LAYER_DIRECTORY_WRITE_LOCKS = 0,        /* total directory write locks taken */
     YDB_LAYER_DIRECTORY_WRITE_LOCKS_FAIL,   /* total directory write locks unable to be taken */
@@ -119,3 +121,17 @@ toku_db_destruct_autotxn(DB_TXN *txn, int r, bool changed) {
     }
     return r; 
 }
+
+void create_iname_hint_for_dbdir(const char *dname, char *hint);
+void create_iname_hint(const char *dname, char *hint);
+char *create_iname(DB_ENV *env,
+                   uint64_t id1,
+                   uint64_t id2,
+                   char *hint,
+                   const char *mark,
+                   int n);
+std::unique_ptr<char[], decltype(&toku_free)> generate_iname_for_rename_or_open(
+    DB_ENV *env,
+    DB_TXN *txn,
+    const char *dname,
+    bool is_open);
diff --git a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess
index da8331460888a..7501b1bee019d 100644
--- a/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess
+++ b/storage/tokudb/PerconaFT/third_party/xz-4.999.9beta/build-aux/config.guess
@@ -1,10 +1,10 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
-#   Free Software Foundation, Inc.
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+#   2011, 2012 Free Software Foundation, Inc.
 
-timestamp='2009-04-27'
+timestamp='2016-06-22'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -17,9 +17,7 @@ timestamp='2009-04-27'
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
@@ -27,16 +25,16 @@ timestamp='2009-04-27'
 # the same distribution terms that you use for the rest of that program.
 
 
-# Originally written by Per Bothner <per@bothner.com>.
-# Please send patches to <config-patches@gnu.org>.  Submit a context
-# diff and a properly formatted ChangeLog entry.
+# Originally written by Per Bothner.  Please send patches (context
+# diff format) to <config-patches@gnu.org> and include a ChangeLog
+# entry.
 #
 # This script attempts to guess a canonical system name similar to
 # config.sub.  If it succeeds, it prints the system name on stdout, and
 # exits with 0.  Otherwise, it exits with 1.
 #
-# The plan is that this can be called by configure scripts if you
-# don't specify an explicit build system type.
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
 
 me=`echo "$0" | sed -e 's,.*/,,'`
 
@@ -56,8 +54,9 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -144,7 +143,7 @@ UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
 case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     *:NetBSD:*:*)
 	# NetBSD (nbsd) targets should (where applicable) match one or
-	# more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
 	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
 	# switched to ELF, *-*-netbsd* would select the old
 	# object file format.  This provides both forward
@@ -170,7 +169,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
 		eval $set_cc_for_build
 		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
-			| grep __ELF__ >/dev/null
+			| grep -q __ELF__
 		then
 		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
 		    # Return netbsd for either.  FIX?
@@ -180,7 +179,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		fi
 		;;
 	    *)
-	        os=netbsd
+		os=netbsd
 		;;
 	esac
 	# The OS release
@@ -223,7 +222,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
 		;;
 	*5.*)
-	        UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
 		;;
 	esac
 	# According to Compaq, /usr/sbin/psrinfo has been available on
@@ -269,7 +268,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
 	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-	exit ;;
+	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+	exitcode=$?
+	trap '' 0
+	exit $exitcode ;;
     Alpha\ *:Windows_NT*:*)
 	# How do we know it's Interix rather than the generic POSIX subsystem?
 	# Should we change UNAME_MACHINE based on the output of uname instead
@@ -295,7 +297,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	echo s390-ibm-zvmoe
 	exit ;;
     *:OS400:*:*)
-        echo powerpc-ibm-os400
+	echo powerpc-ibm-os400
 	exit ;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
 	echo arm-acorn-riscix${UNAME_RELEASE}
@@ -333,6 +335,9 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
 	echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
 	exit ;;
+    i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
+	echo i386-pc-auroraux${UNAME_RELEASE}
+	exit ;;
     i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
 	eval $set_cc_for_build
 	SUN_ARCH="i386"
@@ -391,23 +396,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     # MiNT.  But MiNT is downward compatible to TOS, so this should
     # be no problem.
     atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint${UNAME_RELEASE}
 	exit ;;
     atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
 	echo m68k-atari-mint${UNAME_RELEASE}
-        exit ;;
+	exit ;;
     *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint${UNAME_RELEASE}
 	exit ;;
     milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
-        echo m68k-milan-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-milan-mint${UNAME_RELEASE}
+	exit ;;
     hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
-        echo m68k-hades-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-hades-mint${UNAME_RELEASE}
+	exit ;;
     *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
-        echo m68k-unknown-mint${UNAME_RELEASE}
-        exit ;;
+	echo m68k-unknown-mint${UNAME_RELEASE}
+	exit ;;
     m68k:machten:*:*)
 	echo m68k-apple-machten${UNAME_RELEASE}
 	exit ;;
@@ -477,8 +482,8 @@ EOF
 	echo m88k-motorola-sysv3
 	exit ;;
     AViiON:dgux:*:*)
-        # DG/UX returns AViiON for all architectures
-        UNAME_PROCESSOR=`/usr/bin/uname -p`
+	# DG/UX returns AViiON for all architectures
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
 	if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
 	then
 	    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
@@ -491,7 +496,7 @@ EOF
 	else
 	    echo i586-dg-dgux${UNAME_RELEASE}
 	fi
- 	exit ;;
+	exit ;;
     M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
 	echo m88k-dolphin-sysv3
 	exit ;;
@@ -548,7 +553,7 @@ EOF
 		echo rs6000-ibm-aix3.2
 	fi
 	exit ;;
-    *:AIX:*:[456])
+    *:AIX:*:[4567])
 	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
 	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
 		IBM_ARCH=rs6000
@@ -591,52 +596,52 @@ EOF
 	    9000/[678][0-9][0-9])
 		if [ -x /usr/bin/getconf ]; then
 		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
-                    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
-                    case "${sc_cpu_version}" in
-                      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
-                      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
-                      532)                      # CPU_PA_RISC2_0
-                        case "${sc_kernel_bits}" in
-                          32) HP_ARCH="hppa2.0n" ;;
-                          64) HP_ARCH="hppa2.0w" ;;
+		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+		    case "${sc_cpu_version}" in
+		      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+		      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+		      532)                      # CPU_PA_RISC2_0
+			case "${sc_kernel_bits}" in
+			  32) HP_ARCH="hppa2.0n" ;;
+			  64) HP_ARCH="hppa2.0w" ;;
 			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
-                        esac ;;
-                    esac
+			esac ;;
+		    esac
 		fi
 		if [ "${HP_ARCH}" = "" ]; then
 		    eval $set_cc_for_build
-		    sed 's/^              //' << EOF >$dummy.c
+		    sed 's/^		//' << EOF >$dummy.c
 
-              #define _HPUX_SOURCE
-              #include <stdlib.h>
-              #include <unistd.h>
+		#define _HPUX_SOURCE
+		#include <stdlib.h>
+		#include <unistd.h>
 
-              int main ()
-              {
-              #if defined(_SC_KERNEL_BITS)
-                  long bits = sysconf(_SC_KERNEL_BITS);
-              #endif
-                  long cpu  = sysconf (_SC_CPU_VERSION);
+		int main ()
+		{
+		#if defined(_SC_KERNEL_BITS)
+		    long bits = sysconf(_SC_KERNEL_BITS);
+		#endif
+		    long cpu  = sysconf (_SC_CPU_VERSION);
 
-                  switch (cpu)
-              	{
-              	case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
-              	case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
-              	case CPU_PA_RISC2_0:
-              #if defined(_SC_KERNEL_BITS)
-              	    switch (bits)
-              		{
-              		case 64: puts ("hppa2.0w"); break;
-              		case 32: puts ("hppa2.0n"); break;
-              		default: puts ("hppa2.0"); break;
-              		} break;
-              #else  /* !defined(_SC_KERNEL_BITS) */
-              	    puts ("hppa2.0"); break;
-              #endif
-              	default: puts ("hppa1.0"); break;
-              	}
-                  exit (0);
-              }
+		    switch (cpu)
+			{
+			case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+			case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+			case CPU_PA_RISC2_0:
+		#if defined(_SC_KERNEL_BITS)
+			    switch (bits)
+				{
+				case 64: puts ("hppa2.0w"); break;
+				case 32: puts ("hppa2.0n"); break;
+				default: puts ("hppa2.0"); break;
+				} break;
+		#else  /* !defined(_SC_KERNEL_BITS) */
+			    puts ("hppa2.0"); break;
+		#endif
+			default: puts ("hppa1.0"); break;
+			}
+		    exit (0);
+		}
 EOF
 		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
 		    test -z "$HP_ARCH" && HP_ARCH=hppa
@@ -656,7 +661,7 @@ EOF
 	    # => hppa64-hp-hpux11.23
 
 	    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
-		grep __LP64__ >/dev/null
+		grep -q __LP64__
 	    then
 		HP_ARCH="hppa2.0w"
 	    else
@@ -727,22 +732,22 @@ EOF
 	exit ;;
     C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
 	echo c1-convex-bsd
-        exit ;;
+	exit ;;
     C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
 	if getsysinfo -f scalar_acc
 	then echo c32-convex-bsd
 	else echo c2-convex-bsd
 	fi
-        exit ;;
+	exit ;;
     C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
 	echo c34-convex-bsd
-        exit ;;
+	exit ;;
     C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
 	echo c38-convex-bsd
-        exit ;;
+	exit ;;
     C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
 	echo c4-convex-bsd
-        exit ;;
+	exit ;;
     CRAY*Y-MP:*:*:*)
 	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
@@ -766,14 +771,14 @@ EOF
 	exit ;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
 	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
-        echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-        exit ;;
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
     5000:UNIX_System_V:4.*:*)
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
-        echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
 	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
@@ -785,13 +790,12 @@ EOF
 	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
 	exit ;;
     *:FreeBSD:*:*)
-	case ${UNAME_MACHINE} in
-	    pc98)
-		echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	case ${UNAME_PROCESSOR} in
 	    amd64)
 		echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
 	    *)
-		echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+		echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
 	esac
 	exit ;;
     i*:CYGWIN*:*)
@@ -800,19 +804,22 @@ EOF
     *:MINGW*:*)
 	echo ${UNAME_MACHINE}-pc-mingw32
 	exit ;;
+    i*:MSYS*:*)
+	echo ${UNAME_MACHINE}-pc-msys
+	exit ;;
     i*:windows32*:*)
-    	# uname -m includes "-pc" on this system.
-    	echo ${UNAME_MACHINE}-mingw32
+	# uname -m includes "-pc" on this system.
+	echo ${UNAME_MACHINE}-mingw32
 	exit ;;
     i*:PW*:*)
 	echo ${UNAME_MACHINE}-pc-pw32
 	exit ;;
-    *:Interix*:[3456]*)
-    	case ${UNAME_MACHINE} in
+    *:Interix*:*)
+	case ${UNAME_MACHINE} in
 	    x86)
 		echo i586-pc-interix${UNAME_RELEASE}
 		exit ;;
-	    EM64T | authenticamd | genuineintel)
+	    authenticamd | genuineintel | EM64T)
 		echo x86_64-unknown-interix${UNAME_RELEASE}
 		exit ;;
 	    IA64)
@@ -822,6 +829,9 @@ EOF
     [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
 	echo i${UNAME_MACHINE}-pc-mks
 	exit ;;
+    8664:Windows_NT:*)
+	echo x86_64-pc-mks
+	exit ;;
     i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
 	# How do we know it's Interix rather than the generic POSIX subsystem?
 	# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
@@ -851,6 +861,27 @@ EOF
     i*86:Minix:*:*)
 	echo ${UNAME_MACHINE}-pc-minix
 	exit ;;
+    aarch64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    aarch64_be:Linux:*:*)
+	UNAME_MACHINE=aarch64_be
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    alpha:Linux:*:*)
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+	  EV5)   UNAME_MACHINE=alphaev5 ;;
+	  EV56)  UNAME_MACHINE=alphaev56 ;;
+	  PCA56) UNAME_MACHINE=alphapca56 ;;
+	  PCA57) UNAME_MACHINE=alphapca56 ;;
+	  EV6)   UNAME_MACHINE=alphaev6 ;;
+	  EV67)  UNAME_MACHINE=alphaev67 ;;
+	  EV68*) UNAME_MACHINE=alphaev68 ;;
+	esac
+	objdump --private-headers /bin/sh | grep -q ld.so.1
+	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+	exit ;;
     arm*:Linux:*:*)
 	eval $set_cc_for_build
 	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
@@ -858,20 +889,40 @@ EOF
 	then
 	    echo ${UNAME_MACHINE}-unknown-linux-gnu
 	else
-	    echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+		| grep -q __ARM_PCS_VFP
+	    then
+		echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+	    else
+		echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
+	    fi
 	fi
 	exit ;;
     avr32*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     cris:Linux:*:*)
-	echo cris-axis-linux-gnu
+	echo ${UNAME_MACHINE}-axis-linux-gnu
 	exit ;;
     crisv32:Linux:*:*)
-	echo crisv32-axis-linux-gnu
+	echo ${UNAME_MACHINE}-axis-linux-gnu
 	exit ;;
     frv:Linux:*:*)
-    	echo frv-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    hexagon:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    i*86:Linux:*:*)
+	LIBC=gnu
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#ifdef __dietlibc__
+	LIBC=dietlibc
+	#endif
+EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
+	echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
 	exit ;;
     ia64:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
@@ -882,78 +933,34 @@ EOF
     m68*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
-    mips:Linux:*:*)
+    mips:Linux:*:* | mips64:Linux:*:*)
 	eval $set_cc_for_build
 	sed 's/^	//' << EOF >$dummy.c
 	#undef CPU
-	#undef mips
-	#undef mipsel
+	#undef ${UNAME_MACHINE}
+	#undef ${UNAME_MACHINE}el
 	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-	CPU=mipsel
+	CPU=${UNAME_MACHINE}el
 	#else
 	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
-	CPU=mips
+	CPU=${UNAME_MACHINE}
 	#else
 	CPU=
 	#endif
 	#endif
 EOF
-	eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
-	    /^CPU/{
-		s: ::g
-		p
-	    }'`"
-	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
-	;;
-    mips64:Linux:*:*)
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
-	#undef CPU
-	#undef mips64
-	#undef mips64el
-	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-	CPU=mips64el
-	#else
-	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
-	CPU=mips64
-	#else
-	CPU=
-	#endif
-	#endif
-EOF
-	eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
-	    /^CPU/{
-		s: ::g
-		p
-	    }'`"
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
 	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
 	;;
     or32:Linux:*:*)
-	echo or32-unknown-linux-gnu
-	exit ;;
-    ppc:Linux:*:*)
-	echo powerpc-unknown-linux-gnu
-	exit ;;
-    ppc64:Linux:*:*)
-	echo powerpc64-unknown-linux-gnu
-	exit ;;
-    alpha:Linux:*:*)
-	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
-	  EV5)   UNAME_MACHINE=alphaev5 ;;
-	  EV56)  UNAME_MACHINE=alphaev56 ;;
-	  PCA56) UNAME_MACHINE=alphapca56 ;;
-	  PCA57) UNAME_MACHINE=alphapca56 ;;
-	  EV6)   UNAME_MACHINE=alphaev6 ;;
-	  EV67)  UNAME_MACHINE=alphaev67 ;;
-	  EV68*) UNAME_MACHINE=alphaev68 ;;
-        esac
-	objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null
-	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
-	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     padre:Linux:*:*)
 	echo sparc-unknown-linux-gnu
 	exit ;;
+    parisc64:Linux:*:* | hppa64:Linux:*:*)
+	echo hppa64-unknown-linux-gnu
+	exit ;;
     parisc:Linux:*:* | hppa:Linux:*:*)
 	# Look for CPU level
 	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
@@ -962,14 +969,17 @@ EOF
 	  *)    echo hppa-unknown-linux-gnu ;;
 	esac
 	exit ;;
-    parisc64:Linux:*:* | hppa64:Linux:*:*)
-	echo hppa64-unknown-linux-gnu
+    ppc64:Linux:*:*)
+	echo powerpc64-unknown-linux-gnu
+	exit ;;
+    ppc:Linux:*:*)
+	echo powerpc-unknown-linux-gnu
 	exit ;;
     s390:Linux:*:* | s390x:Linux:*:*)
 	echo ${UNAME_MACHINE}-ibm-linux
 	exit ;;
     sh64*:Linux:*:*)
-    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     sh*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
@@ -977,75 +987,18 @@ EOF
     sparc:Linux:*:* | sparc64:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
+    tile*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
     vax:Linux:*:*)
 	echo ${UNAME_MACHINE}-dec-linux-gnu
 	exit ;;
     x86_64:Linux:*:*)
-	echo x86_64-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
     xtensa*:Linux:*:*)
-    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
-    i*86:Linux:*:*)
-	# The BFD linker knows what the default object file format is, so
-	# first see if it will tell us. cd to the root directory to prevent
-	# problems with other programs or directories called `ld' in the path.
-	# Set LC_ALL=C to ensure ld outputs messages in English.
-	ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
-			 | sed -ne '/supported targets:/!d
-				    s/[ 	][ 	]*/ /g
-				    s/.*supported targets: *//
-				    s/ .*//
-				    p'`
-        case "$ld_supported_targets" in
-	  elf32-i386)
-		TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu"
-		;;
-	  a.out-i386-linux)
-		echo "${UNAME_MACHINE}-pc-linux-gnuaout"
-		exit ;;
-	  "")
-		# Either a pre-BFD a.out linker (linux-gnuoldld) or
-		# one that does not give us useful --help.
-		echo "${UNAME_MACHINE}-pc-linux-gnuoldld"
-		exit ;;
-	esac
-	# Determine whether the default compiler is a.out or elf
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
-	#include <features.h>
-	#ifdef __ELF__
-	# ifdef __GLIBC__
-	#  if __GLIBC__ >= 2
-	LIBC=gnu
-	#  else
-	LIBC=gnulibc1
-	#  endif
-	# else
-	LIBC=gnulibc1
-	# endif
-	#else
-	#if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
-	LIBC=gnu
-	#else
-	LIBC=gnuaout
-	#endif
-	#endif
-	#ifdef __dietlibc__
-	LIBC=dietlibc
-	#endif
-EOF
-	eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
-	    /^LIBC/{
-		s: ::g
-		p
-	    }'`"
-	test x"${LIBC}" != x && {
-		echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
-		exit
-	}
-	test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; }
-	;;
     i*86:DYNIX/ptx:4*:*)
 	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
 	# earlier versions are messed up and put the nodename in both
@@ -1053,11 +1006,11 @@ EOF
 	echo i386-sequent-sysv4
 	exit ;;
     i*86:UNIX_SV:4.2MP:2.*)
-        # Unixware is an offshoot of SVR4, but it has its own version
-        # number series starting with 2...
-        # I am not positive that other SVR4 systems won't match this,
+	# Unixware is an offshoot of SVR4, but it has its own version
+	# number series starting with 2...
+	# I am not positive that other SVR4 systems won't match this,
 	# I just have to hope.  -- rms.
-        # Use sysv4.2uw... so that sysv4* matches it.
+	# Use sysv4.2uw... so that sysv4* matches it.
 	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
 	exit ;;
     i*86:OS/2:*:*)
@@ -1074,7 +1027,7 @@ EOF
     i*86:syllable:*:*)
 	echo ${UNAME_MACHINE}-pc-syllable
 	exit ;;
-    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
+    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
 	echo i386-unknown-lynxos${UNAME_RELEASE}
 	exit ;;
     i*86:*DOS:*:*)
@@ -1089,7 +1042,7 @@ EOF
 	fi
 	exit ;;
     i*86:*:5:[678]*)
-    	# UnixWare 7.x, OpenUNIX and OpenServer 6.
+	# UnixWare 7.x, OpenUNIX and OpenServer 6.
 	case `/bin/uname -X | grep "^Machine"` in
 	    *486*)	     UNAME_MACHINE=i486 ;;
 	    *Pentium)	     UNAME_MACHINE=i586 ;;
@@ -1117,13 +1070,13 @@ EOF
 	exit ;;
     pc:*:*:*)
 	# Left here for compatibility:
-        # uname -m prints for DJGPP always 'pc', but it prints nothing about
-        # the processor, so we play safe by assuming i586.
+	# uname -m prints for DJGPP always 'pc', but it prints nothing about
+	# the processor, so we play safe by assuming i586.
 	# Note: whatever this is, it MUST be the same as what config.sub
 	# prints for the "djgpp" host, or else GDB configury will decide that
 	# this is a cross-build.
 	echo i586-pc-msdosdjgpp
-        exit ;;
+	exit ;;
     Intel:Mach:3*:*)
 	echo i386-pc-mach3
 	exit ;;
@@ -1158,8 +1111,8 @@ EOF
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
 	  && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
     3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
-        /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-          && { echo i486-ncr-sysv4; exit; } ;;
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4; exit; } ;;
     NCR*:*:4.2:* | MPRAS*:*:4.2:*)
 	OS_REL='.3'
 	test -r /etc/.relid \
@@ -1182,7 +1135,7 @@ EOF
     rs6000:LynxOS:2.*:*)
 	echo rs6000-unknown-lynxos${UNAME_RELEASE}
 	exit ;;
-    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*)
+    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
 	echo powerpc-unknown-lynxos${UNAME_RELEASE}
 	exit ;;
     SM[BE]S:UNIX_SV:*:*)
@@ -1202,10 +1155,10 @@ EOF
 		echo ns32k-sni-sysv
 	fi
 	exit ;;
-    PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
-                      # says <Richard.M.Bartel@ccMail.Census.GOV>
-        echo i586-unisys-sysv4
-        exit ;;
+    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+			# says <Richard.M.Bartel@ccMail.Census.GOV>
+	echo i586-unisys-sysv4
+	exit ;;
     *:UNIX_System_V:4*:FTX*)
 	# From Gerald Hewes <hewes@openmarket.com>.
 	# How about differentiating between stratus architectures? -djm
@@ -1231,11 +1184,11 @@ EOF
 	exit ;;
     R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
 	if [ -d /usr/nec ]; then
-	        echo mips-nec-sysv${UNAME_RELEASE}
+		echo mips-nec-sysv${UNAME_RELEASE}
 	else
-	        echo mips-unknown-sysv${UNAME_RELEASE}
+		echo mips-unknown-sysv${UNAME_RELEASE}
 	fi
-        exit ;;
+	exit ;;
     BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
 	echo powerpc-be-beos
 	exit ;;
@@ -1275,6 +1228,16 @@ EOF
     *:Darwin:*:*)
 	UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
 	case $UNAME_PROCESSOR in
+	    i386)
+		eval $set_cc_for_build
+		if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+		  if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+		      (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		      grep IS_64BIT_ARCH >/dev/null
+		  then
+		      UNAME_PROCESSOR="x86_64"
+		  fi
+		fi ;;
 	    unknown) UNAME_PROCESSOR=powerpc ;;
 	esac
 	echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
@@ -1290,6 +1253,9 @@ EOF
     *:QNX:*:4*)
 	echo i386-pc-qnx
 	exit ;;
+    NEO-?:NONSTOP_KERNEL:*:*)
+	echo neo-tandem-nsk${UNAME_RELEASE}
+	exit ;;
     NSE-?:NONSTOP_KERNEL:*:*)
 	echo nse-tandem-nsk${UNAME_RELEASE}
 	exit ;;
@@ -1335,13 +1301,13 @@ EOF
 	echo pdp10-unknown-its
 	exit ;;
     SEI:*:*:SEIUX)
-        echo mips-sei-seiux${UNAME_RELEASE}
+	echo mips-sei-seiux${UNAME_RELEASE}
 	exit ;;
     *:DragonFly:*:*)
 	echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
 	exit ;;
     *:*VMS:*:*)
-    	UNAME_MACHINE=`(uname -p) 2>/dev/null`
+	UNAME_MACHINE=`(uname -p) 2>/dev/null`
 	case "${UNAME_MACHINE}" in
 	    A*) echo alpha-dec-vms ; exit ;;
 	    I*) echo ia64-dec-vms ; exit ;;
@@ -1359,6 +1325,9 @@ EOF
     i*86:AROS:*:*)
 	echo ${UNAME_MACHINE}-pc-aros
 	exit ;;
+    x86_64:VMkernel:*:*)
+	echo ${UNAME_MACHINE}-unknown-esx
+	exit ;;
 esac
 
 #echo '(No uname command or uname output not recognized.)' 1>&2
@@ -1381,11 +1350,11 @@ main ()
 #include <sys/param.h>
   printf ("m68k-sony-newsos%s\n",
 #ifdef NEWSOS4
-          "4"
+	"4"
 #else
-	  ""
+	""
 #endif
-         ); exit (0);
+	); exit (0);
 #endif
 #endif
 
diff --git a/storage/tokudb/PerconaFT/tools/CMakeLists.txt b/storage/tokudb/PerconaFT/tools/CMakeLists.txt
index af82b4357d29e..f11b9f350d726 100644
--- a/storage/tokudb/PerconaFT/tools/CMakeLists.txt
+++ b/storage/tokudb/PerconaFT/tools/CMakeLists.txt
@@ -1,6 +1,6 @@
 set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE DONT_DEPRECATE_ERRNO)
 
-set(tools tokudb_dump tokuftdump tokuft_logprint tdb-recover ftverify ba_replay)
+set(tools tokudb_dump tokuftdump tokuft_logprint tdb-recover ftverify)
 foreach(tool ${tools})
   add_executable(${tool} ${tool}.cc)
   add_dependencies(${tool} install_tdb_h)
@@ -14,4 +14,3 @@ target_link_libraries(ftverify m)
 
 install(TARGETS tokuftdump      DESTINATION ${INSTALL_BINDIR} COMPONENT Server)
 install(TARGETS tokuft_logprint DESTINATION ${INSTALL_BINDIR} COMPONENT Server)
-
diff --git a/storage/tokudb/PerconaFT/tools/ba_replay.cc b/storage/tokudb/PerconaFT/tools/ba_replay.cc
deleted file mode 100644
index cade7e5dfafc6..0000000000000
--- a/storage/tokudb/PerconaFT/tools/ba_replay.cc
+++ /dev/null
@@ -1,629 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-/*======
-This file is part of PerconaFT.
-
-
-Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
-
-    PerconaFT is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License, version 2,
-    as published by the Free Software Foundation.
-
-    PerconaFT is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
-
-----------------------------------------
-
-    PerconaFT is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Affero General Public License, version 3,
-    as published by the Free Software Foundation.
-
-    PerconaFT is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Affero General Public License for more details.
-
-    You should have received a copy of the GNU Affero General Public License
-    along with PerconaFT.  If not, see <http://www.gnu.org/licenses/>.
-======= */
-
-#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-
-// Replay a block allocator trace against different strategies and compare
-// the results
-
-#include <db.h>
-
-#include <getopt.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <map>
-#include <set>
-#include <string>
-#include <sstream>
-#include <vector>
-
-#include <portability/memory.h>
-#include <portability/toku_assert.h>
-#include <portability/toku_stdlib.h>
-
-#include "ft/serialize/block_allocator.h"
-
-using std::map;
-using std::set;
-using std::string;
-using std::vector;
-
-static int verbose = false;
-
-static void ba_replay_assert(bool pred, const char *msg, const char *line, int line_num) {
-    if (!pred) {
-        fprintf(stderr, "%s, line (#%d): %s\n", msg, line_num, line);
-        abort();
-    }
-}
-
-static char *trim_whitespace(char *line) {
-    // skip leading whitespace
-    while (isspace(*line)) {
-        line++;
-    }
-    return line;
-}
-
-static int64_t parse_number(char **ptr, int line_num, int base) {
-    *ptr = trim_whitespace(*ptr);
-    char *line = *ptr;
-
-    char *new_ptr;
-    int64_t n = strtoll(line, &new_ptr, base);
-    ba_replay_assert(n >= 0, "malformed trace (bad numeric token)", line, line_num);
-    ba_replay_assert(new_ptr > *ptr, "malformed trace (missing numeric token)", line, line_num);
-    *ptr = new_ptr;
-    return n;
-}
-
-static uint64_t parse_uint64(char **ptr, int line_num) {
-    int64_t n = parse_number(ptr, line_num, 10);
-    // we happen to know that the uint64's we deal with will
-    // take less than 63 bits (they come from pointers)
-    return static_cast<uint64_t>(n);
-}
-
-static string parse_token(char **ptr, int line_num) {
-    *ptr = trim_whitespace(*ptr);
-    char *line = *ptr;
-
-    // parse the first token, which represents the traced function
-    char token[64];
-    int r = sscanf(*ptr, "%64s", token);
-    ba_replay_assert(r == 1, "malformed trace (missing string token)", line, line_num);
-    *ptr += strlen(token);
-    return string(token);
-}
-
-static block_allocator::blockpair parse_blockpair(char **ptr, int line_num) {
-    *ptr = trim_whitespace(*ptr);
-    char *line = *ptr;
-
-    uint64_t offset, size;
-    int bytes_read;
-    int r = sscanf(line, "[%" PRIu64 " %" PRIu64 "]%n", &offset, &size, &bytes_read);
-    ba_replay_assert(r == 2, "malformed trace (bad offset/size pair)", line, line_num);
-    *ptr += bytes_read;
-    return block_allocator::blockpair(offset, size);
-}
-
-static char *strip_newline(char *line, bool *found) {
-    char *ptr = strchr(line, '\n');
-    if (ptr != nullptr) {
-        if (found != nullptr) {
-            *found = true;
-        }
-        *ptr = '\0';
-    }
-    return line;
-}
-
-static char *read_trace_line(FILE *file) {
-    const int buf_size = 4096;
-    char buf[buf_size];
-    std::stringstream ss;
-    while (true) {
-        if (fgets(buf, buf_size, file) == nullptr) {
-            break;
-        }
-        bool has_newline = false;
-        ss << strip_newline(buf, &has_newline);
-        if (has_newline) {
-            // end of the line, we're done out
-            break;
-        }
-    }
-    std::string s = ss.str();
-    return s.size() ? toku_strdup(s.c_str()) : nullptr;
-}
-
-static vector<string> canonicalize_trace_from(FILE *file) {
-    // new trace, canonicalized from a raw trace
-    vector<string> canonicalized_trace;
-
-    // raw allocator id -> canonical allocator id
-    //
-    // keeps track of allocators that were created as part of the trace,
-    // and therefore will be part of the canonicalized trace.
-    uint64_t allocator_id_seq_num = 0;
-    map<uint64_t, uint64_t> allocator_ids;
-
-    // allocated offset -> allocation seq num
-    //
-    uint64_t allocation_seq_num = 0;
-    static const uint64_t ASN_NONE = (uint64_t) -1;
-    typedef map<uint64_t, uint64_t> offset_seq_map;
-
-    // raw allocator id -> offset_seq_map that tracks its allocations
-    map<uint64_t, offset_seq_map> offset_to_seq_num_maps;
-
-    int line_num = 0;
-    char *line;
-    while ((line = read_trace_line(file)) != nullptr) {
-        line_num++;
-        char *ptr = line;
-
-        string fn = parse_token(&ptr, line_num);
-        int64_t allocator_id = parse_number(&ptr, line_num, 16);
-
-        std::stringstream ss;
-        if (fn.find("ba_trace_create") != string::npos) {
-            ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line, line_num);
-            ba_replay_assert(fn == "ba_trace_create" || fn == "ba_trace_create_from_blockpairs",
-                             "corrupted trace: bad fn", line, line_num);
-
-            // we only convert the allocator_id to an allocator_id_seq_num
-            // in the canonical trace and leave the rest of the line as-is.
-            allocator_ids[allocator_id] = allocator_id_seq_num;
-            ss << fn << ' ' << allocator_id_seq_num << ' ' << trim_whitespace(ptr) << std::endl;
-            allocator_id_seq_num++;
-
-            // First, read passed the reserve / alignment values.
-            (void) parse_uint64(&ptr, line_num);
-            (void) parse_uint64(&ptr, line_num);
-            if (fn == "ba_trace_create_from_blockpairs") {
-                // For each blockpair created by this traceline, add its offset to the offset seq map
-                // with asn ASN_NONE so that later canonicalizations of `free' know whether to write
-                // down the asn or the raw offset.
-                offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
-                while (*trim_whitespace(ptr) != '\0') {
-                    const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
-                    (*map)[bp.offset] = ASN_NONE;
-                }
-            }
-        } else {
-            ba_replay_assert(allocator_ids.count(allocator_id) > 0, "corrupted trace: unknown allocator", line, line_num);
-            uint64_t canonical_allocator_id = allocator_ids[allocator_id];
-
-            // this is the map that tracks allocations for this allocator
-            offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
-
-            if (fn == "ba_trace_alloc") {
-                const uint64_t size = parse_uint64(&ptr, line_num);
-                const uint64_t heat = parse_uint64(&ptr, line_num);
-                const uint64_t offset = parse_uint64(&ptr, line_num);
-                ba_replay_assert(map->count(offset) == 0, "corrupted trace: double alloc", line, line_num);
-
-                // remember that an allocation at `offset' has the current alloc seq num
-                (*map)[offset] = allocation_seq_num;
-
-                // translate `offset = alloc(size)' to `asn = alloc(size)'
-                ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << heat << ' ' << allocation_seq_num << std::endl;
-                allocation_seq_num++;
-            } else if (fn == "ba_trace_free") {
-                const uint64_t offset = parse_uint64(&ptr, line_num);
-                ba_replay_assert(map->count(offset) != 0, "corrupted trace: invalid free", line, line_num);
-
-                // get the alloc seq num for an allcation that occurred at `offset'
-                const uint64_t asn = (*map)[offset];
-                map->erase(offset);
-
-                // if there's an asn, then a corresponding ba_trace_alloc occurred and we should
-                // write `free(asn)'. otherwise, the blockpair was initialized from create_from_blockpairs
-                // and we write the original offset.
-                if (asn != ASN_NONE) {
-                    ss << "ba_trace_free_asn" << ' ' << canonical_allocator_id << ' ' << asn << std::endl;
-                } else {
-                    ss << "ba_trace_free_offset" << ' ' << canonical_allocator_id << ' ' << offset << std::endl;
-                }
-            } else if (fn == "ba_trace_destroy") {
-                // Remove this allocator from both maps
-                allocator_ids.erase(allocator_id);
-                offset_to_seq_num_maps.erase(allocator_id);
-
-                // translate `destroy(ptr_id) to destroy(canonical_id)'
-                ss << fn << ' ' << canonical_allocator_id << ' ' << std::endl;
-            } else {
-                ba_replay_assert(false, "corrupted trace: bad fn", line, line_num);
-            }
-        }
-        canonicalized_trace.push_back(ss.str());
-
-        toku_free(line);
-    }
-
-    if (allocator_ids.size() != 0) {
-        fprintf(stderr, "warning: leaked allocators. this might be ok if the tracing process is still running");
-    }
-
-    return canonicalized_trace;
-}
-
-struct streaming_variance_calculator {
-    int64_t n_samples;
-    int64_t mean;
-    int64_t variance;
-
-    // math credit: AoCP, Donald Knuth, '62
-    void add_sample(int64_t x) {
-        n_samples++;
-        if (n_samples == 1) {
-            mean = x;
-            variance = 0;
-        } else {
-            int64_t old_mean = mean;
-            mean = old_mean + ((x - old_mean) / n_samples);
-            variance = (((n_samples - 1) * variance) +
-                        ((x - old_mean) * (x - mean))) / n_samples;
-        }
-    }
-};
-
-struct canonical_trace_stats {
-    uint64_t n_lines_replayed;
-
-    uint64_t n_create;
-    uint64_t n_create_from_blockpairs;
-    uint64_t n_alloc_hot;
-    uint64_t n_alloc_cold;
-    uint64_t n_free;
-    uint64_t n_destroy;
-
-    struct streaming_variance_calculator alloc_hot_bytes;
-    struct streaming_variance_calculator alloc_cold_bytes;
-
-    canonical_trace_stats() {
-        memset(this, 0, sizeof(*this));
-    }
-};
-
-struct fragmentation_report {
-    TOKU_DB_FRAGMENTATION_S beginning;
-    TOKU_DB_FRAGMENTATION_S end;
-    fragmentation_report() {
-        memset(this, 0, sizeof(*this));
-    }
-    void merge(const struct fragmentation_report &src_report) {
-        for (int i = 0; i < 2; i++) {
-            TOKU_DB_FRAGMENTATION_S *dst = i == 0 ? &beginning : &end;
-            const TOKU_DB_FRAGMENTATION_S *src = i == 0 ? &src_report.beginning : &src_report.end;
-            dst->file_size_bytes += src->file_size_bytes;
-            dst->data_bytes += src->data_bytes;
-            dst->data_blocks += src->data_blocks;
-            dst->checkpoint_bytes_additional += src->checkpoint_bytes_additional;
-            dst->checkpoint_blocks_additional += src->checkpoint_blocks_additional;
-            dst->unused_bytes += src->unused_bytes;
-            dst->unused_blocks += src->unused_blocks;
-            dst->largest_unused_block += src->largest_unused_block;
-        }
-    }
-};
-
-static void replay_canonicalized_trace(const vector<string> &canonicalized_trace,
-                                       block_allocator::allocation_strategy strategy,
-                                       map<uint64_t, struct fragmentation_report> *reports,
-                                       struct canonical_trace_stats *stats) {
-    // maps an allocator id to its block allocator
-    map<uint64_t, block_allocator *> allocator_map;
-
-    // maps allocation seq num to allocated offset
-    map<uint64_t, uint64_t> seq_num_to_offset;
-
-    for (vector<string>::const_iterator it = canonicalized_trace.begin();
-         it != canonicalized_trace.end(); it++) {
-        const int line_num = stats->n_lines_replayed++;
-
-        char *line = toku_strdup(it->c_str());
-        line = strip_newline(line, nullptr);
-
-        char *ptr = trim_whitespace(line);
-
-        // canonical allocator id is in base 10, not 16
-        string fn = parse_token(&ptr, line_num);
-        int64_t allocator_id = parse_number(&ptr, line_num, 10);
-
-        if (fn.find("ba_trace_create") != string::npos) {
-            const uint64_t reserve_at_beginning = parse_uint64(&ptr, line_num);
-            const uint64_t alignment = parse_uint64(&ptr, line_num);
-            ba_replay_assert(allocator_map.count(allocator_id) == 0,
-                             "corrupted canonical trace: double create", line, line_num);
-
-            block_allocator *ba = new block_allocator();
-            if (fn == "ba_trace_create") {
-                ba->create(reserve_at_beginning, alignment);
-                stats->n_create++;
-            } else {
-                ba_replay_assert(fn == "ba_trace_create_from_blockpairs",
-                                 "corrupted canonical trace: bad create fn", line, line_num);
-                vector<block_allocator::blockpair> pairs;
-                while (*trim_whitespace(ptr) != '\0') {
-                    const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
-                    pairs.push_back(bp);
-                }
-                ba->create_from_blockpairs(reserve_at_beginning, alignment, &pairs[0], pairs.size());
-                stats->n_create_from_blockpairs++;
-            }
-            ba->set_strategy(strategy);
-
-            TOKU_DB_FRAGMENTATION_S report;
-            ba->get_statistics(&report);
-            (*reports)[allocator_id].beginning = report;
-            allocator_map[allocator_id] = ba;
-        } else {
-            ba_replay_assert(allocator_map.count(allocator_id) > 0,
-                             "corrupted canonical trace: no such allocator", line, line_num);
-
-            block_allocator *ba = allocator_map[allocator_id];
-            if (fn == "ba_trace_alloc") {
-                // replay an `alloc' whose result will be associated with a certain asn
-                const uint64_t size = parse_uint64(&ptr, line_num);
-                const uint64_t heat = parse_uint64(&ptr, line_num);
-                const uint64_t asn = parse_uint64(&ptr, line_num);
-                ba_replay_assert(seq_num_to_offset.count(asn) == 0,
-                                 "corrupted canonical trace: double alloc (asn in use)", line, line_num);
-
-                uint64_t offset;
-                ba->alloc_block(size, heat, &offset);
-                seq_num_to_offset[asn] = offset;
-                heat ? stats->n_alloc_hot++ : stats->n_alloc_cold++;
-                heat ? stats->alloc_hot_bytes.add_sample(size) : stats->alloc_cold_bytes.add_sample(size);
-            } else if (fn == "ba_trace_free_asn") {
-                // replay a `free' on a block whose offset is the result of an alloc with an asn
-                const uint64_t asn = parse_uint64(&ptr, line_num);
-                ba_replay_assert(seq_num_to_offset.count(asn) == 1,
-                                 "corrupted canonical trace: double free (asn unused)", line, line_num);
-
-                const uint64_t offset = seq_num_to_offset[asn];
-                ba->free_block(offset);
-                seq_num_to_offset.erase(asn);
-                stats->n_free++;
-            } else if (fn == "ba_trace_free_offset") {
-                // replay a `free' on a block whose offset was explicitly set during a create_from_blockpairs
-                const uint64_t offset = parse_uint64(&ptr, line_num);
-                ba->free_block(offset);
-                stats->n_free++;
-            } else if (fn == "ba_trace_destroy") {
-                TOKU_DB_FRAGMENTATION_S report;
-                ba->get_statistics(&report);
-                ba->destroy();
-                (*reports)[allocator_id].end = report;
-                allocator_map.erase(allocator_id);
-                stats->n_destroy++;
-            } else {
-                ba_replay_assert(false, "corrupted canonical trace: bad fn", line, line_num);
-            }
-        }
-
-        toku_free(line);
-    }
-}
-
-static const char *strategy_to_cstring(block_allocator::allocation_strategy strategy) {
-    switch (strategy) {
-    case block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT:
-        return "first-fit";
-    case block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT:
-        return "best-fit";
-    case block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE:
-        return "heat-zone";
-    case block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT:
-        return "padded-fit";
-    default:
-        abort();
-    }
-}
-
-static block_allocator::allocation_strategy cstring_to_strategy(const char *str) {
-    if (strcmp(str, "first-fit") == 0) {
-        return block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT;
-    }
-    if (strcmp(str, "best-fit") == 0) {
-        return block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT;
-    }
-    if (strcmp(str, "heat-zone") == 0) {
-        return block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE;
-    }
-    if (strcmp(str, "padded-fit") != 0) {
-        fprintf(stderr, "bad strategy string: %s\n", str);
-        abort();
-    }
-    return block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT;
-}
-
-static void print_result_verbose(uint64_t allocator_id,
-                                 block_allocator::allocation_strategy strategy,
-                                 const struct fragmentation_report &report) {
-    if (report.end.data_bytes + report.end.unused_bytes +
-        report.beginning.data_bytes + report.beginning.unused_bytes
-        < 32UL * 1024 * 1024) {
-        printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
-        return;
-    }
-
-    printf(" allocator_id:   %20" PRId64 "\n", allocator_id);
-    printf(" strategy:       %20s\n", strategy_to_cstring(strategy));
-
-    for (int i = 0; i < 2; i++) {
-        const TOKU_DB_FRAGMENTATION_S *r = i == 0 ? &report.beginning : &report.end;
-        printf("%s\n", i == 0 ? "BEFORE" : "AFTER");
-
-        uint64_t total_bytes = r->data_bytes + r->unused_bytes;
-        uint64_t total_blocks = r->data_blocks + r->unused_blocks;
-
-        // byte statistics
-        printf(" total bytes:    %20" PRId64 "\n", total_bytes);
-        printf(" used bytes:     %20" PRId64 " (%.3lf)\n", r->data_bytes,
-               static_cast<double>(r->data_bytes) / total_bytes);
-        printf(" unused bytes:   %20" PRId64 " (%.3lf)\n", r->unused_bytes,
-               static_cast<double>(r->unused_bytes) / total_bytes);
-
-        // block statistics
-        printf(" total blocks:   %20" PRId64 "\n", total_blocks);
-        printf(" used blocks:    %20" PRId64 " (%.3lf)\n", r->data_blocks,
-               static_cast<double>(r->data_blocks) / total_blocks);
-        printf(" unused blocks:  %20" PRId64 " (%.3lf)\n", r->unused_blocks,
-               static_cast<double>(r->unused_blocks) / total_blocks);
-
-        // misc
-        printf(" largest unused: %20" PRId64 "\n", r->largest_unused_block);
-    }
-}
-
-static void print_result(uint64_t allocator_id,
-                         block_allocator::allocation_strategy strategy,
-                         const struct fragmentation_report &report) {
-    const TOKU_DB_FRAGMENTATION_S *beginning = &report.beginning;
-    const TOKU_DB_FRAGMENTATION_S *end = &report.end;
-
-    uint64_t total_beginning_bytes = beginning->data_bytes + beginning->unused_bytes;
-    uint64_t total_end_bytes = end->data_bytes + end->unused_bytes;
-    if (total_end_bytes + total_beginning_bytes < 32UL * 1024 * 1024) {
-        if (verbose) {
-            printf("\n");
-            printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
-        }
-        return;
-    }
-    printf("\n");
-    if (verbose) {
-        print_result_verbose(allocator_id, strategy, report);
-    } else {
-        printf(" %-15s: allocator %" PRId64 ", %.3lf used bytes (%.3lf before)\n",
-               strategy_to_cstring(strategy), allocator_id,
-               static_cast<double>(report.end.data_bytes) / total_end_bytes,
-               static_cast<double>(report.beginning.data_bytes) / total_beginning_bytes);
-    }
-}
-
-static int only_aggregate_reports;
-
-static struct option getopt_options[] = {
-    { "verbose", no_argument, &verbose, 1 },
-    { "only-aggregate-reports", no_argument, &only_aggregate_reports, 1 },
-    { "include-strategy", required_argument, nullptr, 'i' },
-    { "exclude-strategy", required_argument, nullptr, 'x' },
-    { nullptr, 0, nullptr, 0 },
-};
-
-int main(int argc, char *argv[]) {
-    int opt;
-    set<block_allocator::allocation_strategy> candidate_strategies, excluded_strategies;
-    while ((opt = getopt_long(argc, argv, "", getopt_options, nullptr)) != -1) {
-        switch (opt) {
-        case 0:
-            break;
-        case 'i':
-            candidate_strategies.insert(cstring_to_strategy(optarg));
-            break;
-        case 'x':
-            excluded_strategies.insert(cstring_to_strategy(optarg));
-            break;
-        case '?':
-        default:
-            abort();
-        };
-    }
-    // Default to everything if nothing was explicitly included.
-    if (candidate_strategies.empty()) {
-        candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT);
-        candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT);
-        candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT);
-        candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE);
-    }
-    // ..but remove anything that was explicitly excluded
-    for (set<block_allocator::allocation_strategy>::const_iterator it = excluded_strategies.begin();
-         it != excluded_strategies.end(); it++) {
-        candidate_strategies.erase(*it);
-    }
-
-    // Run the real trace
-    //
-    // First, read the raw trace from stdin
-    vector<string> canonicalized_trace = canonicalize_trace_from(stdin);
-
-    if (!only_aggregate_reports) {
-        printf("\n");
-        printf("Individual reports, by allocator:\n");
-    }
-
-    struct canonical_trace_stats stats;
-    map<block_allocator::allocation_strategy, struct fragmentation_report> reports_by_strategy; 
-    for (set<block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin();
-         it != candidate_strategies.end(); it++) {
-        const block_allocator::allocation_strategy strategy(*it);
-
-        // replay the canonicalized trace against the current strategy.
-        //
-        // we provided the allocator map so we can gather statistics later
-        struct canonical_trace_stats dummy_stats;
-        map<uint64_t, struct fragmentation_report> reports;
-        replay_canonicalized_trace(canonicalized_trace, strategy, &reports,
-                                   // Only need to gather canonical trace stats once
-                                   it == candidate_strategies.begin() ? &stats : &dummy_stats);
-
-        struct fragmentation_report aggregate_report;
-        memset(&aggregate_report, 0, sizeof(aggregate_report));
-        for (map<uint64_t, struct fragmentation_report>::iterator rp = reports.begin();
-             rp != reports.end(); rp++) {
-            const struct fragmentation_report &report = rp->second;
-            aggregate_report.merge(report);
-            if (!only_aggregate_reports) {
-                print_result(rp->first, strategy, report);
-            }
-        }
-        reports_by_strategy[strategy] = aggregate_report;
-    }
-
-    printf("\n");
-    printf("Aggregate reports, by strategy:\n");
-
-    for (map<block_allocator::allocation_strategy, struct fragmentation_report>::iterator it = reports_by_strategy.begin();
-         it != reports_by_strategy.end(); it++) {
-        print_result(0, it->first, it->second);
-    }
-
-    printf("\n");
-    printf("Overall trace stats:\n");
-    printf("\n");
-    printf(" n_lines_played:            %15" PRIu64 "\n", stats.n_lines_replayed);
-    printf(" n_create:                  %15" PRIu64 "\n", stats.n_create);
-    printf(" n_create_from_blockpairs:  %15" PRIu64 "\n", stats.n_create_from_blockpairs);
-    printf(" n_alloc_hot:               %15" PRIu64 "\n", stats.n_alloc_hot);
-    printf(" n_alloc_cold:              %15" PRIu64 "\n", stats.n_alloc_cold);
-    printf(" n_free:                    %15" PRIu64 "\n", stats.n_free);
-    printf(" n_destroy:                 %15" PRIu64 "\n", stats.n_destroy);
-    printf("\n");
-    printf(" avg_alloc_hot:             %15" PRIu64 "\n", stats.alloc_hot_bytes.mean);
-    printf(" stddev_alloc_hot:          %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_hot_bytes.variance));
-    printf(" avg_alloc_cold:            %15" PRIu64 "\n", stats.alloc_cold_bytes.mean);
-    printf(" stddev_alloc_cold:         %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_cold_bytes.variance));
-    printf("\n");
-
-    return 0;
-}
diff --git a/storage/tokudb/PerconaFT/tools/ftverify.cc b/storage/tokudb/PerconaFT/tools/ftverify.cc
index 5920be8dedaa7..2324249ba00f2 100644
--- a/storage/tokudb/PerconaFT/tools/ftverify.cc
+++ b/storage/tokudb/PerconaFT/tools/ftverify.cc
@@ -148,7 +148,7 @@ deserialize_headers(int fd, struct ft **h1p, struct ft **h2p)
         }
     }
     {
-        toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
+        toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
         r1 = deserialize_ft_from_fd_into_rbuf(
             fd,
             header_1_off,
diff --git a/storage/tokudb/PerconaFT/tools/tokuftdump.cc b/storage/tokudb/PerconaFT/tools/tokuftdump.cc
index 23ef72218ac31..f6d777b416113 100644
--- a/storage/tokudb/PerconaFT/tools/tokuftdump.cc
+++ b/storage/tokudb/PerconaFT/tools/tokuftdump.cc
@@ -192,6 +192,7 @@ static void dump_header(FT ft) {
     dump_descriptor(&ft->descriptor);
     printf(" estimated numrows=%" PRId64 "\n", ft->in_memory_stats.numrows);
     printf(" estimated numbytes=%" PRId64 "\n", ft->in_memory_stats.numbytes);
+    printf(" logical row count=%" PRId64 "\n", ft->in_memory_logical_rows);
 }
 
 static int64_t getRootNode(FT ft) {
diff --git a/storage/tokudb/PerconaFT/util/tests/x1764-test.cc b/storage/tokudb/PerconaFT/util/tests/x1764-test.cc
index 48ff28e89af15..76b1d9c713e48 100644
--- a/storage/tokudb/PerconaFT/util/tests/x1764-test.cc
+++ b/storage/tokudb/PerconaFT/util/tests/x1764-test.cc
@@ -110,7 +110,7 @@ test2 (void) {
 
 static void
 test3 (void)
-// Compare the simple version to the highly optimized verison.
+// Compare the simple version to the highly optimized version.
 {
     const int datalen = 1000;
     char data[datalen];
diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc
index 62a704676618a..0afe9958b856c 100644
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -414,17 +414,17 @@ void TOKUDB_SHARE::update_row_count(
         pct_of_rows_changed_to_trigger = ((_rows * auto_threshold) / 100);
         if (_row_delta_activity >= pct_of_rows_changed_to_trigger) {
             char msg[200];
-            snprintf(
-                msg,
-                sizeof(msg),
-                "TokuDB: Auto %s background analysis for %s, delta_activity "
-                "%llu is greater than %llu percent of %llu rows.",
-                tokudb::sysvars::analyze_in_background(thd) > 0 ?
-                    "scheduling" : "running",
-                full_table_name(),
-                _row_delta_activity,
-                auto_threshold,
-                (ulonglong)(_rows));
+            snprintf(msg,
+                     sizeof(msg),
+                     "TokuDB: Auto %s analysis for %s, delta_activity %llu is "
+                     "greater than %llu percent of %llu rows.",
+                     tokudb::sysvars::analyze_in_background(thd) > 0
+                         ? "scheduling background"
+                         : "running foreground",
+                     full_table_name(),
+                     _row_delta_activity,
+                     auto_threshold,
+                     (ulonglong)(_rows));
 
             // analyze_standard will unlock _mutex regardless of success/failure
             int ret = analyze_standard(thd, NULL);
@@ -4129,7 +4129,7 @@ int ha_tokudb::write_row(uchar * record) {
             goto cleanup; 
         }
         if (curr_num_DBs == 1) {
-            error = insert_row_to_main_dictionary(record,&prim_key, &row, txn);
+            error = insert_row_to_main_dictionary(record, &prim_key, &row, txn);
             if (error) { goto cleanup; }
         } else {
             error = insert_rows_to_dictionaries_mult(&prim_key, &row, txn, thd);
@@ -6176,7 +6176,7 @@ int ha_tokudb::info(uint flag) {
             // we should always have a primary key
             assert_always(share->file != NULL);
 
-            error = estimate_num_rows(share->file,&num_rows, txn);
+            error = estimate_num_rows(share->file, &num_rows, txn);
             if (error == 0) {
                 share->set_row_count(num_rows, false);
                 stats.records = num_rows;
diff --git a/storage/tokudb/ha_tokudb_admin.cc b/storage/tokudb/ha_tokudb_admin.cc
index db3d6c112d499..6d8e7173c8d8b 100644
--- a/storage/tokudb/ha_tokudb_admin.cc
+++ b/storage/tokudb/ha_tokudb_admin.cc
@@ -7,7 +7,7 @@ This file is part of TokuDB
 
 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
-    TokuDBis is free software: you can redistribute it and/or modify
+    TokuDB is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License, version 2,
     as published by the Free Software Foundation.
 
@@ -43,13 +43,11 @@ class recount_rows_t : public tokudb::background::job_manager_t::job_t {
     virtual ~recount_rows_t();
 
     virtual const char* key();
-
-    virtual void status(
-        char* database,
-        char* table,
-        char* type,
-        char* params,
-        char* status);
+    virtual const char* database();
+    virtual const char* table();
+    virtual const char* type();
+    virtual const char* parameters();
+    virtual const char* status();
 
 protected:
     virtual void on_run();
@@ -64,6 +62,8 @@ class recount_rows_t : public tokudb::background::job_manager_t::job_t {
     ulonglong       _throttle;
 
     // for recount rows status reporting
+    char            _parameters[256];
+    char            _status[1024];
     int             _result;
     ulonglong       _recount_start; // in microseconds
     ulonglong       _total_elapsed_time; // in microseconds
@@ -78,7 +78,6 @@ class recount_rows_t : public tokudb::background::job_manager_t::job_t {
         uint64_t deleted,
         void* extra);
     int analyze_recount_rows_progress(uint64_t count, uint64_t deleted);
-    void get_analyze_status(char*);
 };
 
 void* recount_rows_t::operator new(size_t sz) {
@@ -114,10 +113,19 @@ recount_rows_t::recount_rows_t(
     }
 
     _throttle = tokudb::sysvars::analyze_throttle(thd);
+
+    snprintf(_parameters,
+             sizeof(_parameters),
+             "TOKUDB_ANALYZE_THROTTLE=%llu;",
+             _throttle);
+    _status[0] = '\0';
 }
 recount_rows_t::~recount_rows_t() {
 }
 void recount_rows_t::on_run() {
+    const char* orig_proc_info = NULL;
+    if (_thd)
+        orig_proc_info = tokudb_thd_get_proc_info(_thd);
     _recount_start = tokudb::time::microsec();
     _total_elapsed_time = 0;
 
@@ -171,6 +179,8 @@ void recount_rows_t::on_run() {
         _result,
         _share->row_count());
 error:
+    if(_thd)
+        tokudb_thd_set_proc_info(_thd, orig_proc_info);
     return;
 }
 void recount_rows_t::on_destroy() {
@@ -179,18 +189,21 @@ void recount_rows_t::on_destroy() {
 const char* recount_rows_t::key() {
     return _share->full_table_name();
 }
-void recount_rows_t::status(
-    char* database,
-    char* table,
-    char* type,
-    char* params,
-    char* status) {
-
-    strcpy(database, _share->database_name());
-    strcpy(table, _share->table_name());
-    strcpy(type, "TOKUDB_ANALYZE_MODE_RECOUNT_ROWS");
-    sprintf(params, "TOKUDB_ANALYZE_THROTTLE=%llu;", _throttle);
-    get_analyze_status(status);
+const char* recount_rows_t::database() {
+    return _share->database_name();
+}
+const char* recount_rows_t::table() {
+    return _share->table_name();
+}
+const char* recount_rows_t::type() {
+    static const char* type = "TOKUDB_ANALYZE_MODE_RECOUNT_ROWS";
+    return type;
+}
+const char* recount_rows_t::parameters() {
+    return _parameters;
+}
+const char* recount_rows_t::status() {
+    return _status;
 }
 int recount_rows_t::analyze_recount_rows_progress(
     uint64_t count,
@@ -217,12 +230,32 @@ int recount_rows_t::analyze_recount_rows_progress(
             return ER_ABORTING_CONNECTION;
         }
 
+        // rebuild status
+        // There is a slight race condition here,
+        // _status is used here for tokudb_thd_set_proc_info and it is also used
+        // for the status column in i_s.background_job_status.
+        // If someone happens to be querying/building the i_s table
+        // at the exact same time that the status is being rebuilt here,
+        // the i_s table could get some garbage status.
+        // This solution is a little heavy handed but it works, it prevents us
+        // from changing the status while someone might be immediately observing
+        // us and it prevents someone from observing us while we change the
+        // status
+        tokudb::background::_job_manager->lock();
+        snprintf(_status,
+                 sizeof(_status),
+                 "recount_rows %s.%s counted %llu rows and %llu deleted "
+                 "in %llu seconds.",
+                 _share->database_name(),
+                 _share->table_name(),
+                 _rows,
+                 _deleted_rows,
+                 _total_elapsed_time / tokudb::time::MICROSECONDS);
+        tokudb::background::_job_manager->unlock();
+
         // report
-        if (_thd) {
-            char status[256];
-            get_analyze_status(status);
-            thd_proc_info(_thd, status);
-        }
+        if (_thd)
+            tokudb_thd_set_proc_info(_thd, _status);
 
         // throttle
         // given the throttle value, lets calculate the maximum number of rows
@@ -238,18 +271,6 @@ int recount_rows_t::analyze_recount_rows_progress(
     }
     return 0;
 }
-void recount_rows_t::get_analyze_status(char* msg) {
-    sprintf(
-        msg,
-        "recount_rows %s.%s counted %llu rows and %llu deleted in %llu "
-        "seconds.",
-        _share->database_name(),
-        _share->table_name(),
-        _rows,
-        _deleted_rows,
-        _total_elapsed_time / tokudb::time::MICROSECONDS);
-}
-
 
 class standard_t : public tokudb::background::job_manager_t::job_t {
 public:
@@ -261,13 +282,11 @@ class standard_t : public tokudb::background::job_manager_t::job_t {
     virtual ~standard_t();
 
     virtual const char* key(void);
-
-    virtual void status(
-        char* database,
-        char* table,
-        char* type,
-        char* params,
-        char* status);
+    virtual const char* database();
+    virtual const char* table();
+    virtual const char* type();
+    virtual const char* parameters();
+    virtual const char* status();
 
 protected:
     virtual void on_run();
@@ -284,6 +303,8 @@ class standard_t : public tokudb::background::job_manager_t::job_t {
     double          _delete_fraction;
 
     // for analyze status reporting, may also use other state
+    char            _parameters[256];
+    char            _status[1024];
     int             _result;
     ulonglong       _analyze_start; // in microseconds
     ulonglong       _total_elapsed_time; // in microseconds
@@ -305,7 +326,6 @@ class standard_t : public tokudb::background::job_manager_t::job_t {
         uint64_t deleted_rows);
     bool analyze_standard_cursor_callback(uint64_t deleted_rows);
 
-    void get_analyze_status(char*);
     int analyze_key_progress();
     int analyze_key(uint64_t* rec_per_key_part);
 };
@@ -351,6 +371,16 @@ standard_t::standard_t(
     _time_limit =
         tokudb::sysvars::analyze_time(thd) * tokudb::time::MICROSECONDS;
     _delete_fraction = tokudb::sysvars::analyze_delete_fraction(thd);
+
+    snprintf(_parameters,
+             sizeof(_parameters),
+             "TOKUDB_ANALYZE_DELETE_FRACTION=%f; "
+             "TOKUDB_ANALYZE_TIME=%llu; TOKUDB_ANALYZE_THROTTLE=%llu;",
+             _delete_fraction,
+             _time_limit / tokudb::time::MICROSECONDS,
+             _throttle);
+
+    _status[0] = '\0';
 }
 standard_t::~standard_t() {
 }
@@ -358,6 +388,10 @@ void standard_t::on_run() {
     DB_BTREE_STAT64 stat64;
     uint64_t rec_per_key_part[_share->_max_key_parts];
     uint64_t total_key_parts = 0;
+    const char* orig_proc_info = NULL;
+    if (_thd)
+        orig_proc_info = tokudb_thd_get_proc_info(_thd);
+
     _analyze_start = tokudb::time::microsec();
     _half_time = _time_limit > 0 ? _time_limit/2 : 0;
 
@@ -395,7 +429,7 @@ void standard_t::on_run() {
             _result = HA_ADMIN_FAILED;
         }
         if (_thd && (_result == HA_ADMIN_FAILED ||
-            (double)_deleted_rows >
+            static_cast<double>(_deleted_rows) >
                 _delete_fraction * (_rows + _deleted_rows))) {
 
             char name[256]; int namelen;
@@ -460,8 +494,9 @@ void standard_t::on_run() {
     }
 
 error:
+    if (_thd)
+        tokudb_thd_set_proc_info(_thd, orig_proc_info);
     return;
-
 }
 void standard_t::on_destroy() {
     _share->lock();
@@ -472,24 +507,21 @@ void standard_t::on_destroy() {
 const char* standard_t::key() {
     return _share->full_table_name();
 }
-void standard_t::status(
-    char* database,
-    char* table,
-    char* type,
-    char* params,
-    char* status) {
-
-    strcpy(database, _share->database_name());
-    strcpy(table, _share->table_name());
-    strcpy(type, "TOKUDB_ANALYZE_MODE_STANDARD");
-    sprintf(
-        params,
-        "TOKUDB_ANALYZE_DELETE_FRACTION=%f; "
-        "TOKUDB_ANALYZE_TIME=%llu; TOKUDB_ANALYZE_THROTTLE=%llu;",
-        _delete_fraction,
-        _time_limit / tokudb::time::MICROSECONDS,
-        _throttle);
-    get_analyze_status(status);
+const char* standard_t::database() {
+    return _share->database_name();
+}
+const char* standard_t::table() {
+    return _share->table_name();
+}
+const char* standard_t::type() {
+    static const char* type = "TOKUDB_ANALYZE_MODE_STANDARD";
+    return type;
+}
+const char* standard_t::parameters() {
+    return _parameters;
+}
+const char* standard_t::status() {
+    return _status;
 }
 bool standard_t::analyze_standard_cursor_callback(
     void* extra,
@@ -502,41 +534,6 @@ bool standard_t::analyze_standard_cursor_callback(uint64_t deleted_rows) {
     _ticks += deleted_rows;
     return analyze_key_progress() != 0;
 }
-void standard_t::get_analyze_status(char* msg) {
-    static const char* scan_direction_str[] = {
-        "not scanning",
-        "scanning forward",
-        "scanning backward",
-        "scan unknown"
-    };
-
-    const char* scan_direction = NULL;
-    switch (_scan_direction) {
-        case 0: scan_direction = scan_direction_str[0]; break;
-        case DB_NEXT: scan_direction = scan_direction_str[1]; break;
-        case DB_PREV: scan_direction = scan_direction_str[2]; break;
-        default: scan_direction = scan_direction_str[3]; break;
-    }
-
-    float progress_rows = 0.0;
-    if (_share->row_count() > 0)
-        progress_rows = (float) _rows / (float) _share->row_count();
-    float progress_time = 0.0;
-    if (_time_limit > 0)
-        progress_time = (float) _key_elapsed_time / (float) _time_limit;
-    sprintf(
-        msg,
-        "analyze table standard %s.%s.%s %llu of %u %.lf%% rows %.lf%% time, "
-        "%s",
-        _share->database_name(),
-        _share->table_name(),
-        _share->_key_descriptors[_current_key]._name,
-        _current_key,
-        _share->_keys,
-        progress_rows * 100.0,
-        progress_time * 100.0,
-        scan_direction);
-}
 int standard_t::analyze_key_progress(void) {
     if (_ticks > 1000) {
         _ticks = 0;
@@ -546,19 +543,72 @@ int standard_t::analyze_key_progress(void) {
         if ((_thd && thd_killed(_thd)) || cancelled()) {
             // client killed
             return ER_ABORTING_CONNECTION;
-        } else if(_time_limit > 0 &&
-                  (uint64_t)_key_elapsed_time > _time_limit) {
+        } else if (_time_limit > 0 &&
+                   static_cast<uint64_t>(_key_elapsed_time) > _time_limit) {
             // time limit reached
             return ETIME;
         }
 
-        // report
-        if (_thd) {
-            char status[256];
-            get_analyze_status(status);
-            thd_proc_info(_thd, status);
+        // rebuild status
+        // There is a slight race condition here,
+        // _status is used here for tokudb_thd_set_proc_info and it is also used
+        // for the status column in i_s.background_job_status.
+        // If someone happens to be querying/building the i_s table
+        // at the exact same time that the status is being rebuilt here,
+        // the i_s table could get some garbage status.
+        // This solution is a little heavy handed but it works, it prevents us
+        // from changing the status while someone might be immediately observing
+        // us and it prevents someone from observing us while we change the
+        // status.
+        static const char* scan_direction_str[] = {"not scanning",
+                                                   "scanning forward",
+                                                   "scanning backward",
+                                                   "scan unknown"};
+
+        const char* scan_direction = NULL;
+        switch (_scan_direction) {
+            case 0:
+                scan_direction = scan_direction_str[0];
+                break;
+            case DB_NEXT:
+                scan_direction = scan_direction_str[1];
+                break;
+            case DB_PREV:
+                scan_direction = scan_direction_str[2];
+                break;
+            default:
+                scan_direction = scan_direction_str[3];
+                break;
         }
 
+        float progress_rows = 0.0;
+        if (_share->row_count() > 0)
+            progress_rows = static_cast<float>(_rows) /
+                            static_cast<float>(_share->row_count());
+        float progress_time = 0.0;
+        if (_time_limit > 0)
+            progress_time = static_cast<float>(_key_elapsed_time) /
+                            static_cast<float>(_time_limit);
+        tokudb::background::_job_manager->lock();
+        snprintf(
+            _status,
+            sizeof(_status),
+            "analyze table standard %s.%s.%s %llu of %u %.lf%% rows %.lf%% "
+            "time, %s",
+            _share->database_name(),
+            _share->table_name(),
+            _share->_key_descriptors[_current_key]._name,
+            _current_key,
+            _share->_keys,
+            progress_rows * 100.0,
+            progress_time * 100.0,
+            scan_direction);
+        tokudb::background::_job_manager->unlock();
+
+        // report
+        if (_thd)
+            tokudb_thd_set_proc_info(_thd, _status);
+
         // throttle
         // given the throttle value, lets calculate the maximum number of rows
         // we should have seen so far in a .1 sec resolution
@@ -694,6 +744,11 @@ int standard_t::analyze_key(uint64_t* rec_per_key_part) {
     assert_always(close_error == 0);
 
 done:
+    // in case we timed out (bunch of deleted records) without hitting a
+    // single row
+    if (_rows == 0)
+        _rows = 1;
+
     // return cardinality
     for (uint64_t i = 0; i < num_key_parts; i++) {
         rec_per_key_part[i] = _rows / unique_rows[i];
@@ -733,7 +788,6 @@ int TOKUDB_SHARE::analyze_recount_rows(THD* thd,DB_TXN* txn) {
 
     assert_always(thd != NULL);
 
-    const char *orig_proc_info = tokudb_thd_get_proc_info(thd);
     int result = HA_ADMIN_OK;
 
     tokudb::analyze::recount_rows_t* job
@@ -753,8 +807,6 @@ int TOKUDB_SHARE::analyze_recount_rows(THD* thd,DB_TXN* txn) {
         result = HA_ADMIN_FAILED;
     }
 
-    thd_proc_info(thd, orig_proc_info);
-
     TOKUDB_HANDLER_DBUG_RETURN(result);
 }
 
@@ -778,8 +830,6 @@ int TOKUDB_SHARE::analyze_standard(THD* thd, DB_TXN* txn) {
         TOKUDB_HANDLER_DBUG_RETURN(result);
     }
 
-    const char *orig_proc_info = tokudb_thd_get_proc_info(thd);
-
     tokudb::analyze::standard_t* job
         = new tokudb::analyze::standard_t(txn == NULL ? false : true, thd,
                                           this, txn);
@@ -808,8 +858,6 @@ int TOKUDB_SHARE::analyze_standard(THD* thd, DB_TXN* txn) {
 
     lock();
 
-    thd_proc_info(thd, orig_proc_info);
-
     TOKUDB_HANDLER_DBUG_RETURN(result);
 }
 
diff --git a/storage/tokudb/hatoku_defines.h b/storage/tokudb/hatoku_defines.h
index 3ff3e5377781c..a8d4a38e1c36d 100644
--- a/storage/tokudb/hatoku_defines.h
+++ b/storage/tokudb/hatoku_defines.h
@@ -7,7 +7,7 @@ This file is part of TokuDB
 
 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
-    TokuDBis is free software: you can redistribute it and/or modify
+    TokuDB is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License, version 2,
     as published by the Free Software Foundation.
 
@@ -233,9 +233,12 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 // mysql 5.6.15 removed the test macro, so we define our own
 #define tokudb_test(e) ((e) ? 1 : 0)
 
-inline const char* tokudb_thd_get_proc_info(const THD *thd) {
+inline const char* tokudb_thd_get_proc_info(const THD* thd) {
     return thd->proc_info;
 }
+inline void tokudb_thd_set_proc_info(THD* thd, const char* proc_info) {
+    thd_proc_info(thd, proc_info);
+}
 
 // uint3korr reads 4 bytes and valgrind reports an error, so we use this function instead
 inline uint tokudb_uint3korr(const uchar *a) {
diff --git a/storage/tokudb/hatoku_hton.cc b/storage/tokudb/hatoku_hton.cc
index e7dfbd810c213..1581a7b76df15 100644
--- a/storage/tokudb/hatoku_hton.cc
+++ b/storage/tokudb/hatoku_hton.cc
@@ -531,6 +531,7 @@ static int tokudb_init_func(void *p) {
     db_env->change_fsync_log_period(db_env, tokudb::sysvars::fsync_log_period);
 
     db_env->set_lock_timeout_callback(db_env, tokudb_lock_timeout_callback);
+    db_env->set_dir_per_db(db_env, tokudb::sysvars::dir_per_db);
 
     db_env->set_loader_memory_size(
         db_env,
diff --git a/storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result b/storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result
deleted file mode 100644
index c2a598632caad..0000000000000
--- a/storage/tokudb/mysql-test/rpl/r/rpl_foreign_key_tokudb.result
+++ /dev/null
@@ -1,51 +0,0 @@
-include/master-slave.inc
-[connection master]
-CREATE TABLE t1 (a INT AUTO_INCREMENT KEY) ENGINE=TokuDB;
-CREATE TABLE t2 (b INT AUTO_INCREMENT KEY, c INT, FOREIGN KEY(b) REFERENCES t1(a)) ENGINE=TokuDB;
-SET FOREIGN_KEY_CHECKS=0;
-INSERT INTO t1 VALUES (10);
-INSERT INTO t1 VALUES (NULL),(NULL),(NULL);
-INSERT INTO t2 VALUES (5,0);
-INSERT INTO t2 VALUES (NULL,LAST_INSERT_ID());
-SET FOREIGN_KEY_CHECKS=1;
-SELECT * FROM t1 ORDER BY a;
-a
-10
-11
-12
-13
-SELECT * FROM t2 ORDER BY b;
-b	c
-5	0
-6	11
-SELECT * FROM t1 ORDER BY a;
-a
-10
-11
-12
-13
-SELECT * FROM t2 ORDER BY b;
-b	c
-5	0
-6	11
-SET TIMESTAMP=1000000000;
-CREATE TABLE t3 ( a INT UNIQUE );
-SET FOREIGN_KEY_CHECKS=0;
-INSERT INTO t3 VALUES (1),(1);
-Got one of the listed errors
-SET FOREIGN_KEY_CHECKS=0;
-DROP TABLE IF EXISTS t1,t2,t3;
-SET FOREIGN_KEY_CHECKS=1;
-create table t1 (b int primary key) engine = TokuDB;
-create table t2 (a int primary key, b int, foreign key (b) references t1(b))
-engine = TokuDB;
-insert into t1 set b=1;
-insert into t2 set a=1, b=1;
-set foreign_key_checks=0;
-delete from t1;
-must sync w/o a problem (could not with the buggy code)
-select count(*) from t1 /* must be zero */;
-count(*)
-0
-drop table t2,t1;
-include/rpl_end.inc
diff --git a/storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test b/storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test
deleted file mode 100644
index d798cfd4a6232..0000000000000
--- a/storage/tokudb/mysql-test/rpl/t/rpl_foreign_key_tokudb.test
+++ /dev/null
@@ -1,4 +0,0 @@
--- source include/not_ndb_default.inc
--- source include/have_tokudb.inc
-let $engine_type=TokuDB;
--- source extra/rpl_tests/rpl_foreign_key.test
diff --git a/storage/tokudb/mysql-test/tokudb/disabled.def b/storage/tokudb/mysql-test/tokudb/disabled.def
index c98a8aa622a93..ddefceb432ed7 100644
--- a/storage/tokudb/mysql-test/tokudb/disabled.def
+++ b/storage/tokudb/mysql-test/tokudb/disabled.def
@@ -28,3 +28,4 @@ type_timestamp_explicit:
 cluster_key_part: engine options on partitioned tables
 i_s_tokudb_lock_waits_released: unstable, race conditions
 i_s_tokudb_locks_released: unstable, race conditions
+row_format: n/a
diff --git a/storage/tokudb/mysql-test/tokudb/include/table_files_replace_pattern.inc b/storage/tokudb/mysql-test/tokudb/include/table_files_replace_pattern.inc
new file mode 100644
index 0000000000000..b10ad21dd95ce
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/include/table_files_replace_pattern.inc
@@ -0,0 +1 @@
+--replace_regex  /[a-z0-9]+_[a-z0-9]+_[a-z0-9]+(_[BP]_[a-z0-9]+){0,1}\./id./ /sqlx_[a-z0-9]+_[a-z0-9]+_/sqlx_nnnn_nnnn_/ /sqlx-[a-z0-9]+_[a-z0-9]+/sqlx-nnnn_nnnn/ /#p#/#P#/ /#sp#/#SP#/ /#tmp#/#TMP#/
diff --git a/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result b/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
index 5769ee7407134..8b53f89efa337 100644
--- a/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
+++ b/storage/tokudb/mysql-test/tokudb/r/background_job_manager.result
@@ -25,7 +25,7 @@ TokuDB_background_job_status	CREATE TEMPORARY TABLE `TokuDB_background_job_statu
   `scheduler` varchar(32) NOT NULL DEFAULT '',
   `scheduled_time` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
   `started_time` datetime DEFAULT NULL,
-  `status` varchar(256) DEFAULT NULL
+  `status` varchar(1024) DEFAULT NULL
 ) ENGINE=MEMORY DEFAULT CHARSET=utf8
 create table t1 (a int not null auto_increment, b int, c int, primary key(a), key kb(b), key kc(c), key kabc(a,b,c), key kab(a,b), key kbc(b,c));
 insert into t1(b,c) values(0,0), (1,1), (2,2), (3,3);
diff --git a/storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result b/storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result
new file mode 100644
index 0000000000000..a36dbcb28c042
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/r/dir-per-db-with-custom-data-dir.result
@@ -0,0 +1,10 @@
+SELECT @@tokudb_dir_per_db;
+@@tokudb_dir_per_db
+1
+TOKUDB_DATA_DIR_CHANGED
+1
+CREATE DATABASE tokudb_test;
+USE tokudb_test;
+CREATE TABLE t (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY) ENGINE=tokudb;
+DROP TABLE t;
+DROP DATABASE tokudb_test;
diff --git a/storage/tokudb/mysql-test/tokudb/r/dir_per_db.result b/storage/tokudb/mysql-test/tokudb/r/dir_per_db.result
new file mode 100644
index 0000000000000..371f97406c8eb
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/r/dir_per_db.result
@@ -0,0 +1,180 @@
+########
+#  tokudb_dir_per_db = 1
+########
+SET GLOBAL tokudb_dir_per_db= 1;
+########
+#  CREATE
+########
+CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb;
+INSERT INTO t1 SET b = 10;
+INSERT INTO t1 SET b = 20;
+SELECT b FROM t1 ORDER BY a;
+b
+10
+20
+CREATE INDEX b ON t1 (b);
+CREATE INDEX ab ON t1 (a,b);
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+t1_key_ab_id.tokudb
+t1_key_b_id.tokudb
+t1_main_id.tokudb
+t1_status_id.tokudb
+########
+#  RENAME
+########
+RENAME TABLE t1 TO t2;
+SELECT b FROM t2 ORDER BY a;
+b
+10
+20
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+t2_key_ab_id.tokudb
+t2_key_b_id.tokudb
+t2_main_id.tokudb
+t2_status_id.tokudb
+########
+#  DROP
+########
+DROP TABLE t2;
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+########
+#  tokudb_dir_per_db = 0
+########
+SET GLOBAL tokudb_dir_per_db= 0;
+########
+#  CREATE
+########
+CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb;
+INSERT INTO t1 SET b = 10;
+INSERT INTO t1 SET b = 20;
+SELECT b FROM t1 ORDER BY a;
+b
+10
+20
+CREATE INDEX b ON t1 (b);
+CREATE INDEX ab ON t1 (a,b);
+## Looking for *.tokudb files in data_dir
+_test_t1_key_ab_id.tokudb
+_test_t1_key_b_id.tokudb
+_test_t1_main_id.tokudb
+_test_t1_status_id.tokudb
+## Looking for *.tokudb files in data_dir/test
+########
+#  RENAME
+########
+RENAME TABLE t1 TO t2;
+SELECT b FROM t2 ORDER BY a;
+b
+10
+20
+## Looking for *.tokudb files in data_dir
+_test_t1_key_ab_id.tokudb
+_test_t1_key_b_id.tokudb
+_test_t1_main_id.tokudb
+_test_t1_status_id.tokudb
+## Looking for *.tokudb files in data_dir/test
+########
+#  DROP
+########
+DROP TABLE t2;
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+########
+#  CREATE on tokudb_dir_per_db = 0 and RENAME on tokudb_dir_per_db = 1 and vice versa
+########
+########
+#  tokudb_dir_per_db = (1 - 1);
+########
+SET GLOBAL tokudb_dir_per_db= (1 - 1);;
+########
+#  CREATE
+########
+CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb;
+INSERT INTO t1 SET b = 10;
+INSERT INTO t1 SET b = 20;
+SELECT b FROM t1 ORDER BY a;
+b
+10
+20
+CREATE INDEX b ON t1 (b);
+CREATE INDEX ab ON t1 (a,b);
+## Looking for *.tokudb files in data_dir
+_test_t1_key_ab_id.tokudb
+_test_t1_key_b_id.tokudb
+_test_t1_main_id.tokudb
+_test_t1_status_id.tokudb
+## Looking for *.tokudb files in data_dir/test
+########
+#  tokudb_dir_per_db = 1
+########
+SET GLOBAL tokudb_dir_per_db= 1;
+########
+#  RENAME
+########
+RENAME TABLE t1 TO t2;
+SELECT b FROM t2 ORDER BY a;
+b
+10
+20
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+t2_key_ab_id.tokudb
+t2_key_b_id.tokudb
+t2_main_id.tokudb
+t2_status_id.tokudb
+########
+#  DROP
+########
+DROP TABLE t2;
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+########
+#  tokudb_dir_per_db = (1 - 0);
+########
+SET GLOBAL tokudb_dir_per_db= (1 - 0);;
+########
+#  CREATE
+########
+CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb;
+INSERT INTO t1 SET b = 10;
+INSERT INTO t1 SET b = 20;
+SELECT b FROM t1 ORDER BY a;
+b
+10
+20
+CREATE INDEX b ON t1 (b);
+CREATE INDEX ab ON t1 (a,b);
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+t1_key_ab_id.tokudb
+t1_key_b_id.tokudb
+t1_main_id.tokudb
+t1_status_id.tokudb
+########
+#  tokudb_dir_per_db = 0
+########
+SET GLOBAL tokudb_dir_per_db= 0;
+########
+#  RENAME
+########
+RENAME TABLE t1 TO t2;
+SELECT b FROM t2 ORDER BY a;
+b
+10
+20
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+t1_key_ab_id.tokudb
+t1_key_b_id.tokudb
+t1_main_id.tokudb
+t1_status_id.tokudb
+########
+#  DROP
+########
+DROP TABLE t2;
+## Looking for *.tokudb files in data_dir
+## Looking for *.tokudb files in data_dir/test
+SET GLOBAL tokudb_dir_per_db=default;
diff --git a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result
index 6f9592ddc1fc0..ecd4d07720611 100644
--- a/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result
+++ b/storage/tokudb/mysql-test/tokudb/r/i_s_tokudb_lock_waits_released.result
@@ -2,6 +2,7 @@ set default_storage_engine='tokudb';
 set tokudb_prelock_empty=false;
 drop table if exists t;
 create table t (id int primary key);
+t should be empty
 select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
 trx_id	trx_mysql_thread_id
 select * from information_schema.tokudb_locks;
@@ -15,17 +16,21 @@ insert into t values (1);
 set autocommit=0;
 set tokudb_lock_timeout=600000;
 insert into t values (1);
+should find the presence of a lock on 1st transaction
 select * from information_schema.tokudb_locks;
 locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
 TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000	test	t	main
+should find the presence of a lock_wait on the 2nd transaction
 select * from information_schema.tokudb_lock_waits;
 requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time	lock_waits_table_schema	lock_waits_table_name	lock_waits_table_dictionary_name
 REQUEST_TRX_ID	BLOCK_TRX_ID	./test/t-main	0001000000	0001000000	LOCK_WAITS_START_TIME	test	t	main
+should find the presence of two transactions
 select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
 trx_id	trx_mysql_thread_id
 TRX_ID	MYSQL_ID
 TRX_ID	MYSQL_ID
 commit;
+verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction
 select * from information_schema.tokudb_locks;
 locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
 TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000	test	t	main
@@ -33,6 +38,8 @@ select * from information_schema.tokudb_lock_waits;
 requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time	lock_waits_table_schema	lock_waits_table_name	lock_waits_table_dictionary_name
 ERROR 23000: Duplicate entry '1' for key 'PRIMARY'
 commit;
+verify that txn_a replace (1) blocks txn_b replace (1) and txn_b eventually gets the lock on (1) and completes
+verify that the lock on the 2nd transaction has been released, should be be empty
 select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
 trx_id	trx_mysql_thread_id
 select * from information_schema.tokudb_locks;
@@ -46,23 +53,28 @@ replace into t values (1);
 set autocommit=0;
 set tokudb_lock_timeout=600000;
 replace into t values (1);
+should find the presence of a lock on 1st transaction
 select * from information_schema.tokudb_locks;
 locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
 TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000	test	t	main
+should find the presence of a lock_wait on the 2nd transaction
 select * from information_schema.tokudb_lock_waits;
 requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time	lock_waits_table_schema	lock_waits_table_name	lock_waits_table_dictionary_name
 REQUEST_TRX_ID	BLOCK_TRX_ID	./test/t-main	0001000000	0001000000	LOCK_WAITS_START_TIME	test	t	main
+should find the presence of two transactions
 select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
 trx_id	trx_mysql_thread_id
 TRX_ID	MYSQL_ID
 TRX_ID	MYSQL_ID
 commit;
+verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction
 select * from information_schema.tokudb_locks;
 locks_trx_id	locks_mysql_thread_id	locks_dname	locks_key_left	locks_key_right	locks_table_schema	locks_table_name	locks_table_dictionary_name
 TRX_ID	MYSQL_ID	./test/t-main	0001000000	0001000000	test	t	main
 select * from information_schema.tokudb_lock_waits;
 requesting_trx_id	blocking_trx_id	lock_waits_dname	lock_waits_key_left	lock_waits_key_right	lock_waits_start_time	lock_waits_table_schema	lock_waits_table_name	lock_waits_table_dictionary_name
 commit;
+verify that the lock on the 2nd transaction has been released, should be be empty
 select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
 trx_id	trx_mysql_thread_id
 select * from information_schema.tokudb_locks;
diff --git a/storage/tokudb/mysql-test/tokudb/r/row_format.result b/storage/tokudb/mysql-test/tokudb/r/row_format.result
new file mode 100644
index 0000000000000..cb66914844562
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/r/row_format.result
@@ -0,0 +1,51 @@
+CREATE TABLE tokudb_row_format_test_1 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT;
+CREATE TABLE tokudb_row_format_test_2 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST;
+CREATE TABLE tokudb_row_format_test_3 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL;
+CREATE TABLE tokudb_row_format_test_4 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED;
+CREATE TABLE tokudb_row_format_test_5 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB;
+CREATE TABLE tokudb_row_format_test_6 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA;
+CREATE TABLE tokudb_row_format_test_7 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ;
+CREATE TABLE tokudb_row_format_test_8 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name like 'tokudb_row_format_test%' ORDER BY table_name;
+table_name	row_format	engine
+tokudb_row_format_test_1	tokudb_zlib	TokuDB
+tokudb_row_format_test_2	tokudb_quicklz	TokuDB
+tokudb_row_format_test_3	tokudb_lzma	TokuDB
+tokudb_row_format_test_4	tokudb_uncompressed	TokuDB
+tokudb_row_format_test_5	tokudb_zlib	TokuDB
+tokudb_row_format_test_6	tokudb_lzma	TokuDB
+tokudb_row_format_test_7	tokudb_quicklz	TokuDB
+tokudb_row_format_test_8	tokudb_snappy	TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name	row_format	engine
+tokudb_row_format_test_1	tokudb_quicklz	TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name	row_format	engine
+tokudb_row_format_test_1	tokudb_lzma	TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name	row_format	engine
+tokudb_row_format_test_1	tokudb_uncompressed	TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name	row_format	engine
+tokudb_row_format_test_1	tokudb_zlib	TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name	row_format	engine
+tokudb_row_format_test_1	tokudb_snappy	TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name	row_format	engine
+tokudb_row_format_test_1	tokudb_quicklz	TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name	row_format	engine
+tokudb_row_format_test_1	tokudb_lzma	TokuDB
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+table_name	row_format	engine
+tokudb_row_format_test_1	tokudb_zlib	TokuDB
+DROP TABLE tokudb_row_format_test_1, tokudb_row_format_test_2, tokudb_row_format_test_3, tokudb_row_format_test_4, tokudb_row_format_test_5, tokudb_row_format_test_6, tokudb_row_format_test_7, tokudb_row_format_test_8;
diff --git a/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result b/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result
index 5c1c53946a464..b287c70469e08 100644
--- a/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result
+++ b/storage/tokudb/mysql-test/tokudb/r/rows-32m-rand-insert.result
@@ -1009,6 +1009,7 @@ Table	Op	Msg_type	Msg_text
 test.t	check	status	OK
 optimize table t;
 Table	Op	Msg_type	Msg_text
+test.t	optimize	note	Table does not support optimize, doing recreate + analyze instead
 test.t	optimize	status	OK
 check table t;
 Table	Op	Msg_type	Msg_text
diff --git a/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt
new file mode 100644
index 0000000000000..a9090f4d1157e
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir-master.opt
@@ -0,0 +1 @@
+--loose-tokudb_data_dir="$MYSQL_TMP_DIR" --loose-tokudb-dir-per-db=1
diff --git a/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test
new file mode 100644
index 0000000000000..7f415a7251595
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/t/dir-per-db-with-custom-data-dir.test
@@ -0,0 +1,16 @@
+--source include/have_tokudb.inc
+
+SELECT @@tokudb_dir_per_db;
+
+--disable_query_log
+--eval SELECT STRCMP(@@tokudb_data_dir, '$MYSQL_TMP_DIR') = 0 AS TOKUDB_DATA_DIR_CHANGED
+--enable_query_log
+
+CREATE DATABASE tokudb_test;
+USE tokudb_test;
+CREATE TABLE t (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY) ENGINE=tokudb;
+
+--file_exists $MYSQL_TMP_DIR/tokudb_test
+
+DROP TABLE t;
+DROP DATABASE tokudb_test;
diff --git a/storage/tokudb/mysql-test/tokudb/t/dir_per_db.test b/storage/tokudb/mysql-test/tokudb/t/dir_per_db.test
new file mode 100644
index 0000000000000..b638b706d8719
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/t/dir_per_db.test
@@ -0,0 +1,76 @@
+source include/have_tokudb.inc;
+
+--let $DB= test
+--let $DATADIR= `select @@datadir`
+--let $i= 2
+
+while ($i) {
+  --dec $i
+  --echo ########
+  --echo #  tokudb_dir_per_db = $i
+  --echo ########
+  --eval SET GLOBAL tokudb_dir_per_db= $i
+  --echo ########
+  --echo #  CREATE
+  --echo ########
+  CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb;
+  INSERT INTO t1 SET b = 10;
+  INSERT INTO t1 SET b = 20;
+  SELECT b FROM t1 ORDER BY a;
+  CREATE INDEX b ON t1 (b);
+  CREATE INDEX ab ON t1 (a,b);
+  --source dir_per_db_show_table_files.inc
+  --echo ########
+  --echo #  RENAME
+  --echo ########
+  RENAME TABLE t1 TO t2;
+  SELECT b FROM t2 ORDER BY a;
+  --source dir_per_db_show_table_files.inc
+  --echo ########
+  --echo #  DROP
+  --echo ########
+  DROP TABLE t2;
+  --source dir_per_db_show_table_files.inc
+}
+
+--echo ########
+--echo #  CREATE on tokudb_dir_per_db = 0 and RENAME on tokudb_dir_per_db = 1 and vice versa
+--echo ########
+
+--let $i= 2
+
+while ($i) {
+  --dec $i
+  --let $inv_i= (1 - $i);
+  --echo ########
+  --echo #  tokudb_dir_per_db = $inv_i
+  --echo ########
+  --eval SET GLOBAL tokudb_dir_per_db= $inv_i
+  --echo ########
+  --echo #  CREATE
+  --echo ########
+  CREATE TABLE t1 (a INT UNSIGNED AUTO_INCREMENT PRIMARY KEY, b INT(10) UNSIGNED NOT NULL) ENGINE=tokudb;
+  INSERT INTO t1 SET b = 10;
+  INSERT INTO t1 SET b = 20;
+  SELECT b FROM t1 ORDER BY a;
+  CREATE INDEX b ON t1 (b);
+  CREATE INDEX ab ON t1 (a,b);
+  --source dir_per_db_show_table_files.inc
+  --echo ########
+  --echo #  tokudb_dir_per_db = $i
+  --echo ########
+  --eval SET GLOBAL tokudb_dir_per_db= $i
+  --echo ########
+  --echo #  RENAME
+  --echo ########
+  RENAME TABLE t1 TO t2;
+  SELECT b FROM t2 ORDER BY a;
+  --source dir_per_db_show_table_files.inc
+  --echo ########
+  --echo #  DROP
+  --echo ########
+  DROP TABLE t2;
+  --source dir_per_db_show_table_files.inc
+}
+
+SET GLOBAL tokudb_dir_per_db=default;
diff --git a/storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc b/storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc
new file mode 100644
index 0000000000000..bdf7d5b235ff9
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/t/dir_per_db_show_table_files.inc
@@ -0,0 +1,9 @@
+--sorted_result
+
+--echo ## Looking for *.tokudb files in data_dir
+--source include/table_files_replace_pattern.inc
+--list_files $DATADIR *.tokudb
+
+--echo ## Looking for *.tokudb files in data_dir/$DB
+--source include/table_files_replace_pattern.inc
+--list_files $DATADIR/$DB/ *.tokudb
diff --git a/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test b/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test
index d8ce18b3aa7f4..6534175d61902 100644
--- a/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test
+++ b/storage/tokudb/mysql-test/tokudb/t/i_s_tokudb_lock_waits_released.test
@@ -17,7 +17,7 @@ create table t (id int primary key);
 
 # verify that txn_a insert (1) blocks txn_b insert (1) and txn_b gets a duplicate key error
 
-# should be empty
+--echo t should be empty
 select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
 select * from information_schema.tokudb_locks;
 select * from information_schema.tokudb_lock_waits;
@@ -33,7 +33,7 @@ set autocommit=0;
 set tokudb_lock_timeout=600000; # set lock wait timeout to 10 minutes
 send insert into t values (1);
 
-# should find the presence of a lock on 1st transaction
+--echo should find the presence of a lock on 1st transaction
 connection default;
 let $wait_condition= select count(*)=1 from information_schema.processlist where info='insert into t values (1)' and state='update';
 source include/wait_condition.inc;
@@ -42,17 +42,17 @@ real_sleep 1; # delay a little to shorten the update -> write row -> lock wait r
 replace_column 1 TRX_ID 2 MYSQL_ID; 
 select * from information_schema.tokudb_locks;
 
-# should find the presence of a lock_wait on the 2nd transaction
+--echo should find the presence of a lock_wait on the 2nd transaction
 replace_column 1 REQUEST_TRX_ID 2 BLOCK_TRX_ID 6 LOCK_WAITS_START_TIME;
 select * from information_schema.tokudb_lock_waits;
 
-# should find the presence of two transactions
+--echo should find the presence of two transactions
 replace_column 1 TRX_ID 2 MYSQL_ID;
 select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
 
 connection conn_a;
 commit;
-# verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction
+--echo verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction
 let $wait_condition= select count(*)=1 from information_schema.tokudb_locks where locks_dname='./test/t-main';
 source include/wait_condition.inc;
 
@@ -69,10 +69,8 @@ connection default;
 disconnect conn_a;
 disconnect conn_b;
 
-# verify that txn_a replace (1) blocks txn_b replace (1) and txn_b eventually gets the lock on (1) and completes
-
-# verify that the lock on the 2nd transaction has been released
-# should be be empty
+--echo verify that txn_a replace (1) blocks txn_b replace (1) and txn_b eventually gets the lock on (1) and completes
+--echo verify that the lock on the 2nd transaction has been released, should be be empty
 select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
 select * from information_schema.tokudb_locks;
 select * from information_schema.tokudb_lock_waits;
@@ -88,7 +86,7 @@ set autocommit=0;
 set tokudb_lock_timeout=600000; # set lock wait timeout to 10 minutes
 send replace into t values (1);
 
-# should find the presence of a lock on 1st transaction
+--echo should find the presence of a lock on 1st transaction
 connection default;
 let $wait_condition= select count(*)=1 from information_schema.processlist where info='replace into t values (1)' and state='update';
 source include/wait_condition.inc;
@@ -97,17 +95,19 @@ real_sleep 1; # delay a little to shorten the update -> write row -> lock wait r
 replace_column 1 TRX_ID 2 MYSQL_ID; 
 select * from information_schema.tokudb_locks;
 
-# should find the presence of a lock_wait on the 2nd transaction
+--echo should find the presence of a lock_wait on the 2nd transaction
 replace_column 1 REQUEST_TRX_ID 2 BLOCK_TRX_ID 6 LOCK_WAITS_START_TIME;
 select * from information_schema.tokudb_lock_waits;
 
-# should find the presence of two transactions
+--echo should find the presence of two transactions
 replace_column 1 TRX_ID 2 MYSQL_ID;
 select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
 
 connection conn_a;
 commit;
-# verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction
+--echo verify that the lock on the 1st transaction is released and replaced by the lock for the 2nd transaction
+let $wait_condition= select count(*)=1 from information_schema.tokudb_locks where locks_dname='./test/t-main';
+source include/wait_condition.inc;
 replace_column 1 TRX_ID 2 MYSQL_ID;
 select * from information_schema.tokudb_locks;
 select * from information_schema.tokudb_lock_waits;
@@ -120,8 +120,7 @@ connection default;
 disconnect conn_a;
 disconnect conn_b;
 
-# verify that the lock on the 2nd transaction has been released
-# should be be empty
+--echo verify that the lock on the 2nd transaction has been released, should be be empty
 select trx_id,trx_mysql_thread_id from information_schema.tokudb_trx;
 select * from information_schema.tokudb_locks;
 select * from information_schema.tokudb_lock_waits;
diff --git a/storage/tokudb/mysql-test/tokudb/t/row_format.test b/storage/tokudb/mysql-test/tokudb/t/row_format.test
new file mode 100644
index 0000000000000..6533f8c06be95
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb/t/row_format.test
@@ -0,0 +1,41 @@
+#
+# Test TokuDB compression option additions to row_format
+#
+--source include/have_tokudb.inc
+
+CREATE TABLE tokudb_row_format_test_1 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT;
+CREATE TABLE tokudb_row_format_test_2 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST;
+CREATE TABLE tokudb_row_format_test_3 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL;
+CREATE TABLE tokudb_row_format_test_4 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED;
+CREATE TABLE tokudb_row_format_test_5 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB;
+CREATE TABLE tokudb_row_format_test_6 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA;
+CREATE TABLE tokudb_row_format_test_7 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ;
+CREATE TABLE tokudb_row_format_test_8 (a INT) ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY;
+
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name like 'tokudb_row_format_test%' ORDER BY table_name;
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_FAST;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SMALL;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_UNCOMPRESSED;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_ZLIB;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_SNAPPY;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_QUICKLZ;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_LZMA;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+ALTER TABLE tokudb_row_format_test_1 ENGINE=TokuDB ROW_FORMAT=TOKUDB_DEFAULT;
+SELECT table_name, row_format, engine FROM information_schema.tables WHERE table_name = 'tokudb_row_format_test_1';
+
+DROP TABLE tokudb_row_format_test_1, tokudb_row_format_test_2, tokudb_row_format_test_3, tokudb_row_format_test_4, tokudb_row_format_test_5, tokudb_row_format_test_6, tokudb_row_format_test_7, tokudb_row_format_test_8;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result b/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result
index 779d458221bd1..30e0bdbebd78b 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result
+++ b/storage/tokudb/mysql-test/tokudb_bugs/r/db938.result
@@ -23,6 +23,7 @@ set DEBUG_SYNC = 'tokudb_after_truncate_all_dictionarys SIGNAL closed WAIT_FOR d
 TRUNCATE TABLE t1;
 set global tokudb_debug_pause_background_job_manager = FALSE;
 set DEBUG_SYNC = 'now SIGNAL done';
+set DEBUG_SYNC = 'RESET';
 drop table t1;
 set session tokudb_auto_analyze = @orig_auto_analyze;
 set session tokudb_analyze_in_background = @orig_in_background;
@@ -32,4 +33,3 @@ set session tokudb_analyze_time = @orig_time;
 set global tokudb_cardinality_scale_percent = @orig_scale_percent;
 set session default_storage_engine = @orig_default_storage_engine;
 set global tokudb_debug_pause_background_job_manager = @orig_pause_background_job_manager;
-set DEBUG_SYNC='reset';
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test b/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test
index f56f93d1492c8..50434a79a0017 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/db938.test
@@ -40,6 +40,7 @@ insert into t1(b,c) values(0,0), (1,1), (2,2), (3,3);
 select database_name, table_name, job_type, job_params, scheduler from information_schema.tokudb_background_job_status;
 
 # lets flip to another connection
+--source include/count_sessions.inc
 connect(conn1, localhost, root);
 
 # set up the DEBUG_SYNC point
@@ -64,6 +65,7 @@ connection conn1;
 reap;
 connection default;
 disconnect conn1;
+set DEBUG_SYNC = 'RESET';
 drop table t1;
 
 set session tokudb_auto_analyze = @orig_auto_analyze;
@@ -74,4 +76,4 @@ set session tokudb_analyze_time = @orig_time;
 set global tokudb_cardinality_scale_percent = @orig_scale_percent;
 set session default_storage_engine = @orig_default_storage_engine;
 set global tokudb_debug_pause_background_job_manager = @orig_pause_background_job_manager;
-set DEBUG_SYNC='reset';
+--source include/wait_until_count_sessions.inc
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test
index 6100d9aeec230..8b6df4966f498 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store.test
@@ -12,33 +12,11 @@ let $MYSQLD_DATADIR= `SELECT @@datadir`;
 create table foo (a int, b int);
 create table bar (a int, key(a));
 
-# Write file to make mysql-test-run.pl expect the "crash", but don't start
-# it until it's told to
---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-wait
-EOF
-
-# Send shutdown to the connected server and give
-# it 10 seconds to die before zapping it
-shutdown_server 10;
-
+--source include/shutdown_mysqld.inc
 remove_file $MYSQLD_DATADIR/test/foo.frm;
 copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm;
 remove_file $MYSQLD_DATADIR/test/bar.frm;
-
-# Write file to make mysql-test-run.pl start up the server again
---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-restart
-EOF
-
-# Turn on reconnect
---enable_reconnect
-
-# Call script that will poll the server waiting for it to be back online again
---source include/wait_until_connected_again.inc
-
-# Turn off reconnect again
---disable_reconnect
+--source include/start_mysqld.inc
 
 show create table foo;
 show create table bar;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test
index e1acea13ed703..53c1037b051d8 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store2.test
@@ -15,33 +15,11 @@ create table bar (a int);
 alter table foo drop column a;
 alter table bar add column b int, add column c int;
 
-# Write file to make mysql-test-run.pl expect the "crash", but don't start
-# it until it's told to
---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-wait
-EOF
-
-# Send shutdown to the connected server and give
-# it 10 seconds to die before zapping it
-shutdown_server 10;
-
+--source include/shutdown_mysqld.inc
 remove_file $MYSQLD_DATADIR/test/foo.frm;
 copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm;
 remove_file $MYSQLD_DATADIR/test/bar.frm;
-
-# Write file to make mysql-test-run.pl start up the server again
---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-restart
-EOF
-
-# Turn on reconnect
---enable_reconnect
-
-# Call script that will poll the server waiting for it to be back online again
---source include/wait_until_connected_again.inc
-
-# Turn off reconnect again
---disable_reconnect
+--source include/start_mysqld.inc
 
 show create table foo;
 show create table bar;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test
index 17a124249da26..0421b8e9d269f 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/frm_store3.test
@@ -14,33 +14,11 @@ create table bar (a bigint)engine=TokuDB;
 alter table foo drop index b;
 alter table bar add index (a);
 
-# Write file to make mysql-test-run.pl expect the "crash", but don't start
-# it until it's told to
---write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-wait
-EOF
-
-# Send shutdown to the connected server and give
-# it 10 seconds to die before zapping it
-shutdown_server 10;
-
+--source include/shutdown_mysqld.inc
 remove_file $MYSQLD_DATADIR/test/foo.frm;
 copy_file $MYSQLD_DATADIR/test/bar.frm $MYSQLD_DATADIR/test/foo.frm;
 remove_file $MYSQLD_DATADIR/test/bar.frm;
-
-# Write file to make mysql-test-run.pl start up the server again
---append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-restart
-EOF
-
-# Turn on reconnect
---enable_reconnect
-
-# Call script that will poll the server waiting for it to be back online again
---source include/wait_until_connected_again.inc
-
-# Turn off reconnect again
---disable_reconnect
+--source include/start_mysqld.inc
 
 show create table foo;
 show create table bar;
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test
index 42dbb30058a74..4c40339be5a2f 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_part_table_668.test
@@ -7,17 +7,7 @@ set default_storage_engine='tokudb';
 # capture the datadir
 let $MYSQLD_DATADIR= `SELECT @@datadir`;
 
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
 # remove all tokudb file in the datadir
 system mkdir $MYSQLD_DATADIR/save;
 system mv $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test $MYSQLD_DATADIR/save;
@@ -25,13 +15,7 @@ system mkdir $MYSQLD_DATADIR/test;
 
 # install 6.6.8 tokudb test files
 system cp -r std_data/tokudb_drop_part_table_668/data/* $MYSQLD_DATADIR;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
+--source include/start_mysqld.inc
 
 create table tc (a int, b int, c int, primary key(a), key(b)) engine=tokudb partition by hash(a) partitions 2;
 
@@ -45,26 +29,9 @@ select dictionary_name from information_schema.tokudb_file_map;
 # check that the test dir is empty
 list_files $MYSQLD_DATADIR/test *.frm;
 
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
 # restore saved datadir
 system rm -rf $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test;
 system mv $MYSQLD_DATADIR/save/* $MYSQLD_DATADIR;
 system rmdir $MYSQLD_DATADIR/save;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
-
+--source include/start_mysqld.inc
diff --git a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test
index 3903c2cef9f28..0340b960fa5ab 100644
--- a/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test
+++ b/storage/tokudb/mysql-test/tokudb_bugs/t/tokudb_drop_simple_table_668.test
@@ -6,17 +6,7 @@ set default_storage_engine='tokudb';
 # capture the datadir
 let $MYSQLD_DATADIR= `SELECT @@datadir`;
 
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
 # remove all tokudb file in the datadir
 system mkdir $MYSQLD_DATADIR/save;
 system mv $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test $MYSQLD_DATADIR/save;
@@ -24,13 +14,7 @@ system mkdir $MYSQLD_DATADIR/test;
 
 # install 6.6.8 tokudb test files
 system cp -r std_data/tokudb_drop_simple_table_668/data/* $MYSQLD_DATADIR;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
+--source include/start_mysqld.inc
 
 create table tc (id int, x int, primary key(id), key(x));
 
@@ -46,26 +30,9 @@ select dictionary_name from information_schema.tokudb_file_map;
 # check that the test dir is empty
 list_files $MYSQLD_DATADIR/test *.frm;
 
-# shutdown mysqld (code stolen from mysql_plugin.test)
-let $expect_file= $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
-# MTR will remove this file later, but this might be too late.
---error 0,1
---remove_file $expect_file
---write_file $expect_file
-wait
-EOF
---shutdown_server 10
---source include/wait_until_disconnected.inc
-
+--source include/shutdown_mysqld.inc
 # restore saved datadir
 system rm -rf $MYSQLD_DATADIR/*toku* $MYSQLD_DATADIR/test;
 system mv $MYSQLD_DATADIR/save/* $MYSQLD_DATADIR;
 system rmdir $MYSQLD_DATADIR/save;
-
-# restart mysqld
---append_file $expect_file
-restart
-EOF
---enable_reconnect
---source include/wait_until_connected_again.inc
-
+--source include/start_mysqld.inc
diff --git a/storage/tokudb/mysql-test/tokudb_parts/include/table_files_replace_pattern.inc b/storage/tokudb/mysql-test/tokudb_parts/include/table_files_replace_pattern.inc
new file mode 100644
index 0000000000000..b10ad21dd95ce
--- /dev/null
+++ b/storage/tokudb/mysql-test/tokudb_parts/include/table_files_replace_pattern.inc
@@ -0,0 +1 @@
+--replace_regex  /[a-z0-9]+_[a-z0-9]+_[a-z0-9]+(_[BP]_[a-z0-9]+){0,1}\./id./ /sqlx_[a-z0-9]+_[a-z0-9]+_/sqlx_nnnn_nnnn_/ /sqlx-[a-z0-9]+_[a-z0-9]+/sqlx-nnnn_nnnn/ /#p#/#P#/ /#sp#/#SP#/ /#tmp#/#TMP#/
diff --git a/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test b/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test
index be14d8814f04e..f97235a0a2d3a 100644
--- a/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test
+++ b/storage/tokudb/mysql-test/tokudb_parts/t/partition_debug_sync_tokudb.test
@@ -56,7 +56,7 @@ partition by range (a)
 insert into t1 values (1), (11), (21), (33);
 SELECT * FROM t1;
 SHOW CREATE TABLE t1;
---replace_result #p# #P# #sp# #SP#
+--source include/table_files_replace_pattern.inc
 --list_files $MYSQLD_DATADIR/test
 
 SET DEBUG_SYNC='before_open_in_get_all_tables SIGNAL parked WAIT_FOR open';
@@ -82,7 +82,7 @@ ALTER TABLE t1 REORGANIZE PARTITION p0 INTO
 disconnect con1;
 connection default;
 --reap
---replace_result #p# #P# #sp# #SP#
+--source include/table_files_replace_pattern.inc
 --list_files $MYSQLD_DATADIR/test
 SHOW CREATE TABLE t1;
 SELECT * FROM t1;
diff --git a/storage/tokudb/tokudb_background.cc b/storage/tokudb/tokudb_background.cc
index d8ef54a59729e..e019e41c78824 100644
--- a/storage/tokudb/tokudb_background.cc
+++ b/storage/tokudb/tokudb_background.cc
@@ -8,7 +8,7 @@ This file is part of TokuDB
 
 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
-    TokuDBis is free software: you can redistribute it and/or modify
+    TokuDB is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License, version 2,
     as published by the Free Software Foundation.
 
@@ -68,7 +68,8 @@ void job_manager_t::destroy() {
     while (_background_jobs.size()) {
         _mutex.lock();
         job_t* job = _background_jobs.front();
-        cancel(job);
+        if (!job->cancelled())
+            cancel(job);
         _background_jobs.pop_front();
         delete job;
         _mutex.unlock();
@@ -148,11 +149,8 @@ bool job_manager_t::cancel_job(const char* key) {
          it != _background_jobs.end(); it++) {
         job_t* job = *it;
 
-        if (!job->cancelled() &&
-            strcmp(job->key(), key) == 0) {
-
+        if (!job->cancelled() && strcmp(job->key(), key) == 0) {
             cancel(job);
-
             ret = true;
         }
     }
@@ -162,8 +160,6 @@ bool job_manager_t::cancel_job(const char* key) {
 }
 void job_manager_t::iterate_jobs(pfn_iterate_t callback, void* extra) const {
 
-    char database[256], table[256], type[256], params[256], status[256];
-
     _mutex.lock();
 
     for (jobs_t::const_iterator it = _background_jobs.begin();
@@ -171,19 +167,7 @@ void job_manager_t::iterate_jobs(pfn_iterate_t callback, void* extra) const {
          it++) {
         job_t* job = *it;
         if (!job->cancelled()) {
-            database[0] = table[0] = type[0] = params[0] = status[0] = '\0';
-            job->status(database, table, type, params, status);
-            callback(
-                job->id(),
-                database,
-                table,
-                type,
-                params,
-                status,
-                job->user_scheduled(),
-                job->scheduled_time(),
-                job->started_time(),
-                extra);
+            callback(job, extra);
         }
     }
 
@@ -233,6 +217,7 @@ void job_manager_t::run(job_t* job) {
 }
 void job_manager_t::cancel(job_t* job) {
     assert_debug(_mutex.is_owned_by_me());
+    assert_always(!job->cancelled());
     job->cancel();
 }
 job_manager_t* _job_manager = NULL;
diff --git a/storage/tokudb/tokudb_background.h b/storage/tokudb/tokudb_background.h
index 3786701fd0fa0..29991ab325d54 100644
--- a/storage/tokudb/tokudb_background.h
+++ b/storage/tokudb/tokudb_background.h
@@ -7,7 +7,7 @@ This file is part of TokuDB
 
 Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
 
-    TokuDBis is free software: you can redistribute it and/or modify
+    TokuDB is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License, version 2,
     as published by the Free Software Foundation.
 
@@ -58,13 +58,20 @@ class job_manager_t {
         // (or jobs) usually used to find jobs to cancel
         virtual const char* key() = 0;
 
-        // method to get info for information schema, 255 chars per buffer
-        virtual void status(
-            char* database,
-            char* table,
-            char* type,
-            char* params,
-            char* status) = 0;
+        // method to obtain the database name the job is scheduled on
+        virtual const char* database() = 0;
+
+        // method to obtain the table name the job is scheduled on
+        virtual const char* table() = 0;
+
+        // method to obtain the type of job
+        virtual const char* type() = 0;
+
+        // method to obtain a stringized list of job parameters
+        virtual const char* parameters() = 0;
+
+        // method to obtain a sting identifying the current status of the job
+        virtual const char* status() = 0;
 
         inline bool running() const;
 
@@ -99,17 +106,7 @@ class job_manager_t {
     };
 
     // pfn for iterate callback
-    typedef void (*pfn_iterate_t)(
-        uint64_t,
-        const char*,
-        const char*,
-        const char*,
-        const char*,
-        const char*,
-        bool,
-        time_t,
-        time_t,
-        void*);
+    typedef void (*pfn_iterate_t)(class job_t*, void*);
 
 public:
     void* operator new(size_t sz);
@@ -144,6 +141,11 @@ class job_manager_t {
     // data passed when the job was scheduled
     void iterate_jobs(pfn_iterate_t callback, void* extra) const;
 
+    // lock the bjm, this prevents anyone from running, cancelling or iterating
+    // jobs in the bjm.
+    inline void lock();
+    inline void unlock();
+
 private:
     static void* thread_func(void* v);
 
@@ -170,6 +172,15 @@ extern job_manager_t*    _job_manager;
 bool initialize();
 bool destroy();
 
+inline void job_manager_t::lock() {
+    assert_debug(!_mutex.is_owned_by_me());
+    _mutex.lock();
+}
+inline void job_manager_t::unlock() {
+    assert_debug(_mutex.is_owned_by_me());
+    _mutex.unlock();
+}
+
 inline void job_manager_t::job_t::run() {
     if (!_cancelled) {
         _running = true;
diff --git a/storage/tokudb/tokudb_information_schema.cc b/storage/tokudb/tokudb_information_schema.cc
index e69a7899b4597..b3d77eef2d968 100644
--- a/storage/tokudb/tokudb_information_schema.cc
+++ b/storage/tokudb/tokudb_information_schema.cc
@@ -1085,7 +1085,7 @@ ST_FIELD_INFO background_job_status_field_info[] = {
     {"scheduler", 32, MYSQL_TYPE_STRING, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"scheduled_time", 0, MYSQL_TYPE_DATETIME, 0, 0, NULL, SKIP_OPEN_TABLE },
     {"started_time", 0, MYSQL_TYPE_DATETIME, 0, MY_I_S_MAYBE_NULL, NULL, SKIP_OPEN_TABLE },
-    {"status", 256, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, SKIP_OPEN_TABLE },
+    {"status", 1024, MYSQL_TYPE_STRING, 0, MY_I_S_MAYBE_NULL, SKIP_OPEN_TABLE },
     {NULL, 0, MYSQL_TYPE_NULL, 0, 0, NULL, SKIP_OPEN_TABLE}
 };
 
@@ -1095,15 +1095,7 @@ struct background_job_status_extra {
 };
 
 void background_job_status_callback(
-    uint64_t id,
-    const char* database_name,
-    const char* table_name,
-    const char* type,
-    const char* params,
-    const char* status,
-    bool user_scheduled,
-    time_t scheduled_time,
-    time_t started_time,
+    tokudb::background::job_manager_t::job_t* job,
     void* extra) {
 
     background_job_status_extra* e =
@@ -1111,24 +1103,33 @@ void background_job_status_callback(
 
     THD* thd = e->thd;
     TABLE* table = e->table;
+    const char* tmp = NULL;
 
-    table->field[0]->store(id, false);
-    table->field[1]->store(
-        database_name,
-        strlen(database_name),
-        system_charset_info);
-    table->field[2]->store(table_name, strlen(table_name), system_charset_info);
-    table->field[3]->store(type, strlen(type), system_charset_info);
-    table->field[4]->store(params, strlen(params), system_charset_info);
-    if (user_scheduled)
+    table->field[0]->store(job->id(), false);
+
+    tmp = job->database();
+    table->field[1]->store(tmp, strlen(tmp),  system_charset_info);
+
+    tmp = job->table();
+    table->field[2]->store(tmp, strlen(tmp),  system_charset_info);
+
+    tmp = job->type();
+    table->field[3]->store(tmp, strlen(tmp),  system_charset_info);
+
+    tmp = job->parameters();
+    table->field[4]->store(tmp, strlen(tmp),  system_charset_info);
+
+    if (job->user_scheduled())
         table->field[5]->store("USER", strlen("USER"), system_charset_info);
     else
         table->field[5]->store("AUTO", strlen("AUTO"), system_charset_info);
 
-    field_store_time_t(table->field[6], scheduled_time);
-    field_store_time_t(table->field[7], started_time);
-    if (status[0] != '\0') {
-        table->field[8]->store(status, strlen(status), system_charset_info);
+    field_store_time_t(table->field[6], job->scheduled_time());
+    field_store_time_t(table->field[7], job->started_time());
+
+    tmp = job->status();
+    if (tmp && tmp[0] != '\0') {
+        table->field[8]->store(tmp, strlen(tmp), system_charset_info);
         table->field[8]->set_notnull();
     } else {
         table->field[8]->store(NULL, 0, system_charset_info);
diff --git a/storage/tokudb/tokudb_sysvars.cc b/storage/tokudb/tokudb_sysvars.cc
index 7cea749b4fbed..b758929c10e66 100644
--- a/storage/tokudb/tokudb_sysvars.cc
+++ b/storage/tokudb/tokudb_sysvars.cc
@@ -66,6 +66,7 @@ uint        read_status_frequency = 0;
 my_bool     strip_frm_data = FALSE;
 char*       tmp_dir = NULL;
 uint        write_status_frequency = 0;
+my_bool     dir_per_db = FALSE;
 char*       version = (char*) TOKUDB_VERSION_STR;
 
 // file system reserve as a percentage of total disk space
@@ -394,6 +395,18 @@ static MYSQL_SYSVAR_UINT(
     ~0U,
     0);
 
+static void tokudb_dir_per_db_update(THD* thd,
+                                     struct st_mysql_sys_var* sys_var,
+                                     void* var, const void* save) {
+    my_bool *value = (my_bool *) var;
+    *value = *(const my_bool *) save;
+    db_env->set_dir_per_db(db_env, *value);
+}
+
+static MYSQL_SYSVAR_BOOL(dir_per_db, dir_per_db,
+    0, "TokuDB store ft files in db directories",
+    NULL, tokudb_dir_per_db_update, FALSE);
+
 #if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL
 static MYSQL_SYSVAR_STR(
     gdb_path,
@@ -935,6 +948,7 @@ st_mysql_sys_var* system_variables[] = {
     MYSQL_SYSVAR(tmp_dir),
     MYSQL_SYSVAR(version),
     MYSQL_SYSVAR(write_status_frequency),
+    MYSQL_SYSVAR(dir_per_db),
 
 #if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL
     MYSQL_SYSVAR(gdb_path),
diff --git a/storage/tokudb/tokudb_sysvars.h b/storage/tokudb/tokudb_sysvars.h
index 3bd96f7c68da9..7701f211729c7 100644
--- a/storage/tokudb/tokudb_sysvars.h
+++ b/storage/tokudb/tokudb_sysvars.h
@@ -101,6 +101,7 @@ extern uint         read_status_frequency;
 extern my_bool      strip_frm_data;
 extern char*        tmp_dir;
 extern uint         write_status_frequency;
+extern my_bool      dir_per_db;
 extern char*        version;
 
 #if TOKU_INCLUDE_HANDLERTON_HANDLE_FATAL_SIGNAL
diff --git a/storage/xtradb/btr/btr0btr.cc b/storage/xtradb/btr/btr0btr.cc
index da4a6d3cdb1a6..ecea98fccfebb 100644
--- a/storage/xtradb/btr/btr0btr.cc
+++ b/storage/xtradb/btr/btr0btr.cc
@@ -78,7 +78,7 @@ btr_corruption_report(
 			       buf_block_get_zip_size(block),
 			       BUF_PAGE_PRINT_NO_CRASH);
 	}
-	buf_page_print(buf_block_get_frame_fast(block), 0, 0);
+	buf_page_print(buf_nonnull_block_get_frame(block), 0, 0);
 }
 
 #ifndef UNIV_HOTBACKUP
@@ -804,8 +804,10 @@ btr_height_get(
 
         /* S latches the page */
         root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
+	ut_ad(root_block); // The index must not be corrupted
 
-        height = btr_page_get_level(buf_block_get_frame_fast(root_block), mtr);
+	height = btr_page_get_level(buf_nonnull_block_get_frame(root_block),
+				    mtr);
 
         /* Release the S latch on the root page. */
         mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX);
@@ -1231,7 +1233,7 @@ btr_get_size(
 	SRV_CORRUPT_TABLE_CHECK(root,
 	{
 		mtr_commit(mtr);
-		return(0);
+		return(ULINT_UNDEFINED);
 	});
 
 	if (flag == BTR_N_LEAF_PAGES) {
@@ -2756,7 +2758,7 @@ btr_attach_half_pages(
 	}
 
 	/* Get the level of the split pages */
-	level = btr_page_get_level(buf_block_get_frame_fast(block), mtr);
+	level = btr_page_get_level(buf_nonnull_block_get_frame(block), mtr);
 	ut_ad(level
 	      == btr_page_get_level(buf_block_get_frame(new_block), mtr));
 
@@ -4133,8 +4135,10 @@ btr_discard_page(
 
 	/* Decide the page which will inherit the locks */
 
-	left_page_no = btr_page_get_prev(buf_block_get_frame_fast(block), mtr);
-	right_page_no = btr_page_get_next(buf_block_get_frame_fast(block), mtr);
+	left_page_no = btr_page_get_prev(buf_nonnull_block_get_frame(block),
+					 mtr);
+	right_page_no = btr_page_get_next(buf_nonnull_block_get_frame(block),
+					  mtr);
 
 	if (left_page_no != FIL_NULL) {
 		merge_block = btr_block_get(space, zip_size, left_page_no,
diff --git a/storage/xtradb/btr/btr0cur.cc b/storage/xtradb/btr/btr0cur.cc
index 05af024a88226..214d050d56219 100644
--- a/storage/xtradb/btr/btr0cur.cc
+++ b/storage/xtradb/btr/btr0cur.cc
@@ -1751,7 +1751,7 @@ btr_cur_pessimistic_insert(
 		}
 		if (!page_rec_is_infimum(btr_cur_get_rec(cursor))
 		    || btr_page_get_prev(
-			buf_block_get_frame(
+			buf_nonnull_block_get_frame(
 				btr_cur_get_block(cursor)), mtr)
 		       == FIL_NULL) {
 			/* split and inserted need to call
@@ -2220,7 +2220,7 @@ btr_cur_update_in_place(
 	if (page_zip
 	    && !(flags & BTR_KEEP_IBUF_BITMAP)
 	    && !dict_index_is_clust(index)
-	    && page_is_leaf(buf_block_get_frame(block))) {
+	    && page_is_leaf(buf_nonnull_block_get_frame(block))) {
 		/* Update the free bits in the insert buffer. */
 		ibuf_update_free_bits_zip(block, mtr);
 	}
diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc
index 0d5a478ca679c..21b10196a2572 100644
--- a/storage/xtradb/buf/buf0buf.cc
+++ b/storage/xtradb/buf/buf0buf.cc
@@ -4539,7 +4539,9 @@ buf_page_io_complete(
 			recv_recover_page(TRUE, (buf_block_t*) bpage);
 		}
 
-		if (uncompressed && !recv_no_ibuf_operations) {
+		if (uncompressed && !recv_no_ibuf_operations
+		    && fil_page_get_type(frame) == FIL_PAGE_INDEX
+		    && page_is_leaf(frame)) {
 
 			buf_block_t*	block;
 			ibool		update_ibuf_bitmap;
diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc
index f4d1c637e3e6f..3c12d6da73f3f 100644
--- a/storage/xtradb/buf/buf0dblwr.cc
+++ b/storage/xtradb/buf/buf0dblwr.cc
@@ -521,7 +521,7 @@ buf_dblwr_process()
 			if (buf_page_is_corrupted(true, read_buf, zip_size)) {
 
 				fprintf(stderr,
-					"InnoDB: Warning: database page"
+					"InnoDB: Database page"
 					" corruption or a failed\n"
 					"InnoDB: file read of"
 					" space %lu page %lu.\n"
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index 3554405e0b908..601e79d892335 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -309,6 +309,8 @@ buf_flush_init_flush_rbt(void)
 
 		buf_flush_list_mutex_enter(buf_pool);
 
+		ut_ad(buf_pool->flush_rbt == NULL);
+
 		/* Create red black tree for speedy insertions in flush list. */
 		buf_pool->flush_rbt = rbt_create(
 			sizeof(buf_page_t*), buf_flush_block_cmp);
@@ -2565,6 +2567,11 @@ page_cleaner_sleep_if_needed(
 	ulint	next_loop_time)	/*!< in: time when next loop iteration
 				should start */
 {
+	/* No sleep if we are cleaning the buffer pool during the shutdown
+	with everything else finished */
+	if (srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE)
+		return;
+
 	ulint	cur_time = ut_time_ms();
 
 	if (next_loop_time > cur_time) {
diff --git a/storage/xtradb/dict/dict0boot.cc b/storage/xtradb/dict/dict0boot.cc
index 94a3af2852b39..c0bb0298bea72 100644
--- a/storage/xtradb/dict/dict0boot.cc
+++ b/storage/xtradb/dict/dict0boot.cc
@@ -272,6 +272,10 @@ dict_boot(void)
 	ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_FOR_NAME == 2);
 	ut_ad(DICT_NUM_COLS__SYS_FOREIGN_COLS == 4);
 	ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_COLS == 6);
+	ut_ad(DICT_NUM_COLS__SYS_ZIP_DICT == 3);
+	ut_ad(DICT_NUM_FIELDS__SYS_ZIP_DICT == 5);
+	ut_ad(DICT_NUM_COLS__SYS_ZIP_DICT_COLS == 3);
+	ut_ad(DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS == 5);
 
 	mtr_start(&mtr);
 
diff --git a/storage/xtradb/dict/dict0crea.cc b/storage/xtradb/dict/dict0crea.cc
index b34fb7e262660..db0ca638de4da 100644
--- a/storage/xtradb/dict/dict0crea.cc
+++ b/storage/xtradb/dict/dict0crea.cc
@@ -38,6 +38,7 @@ Created 1/8/1996 Heikki Tuuri
 #include "que0que.h"
 #include "row0ins.h"
 #include "row0mysql.h"
+#include "row0sel.h"
 #include "pars0pars.h"
 #include "trx0roll.h"
 #include "usr0sess.h"
@@ -1931,6 +1932,135 @@ dict_create_or_check_sys_tablespace(void)
 	return(err);
 }
 
+/** Creates the zip_dict system table inside InnoDB
+at server bootstrap or server start if it is not found or is
+not of the right form.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_create_or_check_sys_zip_dict(void)
+{
+	trx_t*		trx;
+	my_bool		srv_file_per_table_backup;
+	dberr_t		err;
+	dberr_t		sys_zip_dict_err;
+	dberr_t		sys_zip_dict_cols_err;
+
+	ut_a(srv_get_active_thread_type() == SRV_NONE);
+
+	/* Note: The master thread has not been started at this point. */
+
+	sys_zip_dict_err = dict_check_if_system_table_exists(
+		"SYS_ZIP_DICT", DICT_NUM_FIELDS__SYS_ZIP_DICT + 1, 2);
+	sys_zip_dict_cols_err = dict_check_if_system_table_exists(
+		"SYS_ZIP_DICT_COLS", DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS + 1,
+		1);
+
+	if (sys_zip_dict_err == DB_SUCCESS &&
+		sys_zip_dict_cols_err == DB_SUCCESS)
+		return (DB_SUCCESS);
+
+	trx = trx_allocate_for_mysql();
+
+	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+	trx->op_info = "creating zip_dict and zip_dict_cols sys tables";
+
+	row_mysql_lock_data_dictionary(trx);
+
+	/* Check which incomplete table definition to drop. */
+
+	if (sys_zip_dict_err == DB_CORRUPTION) {
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Dropping incompletely created "
+			"SYS_ZIP_DICT table.");
+		row_drop_table_for_mysql("SYS_ZIP_DICT", trx, TRUE, TRUE);
+	}
+	if (sys_zip_dict_cols_err == DB_CORRUPTION) {
+		ib_logf(IB_LOG_LEVEL_WARN,
+			"Dropping incompletely created "
+			"SYS_ZIP_DICT_COLS table.");
+		row_drop_table_for_mysql("SYS_ZIP_DICT_COLS", trx, TRUE, TRUE);
+	}
+
+	ib_logf(IB_LOG_LEVEL_INFO,
+		"Creating zip_dict and zip_dict_cols system tables.");
+
+	/* We always want SYSTEM tables to be created inside the system
+	tablespace. */
+	srv_file_per_table_backup = srv_file_per_table;
+	srv_file_per_table = 0;
+
+	err = que_eval_sql(
+		NULL,
+		"PROCEDURE CREATE_SYS_ZIP_DICT_PROC () IS\n"
+		"BEGIN\n"
+		"CREATE TABLE SYS_ZIP_DICT(\n"
+		"  ID INT UNSIGNED NOT NULL,\n"
+		"  NAME CHAR("
+		  STRINGIFY_ARG(ZIP_DICT_MAX_NAME_LENGTH)
+		") NOT NULL,\n"
+		"  DATA BLOB NOT NULL\n"
+		");\n"
+		"CREATE UNIQUE CLUSTERED INDEX SYS_ZIP_DICT_ID"
+		" ON SYS_ZIP_DICT (ID);\n"
+		"CREATE UNIQUE INDEX SYS_ZIP_DICT_NAME"
+		" ON SYS_ZIP_DICT (NAME);\n"
+		"CREATE TABLE SYS_ZIP_DICT_COLS(\n"
+		"  TABLE_ID INT UNSIGNED NOT NULL,\n"
+		"  COLUMN_POS INT UNSIGNED NOT NULL,\n"
+		"  DICT_ID INT UNSIGNED NOT NULL\n"
+		");\n"
+		"CREATE UNIQUE CLUSTERED INDEX SYS_ZIP_DICT_COLS_COMPOSITE"
+		" ON SYS_ZIP_DICT_COLS (TABLE_ID, COLUMN_POS);\n"
+		"END;\n",
+		FALSE, trx);
+
+	if (err != DB_SUCCESS) {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Creation of SYS_ZIP_DICT and SYS_ZIP_DICT_COLS"
+			"has failed with error %lu. Tablespace is full. "
+			"Dropping incompletely created tables.",
+			(ulong) err);
+
+		ut_a(err == DB_OUT_OF_FILE_SPACE
+			|| err == DB_TOO_MANY_CONCURRENT_TRXS);
+
+		row_drop_table_for_mysql("SYS_ZIP_DICT", trx, TRUE, TRUE);
+		row_drop_table_for_mysql("SYS_ZIP_DICT_COLS", trx, TRUE, TRUE);
+
+		if (err == DB_OUT_OF_FILE_SPACE) {
+			err = DB_MUST_GET_MORE_FILE_SPACE;
+		}
+	}
+
+	trx_commit_for_mysql(trx);
+
+	row_mysql_unlock_data_dictionary(trx);
+
+	trx_free_for_mysql(trx);
+
+	srv_file_per_table = srv_file_per_table_backup;
+
+	if (err == DB_SUCCESS) {
+		ib_logf(IB_LOG_LEVEL_INFO,
+			"zip_dict and zip_dict_cols system tables created.");
+	}
+
+	/* Note: The master thread has not been started at this point. */
+	/* Confirm and move to the non-LRU part of the table LRU list. */
+
+	sys_zip_dict_err = dict_check_if_system_table_exists(
+		"SYS_ZIP_DICT", DICT_NUM_FIELDS__SYS_ZIP_DICT + 1, 2);
+	ut_a(sys_zip_dict_err == DB_SUCCESS);
+	sys_zip_dict_cols_err = dict_check_if_system_table_exists(
+		"SYS_ZIP_DICT_COLS",
+		DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS + 1, 1);
+	ut_a(sys_zip_dict_cols_err == DB_SUCCESS);
+
+	return(err);
+}
+
 /********************************************************************//**
 Add a single tablespace definition to the data dictionary tables in the
 database.
@@ -1984,3 +2114,456 @@ dict_create_add_tablespace_to_dictionary(
 
 	return(error);
 }
+
+/** Add a single compression dictionary definition to the SYS_ZIP_DICT
+InnoDB system table.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_zip_dict(
+	const char*	name,		/*!< in: dict name */
+	ulint		name_len,	/*!< in: dict name length */
+	const char*	data,		/*!< in: dict data */
+	ulint		data_len,	/*!< in: dict data length */
+	trx_t*		trx)		/*!< in/out: transaction */
+{
+	ut_ad(name);
+	ut_ad(data);
+
+	pars_info_t* info = pars_info_create();
+
+	pars_info_add_literal(info, "name", name, name_len,
+		DATA_VARCHAR, DATA_ENGLISH);
+	pars_info_add_literal(info, "data", data, data_len,
+		DATA_BLOB, DATA_BINARY_TYPE | DATA_NOT_NULL);
+
+	dberr_t error = que_eval_sql(info,
+		"PROCEDURE P () IS\n"
+		"  max_id INT;\n"
+		"DECLARE CURSOR cur IS\n"
+		"  SELECT ID FROM SYS_ZIP_DICT\n"
+		"  ORDER BY ID DESC;\n"
+		"BEGIN\n"
+		"  max_id := 0;\n"
+		"  OPEN cur;\n"
+		"  FETCH cur INTO max_id;\n"
+		"  IF (cur % NOTFOUND) THEN\n"
+		"    max_id := 0;\n"
+		"  END IF;\n"
+		"  CLOSE cur;\n"
+		"  INSERT INTO SYS_ZIP_DICT VALUES"
+		"    (max_id + 1, :name, :data);\n"
+		"END;\n",
+		FALSE, trx);
+
+	return error;
+}
+
+/** Fetch callback, just stores extracted zip_dict id in the external
+variable.
+@return TRUE if all OK */
+static
+ibool
+dict_create_extract_int_aux(
+	void*	row,		/*!< in: sel_node_t* */
+	void*	user_arg)	/*!< in: int32 id */
+{
+	sel_node_t*	node = static_cast<sel_node_t*>(row);
+	dfield_t*	dfield = que_node_get_val(node->select_list);
+	dtype_t*	type = dfield_get_type(dfield);
+	ulint		len = dfield_get_len(dfield);
+
+	ut_a(dtype_get_mtype(type) == DATA_INT);
+	ut_a(len == sizeof(ib_uint32_t));
+
+	memcpy(user_arg, dfield_get_data(dfield), sizeof(ib_uint32_t));
+
+	return(TRUE);
+}
+
+/** Add a single compression dictionary reference to the SYS_ZIP_DICT_COLS
+InnoDB system table.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_zip_dict_reference(
+	ulint		table_id,	/*!< in: table id */
+	ulint		column_pos,	/*!< in: column position */
+	ulint		dict_id,	/*!< in: dict id */
+	trx_t*		trx)		/*!< in/out: transaction */
+{
+	pars_info_t* info = pars_info_create();
+
+	pars_info_add_int4_literal(info, "table_id", table_id);
+	pars_info_add_int4_literal(info, "column_pos", column_pos);
+	pars_info_add_int4_literal(info, "dict_id", dict_id);
+
+	dberr_t error = que_eval_sql(info,
+		"PROCEDURE P () IS\n"
+		"BEGIN\n"
+		"  INSERT INTO SYS_ZIP_DICT_COLS VALUES"
+		"    (:table_id, :column_pos, :dict_id);\n"
+		"END;\n",
+		FALSE, trx);
+	return error;
+}
+
+/** Get a single compression dictionary id for the given
+(table id, column pos) pair.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_get_zip_dict_id_by_reference(
+	ulint	table_id,	/*!< in: table id */
+	ulint	column_pos,	/*!< in: column position */
+	ulint*	dict_id,	/*!< out: dict id */
+	trx_t*	trx)		/*!< in/out: transaction */
+{
+	ut_ad(dict_id);
+
+	pars_info_t* info = pars_info_create();
+
+	ib_uint32_t dict_id_buf;
+	mach_write_to_4(reinterpret_cast<byte*>(&dict_id_buf ),
+		ULINT32_UNDEFINED);
+
+	pars_info_add_int4_literal(info, "table_id", table_id);
+	pars_info_add_int4_literal(info, "column_pos", column_pos);
+	pars_info_bind_function(
+		info, "my_func", dict_create_extract_int_aux, &dict_id_buf);
+
+	dberr_t error = que_eval_sql(info,
+		"PROCEDURE P () IS\n"
+		"DECLARE FUNCTION my_func;\n"
+		"DECLARE CURSOR cur IS\n"
+		"  SELECT DICT_ID FROM SYS_ZIP_DICT_COLS\n"
+		"    WHERE TABLE_ID = :table_id AND\n"
+		"          COLUMN_POS = :column_pos;\n"
+		"BEGIN\n"
+		"  OPEN cur;\n"
+		"  FETCH cur INTO my_func();\n"
+		"  CLOSE cur;\n"
+		"END;\n",
+		FALSE, trx);
+	if (error == DB_SUCCESS) {
+		ib_uint32_t local_dict_id = mach_read_from_4(
+			reinterpret_cast<const byte*>(&dict_id_buf));
+		if (local_dict_id == ULINT32_UNDEFINED)
+			error = DB_RECORD_NOT_FOUND;
+		else
+			*dict_id = local_dict_id;
+	}
+	return error;
+}
+
+/** Get compression dictionary id for the given name.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_get_zip_dict_id_by_name(
+	const char*	dict_name,	/*!< in: dict name */
+	ulint		dict_name_len,	/*!< in: dict name length */
+	ulint*		dict_id,	/*!< out: dict id */
+	trx_t*		trx)		/*!< in/out: transaction */
+{
+	ut_ad(dict_name);
+	ut_ad(dict_name_len);
+	ut_ad(dict_id);
+
+	pars_info_t* info = pars_info_create();
+
+	pars_info_add_literal(info, "dict_name", dict_name, dict_name_len,
+		DATA_VARCHAR, DATA_ENGLISH);
+
+	ib_uint32_t dict_id_buf;
+	mach_write_to_4(reinterpret_cast<byte*>(&dict_id_buf),
+		ULINT32_UNDEFINED);
+	pars_info_bind_function(
+		info, "my_func", dict_create_extract_int_aux, &dict_id_buf);
+
+	dberr_t error = que_eval_sql(info,
+		"PROCEDURE P () IS\n"
+		"DECLARE FUNCTION my_func;\n"
+		"DECLARE CURSOR cur IS\n"
+		"  SELECT ID FROM SYS_ZIP_DICT\n"
+		"    WHERE NAME = :dict_name;\n"
+		"BEGIN\n"
+		"  OPEN cur;\n"
+		"  FETCH cur INTO my_func();\n"
+		"  CLOSE cur;\n"
+		"END;\n",
+		FALSE, trx);
+	if (error == DB_SUCCESS) {
+		ib_uint32_t local_dict_id = mach_read_from_4(
+			reinterpret_cast<const byte*>(&dict_id_buf));
+		if (local_dict_id == ULINT32_UNDEFINED)
+			error = DB_RECORD_NOT_FOUND;
+		else
+			*dict_id = local_dict_id;
+	}
+	return error;
+}
+
+/** Auxiliary enum used to indicate zip dict data extraction result code */
+enum zip_dict_info_aux_code {
+	zip_dict_info_success,		/*!< success */
+	zip_dict_info_not_found,	/*!< zip dict record not found */
+	zip_dict_info_oom,		/*!< out of memory */
+	zip_dict_info_corrupted_name,	/*!< corrupted zip dict name */
+	zip_dict_info_corrupted_data	/*!< corrupted zip dict data */
+};
+
+/** Auxiliary struct used to return zip dict info aling with result code */
+struct zip_dict_info_aux {
+	LEX_STRING	name;	/*!< zip dict name */
+	LEX_STRING	data;	/*!< zip dict data */
+	int		code;	/*!< result code (0 - success) */
+};
+
+/** Fetch callback, just stores extracted zip_dict data in the external
+variable.
+@return always returns TRUE */
+static
+ibool
+dict_create_get_zip_dict_info_by_id_aux(
+	void*	row,		/*!< in: sel_node_t* */
+	void*	user_arg)	/*!< in: pointer to zip_dict_info_aux* */
+{
+	sel_node_t*		node = static_cast<sel_node_t*>(row);
+	zip_dict_info_aux*	result =
+		static_cast<zip_dict_info_aux*>(user_arg);
+
+	result->code = zip_dict_info_success;
+	result->name.str = 0;
+	result->name.length = 0;
+	result->data.str = 0;
+	result->data.length = 0;
+
+	/* NAME field */
+	que_node_t*	exp = node->select_list;
+	ut_a(exp != 0);
+
+	dfield_t*	dfield = que_node_get_val(exp);
+	dtype_t*	type = dfield_get_type(dfield);
+	ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
+
+	ulint	len = dfield_get_len(dfield);
+	void*	data = dfield_get_data(dfield);
+
+
+	if (len == UNIV_SQL_NULL) {
+		result->code = zip_dict_info_corrupted_name;
+	}
+	else {
+		result->name.str =
+			static_cast<char*>(my_malloc(len + 1, MYF(0)));
+		if (result->name.str == 0) {
+			result->code = zip_dict_info_oom;
+		}
+		else {
+			memcpy(result->name.str, data, len);
+			result->name.str[len] = '\0';
+			result->name.length = len;
+		}
+	}
+
+	/* DATA field */
+	exp = que_node_get_next(exp);
+	ut_a(exp != 0);
+
+	dfield = que_node_get_val(exp);
+	type = dfield_get_type(dfield);
+	ut_a(dtype_get_mtype(type) == DATA_BLOB);
+
+	len = dfield_get_len(dfield);
+	data = dfield_get_data(dfield);
+
+	if (len == UNIV_SQL_NULL) {
+		result->code = zip_dict_info_corrupted_data;
+	}
+	else {
+		result->data.str =
+			static_cast<char*>(my_malloc(
+				len == 0 ? 1 : len, MYF(0)));
+		if (result->data.str == 0) {
+			result->code = zip_dict_info_oom;
+		}
+		else {
+			memcpy(result->data.str, data, len);
+			result->data.length = len;
+		}
+	}
+
+	ut_ad(que_node_get_next(exp) == 0);
+
+	if (result->code != zip_dict_info_success) {
+		if (result->name.str == 0) {
+			mem_free(result->name.str);
+			result->name.str = 0;
+			result->name.length = 0;
+		}
+		if (result->data.str == 0) {
+			mem_free(result->data.str);
+			result->data.str = 0;
+			result->data.length = 0;
+		}
+	}
+
+	return TRUE;
+}
+
+/** Get compression dictionary info (name and data) for the given id.
+Allocates memory for name and data on success.
+Must be freed with mem_free().
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_get_zip_dict_info_by_id(
+	ulint	dict_id,	/*!< in: dict id */
+	char**	name,		/*!< out: dict name */
+	ulint*	name_len,	/*!< out: dict name length*/
+	char**	data,		/*!< out: dict data */
+	ulint*	data_len,	/*!< out: dict data length*/
+	trx_t*	trx)		/*!< in/out: transaction */
+{
+	ut_ad(name);
+	ut_ad(data);
+
+	zip_dict_info_aux rec;
+	rec.code = zip_dict_info_not_found;
+	pars_info_t* info = pars_info_create();
+
+	pars_info_add_int4_literal(info, "id", dict_id);
+	pars_info_bind_function(
+		info, "my_func", dict_create_get_zip_dict_info_by_id_aux,
+		&rec);
+
+	dberr_t error = que_eval_sql(info,
+		"PROCEDURE P () IS\n"
+		"DECLARE FUNCTION my_func;\n"
+		"DECLARE CURSOR cur IS\n"
+		"  SELECT NAME, DATA FROM SYS_ZIP_DICT\n"
+		"    WHERE ID = :id;\n"
+		"BEGIN\n"
+		"  OPEN cur;\n"
+		"  FETCH cur INTO my_func();\n"
+		"  CLOSE cur;\n"
+		"END;\n",
+		FALSE, trx);
+	if (error == DB_SUCCESS) {
+		switch (rec.code) {
+			case zip_dict_info_success:
+				*name = rec.name.str;
+				*name_len = rec.name.length;
+				*data = rec.data.str;
+				*data_len = rec.data.length;
+				break;
+			case zip_dict_info_not_found:
+				error = DB_RECORD_NOT_FOUND;
+				break;
+			case zip_dict_info_oom:
+				error = DB_OUT_OF_MEMORY;
+				break;
+			case zip_dict_info_corrupted_name:
+			case zip_dict_info_corrupted_data:
+				error = DB_INVALID_NULL;
+				break;
+			default:
+				ut_error;
+		}
+	}
+	return error;
+}
+
+/** Remove a single compression dictionary from the data dictionary
+tables in the database.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_remove_zip_dict(
+	const char*	name,		/*!< in: dict name */
+	ulint		name_len,	/*!< in: dict name length */
+	trx_t*		trx)		/*!< in/out: transaction */
+{
+	ut_ad(name);
+
+	pars_info_t* info = pars_info_create();
+
+	ib_uint32_t dict_id_buf;
+	mach_write_to_4(reinterpret_cast<byte*>(&dict_id_buf),
+		ULINT32_UNDEFINED);
+	ib_uint32_t counter_buf;
+	mach_write_to_4(reinterpret_cast<byte*>(&counter_buf),
+		ULINT32_UNDEFINED);
+
+	pars_info_add_literal(info, "name", name, name_len,
+		DATA_VARCHAR, DATA_ENGLISH);
+	pars_info_bind_int4_literal(info, "dict_id", &dict_id_buf);
+	pars_info_bind_function(info, "find_dict_func",
+		dict_create_extract_int_aux, &dict_id_buf);
+	pars_info_bind_function(info, "count_func",
+		dict_create_extract_int_aux, &counter_buf);
+
+	dberr_t error = que_eval_sql(info,
+		"PROCEDURE P () IS\n"
+		"DECLARE FUNCTION find_dict_func;\n"
+		"DECLARE FUNCTION count_func;\n"
+		"DECLARE CURSOR dict_cur IS\n"
+		"  SELECT ID FROM SYS_ZIP_DICT\n"
+		"    WHERE NAME = :name\n"
+		"  FOR UPDATE;\n"
+		"DECLARE CURSOR ref_cur IS\n"
+		"  SELECT 1 FROM SYS_ZIP_DICT_COLS\n"
+		"    WHERE DICT_ID = :dict_id;\n"
+		"BEGIN\n"
+		"  OPEN dict_cur;\n"
+		"  FETCH dict_cur INTO find_dict_func();\n"
+		"  IF NOT (SQL % NOTFOUND) THEN\n"
+		"    OPEN ref_cur;\n"
+		"    FETCH ref_cur INTO count_func();\n"
+		"    IF SQL % NOTFOUND THEN\n"
+		"      DELETE FROM SYS_ZIP_DICT WHERE CURRENT OF dict_cur;\n"
+		"    END IF;\n"
+		"    CLOSE ref_cur;\n"
+		"  END IF;\n"
+		"  CLOSE dict_cur;\n"
+		"END;\n",
+		FALSE, trx);
+	if (error == DB_SUCCESS) {
+		ib_uint32_t local_dict_id = mach_read_from_4(
+			reinterpret_cast<const byte*>(&dict_id_buf));
+		if (local_dict_id == ULINT32_UNDEFINED) {
+			error = DB_RECORD_NOT_FOUND;
+		}
+		else {
+			ib_uint32_t local_counter = mach_read_from_4(
+				reinterpret_cast<const byte*>(&counter_buf));
+			if (local_counter != ULINT32_UNDEFINED)
+				error = DB_ROW_IS_REFERENCED;
+		}
+	}
+	return error;
+}
+
+/** Remove all compression dictionary references for the given table ID from
+the data dictionary tables in the database.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_remove_zip_dict_references_for_table(
+	ulint	table_id,	/*!< in: table id */
+	trx_t*	trx)		/*!< in/out: transaction */
+{
+	pars_info_t* info = pars_info_create();
+
+	pars_info_add_int4_literal(info, "table_id", table_id);
+
+	dberr_t error = que_eval_sql(info,
+		"PROCEDURE P () IS\n"
+		"BEGIN\n"
+		"  DELETE FROM SYS_ZIP_DICT_COLS\n"
+		"    WHERE TABLE_ID = :table_id;\n"
+		"END;\n",
+		FALSE, trx);
+	return error;
+}
diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc
index 1b4fa916973db..9e2aa8328d1c0 100644
--- a/storage/xtradb/dict/dict0dict.cc
+++ b/storage/xtradb/dict/dict0dict.cc
@@ -7310,3 +7310,161 @@ dict_tf_to_row_format_string(
 	return(0);
 }
 #endif /* !UNIV_HOTBACKUP */
+
+/** Insert a records into SYS_ZIP_DICT.
+@retval	DB_SUCCESS	if OK
+@retval	dberr_t		if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_create_zip_dict(
+	const char*	name,		/*!< in: zip_dict name */
+	ulint		name_len,	/*!< in: zip_dict name length*/
+	const char*	data,		/*!< in: zip_dict data */
+	ulint		data_len)	/*!< in: zip_dict data length */
+{
+	dberr_t		err = DB_SUCCESS;
+	trx_t*		trx;
+
+	ut_ad(name);
+	ut_ad(data);
+
+	rw_lock_x_lock(&dict_operation_lock);
+	dict_mutex_enter_for_mysql();
+
+	trx = trx_allocate_for_background();
+	trx->op_info = "insert zip_dict";
+	trx->dict_operation_lock_mode = RW_X_LATCH;
+	trx_start_if_not_started(trx);
+
+	err = dict_create_add_zip_dict(name, name_len, data, data_len, trx);
+
+	if (err == DB_SUCCESS) {
+		trx_commit_for_mysql(trx);
+	}
+	else {
+		trx->op_info = "rollback of internal trx on zip_dict table";
+		trx_rollback_to_savepoint(trx, NULL);
+		ut_a(trx->error_state == DB_SUCCESS);
+	}
+	trx->op_info = "";
+	trx->dict_operation_lock_mode = 0;
+	trx_free_for_background(trx);
+
+	dict_mutex_exit_for_mysql();
+	rw_lock_x_unlock(&dict_operation_lock);
+
+	return err;
+}
+/** Get single compression dictionary id for the given
+(table id, column pos) pair.
+@retval	DB_SUCCESS		if OK
+@retval	DB_RECORD_NOT_FOUND	if not found */
+UNIV_INTERN
+dberr_t
+dict_get_dictionary_id_by_key(
+	ulint	table_id,	/*!< in: table id */
+	ulint	column_pos,	/*!< in: column position */
+	ulint*	dict_id)	/*!< out: zip_dict id */
+{
+	dberr_t		err = DB_SUCCESS;
+	trx_t*		trx;
+
+	rw_lock_s_lock(&dict_operation_lock);
+	dict_mutex_enter_for_mysql();
+
+	trx = trx_allocate_for_background();
+	trx->op_info = "get zip dict id by composite key";
+	trx->dict_operation_lock_mode = RW_S_LATCH;
+	trx_start_if_not_started(trx);
+
+	err = dict_create_get_zip_dict_id_by_reference(table_id, column_pos,
+		dict_id, trx);
+
+	trx_commit_for_mysql(trx);
+	trx->dict_operation_lock_mode = 0;
+	trx_free_for_background(trx);
+
+	dict_mutex_exit_for_mysql();
+	rw_lock_s_unlock(&dict_operation_lock);
+
+	return err;
+}
+/** Get compression dictionary info (name and data) for the given id.
+Allocates memory in name->str and data->str on success.
+Must be freed with mem_free().
+@retval	DB_SUCCESS		if OK
+@retval	DB_RECORD_NOT_FOUND	if not found */
+UNIV_INTERN
+dberr_t
+dict_get_dictionary_info_by_id(
+	ulint	dict_id,	/*!< in: table name */
+	char**	name,		/*!< out: dictionary name */
+	ulint*	name_len,	/*!< out: dictionary name length*/
+	char**	data,		/*!< out: dictionary data */
+	ulint*	data_len)	/*!< out: dictionary data length*/
+{
+	dberr_t		err = DB_SUCCESS;
+	trx_t*		trx;
+
+	rw_lock_s_lock(&dict_operation_lock);
+	dict_mutex_enter_for_mysql();
+
+	trx = trx_allocate_for_background();
+	trx->op_info = "get zip dict name and data by id";
+	trx->dict_operation_lock_mode = RW_S_LATCH;
+	trx_start_if_not_started(trx);
+
+	err = dict_create_get_zip_dict_info_by_id(dict_id, name, name_len,
+		data, data_len, trx);
+
+	trx_commit_for_mysql(trx);
+	trx->dict_operation_lock_mode = 0;
+	trx_free_for_background(trx);
+
+	dict_mutex_exit_for_mysql();
+	rw_lock_s_unlock(&dict_operation_lock);
+
+	return err;
+}
+/** Delete a record in SYS_ZIP_DICT with the given name.
+@retval	DB_SUCCESS		if OK
+@retval	DB_RECORD_NOT_FOUND	if not found
+@retval	DB_ROW_IS_REFERENCED	if in use */
+UNIV_INTERN
+dberr_t
+dict_drop_zip_dict(
+	const char*	name,		/*!< in: zip_dict name */
+	ulint		name_len)	/*!< in: zip_dict name length*/
+{
+	dberr_t		err = DB_SUCCESS;
+	trx_t*		trx;
+
+	ut_ad(name);
+
+	rw_lock_x_lock(&dict_operation_lock);
+	dict_mutex_enter_for_mysql();
+
+	trx = trx_allocate_for_background();
+	trx->op_info = "delete zip_dict";
+	trx->dict_operation_lock_mode = RW_X_LATCH;
+	trx_start_if_not_started(trx);
+
+	err = dict_create_remove_zip_dict(name, name_len, trx);
+
+	if (err == DB_SUCCESS) {
+		trx_commit_for_mysql(trx);
+	}
+	else {
+		trx->op_info = "rollback of internal trx on zip_dict table";
+		trx_rollback_to_savepoint(trx, NULL);
+		ut_a(trx->error_state == DB_SUCCESS);
+	}
+	trx->op_info = "";
+	trx->dict_operation_lock_mode = 0;
+	trx_free_for_background(trx);
+
+	dict_mutex_exit_for_mysql();
+	rw_lock_x_unlock(&dict_operation_lock);
+
+	return err;
+}
diff --git a/storage/xtradb/dict/dict0load.cc b/storage/xtradb/dict/dict0load.cc
index 988351dbca55a..ca7de72c9b907 100644
--- a/storage/xtradb/dict/dict0load.cc
+++ b/storage/xtradb/dict/dict0load.cc
@@ -56,7 +56,9 @@ static const char* SYSTEM_TABLE_NAME[] = {
 	"SYS_FOREIGN",
 	"SYS_FOREIGN_COLS",
 	"SYS_TABLESPACES",
-	"SYS_DATAFILES"
+	"SYS_DATAFILES",
+	"SYS_ZIP_DICT",
+	"SYS_ZIP_DICT_COLS"
 };
 
 /* If this flag is TRUE, then we will load the cluster index's (and tables')
@@ -728,6 +730,161 @@ dict_process_sys_datafiles(
 	return(NULL);
 }
 
+/** This function parses a SYS_ZIP_DICT record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_zip_dict(
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	ulint		zip_size,	/*!< in: nonzero=compressed BLOB page size */
+	const rec_t*	rec,		/*!< in: current SYS_ZIP_DICT rec */
+	ulint*		id,		/*!< out: dict id */
+	const char**	name,		/*!< out: dict name */
+	const char**	data,		/*!< out: dict data */
+	ulint*		data_len)	/*!< out: dict data length */
+{
+	ulint		len;
+	const byte*	field;
+
+	/* Initialize the output values */
+	*id = ULINT_UNDEFINED;
+	*name = NULL;
+	*data = NULL;
+	*data_len = 0;
+
+	if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, 0))) {
+		return("delete-marked record in SYS_ZIP_DICT");
+	}
+
+	if (UNIV_UNLIKELY(
+		rec_get_n_fields_old(rec)!= DICT_NUM_FIELDS__SYS_ZIP_DICT)) {
+		return("wrong number of columns in SYS_ZIP_DICT record");
+	}
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_ZIP_DICT__ID, &len);
+	if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) {
+		goto err_len;
+	}
+	*id = mach_read_from_4(field);
+
+	rec_get_nth_field_offs_old(
+		rec, DICT_FLD__SYS_ZIP_DICT__DB_TRX_ID, &len);
+	if (UNIV_UNLIKELY(len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL)) {
+		goto err_len;
+	}
+
+	rec_get_nth_field_offs_old(
+		rec, DICT_FLD__SYS_ZIP_DICT__DB_ROLL_PTR, &len);
+	if (UNIV_UNLIKELY(len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL)) {
+		goto err_len;
+	}
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_ZIP_DICT__NAME, &len);
+	if (UNIV_UNLIKELY(len == 0 || len == UNIV_SQL_NULL)) {
+		goto err_len;
+	}
+	*name = mem_heap_strdupl(heap, (char*) field, len);
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_ZIP_DICT__DATA, &len);
+	if (UNIV_UNLIKELY(len == UNIV_SQL_NULL)) {
+		goto err_len;
+	}
+
+	if (rec_get_1byte_offs_flag(rec) == 0 &&
+		rec_2_is_field_extern(rec, DICT_FLD__SYS_ZIP_DICT__DATA)) {
+		ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+		if (UNIV_UNLIKELY
+			(!memcmp(field + len - BTR_EXTERN_FIELD_REF_SIZE,
+				field_ref_zero,
+				BTR_EXTERN_FIELD_REF_SIZE))) {
+			goto err_len;
+		}
+		*data = reinterpret_cast<char*>(
+			btr_copy_externally_stored_field(data_len, field,
+							zip_size, len, heap, 0));
+	}
+	else {
+		*data_len = len;
+		*data = static_cast<char*>(mem_heap_dup(heap, field, len));
+	}
+
+	return(NULL);
+
+err_len:
+	return("incorrect column length in SYS_ZIP_DICT");
+}
+
+/** This function parses a SYS_ZIP_DICT_COLS record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_zip_dict_cols(
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	const rec_t*	rec,		/*!< in: current SYS_ZIP_DICT rec */
+	ulint*		table_id,	/*!< out: table id */
+	ulint*		column_pos,	/*!< out: column position */
+	ulint*		dict_id)	/*!< out: dict id */
+{
+	ulint		len;
+	const byte*	field;
+
+	/* Initialize the output values */
+	*table_id = ULINT_UNDEFINED;
+	*column_pos = ULINT_UNDEFINED;
+	*dict_id = ULINT_UNDEFINED;
+
+	if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, 0))) {
+		return("delete-marked record in SYS_ZIP_DICT_COLS");
+	}
+
+	if (UNIV_UNLIKELY(rec_get_n_fields_old(rec) !=
+		DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS)) {
+		return("wrong number of columns in SYS_ZIP_DICT_COLS"
+			" record");
+	}
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_ZIP_DICT_COLS__TABLE_ID, &len);
+	if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) {
+err_len:
+		return("incorrect column length in SYS_ZIP_DICT_COLS");
+	}
+	*table_id = mach_read_from_4(field);
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_ZIP_DICT_COLS__COLUMN_POS, &len);
+	if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) {
+		goto err_len;
+	}
+	*column_pos = mach_read_from_4(field);
+
+	rec_get_nth_field_offs_old(
+		rec, DICT_FLD__SYS_ZIP_DICT_COLS__DB_TRX_ID, &len);
+	if (UNIV_UNLIKELY(len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL)) {
+		goto err_len;
+	}
+
+	rec_get_nth_field_offs_old(
+		rec, DICT_FLD__SYS_ZIP_DICT_COLS__DB_ROLL_PTR, &len);
+	if (UNIV_UNLIKELY(len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL)) {
+		goto err_len;
+	}
+
+	field = rec_get_nth_field_old(
+		rec, DICT_FLD__SYS_ZIP_DICT_COLS__DICT_ID, &len);
+	if (UNIV_UNLIKELY(len != DICT_FLD_LEN_SPACE)) {
+		goto err_len;
+	}
+	*dict_id = mach_read_from_4(field);
+
+	return(NULL);
+}
 /********************************************************************//**
 Determine the flags of a table as stored in SYS_TABLES.TYPE and N_COLS.
 @return  ULINT_UNDEFINED if error, else a valid dict_table_t::flags. */
diff --git a/storage/xtradb/dict/dict0stats.cc b/storage/xtradb/dict/dict0stats.cc
index b073398f8ecf7..a4aa43651f8a5 100644
--- a/storage/xtradb/dict/dict0stats.cc
+++ b/storage/xtradb/dict/dict0stats.cc
@@ -673,7 +673,10 @@ void
 dict_stats_copy(
 /*============*/
 	dict_table_t*		dst,	/*!< in/out: destination table */
-	const dict_table_t*	src)	/*!< in: source table */
+	const dict_table_t*	src,	/*!< in: source table */
+	bool reset_ignored_indexes)	/*!< in: if true, set ignored indexes
+                                             to have the same statistics as if 
+                                             the table was empty */
 {
 	dst->stats_last_recalc = src->stats_last_recalc;
 	dst->stat_n_rows = src->stat_n_rows;
@@ -692,7 +695,16 @@ dict_stats_copy(
 	      && (src_idx = dict_table_get_next_index(src_idx)))) {
 
 		if (dict_stats_should_ignore_index(dst_idx)) {
-			continue;
+			if (reset_ignored_indexes) {
+				/* Reset index statistics for all ignored indexes,
+				unless they are FT indexes (these have no statistics)*/
+				if (dst_idx->type & DICT_FTS) {
+					continue;
+				}
+				dict_stats_empty_index(dst_idx);
+			} else {
+				continue;
+			}
 		}
 
 		ut_ad(!dict_index_is_univ(dst_idx));
@@ -782,7 +794,7 @@ dict_stats_snapshot_create(
 
 	t = dict_stats_table_clone_create(table);
 
-	dict_stats_copy(t, table);
+	dict_stats_copy(t, table, false);
 
 	t->stat_persistent = table->stat_persistent;
 	t->stats_auto_recalc = table->stats_auto_recalc;
@@ -3240,13 +3252,10 @@ dict_stats_update(
 
 			dict_table_stats_lock(table, RW_X_LATCH);
 
-			/* Initialize all stats to dummy values before
-			copying because dict_stats_table_clone_create() does
-			skip corrupted indexes so our dummy object 't' may
-			have less indexes than the real object 'table'. */
-			dict_stats_empty_table(table);
-
-			dict_stats_copy(table, t);
+			/* Pass reset_ignored_indexes=true as parameter
+			to dict_stats_copy. This will cause statictics
+			for corrupted indexes to be set to empty values */
+			dict_stats_copy(table, t, true);
 
 			dict_stats_assert_initialized(table);
 
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index 21a3cd75a4436..a7b0377d2a469 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -325,6 +325,8 @@ fil_space_get_by_id(
 		    ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
 		    space->id == id);
 
+	/* The system tablespace must always be found */
+	ut_ad(space || id != 0 || srv_is_being_started);
 	return(space);
 }
 
@@ -1680,6 +1682,9 @@ fil_close_all_files(void)
 {
 	fil_space_t*	space;
 
+	// Must check both flags as it's possible for this to be called during
+	// server startup with srv_track_changed_pages == true but
+	// srv_redo_log_thread_started == false
 	if (srv_track_changed_pages && srv_redo_log_thread_started)
 		os_event_wait(srv_redo_log_tracked_event);
 
@@ -1719,6 +1724,9 @@ fil_close_log_files(
 {
 	fil_space_t*	space;
 
+	// Must check both flags as it's possible for this to be called during
+	// server startup with srv_track_changed_pages == true but
+	// srv_redo_log_thread_started == false
 	if (srv_track_changed_pages && srv_redo_log_thread_started)
 		os_event_wait(srv_redo_log_tracked_event);
 
diff --git a/storage/xtradb/fts/fts0fts.cc b/storage/xtradb/fts/fts0fts.cc
index 80c48b616a91c..4c54afae8cda8 100644
--- a/storage/xtradb/fts/fts0fts.cc
+++ b/storage/xtradb/fts/fts0fts.cc
@@ -108,6 +108,7 @@ UNIV_INTERN mysql_pfs_key_t	fts_pll_tokenize_mutex_key;
 /** variable to record innodb_fts_internal_tbl_name for information
 schema table INNODB_FTS_INSERTED etc. */
 UNIV_INTERN char* fts_internal_tbl_name		= NULL;
+UNIV_INTERN char* fts_internal_tbl_name2	= NULL;
 
 /** InnoDB default stopword list:
 There are different versions of stopwords, the stop words listed
@@ -265,13 +266,15 @@ FTS auxiliary INDEX table and clear the cache at the end.
 @param[in,out]	sync		sync state
 @param[in]	unlock_cache	whether unlock cache lock when write node
 @param[in]	wait		whether wait when a sync is in progress
+@param[in]      has_dict        whether has dict operation lock
 @return DB_SUCCESS if all OK */
 static
 dberr_t
 fts_sync(
 	fts_sync_t*	sync,
 	bool		unlock_cache,
-	bool		wait);
+	bool		wait,
+	bool		has_dict);
 
 /****************************************************************//**
 Release all resources help by the words rb tree e.g., the node ilist. */
@@ -3567,7 +3570,7 @@ fts_add_doc_by_id(
 
 				DBUG_EXECUTE_IF(
 					"fts_instrument_sync_debug",
-					fts_sync(cache->sync, true, true);
+					fts_sync(cache->sync, true, true, false);
 				);
 
 				DEBUG_SYNC_C("fts_instrument_sync_request");
@@ -4379,13 +4382,11 @@ fts_sync_index(
 }
 
 /** Check if index cache has been synced completely
-@param[in,out]	sync		sync state
 @param[in,out]	index_cache	index cache
 @return true if index is synced, otherwise false. */
 static
 bool
 fts_sync_index_check(
-	fts_sync_t*		sync,
 	fts_index_cache_t*	index_cache)
 {
 	const ib_rbt_node_t*	rbt_node;
@@ -4408,14 +4409,36 @@ fts_sync_index_check(
 	return(true);
 }
 
-/*********************************************************************//**
-Commit the SYNC, change state of processed doc ids etc.
+/** Reset synced flag in index cache when rollback
+@param[in,out]	index_cache	index cache */
+static
+void
+fts_sync_index_reset(
+	fts_index_cache_t*	index_cache)
+{
+	const ib_rbt_node_t*	rbt_node;
+
+	for (rbt_node = rbt_first(index_cache->words);
+	     rbt_node != NULL;
+	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
+
+		fts_tokenizer_word_t*	word;
+		word = rbt_value(fts_tokenizer_word_t, rbt_node);
+
+		fts_node_t*	fts_node;
+		fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
+
+		fts_node->synced = false;
+	}
+}
+
+/** Commit the SYNC, change state of processed doc ids etc.
+@param[in,out]	sync	sync state
 @return DB_SUCCESS if all OK */
 static  MY_ATTRIBUTE((nonnull, warn_unused_result))
 dberr_t
 fts_sync_commit(
-/*============*/
-	fts_sync_t*	sync)			/*!< in: sync state */
+	fts_sync_t*	sync)
 {
 	dberr_t		error;
 	trx_t*		trx = sync->trx;
@@ -4468,6 +4491,8 @@ fts_sync_commit(
 			(double) n_nodes/ (double) elapsed_time);
 	}
 
+	/* Avoid assertion in trx_free(). */
+	trx->dict_operation_lock_mode = 0;
 	trx_free_for_background(trx);
 
 	return(error);
@@ -4490,6 +4515,10 @@ fts_sync_rollback(
 		index_cache = static_cast<fts_index_cache_t*>(
 			ib_vector_get(cache->indexes, i));
 
+		/* Reset synced flag so nodes will not be skipped
+		in the next sync, see fts_sync_write_words(). */
+		fts_sync_index_reset(index_cache);
+
 		for (j = 0; fts_index_selector[j].value; ++j) {
 
 			if (index_cache->ins_graph[j] != NULL) {
@@ -4515,6 +4544,9 @@ fts_sync_rollback(
 	rw_lock_x_unlock(&cache->lock);
 
 	fts_sql_rollback(trx);
+
+	/* Avoid assertion in trx_free(). */
+	trx->dict_operation_lock_mode = 0;
 	trx_free_for_background(trx);
 }
 
@@ -4523,13 +4555,15 @@ FTS auxiliary INDEX table and clear the cache at the end.
 @param[in,out]	sync		sync state
 @param[in]	unlock_cache	whether unlock cache lock when write node
 @param[in]	wait		whether wait when a sync is in progress
+@param[in]      has_dict        whether has dict operation lock
 @return DB_SUCCESS if all OK */
 static
 dberr_t
 fts_sync(
 	fts_sync_t*	sync,
 	bool		unlock_cache,
-	bool		wait)
+	bool		wait,
+	bool		has_dict)
 {
 	ulint		i;
 	dberr_t		error = DB_SUCCESS;
@@ -4558,6 +4592,12 @@ fts_sync(
 	DEBUG_SYNC_C("fts_sync_begin");
 	fts_sync_begin(sync);
 
+	/* When sync in background, we hold dict operation lock
+	to prevent DDL like DROP INDEX, etc. */
+	if (has_dict) {
+		sync->trx->dict_operation_lock_mode = RW_S_LATCH;
+	}
+
 begin_sync:
 	if (cache->total_size > fts_max_cache_size) {
 		/* Avoid the case: sync never finish when
@@ -4598,7 +4638,7 @@ fts_sync(
 			ib_vector_get(cache->indexes, i));
 
 		if (index_cache->index->to_be_dropped
-		    || fts_sync_index_check(sync, index_cache)) {
+		    || fts_sync_index_check(index_cache)) {
 			continue;
 		}
 
@@ -4613,6 +4653,7 @@ fts_sync(
 	}
 
 	rw_lock_x_lock(&cache->lock);
+	sync->interrupted = false;
 	sync->in_progress = false;
 	os_event_set(sync->event);
 	rw_lock_x_unlock(&cache->lock);
@@ -4636,20 +4677,23 @@ FTS auxiliary INDEX table and clear the cache at the end.
 @param[in,out]	table		fts table
 @param[in]	unlock_cache	whether unlock cache when write node
 @param[in]	wait		whether wait for existing sync to finish
+@param[in]	has_dict	whether has dict operation lock
 @return DB_SUCCESS on success, error code on failure. */
 UNIV_INTERN
 dberr_t
 fts_sync_table(
 	dict_table_t*	table,
 	bool		unlock_cache,
-	bool		wait)
+	bool		wait,
+	bool		has_dict)
 {
 	dberr_t	err = DB_SUCCESS;
 
 	ut_ad(table->fts);
 
 	if (!dict_table_is_discarded(table) && table->fts->cache) {
-		err = fts_sync(table->fts->cache->sync, unlock_cache, wait);
+		err = fts_sync(table->fts->cache->sync,
+			       unlock_cache, wait, has_dict);
 	}
 
 	return(err);
@@ -6527,6 +6571,36 @@ fts_check_corrupt_index(
 	return(0);
 }
 
+/* Get parent table name if it's a fts aux table
+@param[in]	aux_table_name	aux table name
+@param[in]	aux_table_len	aux table length
+@return parent table name, or NULL */
+char*
+fts_get_parent_table_name(
+	const char*	aux_table_name,
+	ulint		aux_table_len)
+{
+	fts_aux_table_t	aux_table;
+	char*		parent_table_name = NULL;
+
+	if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
+		dict_table_t*	parent_table;
+
+		parent_table = dict_table_open_on_id(
+			aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
+
+		if (parent_table != NULL) {
+			parent_table_name = mem_strdupl(
+				parent_table->name,
+				strlen(parent_table->name));
+
+			dict_table_close(parent_table, TRUE, FALSE);
+		}
+	}
+
+	return(parent_table_name);
+}
+
 /** Check the validity of the parent table.
 @param[in]	aux_table	auxiliary table
 @return true if it is a valid table or false if it is not */
diff --git a/storage/xtradb/fts/fts0opt.cc b/storage/xtradb/fts/fts0opt.cc
index d9f2532578e5c..ea937c2075217 100644
--- a/storage/xtradb/fts/fts0opt.cc
+++ b/storage/xtradb/fts/fts0opt.cc
@@ -2986,7 +2986,7 @@ fts_optimize_sync_table(
 
 	if (table) {
 		if (dict_table_has_fts_index(table) && table->fts->cache) {
-			fts_sync_table(table, true, false);
+			fts_sync_table(table, true, false, true);
 		}
 
 		dict_table_close(table, FALSE, FALSE);
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index ceb489c8f57d4..66d2ae4f9de15 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -107,6 +107,14 @@ this program; if not, write to the Free Software Foundation, Inc.,
 
 #define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X))
 
+#ifndef HAVE_PERCONA_COMPRESSED_COLUMNS
+#define COLUMN_FORMAT_TYPE_COMPRESSED                   0xBADF00D
+#define SQLCOM_CREATE_COMPRESSION_DICTIONARY            0xDECAF
+#define SQLCOM_DROP_COMPRESSION_DICTIONARY              0xC0FFEE
+#define ER_COMPRESSION_DICTIONARY_DOES_NOT_EXIST        0xDEADFACE
+const static LEX_CSTRING null_lex_cstr={0,0};
+#endif
+
 #ifdef MYSQL_DYNAMIC_PLUGIN
 #define tc_size 400
 #define tdc_size 400
@@ -818,6 +826,19 @@ innobase_is_fake_change(
 	THD*		thd) __attribute__((unused));	/*!< in: MySQL thread handle of the user for
 				  whom the transaction is being committed */
 
+/** Get the list of foreign keys referencing a specified table
+table.
+@param thd		The thread handle
+@param path		Path to the table
+@param f_key_list[out]	The list of foreign keys
+
+@return error code or zero for success */
+static
+int
+innobase_get_parent_fk_list(
+	THD*			thd,
+	const char*		path,
+	List<FOREIGN_KEY_INFO>*	f_key_list) __attribute__((unused));
 
 /******************************************************************//**
 Maps a MySQL trx isolation level code to the InnoDB isolation level code
@@ -1493,6 +1514,30 @@ normalize_table_name_low(
 	ibool           set_lower_case); /* in: TRUE if we want to set
 					 name to lower case */
 
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+/** Creates a new compression dictionary. */
+static
+handler_create_zip_dict_result
+innobase_create_zip_dict(
+	handlerton*	hton,	/*!< in: innobase handlerton */
+	THD*		thd,	/*!< in: handle to the MySQL thread */
+	const char*	name,	/*!< in: zip dictionary name */
+	ulint*		name_len,
+				/*!< in/out: zip dictionary name length */
+	const char*	data,	/*!< in: zip dictionary data */
+	ulint*		data_len);
+				/*!< in/out: zip dictionary data length */
+
+/** Drops a existing compression dictionary. */
+static
+handler_drop_zip_dict_result
+innobase_drop_zip_dict(
+	handlerton*	hton,	/*!< in: innobase handlerton */
+	THD*		thd,	/*!< in: handle to the MySQL thread */
+	const char*	name,	/*!< in: zip dictionary name */
+	ulint*		name_len);
+				/*!< in/out: zip dictionary name length */
+#endif
 /*************************************************************//**
 Checks if buffer pool is big enough to enable backoff algorithm.
 InnoDB empty free list algorithm backoff requires free pages
@@ -3607,6 +3652,10 @@ innobase_init(
         innobase_hton->wsrep_fake_trx_id=wsrep_fake_trx_id;
 #endif /* WITH_WSREP */
 
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+	innobase_hton->create_zip_dict = innobase_create_zip_dict;
+	innobase_hton->drop_zip_dict = innobase_drop_zip_dict;
+#endif
 	ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
 
 #ifndef DBUG_OFF
@@ -4300,6 +4349,90 @@ innobase_purge_changed_page_bitmaps(
 	return (my_bool)log_online_purge_changed_page_bitmaps(lsn);
 }
 
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+/** Creates a new compression dictionary. */
+static
+handler_create_zip_dict_result
+innobase_create_zip_dict(
+	handlerton*	hton,	/*!< in: innobase handlerton */
+	THD*		thd,	/*!< in: handle to the MySQL thread */
+	const char*	name,	/*!< in: zip dictionary name */
+	ulint*		name_len,
+				/*!< in/out: zip dictionary name length */
+	const char*	data,	/*!< in: zip dictionary data */
+	ulint*		data_len)
+				/*!< in/out: zip dictionary data length */
+{
+	handler_create_zip_dict_result result =
+		HA_CREATE_ZIP_DICT_UNKNOWN_ERROR;
+
+	DBUG_ENTER("innobase_create_zip_dict");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
+
+	if (UNIV_UNLIKELY(high_level_read_only)) {
+		DBUG_RETURN(HA_CREATE_ZIP_DICT_READ_ONLY);
+	}
+
+	if (UNIV_UNLIKELY(*name_len > ZIP_DICT_MAX_NAME_LENGTH)) {
+		*name_len = ZIP_DICT_MAX_NAME_LENGTH;
+		DBUG_RETURN(HA_CREATE_ZIP_DICT_NAME_TOO_LONG);
+	}
+
+	if (UNIV_UNLIKELY(*data_len > ZIP_DICT_MAX_DATA_LENGTH)) {
+		*data_len = ZIP_DICT_MAX_DATA_LENGTH;
+		DBUG_RETURN(HA_CREATE_ZIP_DICT_DATA_TOO_LONG);
+	}
+
+	switch (dict_create_zip_dict(name, *name_len, data, *data_len)) {
+		case DB_SUCCESS:
+			result = HA_CREATE_ZIP_DICT_OK;
+			break;
+		case DB_DUPLICATE_KEY:
+			result = HA_CREATE_ZIP_DICT_ALREADY_EXISTS;
+			break;
+		default:
+			ut_ad(0);
+			result = HA_CREATE_ZIP_DICT_UNKNOWN_ERROR;
+	}
+	DBUG_RETURN(result);
+}
+
+/** Drops a existing compression dictionary. */
+static
+handler_drop_zip_dict_result
+innobase_drop_zip_dict(
+	handlerton*	hton,	/*!< in: innobase handlerton */
+	THD*		thd,	/*!< in: handle to the MySQL thread */
+	const char*	name,	/*!< in: zip dictionary name */
+	ulint*		name_len)
+				/*!< in/out: zip dictionary name length */
+{
+	handler_drop_zip_dict_result result = HA_DROP_ZIP_DICT_UNKNOWN_ERROR;
+
+	DBUG_ENTER("innobase_drop_zip_dict");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
+
+	if (UNIV_UNLIKELY(high_level_read_only)) {
+		DBUG_RETURN(HA_DROP_ZIP_DICT_READ_ONLY);
+	}
+
+	switch (dict_drop_zip_dict(name, *name_len)) {
+		case DB_SUCCESS:
+			result = HA_DROP_ZIP_DICT_OK;
+			break;
+		case DB_RECORD_NOT_FOUND:
+			result = HA_DROP_ZIP_DICT_DOES_NOT_EXIST;
+			break;
+		case DB_ROW_IS_REFERENCED:
+			result = HA_DROP_ZIP_DICT_IS_REFERENCED;
+			break;
+		default:
+			ut_ad(0);
+			result = HA_DROP_ZIP_DICT_UNKNOWN_ERROR;
+	}
+	DBUG_RETURN(result);
+}
+#endif
 /*****************************************************************//**
 Check whether this is a fake change transaction.
 @return TRUE if a fake change transaction */
@@ -5933,6 +6066,88 @@ innobase_build_index_translation(
 	DBUG_RETURN(ret);
 }
 
+/** This function checks if all the compression dictionaries referenced
+in table->fields exist in SYS_ZIP_DICT InnoDB system table.
+@return true if all referenced dictionaries exist */
+UNIV_INTERN
+bool
+innobase_check_zip_dicts(
+	const TABLE*	table,		/*!< in: table in MySQL data
+					dictionary */
+	ulint*		dict_ids,	/*!< out: identified zip dict ids
+					(at least n_fields long) */
+	trx_t*		trx,		/*!< in: transaction */
+	const char**	err_dict_name)	/*!< out: the name of the
+					zip_dict which does not exist. */
+{
+	DBUG_ENTER("innobase_check_zip_dicts");
+
+	bool res = true;
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+	dberr_t err = DB_SUCCESS;
+	const size_t n_fields = table->s->fields;
+
+	Field* field_ptr;
+	for (size_t field_idx = 0; err == DB_SUCCESS && field_idx < n_fields;
+		++field_idx)
+	{
+		field_ptr = table->field[field_idx];
+		if (field_ptr->has_associated_compression_dictionary()) {
+			err = dict_create_get_zip_dict_id_by_name(
+				field_ptr->zip_dict_name.str,
+				field_ptr->zip_dict_name.length,
+				&dict_ids[field_idx],
+				trx);
+			ut_a(err == DB_SUCCESS || err == DB_RECORD_NOT_FOUND);
+		}
+		else {
+			dict_ids[field_idx] = ULINT_UNDEFINED;
+		}
+
+	}
+
+	if (err != DB_SUCCESS) {
+		res = false;
+		*err_dict_name = field_ptr->zip_dict_name.str;
+	}
+
+#endif
+	DBUG_RETURN(res);
+}
+
+/** This function creates compression dictionary references in
+SYS_ZIP_DICT_COLS InnoDB system table for table_id based on info
+in table->fields and provided zip dict ids. */
+UNIV_INTERN
+void
+innobase_create_zip_dict_references(
+	const TABLE*	table,		/*!< in: table in MySQL data
+					dictionary */
+	table_id_t	ib_table_id,	/*!< in: table ID in Innodb data
+					dictionary */
+	ulint*		zip_dict_ids,	/*!< in: zip dict ids
+					(at least n_fields long) */
+	trx_t*		trx)		/*!< in: transaction */
+{
+	DBUG_ENTER("innobase_create_zip_dict_references");
+
+	dberr_t err = DB_SUCCESS;
+	const size_t n_fields = table->s->fields;
+
+	for (size_t field_idx = 0; err == DB_SUCCESS && field_idx < n_fields;
+		++field_idx)
+	{
+		if (zip_dict_ids[field_idx] != ULINT_UNDEFINED) {
+			err = dict_create_add_zip_dict_reference(ib_table_id,
+				table->field[field_idx]->field_index,
+				zip_dict_ids[field_idx], trx);
+			ut_a(err == DB_SUCCESS);
+		}
+	}
+
+	DBUG_VOID_RETURN;
+}
+
 /*******************************************************************//**
 This function uses index translation table to quickly locate the
 requested index structure.
@@ -7164,6 +7379,7 @@ wsrep_store_key_val_for_row(
 				format) */
 	uint		buff_len,/*!< in: buffer length */
 	const uchar*	record,
+	row_prebuilt_t*	prebuilt,	/*!< in: InnoDB prebuilt struct */
 	ibool*          key_is_null)/*!< out: full key was null */
 {
 	KEY*		key_info	= table->key_info + keynr;
@@ -7320,8 +7536,17 @@ wsrep_store_key_val_for_row(
 
 			blob_data = row_mysql_read_blob_ref(&blob_len,
 				(byte*) (record
-				+ (ulint)get_field_offset(table, field)),
-					(ulint) field->pack_length());
+				+ (ulint) get_field_offset(table, field)),
+				(ulint) field->pack_length(),
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+				field->column_format() ==
+					COLUMN_FORMAT_TYPE_COMPRESSED,
+				reinterpret_cast<const byte*>(
+					field->zip_dict_data.str),
+				field->zip_dict_data.length, prebuilt);
+#else
+                                0, 0, 0, prebuilt);
+#endif
 
 			true_len = blob_len;
 
@@ -7616,7 +7841,16 @@ ha_innobase::store_key_val_for_row(
 			blob_data = row_mysql_read_blob_ref(&blob_len,
 				(byte*) (record
 				+ (ulint) get_field_offset(table, field)),
-					(ulint) field->pack_length());
+				(ulint) field->pack_length(),
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+				field->column_format() ==
+					COLUMN_FORMAT_TYPE_COMPRESSED,
+				reinterpret_cast<const byte*>(
+					field->zip_dict_data.str),
+				field->zip_dict_data.length, prebuilt);
+#else
+                                0, 0, 0, prebuilt);
+#endif
 
 			true_len = blob_len;
 
@@ -7872,6 +8106,14 @@ build_template_field(
 	templ->mbminlen = dict_col_get_mbminlen(col);
 	templ->mbmaxlen = dict_col_get_mbmaxlen(col);
 	templ->is_unsigned = col->prtype & DATA_UNSIGNED;
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+	templ->compressed = (field->column_format()
+				== COLUMN_FORMAT_TYPE_COMPRESSED);
+	templ->zip_dict_data = field->zip_dict_data;
+#else
+	templ->compressed = 0;
+	templ->zip_dict_data = null_lex_cstr;
+#endif
 
 	if (!dict_index_is_clust(index)
 	    && templ->rec_field_no == ULINT_UNDEFINED) {
@@ -8190,6 +8432,7 @@ dberr_t
 ha_innobase::innobase_lock_autoinc(void)
 /*====================================*/
 {
+	DBUG_ENTER("ha_innobase::innobase_lock_autoinc");
 	dberr_t		error = DB_SUCCESS;
 
 	ut_ad(!srv_read_only_mode);
@@ -8229,6 +8472,8 @@ ha_innobase::innobase_lock_autoinc(void)
 		/* Fall through to old style locking. */
 
 	case AUTOINC_OLD_STYLE_LOCKING:
+		DBUG_EXECUTE_IF("die_if_autoinc_old_lock_style_used",
+				ut_ad(0););
 		error = row_lock_table_autoinc_for_mysql(prebuilt);
 
 		if (error == DB_SUCCESS) {
@@ -8242,7 +8487,7 @@ ha_innobase::innobase_lock_autoinc(void)
 		ut_error;
 	}
 
-	return(error);
+	DBUG_RETURN(error);
 }
 
 /********************************************************************//**
@@ -8776,8 +9021,11 @@ calc_row_difference(
 		switch (col_type) {
 
 		case DATA_BLOB:
-			o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
-			n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
+			/* Do not compress blob column while comparing*/
+			o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len,
+				false, 0, 0, prebuilt);
+			n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len,
+				false, 0, 0, prebuilt);
 
 			break;
 
@@ -8847,7 +9095,17 @@ calc_row_difference(
 					TRUE,
 					new_mysql_row_col,
 					col_pack_len,
-					dict_table_is_comp(prebuilt->table));
+					dict_table_is_comp(prebuilt->table),
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+					field->column_format() ==
+						COLUMN_FORMAT_TYPE_COMPRESSED,
+					reinterpret_cast<const byte*>(
+						field->zip_dict_data.str),
+					field->zip_dict_data.length,
+#else
+                                        0, 0, 0,
+#endif
+					prebuilt);
 				dfield_copy(&ufield->new_val, &dfield);
 			} else {
 				dfield_set_null(&ufield->new_val);
@@ -9018,7 +9276,8 @@ wsrep_calc_row_hash(
 		switch (col_type) {
 
 		case DATA_BLOB:
-			ptr = row_mysql_read_blob_ref(&len, ptr, len);
+			ptr = row_mysql_read_blob_ref(&len, ptr, len,
+				false, 0, 0, prebuilt);
 
 			break;
 
@@ -10831,7 +11090,7 @@ ha_innobase::wsrep_append_keys(
 
 		len = wsrep_store_key_val_for_row(
 			thd, table, 0, key, WSREP_MAX_SUPPORTED_KEY_LENGTH,
-			record0, &is_null);
+			record0, prebuilt, &is_null);
 
 		if (!is_null) {
 			rcode = wsrep_append_key(
@@ -10885,7 +11144,7 @@ ha_innobase::wsrep_append_keys(
 				len = wsrep_store_key_val_for_row(
 					thd, table, i, key0, 
 					WSREP_MAX_SUPPORTED_KEY_LENGTH, 
-					record0, &is_null);
+					record0, prebuilt, &is_null);
 				if (!is_null) {
 					rcode = wsrep_append_key(
 						thd, trx, table_share, table, 
@@ -10904,7 +11163,7 @@ ha_innobase::wsrep_append_keys(
 					len = wsrep_store_key_val_for_row(
 						thd, table, i, key1, 
 						WSREP_MAX_SUPPORTED_KEY_LENGTH,
-						record1, &is_null);
+						record1, prebuilt, &is_null);
 					if (!is_null && memcmp(key0, key1, len)) {
 						rcode = wsrep_append_key(
 							thd, trx, table_share, 
@@ -11079,6 +11338,7 @@ create_table_def(
 	ulint		unsigned_type;
 	ulint		binary_type;
 	ulint		long_true_varchar;
+	ulint		compressed;
 	ulint		charset_no;
 	ulint		i;
 	ulint		doc_id_col = 0;
@@ -11228,6 +11488,13 @@ create_table_def(
 			}
 		}
 
+		/* Check if the the field has COMPRESSED attribute */
+		compressed = 0;
+		if (field->column_format() ==
+			COLUMN_FORMAT_TYPE_COMPRESSED) {
+			compressed = DATA_COMPRESSED;
+		}
+
 		/* First check whether the column to be added has a
 		system reserved name. */
 		if (dict_col_name_is_reserved(field->field_name)){
@@ -11248,7 +11515,8 @@ create_table_def(
 			dtype_form_prtype(
 				(ulint) field->type()
 				| nulls_allowed | unsigned_type
-				| binary_type | long_true_varchar,
+				| binary_type | long_true_varchar
+				| compressed,
 				charset_no),
 			col_len);
 	}
@@ -12078,6 +12346,9 @@ ha_innobase::create(
 	const char*	stmt;
 	size_t		stmt_len;
 
+	mem_heap_t*	heap = 0;
+	ulint*		zip_dict_ids = 0;
+
 	DBUG_ENTER("ha_innobase::create");
 
 	DBUG_ASSERT(thd != NULL);
@@ -12168,6 +12439,19 @@ ha_innobase::create(
 
 	row_mysql_lock_data_dictionary(trx);
 
+	heap = mem_heap_create(form->s->fields * sizeof(ulint));
+	zip_dict_ids = static_cast<ulint*>(
+		mem_heap_alloc(heap, form->s->fields * sizeof(ulint)));
+
+	const char*	err_zip_dict_name = 0;
+	if (!innobase_check_zip_dicts(form, zip_dict_ids,
+		trx, &err_zip_dict_name)) {
+		error = -1;
+		my_error(ER_COMPRESSION_DICTIONARY_DOES_NOT_EXIST,
+			MYF(0), err_zip_dict_name);
+		goto cleanup;
+	}
+
 	error = create_table_def(trx, form, norm_name, temp_path,
 				 remote_path, flags, flags2);
 	if (error) {
@@ -12275,6 +12559,22 @@ ha_innobase::create(
 		dict_table_get_all_fts_indexes(innobase_table, fts->indexes);
 	}
 
+	/*
+	Adding compression dictionary <-> compressed table column links
+	to the SYS_ZIP_DICT_COLS table.
+	*/
+	ut_a(zip_dict_ids != 0);
+	{
+		dict_table_t*	local_table = dict_table_open_on_name(
+			norm_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
+
+		ut_a(local_table);
+		table_id_t table_id = local_table->id;
+		dict_table_close(local_table, TRUE, FALSE);
+		innobase_create_zip_dict_references(form,
+			table_id, zip_dict_ids, trx);
+	}
+
 	stmt = innobase_get_stmt(thd, &stmt_len);
 
 	if (stmt) {
@@ -12391,6 +12691,9 @@ ha_innobase::create(
 
 	trx_free_for_mysql(trx);
 
+	if (heap != 0)
+		mem_heap_free(heap);
+
 	DBUG_RETURN(0);
 
 cleanup:
@@ -12400,6 +12703,9 @@ ha_innobase::create(
 
 	trx_free_for_mysql(trx);
 
+	if (heap != 0)
+		mem_heap_free(heap);
+
 	DBUG_RETURN(error);
 }
 
@@ -13486,6 +13792,14 @@ ha_innobase::info_low(
 			if (dict_stats_is_persistent_enabled(ib_table)) {
 
 				if (is_analyze) {
+
+					/* If this table is already queued for
+					background analyze, remove it from the
+					queue as we are about to do the same */
+					dict_mutex_enter_for_mysql();
+					dict_stats_recalc_pool_del(ib_table);
+					dict_mutex_exit_for_mysql();
+
 					opt = DICT_STATS_RECALC_PERSISTENT;
 				} else {
 					/* This is e.g. 'SHOW INDEXES', fetch
@@ -13915,7 +14229,7 @@ ha_innobase::optimize(
 	if (innodb_optimize_fulltext_only) {
 		if (prebuilt->table->fts && prebuilt->table->fts->cache
 		    && !dict_table_is_discarded(prebuilt->table)) {
-			fts_sync_table(prebuilt->table, false, true);
+			fts_sync_table(prebuilt->table, false, true, false);
 			fts_optimize_table(prebuilt->table);
 		}
 		return(HA_ADMIN_OK);
@@ -14119,7 +14433,14 @@ ha_innobase::check(
 
 		prebuilt->select_lock_type = LOCK_NONE;
 
-		if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) {
+		bool check_result
+			= row_check_index_for_mysql(prebuilt, index, &n_rows);
+		DBUG_EXECUTE_IF(
+				"dict_set_index_corrupted",
+				if (!(index->type & DICT_CLUSTERED)) {
+					check_result = false;
+				});
+		if (!check_result) {
 			innobase_format_name(
 				index_name, sizeof index_name,
 				index->name, TRUE);
@@ -14446,6 +14767,75 @@ get_foreign_key_info(
 	return(pf_key_info);
 }
 
+/** Get the list of foreign keys referencing a specified table
+table.
+@param thd		The thread handle
+@param path		Path to the table
+@param f_key_list[out]	The list of foreign keys */
+static
+void
+fill_foreign_key_list(THD* thd,
+		      const dict_table_t* table,
+		      List<FOREIGN_KEY_INFO>* f_key_list)
+{
+	ut_ad(mutex_own(&dict_sys->mutex));
+
+	for (dict_foreign_set::iterator it = table->referenced_set.begin();
+	     it != table->referenced_set.end(); ++it) {
+
+		dict_foreign_t* foreign = *it;
+
+		FOREIGN_KEY_INFO* pf_key_info
+			= get_foreign_key_info(thd, foreign);
+		if (pf_key_info) {
+			f_key_list->push_back(pf_key_info);
+		}
+	}
+}
+
+/** Get the list of foreign keys referencing a specified table
+table.
+@param thd		The thread handle
+@param path		Path to the table
+@param f_key_list[out]	The list of foreign keys
+
+@return error code or zero for success */
+static
+int
+innobase_get_parent_fk_list(
+	THD*			thd,
+	const char*		path,
+	List<FOREIGN_KEY_INFO>*	f_key_list)
+{
+	ut_a(strlen(path) <= FN_REFLEN);
+	char	norm_name[FN_REFLEN + 1];
+	normalize_table_name(norm_name, path);
+
+	trx_t*	parent_trx = check_trx_exists(thd);
+	parent_trx->op_info = "getting list of referencing foreign keys";
+	trx_search_latch_release_if_reserved(parent_trx);
+
+	mutex_enter(&dict_sys->mutex);
+
+	dict_table_t*	table
+		= dict_table_open_on_name(norm_name, TRUE, FALSE,
+					  static_cast<dict_err_ignore_t>(
+						  DICT_ERR_IGNORE_INDEX_ROOT
+						  | DICT_ERR_IGNORE_CORRUPT));
+	if (!table) {
+		mutex_exit(&dict_sys->mutex);
+		return(HA_ERR_NO_SUCH_TABLE);
+	}
+
+	fill_foreign_key_list(thd, table, f_key_list);
+
+	dict_table_close(table, TRUE, FALSE);
+
+	mutex_exit(&dict_sys->mutex);
+	parent_trx->op_info = "";
+	return(0);
+}
+
 /*******************************************************************//**
 Gets the list of foreign keys in this table.
 @return always 0, that is, always succeeds */
@@ -14498,9 +14888,6 @@ ha_innobase::get_parent_foreign_key_list(
 	THD*			thd,		/*!< in: user thread handle */
 	List<FOREIGN_KEY_INFO>*	f_key_list)	/*!< out: foreign key list */
 {
-	FOREIGN_KEY_INFO*	pf_key_info;
-	dict_foreign_t*		foreign;
-
 	ut_a(prebuilt != NULL);
 	update_thd(ha_thd());
 
@@ -14509,20 +14896,7 @@ ha_innobase::get_parent_foreign_key_list(
 	trx_search_latch_release_if_reserved(prebuilt->trx);
 
 	mutex_enter(&(dict_sys->mutex));
-
-	for (dict_foreign_set::iterator it
-		= prebuilt->table->referenced_set.begin();
-	     it != prebuilt->table->referenced_set.end();
-	     ++it) {
-
-		foreign = *it;
-
-		pf_key_info = get_foreign_key_info(thd, foreign);
-		if (pf_key_info) {
-			f_key_list->push_back(pf_key_info);
-		}
-	}
-
+	fill_foreign_key_list(thd, prebuilt->table, f_key_list);
 	mutex_exit(&(dict_sys->mutex));
 
 	prebuilt->trx->op_info = "";
@@ -14612,6 +14986,11 @@ ha_innobase::extra(
 		if (prebuilt->blob_heap) {
 			row_mysql_prebuilt_free_blob_heap(prebuilt);
 		}
+
+		if (prebuilt->compress_heap) {
+			row_mysql_prebuilt_free_compress_heap(prebuilt);
+		}
+
 		break;
 	case HA_EXTRA_RESET_STATE:
 		reset_template();
@@ -14663,6 +15042,10 @@ ha_innobase::reset()
 		row_mysql_prebuilt_free_blob_heap(prebuilt);
 	}
 
+	if (prebuilt->compress_heap) {
+		row_mysql_prebuilt_free_compress_heap(prebuilt);
+	}
+
 	reset_template();
 	ds_mrr.dsmrr_close();
 
@@ -14869,7 +15252,11 @@ ha_innobase::external_lock(
 		    && lock_type == F_WRLCK)
 		|| thd_sql_command(thd) == SQLCOM_CREATE_INDEX
 		|| thd_sql_command(thd) == SQLCOM_DROP_INDEX
-		|| thd_sql_command(thd) == SQLCOM_DELETE)) {
+		|| thd_sql_command(thd) == SQLCOM_DELETE
+		|| thd_sql_command(thd) ==
+			SQLCOM_CREATE_COMPRESSION_DICTIONARY
+		|| thd_sql_command(thd) ==
+			SQLCOM_DROP_COMPRESSION_DICTIONARY)) {
 
 		if (thd_sql_command(thd) == SQLCOM_CREATE_TABLE)
 		{
@@ -15637,7 +16024,9 @@ ha_innobase::store_lock(
 			 && lock_type <= TL_WRITE))
 		|| sql_command == SQLCOM_CREATE_INDEX
 		|| sql_command == SQLCOM_DROP_INDEX
-		|| sql_command == SQLCOM_DELETE)) {
+		|| sql_command == SQLCOM_DELETE
+		|| sql_command == SQLCOM_CREATE_COMPRESSION_DICTIONARY
+		|| sql_command == SQLCOM_DROP_COMPRESSION_DICTIONARY)) {
 
 		ib_senderrf(trx->mysql_thd,
 			    IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
@@ -16594,6 +16983,84 @@ ha_innobase::check_if_incompatible_data(
 	return(COMPATIBLE_DATA_YES);
 }
 
+/** This function reads zip dict-related info from SYS_ZIP_DICT
+and SYS_ZIP_DICT_COLS for all columns marked with
+COLUMN_FORMAT_TYPE_COMPRESSED flag and updates
+zip_dict_name / zip_dict_data for those which have associated
+compression dictionaries.
+*/
+UNIV_INTERN
+void
+ha_innobase::update_field_defs_with_zip_dict_info()
+{
+	DBUG_ENTER("update_field_defs_with_zip_dict_info");
+	ut_ad(!mutex_own(&dict_sys->mutex));
+
+	char norm_name[FN_REFLEN];
+	normalize_table_name(norm_name, table_share->normalized_path.str);
+
+	dict_table_t* ib_table = dict_table_open_on_name(
+		norm_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
+
+	/* if dict_table_open_on_name() returns NULL, then it means that
+	TABLE_SHARE is populated for a table being created and we can
+	skip filling zip dict info here */
+	if (ib_table == 0)
+		DBUG_VOID_RETURN;
+
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+	table_id_t ib_table_id = ib_table->id;
+	dict_table_close(ib_table, FALSE, FALSE);
+	Field* field;
+	for (uint i = 0; i < table_share->fields; ++i) {
+		field = table_share->field[i];
+		if (field->column_format() ==
+		    COLUMN_FORMAT_TYPE_COMPRESSED) {
+			bool reference_found = false;
+			ulint dict_id = 0;
+			switch (dict_get_dictionary_id_by_key(ib_table_id, i,
+				&dict_id)) {
+				case DB_SUCCESS:
+					reference_found = true;
+					break;
+				case DB_RECORD_NOT_FOUND:
+					reference_found = false;
+					break;
+				default:
+					ut_error;
+			}
+			if (reference_found) {
+				char* local_name = 0;
+				ulint local_name_len = 0;
+				char* local_data = 0;
+				ulint local_data_len = 0;
+				if (dict_get_dictionary_info_by_id(dict_id,
+					&local_name, &local_name_len,
+					&local_data, &local_data_len) !=
+					DB_SUCCESS) {
+					ut_error;
+				}
+				else {
+					field->zip_dict_name.str =
+						local_name;
+					field->zip_dict_name.length =
+						local_name_len;
+					field->zip_dict_data.str =
+						local_data;
+					field->zip_dict_data.length =
+						local_data_len;
+				}
+			}
+			else {
+				field->zip_dict_name = null_lex_cstr;
+				field->zip_dict_data = null_lex_cstr;
+			}
+		}
+	}
+#endif
+	DBUG_VOID_RETURN;
+}
+
 /****************************************************************//**
 Update the system variable innodb_io_capacity_max using the "saved"
 value. This function is registered as a callback with MySQL. */
@@ -17155,7 +17622,12 @@ innodb_internal_table_update(
 		my_free(old);
 	}
 
-	fts_internal_tbl_name = *(char**) var_ptr;
+	fts_internal_tbl_name2 = *(char**) var_ptr;
+	if (fts_internal_tbl_name2 == NULL) {
+		fts_internal_tbl_name = const_cast<char*>("default");
+	} else {
+		fts_internal_tbl_name = fts_internal_tbl_name2;
+	}
 }
 
 /****************************************************************//**
@@ -18287,7 +18759,6 @@ innodb_track_changed_pages_validate(
 						for update function */
 	struct st_mysql_value*		value)	/*!< in: incoming bool */
 {
-	static bool     enabled_on_startup = false;
 	long long	intbuf = 0;
 
 	if (value->val_int(value, &intbuf)) {
@@ -18295,8 +18766,7 @@ innodb_track_changed_pages_validate(
 		return 1;
 	}
 
-	if (srv_track_changed_pages || enabled_on_startup) {
-		enabled_on_startup = true;
+	if (srv_redo_log_thread_started) {
 		*reinterpret_cast<ulong*>(save)
 			= static_cast<ulong>(intbuf);
 		return 0;
@@ -19834,7 +20304,7 @@ static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
   "Whether to disable OS system file cache for sort I/O",
   NULL, NULL, FALSE);
 
-static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name,
+static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name2,
   PLUGIN_VAR_NOCMDARG,
   "FTS internal auxiliary table to be checked",
   innodb_internal_table_validate,
@@ -20314,7 +20784,7 @@ static	MYSQL_SYSVAR_ENUM(corrupt_table_action, srv_pass_corrupt_table,
   "Warn corruptions of user tables as 'corrupt table' instead of not crashing itself, "
   "when used with file_per_table. "
   "All file io for the datafile after detected as corrupt are disabled, "
-  "except for the deletion. Possible options are 'assert', 'warn' & 'salvage'",
+  "except for the deletion.",
   NULL, NULL, 0, &corrupt_table_action_typelib);
 
 static MYSQL_SYSVAR_BOOL(locking_fake_changes, srv_fake_changes_locks,
@@ -20329,6 +20799,21 @@ static MYSQL_SYSVAR_BOOL(use_stacktrace, srv_use_stacktrace,
   "Print stacktrace on long semaphore wait (off by default supported only on linux)",
   NULL, NULL, FALSE);
 
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+static MYSQL_SYSVAR_UINT(compressed_columns_zip_level,
+  srv_compressed_columns_zip_level,
+  PLUGIN_VAR_RQCMDARG,
+  "Compression level used for compressed columns.  0 is no compression"
+  ", 1 is fastest and 9 is best compression. Default is 6.",
+  NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
+
+static MYSQL_SYSVAR_ULONG(compressed_columns_threshold,
+  srv_compressed_columns_threshold,
+  PLUGIN_VAR_RQCMDARG,
+  "Compress column data if its length exceeds this value. Default is 96",
+  NULL, NULL, 96, 1, ~0UL, 0);
+#endif
+
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(log_block_size),
   MYSQL_SYSVAR(additional_mem_pool_size),
@@ -20535,6 +21020,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(tmpdir),
   MYSQL_SYSVAR(use_stacktrace),
   MYSQL_SYSVAR(simulate_comp_failures),
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+  MYSQL_SYSVAR(compressed_columns_zip_level),
+  MYSQL_SYSVAR(compressed_columns_threshold),
+#endif
   NULL
 };
 
@@ -20557,6 +21046,10 @@ maria_declare_plugin(xtradb)
 i_s_xtradb_read_view,
 i_s_xtradb_internal_hash_tables,
 i_s_xtradb_rseg,
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+i_s_xtradb_zip_dict,
+i_s_xtradb_zip_dict_cols,
+#endif
 i_s_innodb_trx,
 i_s_innodb_locks,
 i_s_innodb_lock_waits,
diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h
index 37d787ad14d62..cb7dd6b9cf4c8 100644
--- a/storage/xtradb/handler/ha_innodb.h
+++ b/storage/xtradb/handler/ha_innodb.h
@@ -291,8 +291,17 @@ class ha_innobase: public handler
 	/** @} */
 	bool check_if_incompatible_data(HA_CREATE_INFO *info,
 					uint table_changes);
+
 	bool check_if_supported_virtual_columns(void) { return TRUE; }
 
+	/** This function reads zip dict-related info from SYS_ZIP_DICT
+	and SYS_ZIP_DICT_COLS for all columns marked with
+	COLUMN_FORMAT_TYPE_COMPRESSED flag and updates
+	zip_dict_name / zip_dict_data for those which have associated
+	compression dictionaries.
+	*/
+	virtual void update_field_defs_with_zip_dict_info();
+
 private:
 	/** Builds a 'template' to the prebuilt struct.
 
@@ -723,3 +732,31 @@ ib_push_frm_error(
 	TABLE*		table,		/*!< in: MySQL table */
 	ulint		n_keys,		/*!< in: InnoDB #keys */
 	bool		push_warning);	/*!< in: print warning ? */
+
+/** This function checks if all the compression dictionaries referenced
+in table->fields exist in SYS_ZIP_DICT InnoDB system table.
+@return true if all referenced dictionaries exist */
+UNIV_INTERN
+bool
+innobase_check_zip_dicts(
+	const TABLE*	table,		/*!< in: table in MySQL data
+					dictionary */
+	ulint*		dict_ids,	/*!< out: identified zip dict ids
+					(at least n_fields long) */
+	trx_t*		trx,		/*!< in: transaction */
+	const char**	err_dict_name);	/*!< out: the name of the
+					zip_dict which does not exist. */
+
+/** This function creates compression dictionary references in
+SYS_ZIP_DICT_COLS InnoDB system table for table_id based on info
+in table->fields and provided zip dict ids. */
+UNIV_INTERN
+void
+innobase_create_zip_dict_references(
+	const TABLE*	table,		/*!< in: table in MySQL data
+					dictionary */
+	table_id_t	ib_table_id,	/*!< in: table ID in Innodb data
+					dictionary */
+	ulint*		zip_dict_ids,	/*!< in: zip dict ids
+					(at least n_fields long) */
+	trx_t*		trx);		/*!< in: transaction */
diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc
index 646ba986dcca8..8299b4dafe1b0 100644
--- a/storage/xtradb/handler/handler0alter.cc
+++ b/storage/xtradb/handler/handler0alter.cc
@@ -21,6 +21,11 @@ this program; if not, write to the Free Software Foundation, Inc.,
 Smart ALTER TABLE
 *******************************************************/
 
+#ifndef HAVE_PERCONA_COMPRESSED_COLUMNS
+#define COLUMN_FORMAT_TYPE_COMPRESSED                   0xBADF00D
+#define ER_COMPRESSION_DICTIONARY_DOES_NOT_EXIST        0xDEADFACE
+#endif
+
 #include <my_global.h>
 #include <unireg.h>
 #include <mysqld_error.h>
@@ -213,7 +218,10 @@ innobase_need_rebuild(
 	const Alter_inplace_info*	ha_alter_info,
 	const TABLE*			altered_table)
 {
-	if (ha_alter_info->handler_flags
+	Alter_inplace_info::HA_ALTER_FLAGS alter_inplace_flags =
+		ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE);
+
+	if (alter_inplace_flags
 	    == Alter_inplace_info::CHANGE_CREATE_OPTION
 	    && !(ha_alter_info->create_info->used_fields
 		 & (HA_CREATE_USED_ROW_FORMAT
@@ -1181,6 +1189,15 @@ innobase_col_to_mysql(
 		field->reset();
 
 		if (field->type() == MYSQL_TYPE_VARCHAR) {
+			if (field->column_format() ==
+				COLUMN_FORMAT_TYPE_COMPRESSED) {
+				/* Skip compressed varchar column when
+				reporting an erroneous row
+				during index creation or table rebuild. */
+				field->set_null();
+				break;
+			}
+
 			/* This is a >= 5.0.3 type true VARCHAR. Store the
 			length of the data to the first byte or the first
 			two bytes of dest. */
@@ -2466,7 +2483,8 @@ innobase_build_col_map_add(
 	mem_heap_t*	heap,
 	dfield_t*	dfield,
 	const Field*	field,
-	ulint		comp)
+	ulint		comp,
+	row_prebuilt_t*	prebuilt)
 {
 	if (field->is_real_null()) {
 		dfield_set_null(dfield);
@@ -2478,7 +2496,14 @@ innobase_build_col_map_add(
 	byte*	buf	= static_cast<byte*>(mem_heap_alloc(heap, size));
 
 	row_mysql_store_col_in_innobase_format(
-		dfield, buf, TRUE, field->ptr, size, comp);
+		dfield, buf, TRUE, field->ptr, size, comp,
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+		field->column_format() == COLUMN_FORMAT_TYPE_COMPRESSED,
+		reinterpret_cast<const byte*>(field->zip_dict_data.str),
+		field->zip_dict_data.length, prebuilt);
+#else
+		0,0,0, prebuilt);
+#endif
 }
 
 /** Construct the translation table for reordering, dropping or
@@ -2503,7 +2528,8 @@ innobase_build_col_map(
 	const dict_table_t*	new_table,
 	const dict_table_t*	old_table,
 	dtuple_t*		add_cols,
-	mem_heap_t*		heap)
+	mem_heap_t*		heap,
+	row_prebuilt_t*	prebuilt)
 {
         uint old_i, old_innobase_i;
 	DBUG_ENTER("innobase_build_col_map");
@@ -2554,7 +2580,7 @@ innobase_build_col_map(
 		innobase_build_col_map_add(
 			heap, dtuple_get_nth_field(add_cols, i),
 			altered_table->field[sql_idx],
-			dict_table_is_comp(new_table));
+			dict_table_is_comp(new_table), prebuilt);
 found_col:
 		i++;
                 sql_idx++;
@@ -2718,7 +2744,8 @@ prepare_inplace_alter_table_dict(
 	ulint			flags2,
 	ulint			fts_doc_id_col,
 	bool			add_fts_doc_id,
-	bool			add_fts_doc_id_idx)
+	bool			add_fts_doc_id_idx,
+	row_prebuilt_t* 	prebuilt)
 {
 	bool			dict_locked	= false;
 	ulint*			add_key_nums;	/* MySQL key numbers */
@@ -2730,6 +2757,7 @@ prepare_inplace_alter_table_dict(
 	ulint			num_fts_index;
 	ha_innobase_inplace_ctx*ctx;
         uint                    sql_idx;
+	ulint*			zip_dict_ids = 0;
 
 	DBUG_ENTER("prepare_inplace_alter_table_dict");
 
@@ -2866,6 +2894,18 @@ prepare_inplace_alter_table_dict(
 		ulint		n_cols;
 		dtuple_t*	add_cols;
 
+		zip_dict_ids = static_cast<ulint*>(
+			mem_heap_alloc(ctx->heap,
+				altered_table->s->fields * sizeof(ulint)));
+
+		const char*	err_zip_dict_name = 0;
+		if (!innobase_check_zip_dicts(altered_table, zip_dict_ids,
+			ctx->trx, &err_zip_dict_name)) {
+			my_error(ER_COMPRESSION_DICTIONARY_DOES_NOT_EXIST,
+				MYF(0), err_zip_dict_name);
+			goto new_clustered_failed;
+		}
+
 		if (innobase_check_foreigns(
 			    ha_alter_info, altered_table, old_table,
 			    user_table, ctx->drop_fk, ctx->num_to_drop_fk)) {
@@ -2972,6 +3012,12 @@ prepare_inplace_alter_table_dict(
 				}
 			}
 
+			if (field->column_format() ==
+				COLUMN_FORMAT_TYPE_COMPRESSED) {
+				field_type |= DATA_COMPRESSED;
+			}
+
+
 			if (dict_col_name_is_reserved(field->field_name)) {
 				dict_mem_table_free(ctx->new_table);
 				my_error(ER_WRONG_COLUMN_NAME, MYF(0),
@@ -3051,7 +3097,7 @@ prepare_inplace_alter_table_dict(
 		ctx->col_map = innobase_build_col_map(
 			ha_alter_info, altered_table, old_table,
 			ctx->new_table, user_table,
-			add_cols, ctx->heap);
+			add_cols, ctx->heap, prebuilt);
 		ctx->add_cols = add_cols;
 	} else {
 		DBUG_ASSERT(!innobase_need_rebuild(ha_alter_info, old_table));
@@ -3229,6 +3275,15 @@ prepare_inplace_alter_table_dict(
 
 	DBUG_ASSERT(error == DB_SUCCESS);
 
+	/*
+	Adding compression dictionary <-> compressed table column links
+	to the SYS_ZIP_DICT_COLS table.
+	*/
+	if (zip_dict_ids != 0) {
+		innobase_create_zip_dict_references(altered_table,
+			ctx->trx->table_id, zip_dict_ids, ctx->trx);
+	}
+
 	/* Commit the data dictionary transaction in order to release
 	the table locks on the system tables.  This means that if
 	MySQL crashes while creating a new primary key inside
@@ -3947,7 +4002,7 @@ ha_innobase::prepare_inplace_alter_table(
 	}
 
 	if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)
-	    || (ha_alter_info->handler_flags
+	    || ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
 		== Alter_inplace_info::CHANGE_CREATE_OPTION
 		&& !innobase_need_rebuild(ha_alter_info, table))) {
 
@@ -4081,7 +4136,7 @@ ha_innobase::prepare_inplace_alter_table(
 			    table_share->table_name.str,
 			    flags, flags2,
 			    fts_doc_col_no, add_fts_doc_id,
-			    add_fts_doc_id_idx));
+			    add_fts_doc_id_idx, prebuilt));
 }
 
 /** Alter the table structure in-place with operations
@@ -4121,7 +4176,7 @@ ha_innobase::inplace_alter_table(
 		DBUG_RETURN(false);
 	}
 
-	if (ha_alter_info->handler_flags
+	if ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
 	    == Alter_inplace_info::CHANGE_CREATE_OPTION
 	    && !innobase_need_rebuild(ha_alter_info, table)) {
 		goto ok_exit;
diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc
index 02d2a2100a403..59cad1c2e7a4d 100644
--- a/storage/xtradb/handler/i_s.cc
+++ b/storage/xtradb/handler/i_s.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -2958,15 +2958,26 @@ i_s_fts_deleted_generic_fill(
 		DBUG_RETURN(0);
 	}
 
-	deleted = fts_doc_ids_create();
+	/* Prevent DDL to drop fts aux tables. */
+	rw_lock_s_lock(&dict_operation_lock);
 
 	user_table = dict_table_open_on_name(
 		fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (!user_table) {
+		rw_lock_s_unlock(&dict_operation_lock);
+
+		DBUG_RETURN(0);
+	} else if (!dict_table_has_fts_index(user_table)) {
+		dict_table_close(user_table, FALSE, FALSE);
+
+		rw_lock_s_unlock(&dict_operation_lock);
+
 		DBUG_RETURN(0);
 	}
 
+	deleted = fts_doc_ids_create();
+
 	trx = trx_allocate_for_background();
 	trx->op_info = "Select for FTS DELETE TABLE";
 
@@ -2994,6 +3005,8 @@ i_s_fts_deleted_generic_fill(
 
 	dict_table_close(user_table, FALSE, FALSE);
 
+	rw_lock_s_unlock(&dict_operation_lock);
+
 	DBUG_RETURN(0);
 }
 
@@ -3365,6 +3378,12 @@ i_s_fts_index_cache_fill(
 		DBUG_RETURN(0);
 	}
 
+	if (user_table->fts == NULL || user_table->fts->cache == NULL) {
+		dict_table_close(user_table, FALSE, FALSE);
+
+		DBUG_RETURN(0);
+	}
+
 	cache = user_table->fts->cache;
 
 	ut_a(cache);
@@ -3798,10 +3817,15 @@ i_s_fts_index_table_fill(
 		DBUG_RETURN(0);
 	}
 
+	/* Prevent DDL to drop fts aux tables. */
+	rw_lock_s_lock(&dict_operation_lock);
+
 	user_table = dict_table_open_on_name(
 		fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (!user_table) {
+		rw_lock_s_unlock(&dict_operation_lock);
+
 		DBUG_RETURN(0);
 	}
 
@@ -3814,6 +3838,8 @@ i_s_fts_index_table_fill(
 
 	dict_table_close(user_table, FALSE, FALSE);
 
+	rw_lock_s_unlock(&dict_operation_lock);
+
 	DBUG_RETURN(0);
 }
 
@@ -3946,16 +3972,25 @@ i_s_fts_config_fill(
 		DBUG_RETURN(0);
 	}
 
+	DEBUG_SYNC_C("i_s_fts_config_fille_check");
+
 	fields = table->field;
 
+	/* Prevent DDL to drop fts aux tables. */
+	rw_lock_s_lock(&dict_operation_lock);
+
 	user_table = dict_table_open_on_name(
 		fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
 
 	if (!user_table) {
+		rw_lock_s_unlock(&dict_operation_lock);
+
 		DBUG_RETURN(0);
 	} else if (!dict_table_has_fts_index(user_table)) {
 		dict_table_close(user_table, FALSE, FALSE);
 
+		rw_lock_s_unlock(&dict_operation_lock);
+
 		DBUG_RETURN(0);
 	}
 
@@ -4011,6 +4046,8 @@ i_s_fts_config_fill(
 
 	dict_table_close(user_table, FALSE, FALSE);
 
+	rw_lock_s_unlock(&dict_operation_lock);
+
 	DBUG_RETURN(0);
 }
 
diff --git a/storage/xtradb/handler/xtradb_i_s.cc b/storage/xtradb/handler/xtradb_i_s.cc
index 7078ab752c2e4..eb6637dad0397 100644
--- a/storage/xtradb/handler/xtradb_i_s.cc
+++ b/storage/xtradb/handler/xtradb_i_s.cc
@@ -32,9 +32,11 @@ this program; if not, write to the Free Software Foundation, Inc.,
 #include <read0i_s.h>
 #include <trx0i_s.h>
 #include "srv0start.h"	/* for srv_was_started */
+#include <btr0pcur.h> /* btr_pcur_t */
 #include <btr0sea.h> /* btr_search_sys */
 #include <log0recv.h> /* recv_sys */
 #include <fil0fil.h>
+#include <dict0crea.h> /* for ZIP_DICT_MAX_* constants */
 
 /* for XTRADB_RSEG table */
 #include "trx0trx.h" /* for TRX_QUE_STATE_STR_MAX_LEN */
@@ -131,6 +133,30 @@ field_store_string(
 	return(ret);
 }
 
+static int field_store_blob(Field*, const char*, uint) __attribute__((unused));
+/** Auxiliary function to store (char*, len) value in MYSQL_TYPE_BLOB
+field.
+@return	0 on success */
+static
+int
+field_store_blob(
+	Field*		field,		/*!< in/out: target field for storage */
+	const char*	data,		/*!< in: pointer to data, or NULL */
+	uint		data_len)	/*!< in: data length */
+{
+	int	ret;
+
+	if (data != NULL) {
+		ret = field->store(data, data_len, system_charset_info);
+		field->set_notnull();
+	} else {
+		ret = 0; /* success */
+		field->set_null();
+	}
+
+	return(ret);
+}
+
 static
 int
 i_s_common_deinit(
@@ -603,3 +629,331 @@ UNIV_INTERN struct st_mysql_plugin	i_s_xtradb_rseg =
 	STRUCT_FLD(version_info, INNODB_VERSION_STR),
         STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
 };
+
+
+#ifdef HAVE_PERCONA_COMPRESSED_COLUMNS
+/************************************************************************/
+enum zip_dict_field_type
+{
+	zip_dict_field_id,
+	zip_dict_field_name,
+	zip_dict_field_zip_dict
+};
+
+static ST_FIELD_INFO xtradb_sys_zip_dict_fields_info[] =
+{
+	{ STRUCT_FLD(field_name, "id"),
+	STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+	STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+	STRUCT_FLD(value, 0),
+	STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+	STRUCT_FLD(old_name, ""),
+	STRUCT_FLD(open_method, SKIP_OPEN_TABLE) },
+
+	{ STRUCT_FLD(field_name, "name"),
+	STRUCT_FLD(field_length, ZIP_DICT_MAX_NAME_LENGTH),
+	STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
+	STRUCT_FLD(value, 0),
+	STRUCT_FLD(field_flags, 0),
+	STRUCT_FLD(old_name, ""),
+	STRUCT_FLD(open_method, SKIP_OPEN_TABLE) },
+
+	{ STRUCT_FLD(field_name, "zip_dict"),
+	STRUCT_FLD(field_length, ZIP_DICT_MAX_DATA_LENGTH),
+	STRUCT_FLD(field_type, MYSQL_TYPE_BLOB),
+	STRUCT_FLD(value, 0),
+	STRUCT_FLD(field_flags, 0),
+	STRUCT_FLD(old_name, ""),
+	STRUCT_FLD(open_method, SKIP_OPEN_TABLE) },
+
+	END_OF_ST_FIELD_INFO
+};
+
+/** Function to fill INFORMATION_SCHEMA.XTRADB_ZIP_DICT with information
+collected by scanning SYS_ZIP_DICT table.
+@return 0 on success */
+static
+int
+xtradb_i_s_dict_fill_sys_zip_dict(
+	THD*		thd,		/*!< in: thread */
+	ulint		id,		/*!< in: dict ID */
+	const char*	name,		/*!< in: dict name */
+	const char*	data,		/*!< in: dict data */
+	ulint		data_len,	/*!< in: dict data length */
+	TABLE*		table_to_fill)	/*!< in/out: fill this table */
+{
+	DBUG_ENTER("xtradb_i_s_dict_fill_sys_zip_dict");
+
+	Field**	fields = table_to_fill->field;
+
+	OK(field_store_ulint(fields[zip_dict_field_id], id));
+	OK(field_store_string(fields[zip_dict_field_name], name));
+	OK(field_store_blob(fields[zip_dict_field_zip_dict], data,
+		data_len));
+
+	OK(schema_table_store_record(thd, table_to_fill));
+
+	DBUG_RETURN(0);
+}
+
+/** Function to populate INFORMATION_SCHEMA.XTRADB_ZIP_DICT table.
+Loop through each record in SYS_ZIP_DICT, and extract the column
+information and fill the INFORMATION_SCHEMA.XTRADB_ZIP_DICT table.
+@return 0 on success */
+static
+int
+xtradb_i_s_sys_zip_dict_fill_table(
+	THD*		thd,	/*!< in: thread */
+	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
+	Item*		)	/*!< in: condition (not used) */
+{
+	btr_pcur_t	pcur;
+	const rec_t*	rec;
+	mem_heap_t*	heap;
+	mtr_t		mtr;
+
+	DBUG_ENTER("xtradb_i_s_sys_zip_dict_fill_table");
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+
+	/* deny access to user without SUPER_ACL privilege */
+	if (check_global_access(thd, SUPER_ACL)) {
+		DBUG_RETURN(0);
+	}
+
+	heap = mem_heap_create(1000);
+	mutex_enter(&dict_sys->mutex);
+	mtr_start(&mtr);
+
+	rec = dict_startscan_system(&pcur, &mtr, SYS_ZIP_DICT);
+	ulint zip_size = dict_table_zip_size(pcur.btr_cur.index->table);
+
+	while (rec) {
+		const char*	err_msg;
+		ulint		id;
+		const char*	name;
+		const char*	data;
+		ulint		data_len;
+
+		/* Extract necessary information from a SYS_ZIP_DICT row */
+		err_msg = dict_process_sys_zip_dict(
+			heap, zip_size, rec, &id, &name, &data, &data_len);
+
+		mtr_commit(&mtr);
+		mutex_exit(&dict_sys->mutex);
+
+		if (!err_msg) {
+			xtradb_i_s_dict_fill_sys_zip_dict(
+				thd, id, name, data, data_len,
+				tables->table);
+		} else {
+			push_warning_printf(thd,
+				Sql_condition::WARN_LEVEL_WARN,
+				ER_CANT_FIND_SYSTEM_REC, "%s", err_msg);
+		}
+
+		mem_heap_empty(heap);
+
+		/* Get the next record */
+		mutex_enter(&dict_sys->mutex);
+		mtr_start(&mtr);
+		rec = dict_getnext_system(&pcur, &mtr);
+	}
+
+	mtr_commit(&mtr);
+	mutex_exit(&dict_sys->mutex);
+	mem_heap_free(heap);
+
+	DBUG_RETURN(0);
+}
+
+static int i_s_xtradb_zip_dict_init(void* p)
+{
+	DBUG_ENTER("i_s_xtradb_zip_dict_init");
+
+	ST_SCHEMA_TABLE* schema = static_cast<ST_SCHEMA_TABLE*>(p);
+
+	schema->fields_info = xtradb_sys_zip_dict_fields_info;
+	schema->fill_table = xtradb_i_s_sys_zip_dict_fill_table;
+
+	DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin	i_s_xtradb_zip_dict =
+{
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+	STRUCT_FLD(info, &i_s_info),
+	STRUCT_FLD(name, "XTRADB_ZIP_DICT"),
+	STRUCT_FLD(author, PLUGIN_AUTHOR),
+	STRUCT_FLD(descr, "InnoDB compression dictionaries information"),
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+	STRUCT_FLD(init, i_s_xtradb_zip_dict_init),
+	STRUCT_FLD(deinit, i_s_common_deinit),
+	STRUCT_FLD(version, INNODB_VERSION_SHORT),
+	STRUCT_FLD(status_vars, NULL),
+	STRUCT_FLD(system_vars, NULL),
+	STRUCT_FLD(__reserved1, NULL),
+	STRUCT_FLD(flags, 0UL),
+};
+
+enum zip_dict_cols_field_type
+{
+	zip_dict_cols_field_table_id,
+	zip_dict_cols_field_column_pos,
+	zip_dict_cols_field_dict_id
+};
+
+static ST_FIELD_INFO xtradb_sys_zip_dict_cols_fields_info[] =
+{
+	{ STRUCT_FLD(field_name, "table_id"),
+	STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+	STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+	STRUCT_FLD(value, 0),
+	STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+	STRUCT_FLD(old_name, ""),
+	STRUCT_FLD(open_method, SKIP_OPEN_TABLE) },
+
+	{ STRUCT_FLD(field_name, "column_pos"),
+	STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+	STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+	STRUCT_FLD(value, 0),
+	STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+	STRUCT_FLD(old_name, ""),
+	STRUCT_FLD(open_method, SKIP_OPEN_TABLE) },
+
+	{ STRUCT_FLD(field_name, "dict_id"),
+	STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+	STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+	STRUCT_FLD(value, 0),
+	STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+	STRUCT_FLD(old_name, ""),
+	STRUCT_FLD(open_method, SKIP_OPEN_TABLE) },
+
+	END_OF_ST_FIELD_INFO
+};
+
+/** Function to fill INFORMATION_SCHEMA.XTRADB_ZIP_DICT_COLS with information
+collected by scanning SYS_ZIP_DICT_COLS table.
+@return 0 on success */
+static
+int
+xtradb_i_s_dict_fill_sys_zip_dict_cols(
+	THD*		thd,		/*!< in: thread */
+	ulint		table_id,	/*!< in: table ID */
+	ulint		column_pos,	/*!< in: column position */
+	ulint		dict_id,	/*!< in: dict ID */
+	TABLE*		table_to_fill)	/*!< in/out: fill this table */
+{
+	DBUG_ENTER("xtradb_i_s_dict_fill_sys_zip_dict_cols");
+
+	Field**	fields = table_to_fill->field;
+
+	OK(field_store_ulint(fields[zip_dict_cols_field_table_id],
+		table_id));
+	OK(field_store_ulint(fields[zip_dict_cols_field_column_pos],
+		column_pos));
+	OK(field_store_ulint(fields[zip_dict_cols_field_dict_id],
+		dict_id));
+
+	OK(schema_table_store_record(thd, table_to_fill));
+
+	DBUG_RETURN(0);
+}
+
+/** Function to populate INFORMATION_SCHEMA.XTRADB_ZIP_DICT_COLS table.
+Loop through each record in SYS_ZIP_DICT_COLS, and extract the column
+information and fill the INFORMATION_SCHEMA.XTRADB_ZIP_DICT_COLS table.
+@return 0 on success */
+static
+int
+xtradb_i_s_sys_zip_dict_cols_fill_table(
+	THD*		thd,	/*!< in: thread */
+	TABLE_LIST*	tables,	/*!< in/out: tables to fill */
+	Item*		)	/*!< in: condition (not used) */
+{
+	btr_pcur_t	pcur;
+	const rec_t*	rec;
+	mem_heap_t*	heap;
+	mtr_t		mtr;
+
+	DBUG_ENTER("xtradb_i_s_sys_zip_dict_cols_fill_table");
+	RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+
+	/* deny access to user without SUPER_ACL privilege */
+	if (check_global_access(thd, SUPER_ACL)) {
+		DBUG_RETURN(0);
+	}
+
+	heap = mem_heap_create(1000);
+	mutex_enter(&dict_sys->mutex);
+	mtr_start(&mtr);
+
+	rec = dict_startscan_system(&pcur, &mtr, SYS_ZIP_DICT_COLS);
+
+	while (rec) {
+		const char*	err_msg;
+		ulint table_id;
+		ulint column_pos;
+		ulint dict_id;
+
+		/* Extract necessary information from a SYS_ZIP_DICT_COLS
+		row */
+		err_msg = dict_process_sys_zip_dict_cols(
+			heap, rec, &table_id, &column_pos, &dict_id);
+
+		mtr_commit(&mtr);
+		mutex_exit(&dict_sys->mutex);
+
+		if (!err_msg) {
+			xtradb_i_s_dict_fill_sys_zip_dict_cols(
+				thd, table_id, column_pos, dict_id,
+				tables->table);
+		} else {
+			push_warning_printf(thd,
+				Sql_condition::WARN_LEVEL_WARN,
+				ER_CANT_FIND_SYSTEM_REC, "%s", err_msg);
+		}
+
+		mem_heap_empty(heap);
+
+		/* Get the next record */
+		mutex_enter(&dict_sys->mutex);
+		mtr_start(&mtr);
+		rec = dict_getnext_system(&pcur, &mtr);
+	}
+
+	mtr_commit(&mtr);
+	mutex_exit(&dict_sys->mutex);
+	mem_heap_free(heap);
+
+	DBUG_RETURN(0);
+}
+
+static int i_s_xtradb_zip_dict_cols_init(void* p)
+{
+	DBUG_ENTER("i_s_xtradb_zip_dict_cols_init");
+
+	ST_SCHEMA_TABLE* schema = static_cast<ST_SCHEMA_TABLE*>(p);
+
+	schema->fields_info = xtradb_sys_zip_dict_cols_fields_info;
+	schema->fill_table = xtradb_i_s_sys_zip_dict_cols_fill_table;
+
+	DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin	i_s_xtradb_zip_dict_cols =
+{
+	STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+	STRUCT_FLD(info, &i_s_info),
+	STRUCT_FLD(name, "XTRADB_ZIP_DICT_COLS"),
+	STRUCT_FLD(author, PLUGIN_AUTHOR),
+	STRUCT_FLD(descr, "InnoDB compressed columns information"),
+	STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+	STRUCT_FLD(init, i_s_xtradb_zip_dict_cols_init),
+	STRUCT_FLD(deinit, i_s_common_deinit),
+	STRUCT_FLD(version, INNODB_VERSION_SHORT),
+	STRUCT_FLD(status_vars, NULL),
+	STRUCT_FLD(system_vars, NULL),
+	STRUCT_FLD(__reserved1, NULL),
+	STRUCT_FLD(flags, 0UL),
+};
+#endif
diff --git a/storage/xtradb/handler/xtradb_i_s.h b/storage/xtradb/handler/xtradb_i_s.h
index 2f7552c565adc..905d84587affd 100644
--- a/storage/xtradb/handler/xtradb_i_s.h
+++ b/storage/xtradb/handler/xtradb_i_s.h
@@ -22,5 +22,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
 extern struct st_mysql_plugin	i_s_xtradb_read_view;
 extern struct st_mysql_plugin	i_s_xtradb_internal_hash_tables;
 extern struct st_mysql_plugin	i_s_xtradb_rseg;
+extern struct st_mysql_plugin	i_s_xtradb_zip_dict;
+extern struct st_mysql_plugin	i_s_xtradb_zip_dict_cols;
 
 #endif /* XTRADB_I_S_H */
diff --git a/storage/xtradb/ibuf/ibuf0ibuf.cc b/storage/xtradb/ibuf/ibuf0ibuf.cc
index d62b869df4fd9..a9f039d3f0ff6 100644
--- a/storage/xtradb/ibuf/ibuf0ibuf.cc
+++ b/storage/xtradb/ibuf/ibuf0ibuf.cc
@@ -938,7 +938,7 @@ ibuf_set_free_bits_low(
 	ulint	space;
 	ulint	page_no;
 
-	if (!page_is_leaf(buf_block_get_frame(block))) {
+	if (!page_is_leaf(buf_nonnull_block_get_frame(block))) {
 
 		return;
 	}
@@ -1113,7 +1113,7 @@ ibuf_update_free_bits_zip(
 	page_no = buf_block_get_page_no(block);
 	zip_size = buf_block_get_zip_size(block);
 
-	ut_a(page_is_leaf(buf_block_get_frame(block)));
+	ut_a(page_is_leaf(buf_nonnull_block_get_frame(block)));
 	ut_a(zip_size);
 
 	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h
index 7e1083a103c55..8110fbc48086c 100644
--- a/storage/xtradb/include/buf0buf.h
+++ b/storage/xtradb/include/buf0buf.h
@@ -1069,10 +1069,20 @@ buf_block_get_frame(
 /*================*/
 	const buf_block_t*	block)	/*!< in: pointer to the control block */
 	MY_ATTRIBUTE((pure));
-# define buf_block_get_frame_fast(block) buf_block_get_frame(block)
+
+/*********************************************************************//**
+Gets a pointer to the memory frame of a block, where block is known not to be
+NULL.
+@return	pointer to the frame */
+UNIV_INLINE
+buf_frame_t*
+buf_nonnull_block_get_frame(
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+	MY_ATTRIBUTE((pure));
+
 #else /* UNIV_DEBUG */
 # define buf_block_get_frame(block) (block ? (block)->frame : 0)
-# define buf_block_get_frame_fast(block) (block)->frame
+# define buf_nonnull_block_get_frame(block) ((block)->frame)
 #endif /* UNIV_DEBUG */
 /*********************************************************************//**
 Gets the space id of a block.
diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic
index b40285ae3f008..8a21f44a2eec4 100644
--- a/storage/xtradb/include/buf0buf.ic
+++ b/storage/xtradb/include/buf0buf.ic
@@ -690,6 +690,19 @@ buf_block_get_frame(
 {
 	SRV_CORRUPT_TABLE_CHECK(block, return(0););
 
+	return(buf_nonnull_block_get_frame(block));
+}
+
+/*********************************************************************//**
+Gets a pointer to the memory frame of a block, where block is known not to be
+NULL.
+@return	pointer to the frame */
+UNIV_INLINE
+buf_frame_t*
+buf_nonnull_block_get_frame(
+/*========================*/
+	const buf_block_t*	block)	/*!< in: pointer to the control block */
+{
 	switch (buf_block_get_state(block)) {
 	case BUF_BLOCK_POOL_WATCH:
 	case BUF_BLOCK_ZIP_PAGE:
@@ -711,6 +724,7 @@ buf_block_get_frame(
 ok:
 	return((buf_frame_t*) block->frame);
 }
+
 #endif /* UNIV_DEBUG */
 
 /*********************************************************************//**
diff --git a/storage/xtradb/include/data0type.h b/storage/xtradb/include/data0type.h
index 111664b0b527f..f269c266efb9b 100644
--- a/storage/xtradb/include/data0type.h
+++ b/storage/xtradb/include/data0type.h
@@ -170,6 +170,9 @@ be less than 256 */
 				type when the column is true VARCHAR where
 				MySQL uses 2 bytes to store the data len;
 				for shorter VARCHARs MySQL uses only 1 byte */
+#define	DATA_COMPRESSED	16384	/* this is ORed to the precise data
+				type when the column has COLUMN_FORMAT =
+				COMPRESSED attribute*/
 /*-------------------------------------------*/
 
 /* This many bytes we need to store the type information affecting the
@@ -500,6 +503,17 @@ dtype_print(
 /*========*/
 	const dtype_t*	type);	/*!< in: type */
 
+/**
+Calculates the number of extra bytes needed for compression header
+depending on precise column type.
+@reval 0 if prtype does not include DATA_COMPRESSED flag
+@reval ZIP_COLUMN_HEADER_LENGTH if prtype includes DATA_COMPRESSED flag
+*/
+UNIV_INLINE
+ulint
+prtype_get_compression_extra(
+	ulint		prtype);	/*!< in: precise type */
+
 /* Structure for an SQL data type.
 If you add fields to this structure, be sure to initialize them everywhere.
 This structure is initialized in the following functions:
diff --git a/storage/xtradb/include/data0type.ic b/storage/xtradb/include/data0type.ic
index d489bef89a8bb..29dc480a19c3e 100644
--- a/storage/xtradb/include/data0type.ic
+++ b/storage/xtradb/include/data0type.ic
@@ -26,6 +26,7 @@ Created 1/16/1996 Heikki Tuuri
 #include <string.h> /* strlen() */
 
 #include "mach0data.h"
+#include "rem0types.h" /* ZIP_COLUMN_HEADER_LENGTH */
 #ifndef UNIV_HOTBACKUP
 # include "ha_prototypes.h"
 
@@ -709,3 +710,18 @@ dtype_get_sql_null_size(
 					0, 0));
 #endif /* !UNIV_HOTBACKUP */
 }
+
+/**
+Calculates the number of extra bytes needed for compression header
+depending on precise column type.
+@reval 0 if prtype does not include DATA_COMPRESSED flag
+@reval ZIP_COLUMN_HEADER_LENGTH if prtype includes DATA_COMPRESSED flag
+*/
+UNIV_INLINE
+ulint
+prtype_get_compression_extra(
+	ulint		prtype)	/*!< in: precise type */
+{
+	return (prtype & DATA_COMPRESSED) != 0 ?
+		ZIP_COLUMN_HEADER_LENGTH : 0;
+}
diff --git a/storage/xtradb/include/dict0boot.h b/storage/xtradb/include/dict0boot.h
index 477e1150f437b..d5bee886cbf43 100644
--- a/storage/xtradb/include/dict0boot.h
+++ b/storage/xtradb/include/dict0boot.h
@@ -324,6 +324,38 @@ enum dict_fld_sys_datafiles_enum {
 	DICT_FLD__SYS_DATAFILES__PATH			= 3,
 	DICT_NUM_FIELDS__SYS_DATAFILES			= 4
 };
+/* The columns in SYS_DICT */
+enum dict_col_sys_zip_dict_enum {
+	DICT_COL__SYS_ZIP_DICT__ID			= 0,
+	DICT_COL__SYS_ZIP_DICT__NAME			= 1,
+	DICT_COL__SYS_ZIP_DICT__DATA			= 2,
+	DICT_NUM_COLS__SYS_ZIP_DICT			= 3
+};
+/* The field numbers in the SYS_DICT clustered index */
+enum dict_fld_sys_zip_dict_enum {
+	DICT_FLD__SYS_ZIP_DICT__ID			= 0,
+	DICT_FLD__SYS_ZIP_DICT__DB_TRX_ID		= 1,
+	DICT_FLD__SYS_ZIP_DICT__DB_ROLL_PTR		= 2,
+	DICT_FLD__SYS_ZIP_DICT__NAME			= 3,
+	DICT_FLD__SYS_ZIP_DICT__DATA			= 4,
+	DICT_NUM_FIELDS__SYS_ZIP_DICT			= 5
+};
+/* The columns in SYS_DICT_COLS */
+enum dict_col_sys_zip_dict_cols_enum {
+	DICT_COL__SYS_ZIP_DICT_COLS__TABLE_ID		= 0,
+	DICT_COL__SYS_ZIP_DICT_COLS__COLUMN_POS		= 1,
+	DICT_COL__SYS_ZIP_DICT_COLS__DICT_ID		= 2,
+	DICT_NUM_COLS__SYS_ZIP_DICT_COLS		= 3
+};
+/* The field numbers in the SYS_DICT_COLS clustered index */
+enum dict_fld_sys_zip_dict_cols_enum {
+	DICT_FLD__SYS_ZIP_DICT_COLS__TABLE_ID		= 0,
+	DICT_FLD__SYS_ZIP_DICT_COLS__COLUMN_POS		= 1,
+	DICT_FLD__SYS_ZIP_DICT_COLS__DB_TRX_ID		= 2,
+	DICT_FLD__SYS_ZIP_DICT_COLS__DB_ROLL_PTR	= 3,
+	DICT_FLD__SYS_ZIP_DICT_COLS__DICT_ID		= 4,
+	DICT_NUM_FIELDS__SYS_ZIP_DICT_COLS		= 5
+};
 
 /* A number of the columns above occur in multiple tables.  These are the
 length of thos fields. */
diff --git a/storage/xtradb/include/dict0crea.h b/storage/xtradb/include/dict0crea.h
index 6fd37b90799ad..686f56ad58c61 100644
--- a/storage/xtradb/include/dict0crea.h
+++ b/storage/xtradb/include/dict0crea.h
@@ -163,6 +163,19 @@ UNIV_INTERN
 dberr_t
 dict_create_or_check_sys_tablespace(void);
 /*=====================================*/
+
+#define ZIP_DICT_MAX_NAME_LENGTH 64
+/* Max window size (2^15) minus 262 */
+#define ZIP_DICT_MAX_DATA_LENGTH 32506
+
+/** Creates the zip_dict system table inside InnoDB
+at server bootstrap or server start if it is not found or is
+not of the right form.
+@return	DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_create_or_check_sys_zip_dict(void);
+
 /********************************************************************//**
 Add a single tablespace definition to the data dictionary tables in the
 database.
@@ -178,6 +191,84 @@ dict_create_add_tablespace_to_dictionary(
 	trx_t*		trx,		/*!< in: transaction */
 	bool		commit);	/*!< in: if true then commit the
 					transaction */
+
+/** Add a single compression dictionary definition to the SYS_ZIP_DICT
+InnoDB system table.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_zip_dict(
+	const char*	name,		/*!< in: dict name */
+	ulint		name_len,	/*!< in: dict name length */
+	const char*	data,		/*!< in: dict data */
+	ulint		data_len,	/*!< in: dict data length */
+	trx_t*		trx);		/*!< in/out: transaction */
+
+/** Add a single compression dictionary reference to the SYS_ZIP_DICT_COLS
+InnoDB system table.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_zip_dict_reference(
+	ulint		table_id,	/*!< in: table id */
+	ulint		column_pos,	/*!< in: column position */
+	ulint		dict_id,	/*!< in: dict id */
+	trx_t*		trx);		/*!< in/out: transaction */
+
+/** Get a single compression dictionary id for the given
+(table id, column pos) pair.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_get_zip_dict_id_by_reference(
+	ulint	table_id,	/*!< in: table id */
+	ulint	column_pos,	/*!< in: column position */
+	ulint*	dict_id,	/*!< out: dict id */
+	trx_t*	trx);		/*!< in/out: transaction */
+
+/** Get compression dictionary id for the given name.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_get_zip_dict_id_by_name(
+	const char*	dict_name,	/*!< in: dict name */
+	ulint		dict_name_len,	/*!< in: dict name length */
+	ulint*		dict_id,	/*!< out: dict id */
+	trx_t*		trx);		/*!< in/out: transaction */
+
+/** Get compression dictionary info (name and data) for the given id.
+Allocates memory for name and data on success.
+Must be freed with mem_free().
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_get_zip_dict_info_by_id(
+	ulint	dict_id,	/*!< in: dict id */
+	char**	name,		/*!< out: dict name */
+	ulint*	name_len,	/*!< out: dict name length */
+	char**	data,		/*!< out: dict data */
+	ulint*	data_len,	/*!< out: dict data length */
+	trx_t*	trx);		/*!< in/out: transaction */
+
+/** Remove a single compression dictionary from the data dictionary
+tables in the database.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_remove_zip_dict(
+	const char*	name,		/*!< in: dict name */
+	ulint		name_len,	/*!< in: dict name length */
+	trx_t*		trx);		/*!< in/out: transaction */
+
+/** Remove all compression dictionary references for the given table ID from
+the data dictionary tables in the database.
+@return	error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_remove_zip_dict_references_for_table(
+	ulint	table_id,	/*!< in: table id */
+	trx_t*	trx);		/*!< in/out: transaction */
+
 /********************************************************************//**
 Add a foreign key definition to the data dictionary tables.
 @return	error code or DB_SUCCESS */
diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h
index d8a6bc927b6bd..1b6110dd0102c 100644
--- a/storage/xtradb/include/dict0dict.h
+++ b/storage/xtradb/include/dict0dict.h
@@ -1871,6 +1871,52 @@ dict_table_set_corrupt_by_space(
 	ulint	space_id,
 	ibool	need_mutex);
 
+/** Insert a records into SYS_ZIP_DICT.
+@retval	DB_SUCCESS	if OK
+@retval	dberr_t		if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_create_zip_dict(
+	const char*	name,		/*!< in: zip_dict name */
+	ulint		name_len,	/*!< in: zip_dict name length*/
+	const char*	data,		/*!< in: zip_dict data */
+	ulint		data_len);	/*!< in: zip_dict data length */
+
+/** Get single compression dictionary id for the given
+(table id, column pos) pair.
+@retval	DB_SUCCESS		if OK
+@retval	DB_RECORD_NOT_FOUND	if not found */
+UNIV_INTERN
+dberr_t
+dict_get_dictionary_id_by_key(
+	ulint	table_id,	/*!< in: table id */
+	ulint	column_pos,	/*!< in: column position */
+	ulint*	dict_id);	/*!< out: zip_dict id */
+
+/** Get compression dictionary info (name and data) for the given id.
+Allocates memory in name->str and data->str on success.
+Must be freed with mem_free().
+@retval	DB_SUCCESS		if OK
+@retval	DB_RECORD_NOT_FOUND	if not found */
+UNIV_INTERN
+dberr_t
+dict_get_dictionary_info_by_id(
+	ulint	dict_id,	/*!< in: table name */
+	char**	name,		/*!< out: dictionary name */
+	ulint*	name_len,	/*!< out: dictionary name length*/
+	char**	data,		/*!< out: dictionary data */
+	ulint*	data_len);	/*!< out: dictionary data length*/
+
+/** Delete a record in SYS_ZIP_DICT with the given name.
+@retval	DB_SUCCESS		if OK
+@retval	DB_RECORD_NOT_FOUND	if not found
+@retval	DB_ROW_IS_REFERENCED	if in use */
+UNIV_INTERN
+dberr_t
+dict_drop_zip_dict(
+	const char*	name,		/*!< in: zip_dict name */
+	ulint		name_len);	/*!< in: zip_dict name length*/
+
 #ifndef UNIV_NONINL
 #include "dict0dict.ic"
 #endif
diff --git a/storage/xtradb/include/dict0load.h b/storage/xtradb/include/dict0load.h
index dcbc3de8e942f..85e3e5656371b 100644
--- a/storage/xtradb/include/dict0load.h
+++ b/storage/xtradb/include/dict0load.h
@@ -44,6 +44,8 @@ enum dict_system_id_t {
 	SYS_FOREIGN_COLS,
 	SYS_TABLESPACES,
 	SYS_DATAFILES,
+	SYS_ZIP_DICT,
+	SYS_ZIP_DICT_COLS,
 
 	/* This must be last item. Defines the number of system tables. */
 	SYS_NUM_SYSTEM_TABLES
@@ -386,6 +388,33 @@ dict_process_sys_datafiles(
 	const rec_t*	rec,		/*!< in: current SYS_DATAFILES rec */
 	ulint*		space,		/*!< out: pace id */
 	const char**	path);		/*!< out: datafile path */
+
+/** This function parses a SYS_ZIP_DICT record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_zip_dict(
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	ulint		zip_size,	/*!< in: nonzero=compressed BLOB page size */
+	const rec_t*	rec,		/*!< in: current SYS_ZIP_DICT rec */
+	ulint*		id,		/*!< out: dict id */
+	const char**	name,		/*!< out: dict name */
+	const char**	data,		/*!< out: dict data */
+	ulint*		data_len);	/*!< out: dict data length */
+
+/** This function parses a SYS_ZIP_DICT_COLS record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_zip_dict_cols(
+	mem_heap_t*	heap,		/*!< in/out: heap memory */
+	const rec_t*	rec,		/*!< in: current SYS_ZIP_DICT rec */
+	ulint*		table_id,	/*!< out: table id */
+	ulint*		column_pos,	/*!< out: column position */
+	ulint*		dict_id);	/*!< out: dict id */
+
 /********************************************************************//**
 Get the filepath for a spaceid from SYS_DATAFILES. This function provides
 a temporary heap which is used for the table lookup, but not for the path.
diff --git a/storage/xtradb/include/fts0fts.h b/storage/xtradb/include/fts0fts.h
index 68d4d333245d9..3e2f359bbebef 100644
--- a/storage/xtradb/include/fts0fts.h
+++ b/storage/xtradb/include/fts0fts.h
@@ -375,6 +375,7 @@ extern bool		fts_need_sync;
 /** Variable specifying the table that has Fulltext index to display its
 content through information schema table */
 extern char*		fts_internal_tbl_name;
+extern char*		fts_internal_tbl_name2;
 
 #define	fts_que_graph_free(graph)			\
 do {							\
@@ -823,6 +824,15 @@ void
 fts_drop_orphaned_tables(void);
 /*==========================*/
 
+/* Get parent table name if it's a fts aux table
+@param[in]	aux_table_name	aux table name
+@param[in]	aux_table_len	aux table length
+@return parent table name, or NULL */
+char*
+fts_get_parent_table_name(
+	const char*	aux_table_name,
+	ulint		aux_table_len);
+
 /******************************************************************//**
 Since we do a horizontal split on the index table, we need to drop
 all the split tables.
@@ -840,13 +850,15 @@ FTS auxiliary INDEX table and clear the cache at the end.
 @param[in,out]	table		fts table
 @param[in]	unlock_cache	whether unlock cache when write node
 @param[in]	wait		whether wait for existing sync to finish
+@param[in]      has_dict        whether has dict operation lock
 @return DB_SUCCESS on success, error code on failure. */
 UNIV_INTERN
 dberr_t
 fts_sync_table(
 	dict_table_t*	table,
 	bool		unlock_cache,
-	bool		wait);
+	bool		wait,
+	bool		has_dict);
 
 /****************************************************************//**
 Free the query graph but check whether dict_sys->mutex is already
diff --git a/storage/xtradb/include/os0thread.h b/storage/xtradb/include/os0thread.h
index 815faf97319c0..671b9b7dc3fb8 100644
--- a/storage/xtradb/include/os0thread.h
+++ b/storage/xtradb/include/os0thread.h
@@ -131,14 +131,27 @@ os_thread_create_func(
 	os_thread_id_t*		thread_id);	/*!< out: id of the created
 						thread, or NULL */
 
+/**
+Waits until the specified thread completes and joins it. Its return value is
+ignored.
+
+@param	thread	thread to join */
+UNIV_INTERN
+void
+os_thread_join(
+	os_thread_t	thread);
+
 /*****************************************************************//**
 Exits the current thread. */
 UNIV_INTERN
 void
 os_thread_exit(
 /*===========*/
-	void*	exit_value)	/*!< in: exit value; in Windows this void*
+	void*	exit_value,	/*!< in: exit value; in Windows this void*
 				is cast as a DWORD */
+	bool	detach = true)	/*!< in: if true, the thread will be detached
+				right before exiting. If false, another thread
+				is responsible for joining this thread. */
 	UNIV_COLD MY_ATTRIBUTE((noreturn));
 /*****************************************************************//**
 Returns the thread identifier of current thread.
diff --git a/storage/xtradb/include/rem0types.h b/storage/xtradb/include/rem0types.h
index f8133f77466d9..5da96066f8851 100644
--- a/storage/xtradb/include/rem0types.h
+++ b/storage/xtradb/include/rem0types.h
@@ -71,4 +71,7 @@ enum rec_format_enum {
 };
 typedef enum rec_format_enum rec_format_t;
 
+/** Compressed field header size in bytes */
+#define ZIP_COLUMN_HEADER_LENGTH	2
+
 #endif
diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h
index 8e219d3e856d1..70da84640e533 100644
--- a/storage/xtradb/include/row0mysql.h
+++ b/storage/xtradb/include/row0mysql.h
@@ -41,6 +41,9 @@ struct SysIndexCallback;
 
 extern ibool row_rollback_on_timeout;
 
+extern uint	srv_compressed_columns_zip_level;
+extern ulong	srv_compressed_columns_threshold;
+
 struct row_prebuilt_t;
 
 /*******************************************************************//**
@@ -51,6 +54,49 @@ row_mysql_prebuilt_free_blob_heap(
 /*==============================*/
 	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct of a
 					ha_innobase:: table handle */
+
+/** Frees the compress heap in prebuilt when no longer needed. */
+UNIV_INTERN
+void
+row_mysql_prebuilt_free_compress_heap(
+	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct of a
+					ha_innobase:: table handle */
+
+/** Uncompress blob/text/varchar column using zlib
+@return pointer to the uncompressed data */
+const byte*
+row_decompress_column(
+	const byte*	data,	/*!< in: data in innodb(compressed) format */
+	ulint		*len,	/*!< in: data length; out: length of
+				decompressed data*/
+	const byte*	dict_data,
+				/*!< in: optional dictionary data used for
+				decompression */
+	ulint		dict_data_len,
+				/*!< in: optional dictionary data length */
+	row_prebuilt_t*	prebuilt);
+				/*!< in: use prebuilt->compress_heap only
+				here*/
+
+/** Compress blob/text/varchar column using zlib
+@return pointer to the compressed data */
+byte*
+row_compress_column(
+	const byte*	data,	/*!< in: data in mysql(uncompressed)
+				format */
+	ulint		*len,	/*!< in: data length; out: length of
+				compressed data*/
+	ulint		lenlen,	/*!< in: bytes used to store the length of
+				data */
+	const byte*	dict_data,
+				/*!< in: optional dictionary data used for
+				compression */
+	ulint		dict_data_len,
+				/*!< in: optional dictionary data length */
+	row_prebuilt_t*	prebuilt);
+				/*!< in: use prebuilt->compress_heap only
+				here*/
+
 /*******************************************************************//**
 Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
 format.
@@ -89,10 +135,21 @@ row_mysql_store_blob_ref(
 				to 4 bytes */
 	const void*	data,	/*!< in: BLOB data; if the value to store
 				is SQL NULL this should be NULL pointer */
-	ulint		len);	/*!< in: BLOB length; if the value to store
+	ulint		len,	/*!< in: BLOB length; if the value to store
 				is SQL NULL this should be 0; remember
 				also to set the NULL bit in the MySQL record
 				header! */
+	bool		need_decompression,
+				/*!< in: if the data need to be compressed*/
+	const byte*	dict_data,
+				/*!< in: optional compression dictionary
+				data */
+	ulint		dict_data_len,
+				/*!< in: optional compression dictionary data
+				length */
+	row_prebuilt_t*	prebuilt);
+				/*<! in: use prebuilt->compress_heap only
+				here */
 /*******************************************************************//**
 Reads a reference to a BLOB in the MySQL format.
 @return	pointer to BLOB data */
@@ -103,8 +160,17 @@ row_mysql_read_blob_ref(
 	ulint*		len,		/*!< out: BLOB length */
 	const byte*	ref,		/*!< in: BLOB reference in the
 					MySQL format */
-	ulint		col_len);	/*!< in: BLOB reference length
+	ulint		col_len,	/*!< in: BLOB reference length
 					(not BLOB length) */
+	bool		need_compression,
+					/*!< in: if the data need to be
+					compressed*/
+	const byte*	dict_data,	/*!< in: optional compression
+					dictionary data */
+	ulint		dict_data_len,	/*!< in: optional compression
+					dictionary data length */
+	row_prebuilt_t*	prebuilt);	/*!< in: use prebuilt->compress_heap
+					only here */
 /**************************************************************//**
 Pad a column with spaces. */
 UNIV_INTERN
@@ -152,7 +218,16 @@ row_mysql_store_col_in_innobase_format(
 					necessarily the length of the actual
 					payload data; if the column is a true
 					VARCHAR then this is irrelevant */
-	ulint		comp);		/*!< in: nonzero=compact format */
+	ulint		comp,		/*!< in: nonzero=compact format */
+	bool		need_compression,
+					/*!< in: if the data need to be
+					compressed */
+	const byte*	dict_data,	/*!< in: optional compression
+					dictionary data */
+	ulint		dict_data_len,	/*!< in: optional compression
+					dictionary data length */
+	row_prebuilt_t*	prebuilt);	/*!< in: use prebuilt->compress_heap
+					only here */
 /****************************************************************//**
 Handles user errors and lock waits detected by the database engine.
 @return true if it was a lock wait and we should continue running the
@@ -646,6 +721,8 @@ struct mysql_row_templ_t {
 	ulint	is_unsigned;		/*!< if a column type is an integer
 					type and this field is != 0, then
 					it is an unsigned integer type */
+	bool		compressed;	/*!< if column format is compressed */
+	LEX_CSTRING	zip_dict_data;	/*!< associated compression dictionary */
 };
 
 #define MYSQL_FETCH_CACHE_SIZE		8
@@ -843,6 +920,8 @@ struct row_prebuilt_t {
 					in fetch_cache */
 	mem_heap_t*	blob_heap;	/*!< in SELECTS BLOB fields are copied
 					to this heap */
+	mem_heap_t*	compress_heap;  /*!< memory heap used to compress
+					/decompress blob column*/
 	mem_heap_t*	old_vers_heap;	/*!< memory heap where a previous
 					version is built in consistent read */
 	bool		in_fts_query;	/*!< Whether we are in a FTS query */
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index b6507b136bc03..8ea92f693687a 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -181,8 +181,10 @@ extern os_event_t	srv_checkpoint_completed_event;
 log tracking iteration */
 extern os_event_t	srv_redo_log_tracked_event;
 
-/** srv_redo_log_follow_thread spawn flag */
-extern bool srv_redo_log_thread_started;
+/** Whether the redo log tracker thread has been started. Does not take into
+account whether the tracking is currently enabled (see srv_track_changed_pages
+for that) */
+extern bool		srv_redo_log_thread_started;
 
 /* If the last data file is auto-extended, we add this many pages to it
 at a time */
@@ -278,6 +280,10 @@ extern char**	srv_data_file_names;
 extern ulint*	srv_data_file_sizes;
 extern ulint*	srv_data_file_is_raw_partition;
 
+
+/** Whether the redo log tracking is currently enabled. Note that it is
+possible for the log tracker thread to be running and the tracking to be
+disabled */
 extern my_bool		srv_track_changed_pages;
 extern ulonglong	srv_max_bitmap_file_size;
 
@@ -507,6 +513,9 @@ extern ibool	srv_priority_boost;
 extern ulint	srv_truncated_status_writes;
 extern ulint	srv_available_undo_logs;
 
+extern ulint	srv_column_compressed;
+extern ulint	srv_column_decompressed;
+
 extern	ulint	srv_mem_pool_size;
 extern	ulint	srv_lock_table_size;
 
@@ -1105,6 +1114,8 @@ struct export_var_t{
 	ulint innodb_purge_view_trx_id_age;	/*!< rw_max_trx_id
 						- purged view's min trx_id */
 #endif /* UNIV_DEBUG */
+	ulint innodb_column_compressed;		/*!< srv_column_compressed */
+	ulint innodb_column_decompressed;	/*!< srv_column_decompressed */
 };
 
 /** Thread slot in the thread table.  */
diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i
index 3646978ac152d..fb103e2c9b8a1 100644
--- a/storage/xtradb/include/univ.i
+++ b/storage/xtradb/include/univ.i
@@ -44,10 +44,10 @@ Created 1/20/1994 Heikki Tuuri
 
 #define INNODB_VERSION_MAJOR	5
 #define INNODB_VERSION_MINOR	6
-#define INNODB_VERSION_BUGFIX	31
+#define INNODB_VERSION_BUGFIX	32
 
 #ifndef PERCONA_INNODB_VERSION
-#define PERCONA_INNODB_VERSION 77.0
+#define PERCONA_INNODB_VERSION 79.0
 #endif
 
 /* Enable UNIV_LOG_ARCHIVE in XtraDB */
diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc
index 9be93c3f5acbc..15183b9c1cb86 100644
--- a/storage/xtradb/log/log0log.cc
+++ b/storage/xtradb/log/log0log.cc
@@ -1005,6 +1005,7 @@ log_init(void)
 
 	log_sys->next_checkpoint_no = 0;
 	log_sys->last_checkpoint_lsn = log_sys->lsn;
+	log_sys->next_checkpoint_lsn = log_sys->lsn;
 	log_sys->n_pending_checkpoint_writes = 0;
 
 
@@ -1928,6 +1929,7 @@ log_complete_checkpoint(void)
 
 	log_sys->next_checkpoint_no++;
 
+	ut_ad(log_sys->next_checkpoint_lsn >= log_sys->last_checkpoint_lsn);
 	log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
 	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
 		    log_sys->lsn - log_sys->last_checkpoint_lsn);
@@ -2015,11 +2017,17 @@ log_group_checkpoint(
 	ulint		i;
 
 	ut_ad(!srv_read_only_mode);
+	ut_ad(srv_shutdown_state != SRV_SHUTDOWN_LAST_PHASE);
 	ut_ad(mutex_own(&(log_sys->mutex)));
 	ut_a(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE);
 
 	buf = group->checkpoint_buf;
 
+#ifdef UNIV_DEBUG
+	lsn_t		old_next_checkpoint_lsn
+		= mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
+	ut_ad(old_next_checkpoint_lsn <= log_sys->next_checkpoint_lsn);
+#endif /* UNIV_DEBUG */
 	mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
 	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
 
@@ -2294,6 +2302,7 @@ log_checkpoint(
 		return(FALSE);
 	}
 
+	ut_ad(oldest_lsn >= log_sys->next_checkpoint_lsn);
 	log_sys->next_checkpoint_lsn = oldest_lsn;
 
 #ifdef UNIV_DEBUG
@@ -3612,13 +3621,15 @@ logs_empty_and_mark_files_at_shutdown(void)
 	before proceeding further. */
 	srv_shutdown_state = SRV_SHUTDOWN_FLUSH_PHASE;
 	count = 0;
-	while (buf_page_cleaner_is_active) {
-		++count;
-		os_thread_sleep(100000);
-		if (srv_print_verbose_log && count > 600) {
+	while (buf_page_cleaner_is_active || buf_lru_manager_is_active) {
+		if (srv_print_verbose_log && count == 0) {
 			ib_logf(IB_LOG_LEVEL_INFO,
 				"Waiting for page_cleaner to "
 				"finish flushing of buffer pool");
+		}
+		++count;
+		os_thread_sleep(100000);
+		if (count > 600) {
 			count = 0;
 		}
 	}
@@ -3694,7 +3705,7 @@ logs_empty_and_mark_files_at_shutdown(void)
 
 		/* Wake the log tracking thread which will then immediatelly
 		quit because of srv_shutdown_state value */
-		if (srv_track_changed_pages) {
+		if (srv_redo_log_thread_started) {
 			os_event_reset(srv_redo_log_tracked_event);
 			os_event_set(srv_checkpoint_completed_event);
 		}
@@ -3773,7 +3784,7 @@ logs_empty_and_mark_files_at_shutdown(void)
 	srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
 
 	/* Signal the log following thread to quit */
-	if (srv_track_changed_pages) {
+	if (srv_redo_log_thread_started) {
 		os_event_reset(srv_redo_log_tracked_event);
 		os_event_set(srv_checkpoint_completed_event);
 	}
@@ -3786,6 +3797,7 @@ logs_empty_and_mark_files_at_shutdown(void)
 	ut_a(freed);
 
 	ut_a(lsn == log_sys->lsn);
+	ut_ad(lsn == log_sys->last_checkpoint_lsn);
 
 	if (lsn < srv_start_lsn) {
 		ib_logf(IB_LOG_LEVEL_ERROR,
diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc
index 6517654853293..209ca74362899 100644
--- a/storage/xtradb/log/log0online.cc
+++ b/storage/xtradb/log/log0online.cc
@@ -433,6 +433,7 @@ log_online_track_missing_on_startup(
 					current server startup */
 {
 	ut_ad(last_tracked_lsn != tracking_start_lsn);
+	ut_ad(srv_track_changed_pages);
 
 	ib_logf(IB_LOG_LEVEL_WARN, "last tracked LSN in \'%s\' is " LSN_PF
 		", but the last checkpoint LSN is " LSN_PF ".  This might be "
@@ -615,6 +616,8 @@ log_online_read_init(void)
 	compile_time_assert(MODIFIED_PAGE_BLOCK_BITMAP % 8 == 0);
 	compile_time_assert(MODIFIED_PAGE_BLOCK_BITMAP_LEN % 8 == 0);
 
+	ut_ad(srv_track_changed_pages);
+
 	log_bmp_sys = static_cast<log_bitmap_struct *>
 		(ut_malloc(sizeof(*log_bmp_sys)));
 	log_bmp_sys->read_buf_ptr = static_cast<byte *>
@@ -1089,10 +1092,15 @@ log_online_write_bitmap_page(
 {
 	ibool	success;
 
+	ut_ad(srv_track_changed_pages);
 	ut_ad(mutex_own(&log_bmp_sys->mutex));
 
 	/* Simulate a write error */
-	DBUG_EXECUTE_IF("bitmap_page_write_error", return FALSE;);
+	DBUG_EXECUTE_IF("bitmap_page_write_error",
+			ib_logf(IB_LOG_LEVEL_ERROR,
+				"simulating bitmap write error in "
+				"log_online_write_bitmap_page");
+			return FALSE;);
 
 	success = os_file_write(log_bmp_sys->out.name, log_bmp_sys->out.file,
 				block, log_bmp_sys->out.offset,
@@ -1182,7 +1190,9 @@ log_online_write_bitmap(void)
 			rbt_next(log_bmp_sys->modified_pages, bmp_tree_node);
 
 		DBUG_EXECUTE_IF("bitmap_page_2_write_error",
-				DBUG_SET("+d,bitmap_page_write_error"););
+				ut_ad(bmp_tree_node); /* 2nd page must exist */
+				DBUG_SET("+d,bitmap_page_write_error");
+				DBUG_SET("-d,bitmap_page_2_write_error"););
 	}
 
 	rbt_reset(log_bmp_sys->modified_pages);
@@ -1203,15 +1213,11 @@ log_online_follow_redo_log(void)
 	log_group_t*	group;
 	ibool		result;
 
-	mutex_enter(&log_bmp_sys->mutex);
-
-	if (!srv_track_changed_pages) {
-		mutex_exit(&log_bmp_sys->mutex);
-		return FALSE;
-	}
-
+	ut_ad(srv_track_changed_pages);
 	ut_ad(!srv_read_only_mode);
 
+	mutex_enter(&log_bmp_sys->mutex);
+
 	/* Grab the LSN of the last checkpoint, we will parse up to it */
 	mutex_enter(&(log_sys->mutex));
 	log_bmp_sys->end_lsn = log_sys->last_checkpoint_lsn;
@@ -1554,9 +1560,12 @@ log_online_diagnose_bitmap_eof(
 			/* It's a "Warning" here because it's not a fatal error
 			for the whole server */
 			ib_logf(IB_LOG_LEVEL_WARN,
-				"changed page bitmap file \'%s\' does not "
-				"contain a complete run at the end.",
-				bitmap_file->name);
+				"changed page bitmap file \'%s\', size "
+				UINT64PF " bytes, does not "
+				"contain a complete run at the next read "
+				"offset " UINT64PF,
+				bitmap_file->name, bitmap_file->size,
+				bitmap_file->offset);
 			return FALSE;
 		}
 	}
@@ -1788,20 +1797,20 @@ log_online_purge_changed_page_bitmaps(
 		lsn = LSN_MAX;
 	}
 
-	if (srv_track_changed_pages) {
+	if (srv_redo_log_thread_started) {
 		/* User requests might happen with both enabled and disabled
 		tracking */
 		mutex_enter(&log_bmp_sys->mutex);
 	}
 
 	if (!log_online_setup_bitmap_file_range(&bitmap_files, 0, LSN_MAX)) {
-		if (srv_track_changed_pages) {
+		if (srv_redo_log_thread_started) {
 			mutex_exit(&log_bmp_sys->mutex);
 		}
 		return TRUE;
 	}
 
-	if (srv_track_changed_pages && lsn > log_bmp_sys->end_lsn) {
+	if (srv_redo_log_thread_started && lsn > log_bmp_sys->end_lsn) {
 		/* If we have to delete the current output file, close it
 		first. */
 		os_file_close(log_bmp_sys->out.file);
@@ -1834,7 +1843,7 @@ log_online_purge_changed_page_bitmaps(
 		}
 	}
 
-	if (srv_track_changed_pages) {
+	if (srv_redo_log_thread_started) {
 		if (lsn > log_bmp_sys->end_lsn) {
 			lsn_t	new_file_lsn;
 			if (lsn == LSN_MAX) {
@@ -1845,9 +1854,7 @@ log_online_purge_changed_page_bitmaps(
 				new_file_lsn = log_bmp_sys->end_lsn;
 			}
 			if (!log_online_rotate_bitmap_file(new_file_lsn)) {
-				/* If file create failed, signal the log
-				tracking thread to quit next time it wakes
-				up.  */
+				/* If file create failed, stop log tracking */
 				srv_track_changed_pages = FALSE;
 			}
 		}
diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc
index 2e9e8b6e75ce5..9e42fb5cc1ccd 100644
--- a/storage/xtradb/log/log0recv.cc
+++ b/storage/xtradb/log/log0recv.cc
@@ -386,12 +386,6 @@ recv_sys_init(
 	}
 
 #ifndef UNIV_HOTBACKUP
-	/* Initialize red-black tree for fast insertions into the
-	flush_list during recovery process.
-	As this initialization is done while holding the buffer pool
-	mutex we perform it before acquiring recv_sys->mutex. */
-	buf_flush_init_flush_rbt();
-
 	mutex_enter(&(recv_sys->mutex));
 
 	recv_sys->heap = mem_heap_create_typed(256,
@@ -481,9 +475,6 @@ recv_sys_debug_free(void)
 	recv_sys->last_block_buf_start = NULL;
 
 	mutex_exit(&(recv_sys->mutex));
-
-	/* Free up the flush_rbt. */
-	buf_flush_free_flush_rbt();
 }
 # endif /* UNIV_LOG_DEBUG */
 
@@ -3125,6 +3116,11 @@ recv_recovery_from_checkpoint_start_func(
 	byte*		log_hdr_buf_base = reinterpret_cast<byte *>
 		(alloca(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
 	dberr_t		err;
+
+	/* Initialize red-black tree for fast insertions into the
+	flush_list during recovery process. */
+	buf_flush_init_flush_rbt();
+
 	ut_when_dtor<recv_dblwr_t> tmp(recv_sys->dblwr);
 
 	log_hdr_buf = static_cast<byte *>
@@ -3544,6 +3540,9 @@ recv_recovery_from_checkpoint_finish(void)
 #ifndef UNIV_LOG_DEBUG
 	recv_sys_debug_free();
 #endif
+	/* Free up the flush_rbt. */
+	buf_flush_free_flush_rbt();
+
 	/* Roll back any recovered data dictionary transactions, so
 	that the data dictionary tables will be free of any locks.
 	The data dictionary latch should guarantee that there is at
diff --git a/storage/xtradb/mach/mach0data.cc b/storage/xtradb/mach/mach0data.cc
index df68aab8a1874..206434dc5ab37 100644
--- a/storage/xtradb/mach/mach0data.cc
+++ b/storage/xtradb/mach/mach0data.cc
@@ -56,7 +56,18 @@ mach_parse_compressed(
 		*val = flag;
 		return(ptr + 1);
 
-	} else if (flag < 0xC0UL) {
+	}
+
+	/* Workaround GCC bug
+	https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77673:
+	the compiler moves mach_read_from_4 right to the beginning of the
+	function, causing and out-of-bounds read if we are reading a short
+	integer close to the end of buffer. */
+#if defined(__GNUC__) && (__GNUC__ >= 5) && !defined(__clang__)
+	asm volatile("": : :"memory");
+#endif
+
+	if (flag < 0xC0UL) {
 		if (end_ptr < ptr + 2) {
 			return(NULL);
 		}
diff --git a/storage/xtradb/os/os0thread.cc b/storage/xtradb/os/os0thread.cc
index aabdd06d76b2f..af826027efc36 100644
--- a/storage/xtradb/os/os0thread.cc
+++ b/storage/xtradb/os/os0thread.cc
@@ -210,14 +210,42 @@ os_thread_create_func(
 #endif
 }
 
+/**
+Waits until the specified thread completes and joins it. Its return value is
+ignored.
+
+@param	thread	thread to join */
+UNIV_INTERN
+void
+os_thread_join(
+	os_thread_t	thread)
+{
+	/*This function is currently only used to workaround glibc bug
+	described in http://bugs.mysql.com/bug.php?id=82886
+
+	On Windows, no workarounds are necessary, all threads
+	are "detached" upon thread exit (handle is closed), so we do
+	nothing.
+	*/
+#ifndef _WIN32
+	int ret	MY_ATTRIBUTE((unused)) = pthread_join(thread, NULL);
+
+	/* Waiting on already-quit threads is allowed */
+	ut_ad(ret == 0 || ret == ESRCH);
+#endif
+}
+
 /*****************************************************************//**
 Exits the current thread. */
 UNIV_INTERN
 void
 os_thread_exit(
 /*===========*/
-	void*	exit_value)	/*!< in: exit value; in Windows this void*
+	void*	exit_value,	/*!< in: exit value; in Windows this void*
 				is cast as a DWORD */
+	bool	detach)		/*!< in: if true, the thread will be detached
+				right before exiting. If false, another thread
+				is responsible for joining this thread. */
 {
 #ifdef UNIV_DEBUG_THREAD_CREATION
 	fprintf(stderr, "Thread exits, id %lu\n",
@@ -233,7 +261,8 @@ os_thread_exit(
 #ifdef __WIN__
 	ExitThread((DWORD) exit_value);
 #else
-	pthread_detach(pthread_self());
+	if (detach)
+		pthread_detach(pthread_self());
 	pthread_exit(exit_value);
 #endif
 }
diff --git a/storage/xtradb/rem/rem0rec.cc b/storage/xtradb/rem/rem0rec.cc
index 75e0bba343dc1..f8eb807654766 100644
--- a/storage/xtradb/rem/rem0rec.cc
+++ b/storage/xtradb/rem/rem0rec.cc
@@ -323,7 +323,8 @@ rec_init_offsets_comp_ordinary(
 			stored in one byte for 0..127.  The length
 			will be encoded in two bytes when it is 128 or
 			more, or when the field is stored externally. */
-			if (UNIV_UNLIKELY(col->len > 255)
+			if (UNIV_UNLIKELY(col->len > 255 -
+			    prtype_get_compression_extra(col->prtype))
 			    || UNIV_UNLIKELY(col->mtype
 					     == DATA_BLOB)) {
 				if (len & 0x80) {
@@ -844,8 +845,12 @@ rec_get_converted_size_comp_prefix_low(
 			continue;
 		}
 
-		ut_ad(len <= col->len || col->mtype == DATA_BLOB
-			|| (col->len == 0 && col->mtype == DATA_VARCHAR));
+		ut_ad(len <= col->len || col->mtype == DATA_BLOB ||
+		  ((col->mtype == DATA_VARCHAR || col->mtype == DATA_BINARY
+		   || col->mtype == DATA_VARMYSQL)
+		   && (col->len == 0
+		       || len <= col->len +
+			  prtype_get_compression_extra(col->prtype))));
 
 		fixed_len = field->fixed_len;
 		if (temp && fixed_len
@@ -877,7 +882,9 @@ rec_get_converted_size_comp_prefix_low(
 			ut_ad(col->len >= 256 || col->mtype == DATA_BLOB);
 			extra_size += 2;
 		} else if (len < 128
-			   || (col->len < 256 && col->mtype != DATA_BLOB)) {
+			   || (col->len < 256 -
+			       prtype_get_compression_extra(col->prtype)
+			       && col->mtype != DATA_BLOB)) {
 			extra_size++;
 		} else {
 			/* For variable-length columns, we look up the
@@ -1272,12 +1279,16 @@ rec_convert_dtuple_to_rec_comp(
 			*lens-- = (byte) (len >> 8) | 0xc0;
 			*lens-- = (byte) len;
 		} else {
-			ut_ad(len <= dtype_get_len(type)
+			ut_ad(len <= dtype_get_len(type) +
+			      prtype_get_compression_extra(
+			        dtype_get_prtype(type))
 			      || dtype_get_mtype(type) == DATA_BLOB
 			      || !strcmp(index->name,
 					 FTS_INDEX_TABLE_IND_NAME));
 			if (len < 128
-			    || (dtype_get_len(type) < 256
+			    || (dtype_get_len(type) < 256 -
+			        prtype_get_compression_extra(
+				  dtype_get_prtype(type))
 				&& dtype_get_mtype(type) != DATA_BLOB)) {
 
 				*lens-- = (byte) len;
diff --git a/storage/xtradb/row/row0ftsort.cc b/storage/xtradb/row/row0ftsort.cc
index 51cc53ff260f8..cb47d60562355 100644
--- a/storage/xtradb/row/row0ftsort.cc
+++ b/storage/xtradb/row/row0ftsort.cc
@@ -961,7 +961,7 @@ fts_parallel_merge(
 	CloseHandle(psort_info->thread_hdl);
 #endif /*__WIN__ */
 
-	os_thread_exit(NULL);
+	os_thread_exit(NULL, false);
 
 	OS_THREAD_DUMMY_RETURN;
 }
diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc
index b84e3113ea8d0..3c5d5773aee85 100644
--- a/storage/xtradb/row/row0log.cc
+++ b/storage/xtradb/row/row0log.cc
@@ -613,7 +613,7 @@ row_log_table_delete(
 		&old_pk_extra_size);
 	ut_ad(old_pk_extra_size < 0x100);
 
-	mrec_size = 4 + old_pk_size;
+	mrec_size = 6 + old_pk_size;
 
 	/* Log enough prefix of the BLOB unless both the
 	old and new table are in COMPACT or REDUNDANT format,
@@ -643,8 +643,8 @@ row_log_table_delete(
 		*b++ = static_cast<byte>(old_pk_extra_size);
 
 		/* Log the size of external prefix we saved */
-		mach_write_to_2(b, ext_size);
-		b += 2;
+		mach_write_to_4(b, ext_size);
+		b += 4;
 
 		rec_convert_dtuple_to_temp(
 			b + old_pk_extra_size, new_index,
@@ -2268,14 +2268,14 @@ row_log_table_apply_op(
 		break;
 
 	case ROW_T_DELETE:
-		/* 1 (extra_size) + 2 (ext_size) + at least 1 (payload) */
-		if (mrec + 4 >= mrec_end) {
+		/* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */
+		if (mrec + 6 >= mrec_end) {
 			return(NULL);
 		}
 
 		extra_size = *mrec++;
-		ext_size = mach_read_from_2(mrec);
-		mrec += 2;
+		ext_size = mach_read_from_4(mrec);
+		mrec += 4;
 		ut_ad(mrec < mrec_end);
 
 		/* We assume extra_size < 0x100 for the PRIMARY KEY prefix.
diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc
index 83f6ccb90c308..e397053949e1a 100644
--- a/storage/xtradb/row/row0merge.cc
+++ b/storage/xtradb/row/row0merge.cc
@@ -525,7 +525,12 @@ row_merge_buf_add(
 			dfield_set_len(field, len);
 		}
 
-		ut_ad(len <= col->len || col->mtype == DATA_BLOB);
+		ut_ad(len <= col->len || col->mtype == DATA_BLOB ||
+		  ((col->mtype == DATA_VARCHAR || col->mtype == DATA_BINARY
+		   || col->mtype == DATA_VARMYSQL)
+		   && (col->len == 0
+		       || len <= col->len +
+			  prtype_get_compression_extra(col->prtype))));
 
 		fixed_len = ifield->fixed_len;
 		if (fixed_len && !dict_table_is_comp(index->table)
@@ -554,7 +559,9 @@ row_merge_buf_add(
 		} else if (dfield_is_ext(field)) {
 			extra_size += 2;
 		} else if (len < 128
-			   || (col->len < 256 && col->mtype != DATA_BLOB)) {
+			   || (col->len < 256 -
+			       prtype_get_compression_extra(col->prtype)
+			       && col->mtype != DATA_BLOB)) {
 			extra_size++;
 		} else {
 			/* For variable-length columns, we look up the
@@ -1995,7 +2002,7 @@ row_merge_read_clustered_index(
 		/* Sync fts cache for other fts indexes to keep all
 		fts indexes consistent in sync_doc_id. */
 		err = fts_sync_table(const_cast<dict_table_t*>(new_table),
-				     false, true);
+				     false, true, false);
 
 		if (err == DB_SUCCESS) {
 			fts_update_next_doc_id(
@@ -3823,6 +3830,13 @@ row_merge_build_indexes(
 						" exited when creating FTS"
 						" index '%s'",
 						indexes[i]->name);
+				} else {
+					for (j = 0; j < FTS_NUM_AUX_INDEX;
+					     j++) {
+
+					    os_thread_join(merge_info[j]
+							   .thread_hdl);
+					}
 				}
 			} else {
 				/* This cannot report duplicates; an
diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc
index 733c7fef3744f..46bf523750c6d 100644
--- a/storage/xtradb/row/row0mysql.cc
+++ b/storage/xtradb/row/row0mysql.cc
@@ -63,11 +63,54 @@ Created 9/17/2000 Heikki Tuuri
 #include "row0import.h"
 #include "m_string.h"
 #include "my_sys.h"
+#include "zlib.h"
 #include <algorithm>
 
 /** Provide optional 4.x backwards compatibility for 5.0 and above */
 UNIV_INTERN ibool	row_rollback_on_timeout	= FALSE;
 
+/**
+Z_NO_COMPRESSION = 0
+Z_BEST_SPEED = 1
+Z_BEST_COMPRESSION = 9
+Z_DEFAULT_COMPRESSION = -1
+Compression level to be used by zlib for compressed-blob columns.
+Settable by user.
+*/
+UNIV_INTERN uint	srv_compressed_columns_zip_level = DEFAULT_COMPRESSION_LEVEL;
+/**
+(Z_FILTERED | Z_HUFFMAN_ONLY | Z_RLE | Z_FIXED | Z_DEFAULT_STRATEGY)
+
+The strategy parameter is used to tune the compression algorithm. Use the
+value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
+filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only
+(no string match), or Z_RLE to limit match distances to one
+(run-length encoding). Filtered data consists mostly of small values with a
+somewhat random distribution. In this case, the compression algorithm is
+tuned to compress them better.
+The effect of Z_FILTERED is to force more Huffman coding and less string
+matching; it is somewhat intermediate between Z_DEFAULT_STRATEGY and
+Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as Z_HUFFMAN_ONLY,
+but give better compression for PNG image data. The strategy parameter only
+affects the compression ratio but not the correctness of the compressed
+output even if it is not set appropriately. Z_FIXED prevents the use of
+dynamic Huffman codes, allowing for a simpler decoder for special
+applications.
+*/
+const uint	srv_compressed_columns_zlib_strategy = Z_DEFAULT_STRATEGY;
+/** Compress the column if the data length exceeds this value. */
+UNIV_INTERN ulong	srv_compressed_columns_threshold = 96;
+/**
+Determine if zlib needs to compute adler32 value for the compressed data.
+This variables is similar to page_zip_zlib_wrap, but only used by
+compressed blob columns.
+*/
+const bool	srv_compressed_columns_zlib_wrap = true;
+/**
+Determine if zlib will use custom memory allocation functions based on
+InnoDB memory heap routines (mem_heap_t*).
+*/
+const bool	srv_compressed_columns_zlib_use_heap = false;
 /** Chain node of the list of tables to drop in the background. */
 struct row_mysql_drop_t{
 	char*				table_name;	/*!< table name */
@@ -171,6 +214,17 @@ row_mysql_prebuilt_free_blob_heap(
 	prebuilt->blob_heap = NULL;
 }
 
+/** Frees the compress heap in prebuilt when no longer needed. */
+UNIV_INTERN
+void
+row_mysql_prebuilt_free_compress_heap(
+	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct of a
+					ha_innobase:: table handle */
+{
+	mem_heap_free(prebuilt->compress_heap);
+	prebuilt->compress_heap = NULL;
+}
+
 /*******************************************************************//**
 Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
 format.
@@ -227,6 +281,425 @@ row_mysql_read_true_varchar(
 	return(field + 1);
 }
 
+/**
+  Compressed BLOB header format:
+  ---------------------------------------------------------------
+  | reserved | wrap | algorithm | len-len | compressed | unused |
+  |      [1] |  [1] |       [5] |     [3] |        [1] |    [5] |
+  ---------------------------------------------------------------
+  | 0      0 | 1  1 | 2       6 | 7     9 | 10      10 | 11  15 |
+  ---------------------------------------------------------------
+  * 'reserved' bit is planned to be used in future versions of the BLOB
+  header. In this version it must always be
+  'default_zip_column_reserved_value' (0).
+  * 'wrap' identifies if compression algorithm calculated a checksum
+  (adler32 in case of zlib) and appended it to the compressed data.
+  * 'algorithm' identifies which algoritm was used to compress this BLOB.
+  Currently, the only value 'default_zip_column_algorithm_value' (0) is
+  supported.
+  * 'len-len' field identifies the length of the column length data portion
+  followed by this header (see below).
+  * If 'compressed' bit is set to 1, then this header is immediately followed
+  by 1..8 bytes (depending on the value of 'len-len' bitfield) which
+  determine original (uncompressed) block size. These 'len-len' bytes are
+  followed by compressed representation of the original data.
+  * If 'compressed' bit is set to 0, every other bitfield ('wrap',
+  'algorithm' and 'le-len') must be ignored. In this case the header is
+  immediately followed by uncompressed (original) data.
+*/
+
+/**
+  Currently the only supported value for the 'reserved' field is
+  false (0).
+*/
+static const bool default_zip_column_reserved_value = false;
+
+/**
+  Currently the only supported value for the 'algorithm' field is 0, which
+  means 'zlib'.
+*/
+static const uint default_zip_column_algorithm_value = 0;
+
+static const size_t zip_column_prefix_max_length =
+	ZIP_COLUMN_HEADER_LENGTH + 8;
+static const size_t zip_column_header_length = ZIP_COLUMN_HEADER_LENGTH;
+
+/* 'reserved', bit 0 */
+static const uint zip_column_reserved = 0;
+/* 0000 0000 0000 0001 */
+static const uint zip_column_reserved_mask = 0x0001;
+
+/* 'wrap', bit 1 */
+static const uint zip_column_wrap = 1;
+/* 0000 0000 0000 0010 */
+static const uint zip_column_wrap_mask = 0x0002;
+
+/* 'algorithm', bit 2,3,4,5,6 */
+static const uint zip_column_algorithm = 2;
+/* 0000 0000 0111 1100 */
+static const uint zip_column_algorithm_mask = 0x007C;
+
+/* 'len-len', bit 7,8,9 */
+static const uint zip_column_data_length = 7;
+/* 0000 0011 1000 0000 */
+static const uint zip_column_data_length_mask = 0x0380;
+
+/* 'compressed', bit 10 */
+static const uint zip_column_compressed = 10;
+/* 0000 0100 0000 0000 */
+static const uint zip_column_compressed_mask = 0x0400;
+
+/** Updates compressed block header with the given components */
+static void
+column_set_compress_header(
+	byte*	data,
+	bool	compressed,
+	ulint	lenlen,
+	uint	alg,
+	bool	wrap,
+	bool	reserved)
+{
+	ulint header = 0;
+	header |= (compressed << zip_column_compressed);
+	header |= (lenlen << zip_column_data_length);
+	header |= (alg << zip_column_algorithm);
+	header |= (wrap << zip_column_wrap);
+	header |= (reserved << zip_column_reserved);
+	mach_write_to_2(data, header);
+}
+
+/** Parse compressed block header into components */
+static void
+column_get_compress_header(
+	const byte*	data,
+	bool*		compressed,
+	ulint*		lenlen,
+	uint*		alg,
+	bool*		wrap,
+	bool*		reserved
+)
+{
+	ulint header = mach_read_from_2(data);
+	*compressed = ((header & zip_column_compressed_mask) >>
+		zip_column_compressed);
+	*lenlen = ((header & zip_column_data_length_mask) >>
+		zip_column_data_length);
+	*alg = ((header & zip_column_algorithm_mask) >>
+		zip_column_algorithm);
+	*wrap = ((header & zip_column_wrap_mask) >>
+		zip_column_wrap);
+	*reserved = ((header & zip_column_reserved_mask) >>
+		zip_column_reserved);
+}
+
+/** Allocate memory for zlib. */
+static
+void*
+column_zip_zalloc(
+	void*	opaque,	/*!< in/out: memory heap */
+	uInt	items,	/*!< in: number of items to allocate */
+	uInt	size)	/*!< in: size of an item in bytes */
+{
+	return(mem_heap_zalloc(static_cast<mem_heap_t*>(opaque),
+		items * size));
+}
+
+/** Deallocate memory for zlib. */
+static
+void
+column_zip_free(
+	 void*	opaque MY_ATTRIBUTE((unused)),	/*!< in: memory heap */
+	 void*	address MY_ATTRIBUTE((unused)))	/*!< in: object to free */
+{
+}
+
+/** Configure the zlib allocator to use the given memory heap. */
+UNIV_INTERN
+void
+column_zip_set_alloc(
+	void*		stream,	/*!< in/out: zlib stream */
+	mem_heap_t*	heap)	/*!< in: memory heap to use */
+{
+	z_stream* strm = static_cast<z_stream*>(stream);
+
+	if (srv_compressed_columns_zlib_use_heap) {
+		strm->zalloc = column_zip_zalloc;
+		strm->zfree = column_zip_free;
+		strm->opaque = heap;
+	} else {
+		strm->zalloc = (alloc_func)0;
+		strm->zfree = (free_func)0;
+		strm->opaque = (voidpf)0;
+	}
+}
+
+/** Compress blob/text/varchar column using zlib
+@return pointer to the compressed data */
+byte*
+row_compress_column(
+	const byte*	data,	/*!< in: data in mysql(uncompressed)
+				format */
+	ulint		*len,	/*!< in: data length; out: length of
+				compressed data*/
+	ulint		lenlen,	/*!< in: bytes used to store the length of
+				data */
+	const byte*	dict_data,
+				/*!< in: optional dictionary data used for
+				compression */
+	ulint		dict_data_len,
+				/*!< in: optional dictionary data length */
+	row_prebuilt_t*	prebuilt)
+				/*!< in: use prebuilt->compress_heap only
+				here*/
+{
+	int err = 0;
+	ulint comp_len = *len;
+	ulint buf_len = *len + zip_column_prefix_max_length;
+	byte* buf;
+	byte* ptr;
+	z_stream c_stream;
+	bool wrap = srv_compressed_columns_zlib_wrap;
+
+	int window_bits = wrap ? MAX_WBITS : -MAX_WBITS;
+
+	if (!prebuilt->compress_heap) {
+		prebuilt->compress_heap =
+			mem_heap_create(max(UNIV_PAGE_SIZE, buf_len));
+	}
+
+	buf = static_cast<byte*>(mem_heap_zalloc(
+			prebuilt->compress_heap,buf_len));
+
+	if (*len < srv_compressed_columns_threshold ||
+		srv_compressed_columns_zip_level == Z_NO_COMPRESSION)
+		goto do_not_compress;
+
+	ptr = buf + zip_column_header_length + lenlen;
+
+	/*init deflate object*/
+	c_stream.next_in = const_cast<Bytef*>(data);
+	c_stream.avail_in = *len;
+	c_stream.next_out = ptr;
+	c_stream.avail_out = comp_len;
+
+	column_zip_set_alloc(&c_stream, prebuilt->compress_heap);
+
+	err = deflateInit2(&c_stream, srv_compressed_columns_zip_level,
+		Z_DEFLATED, window_bits, MAX_MEM_LEVEL,
+		srv_compressed_columns_zlib_strategy);
+	ut_a(err == Z_OK);
+
+	if (dict_data != 0 && dict_data_len != 0) {
+		err = deflateSetDictionary(&c_stream, dict_data,
+			dict_data_len);
+		ut_a(err == Z_OK);
+	}
+
+	err = deflate(&c_stream, Z_FINISH);
+	if (err != Z_STREAM_END) {
+		deflateEnd(&c_stream);
+		if (err == Z_OK)
+			err = Z_BUF_ERROR;
+	} else {
+		comp_len = c_stream.total_out;
+		err = deflateEnd(&c_stream);
+	}
+
+	switch (err) {
+	case Z_OK:
+		break;
+	case Z_BUF_ERROR:
+		/* data after compress is larger than uncompressed data*/
+		break;
+	default:
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"failed to compress the column, error: %d\n", err);
+	}
+
+	/* make sure the compressed data size is smaller than
+	uncompressed data */
+	if (err == Z_OK &&
+		*len > (comp_len + zip_column_header_length + lenlen)) {
+		column_set_compress_header(buf, true, lenlen - 1,
+			default_zip_column_algorithm_value, wrap,
+			default_zip_column_reserved_value);
+		ptr = buf + zip_column_header_length;
+		/*store the uncompressed data length*/
+		switch (lenlen) {
+		case 1:
+			mach_write_to_1(ptr, *len);
+			break;
+		case 2:
+			mach_write_to_2(ptr, *len);
+			break;
+		case 3:
+			mach_write_to_3(ptr, *len);
+			break;
+		case 4:
+			mach_write_to_4(ptr, *len);
+			break;
+		default:
+			ut_error;
+		}
+
+		*len = comp_len + zip_column_header_length + lenlen;
+		return buf;
+	}
+
+do_not_compress:
+	ptr = buf;
+	column_set_compress_header(ptr, false, 0,
+		default_zip_column_algorithm_value, false,
+		default_zip_column_reserved_value);
+	ptr += zip_column_header_length;
+	memcpy(ptr, data, *len);
+	*len += zip_column_header_length;
+	return buf;
+}
+
+/** Uncompress blob/text/varchar column using zlib
+@return pointer to the uncompressed data */
+const byte*
+row_decompress_column(
+	const byte*	data,	/*!< in: data in innodb(compressed) format */
+	ulint		*len,	/*!< in: data length; out: length of
+				decompressed data*/
+	const byte*	dict_data,
+				/*!< in: optional dictionary data used for
+				decompression */
+	ulint		dict_data_len,
+				/*!< in: optional dictionary data length */
+	row_prebuilt_t*	prebuilt)
+				/*!< in: use prebuilt->compress_heap only
+				here*/
+{
+	ulint buf_len = 0;
+	byte* buf;
+	int err = 0;
+	int window_bits = 0;
+	z_stream d_stream;
+	bool is_compressed = false;
+	bool wrap = false;
+	bool reserved = false;
+	ulint lenlen = 0;
+	uint alg = 0;
+
+	ut_ad(*len != ULINT_UNDEFINED);
+	ut_ad(*len >= zip_column_header_length);
+
+	column_get_compress_header(data, &is_compressed, &lenlen, &alg,
+		&wrap, &reserved);
+
+	if (reserved != default_zip_column_reserved_value) {
+		ib_logf(IB_LOG_LEVEL_FATAL,
+			"unsupported compressed BLOB header format\n");
+	}
+
+	if (alg != default_zip_column_algorithm_value) {
+		ib_logf(IB_LOG_LEVEL_FATAL,
+			"unsupported 'algorithm' value in the"
+			" compressed BLOB header\n");
+	}
+
+	ut_a(lenlen < 4);
+
+	data += zip_column_header_length;
+	if (!is_compressed) { /* column not compressed */
+		*len -= zip_column_header_length;
+		return data;
+	}
+
+	lenlen++;
+
+	ulint comp_len = *len - zip_column_header_length - lenlen;
+
+	ulint uncomp_len = 0;
+	switch (lenlen) {
+	case 1:
+		uncomp_len = mach_read_from_1(data);
+		break;
+	case 2:
+		uncomp_len = mach_read_from_2(data);
+		break;
+	case 3:
+		uncomp_len = mach_read_from_3(data);
+		break;
+	case 4:
+		uncomp_len = mach_read_from_4(data);
+		break;
+	default:
+		ut_error;
+	}
+
+	data += lenlen;
+
+	/* data is compressed, decompress it*/
+	if (!prebuilt->compress_heap) {
+		prebuilt->compress_heap =
+			mem_heap_create(max(UNIV_PAGE_SIZE, uncomp_len));
+	}
+
+	buf_len = uncomp_len;
+	buf = static_cast<byte*>(mem_heap_zalloc(
+				 prebuilt->compress_heap, buf_len));
+
+	/* init d_stream */
+	d_stream.next_in = const_cast<Bytef*>(data);
+	d_stream.avail_in = comp_len;
+	d_stream.next_out = buf;
+	d_stream.avail_out = buf_len;
+
+	column_zip_set_alloc(&d_stream, prebuilt->compress_heap);
+
+	window_bits = wrap ? MAX_WBITS : -MAX_WBITS;
+	err = inflateInit2(&d_stream, window_bits);
+	ut_a(err == Z_OK);
+
+	err = inflate(&d_stream, Z_FINISH);
+	if (err == Z_NEED_DICT) {
+		ut_a(dict_data != 0 && dict_data_len != 0);
+		err = inflateSetDictionary(&d_stream, dict_data,
+			dict_data_len);
+		ut_a(err == Z_OK);
+		err = inflate(&d_stream, Z_FINISH);
+	}
+
+	if (err != Z_STREAM_END) {
+		inflateEnd(&d_stream);
+		if (err == Z_BUF_ERROR && d_stream.avail_in == 0)
+			err = Z_DATA_ERROR;
+	} else {
+		buf_len = d_stream.total_out;
+		err = inflateEnd(&d_stream);
+	}
+
+	switch (err) {
+	case Z_OK:
+		break;
+	case Z_BUF_ERROR:
+		ib_logf(IB_LOG_LEVEL_FATAL,
+			"zlib buf error, this shouldn't happen\n");
+		break;
+	default:
+		ib_logf(IB_LOG_LEVEL_FATAL,
+			"failed to decompress column, error: %d\n", err);
+	}
+
+	if (err == Z_OK) {
+		if (buf_len != uncomp_len) {
+			ib_logf(IB_LOG_LEVEL_FATAL,
+				"failed to decompress blob column, may"
+				" be corrupted\n");
+		}
+		*len = buf_len;
+		return buf;
+	}
+
+	*len -= (zip_column_header_length + lenlen);
+	return data;
+}
+
+
 /*******************************************************************//**
 Stores a reference to a BLOB in the MySQL format. */
 UNIV_INTERN
@@ -240,10 +713,21 @@ row_mysql_store_blob_ref(
 				to 4 bytes */
 	const void*	data,	/*!< in: BLOB data; if the value to store
 				is SQL NULL this should be NULL pointer */
-	ulint		len)	/*!< in: BLOB length; if the value to store
+	ulint		len,	/*!< in: BLOB length; if the value to store
 				is SQL NULL this should be 0; remember
 				also to set the NULL bit in the MySQL record
 				header! */
+	bool		need_decompression,
+				/*!< in: if the data need to be compressed*/
+	const byte*	dict_data,
+				/*!< in: optional compression dictionary
+				data */
+	ulint		dict_data_len,
+				/*!< in: optional compression dictionary data
+				length */
+	row_prebuilt_t*	prebuilt)
+				/*<! in: use prebuilt->compress_heap only
+				here */
 {
 	/* MySQL might assume the field is set to zero except the length and
 	the pointer fields */
@@ -255,13 +739,28 @@ row_mysql_store_blob_ref(
 	In 32-bit architectures we only use the first 4 bytes of the pointer
 	slot. */
 
-	ut_a(col_len - 8 > 1 || len < 256);
-	ut_a(col_len - 8 > 2 || len < 256 * 256);
-	ut_a(col_len - 8 > 3 || len < 256 * 256 * 256);
+	ut_a(col_len - 8 > 1 ||
+		len < 256 +
+		(need_decompression ? ZIP_COLUMN_HEADER_LENGTH : 0));
+	ut_a(col_len - 8 > 2 ||
+		len < 256 * 256 +
+		(need_decompression ? ZIP_COLUMN_HEADER_LENGTH : 0));
+	ut_a(col_len - 8 > 3 ||
+		len < 256 * 256 * 256 +
+		(need_decompression ? ZIP_COLUMN_HEADER_LENGTH : 0));
 
-	mach_write_to_n_little_endian(dest, col_len - 8, len);
+	const byte *ptr = NULL;
 
-	memcpy(dest + col_len - 8, &data, sizeof data);
+	if (need_decompression)
+		ptr = row_decompress_column((const byte*)data, &len,
+			dict_data, dict_data_len, prebuilt);
+
+	if (ptr)
+		memcpy(dest + col_len - 8, &ptr, sizeof ptr);
+	else
+		memcpy(dest + col_len - 8, &data, sizeof data);
+
+	mach_write_to_n_little_endian(dest, col_len - 8, len);
 }
 
 /*******************************************************************//**
@@ -274,15 +773,32 @@ row_mysql_read_blob_ref(
 	ulint*		len,		/*!< out: BLOB length */
 	const byte*	ref,		/*!< in: BLOB reference in the
 					MySQL format */
-	ulint		col_len)	/*!< in: BLOB reference length
+	ulint		col_len,	/*!< in: BLOB reference length
 					(not BLOB length) */
+	bool		need_compression,
+					/*!< in: if the data need to be
+					compressed*/
+	const byte*	dict_data,	/*!< in: optional compression
+					dictionary data */
+	ulint		dict_data_len,	/*!< in: optional compression
+					dictionary data length */
+	row_prebuilt_t*	prebuilt)	/*!< in: use prebuilt->compress_heap
+					only here */
 {
-	byte*	data;
+	byte*	data = NULL;
+	byte*	ptr = NULL;
 
 	*len = mach_read_from_n_little_endian(ref, col_len - 8);
 
 	memcpy(&data, ref + col_len - 8, sizeof data);
 
+	if (need_compression) {
+		ptr = row_compress_column(data, len, col_len - 8, dict_data,
+			dict_data_len, prebuilt);
+		if (ptr)
+			data = ptr;
+	}
+
 	return(data);
 }
 
@@ -365,7 +881,16 @@ row_mysql_store_col_in_innobase_format(
 					necessarily the length of the actual
 					payload data; if the column is a true
 					VARCHAR then this is irrelevant */
-	ulint		comp)		/*!< in: nonzero=compact format */
+	ulint		comp,		/*!< in: nonzero=compact format */
+	bool		need_compression,
+					/*!< in: if the data need to be
+					compressed*/
+	const byte*	dict_data,	/*!< in: optional compression
+					dictionary data */
+	ulint		dict_data_len,	/*!< in: optional compression
+					dictionary data length */
+	row_prebuilt_t*	prebuilt)	/*!< in: use prebuilt->compress_heap
+					only here */
 {
 	const byte*	ptr	= mysql_data;
 	const dtype_t*	dtype;
@@ -418,8 +943,14 @@ row_mysql_store_col_in_innobase_format(
 				lenlen = 2;
 			}
 
-			ptr = row_mysql_read_true_varchar(&col_len, mysql_data,
-							  lenlen);
+			const byte* tmp_ptr = row_mysql_read_true_varchar(
+				&col_len, mysql_data, lenlen);
+			if (need_compression)
+				ptr = row_compress_column(tmp_ptr, &col_len,
+					lenlen, dict_data, dict_data_len,
+					prebuilt);
+			else
+				ptr = tmp_ptr;
 		} else {
 			/* Remove trailing spaces from old style VARCHAR
 			columns. */
@@ -501,7 +1032,9 @@ row_mysql_store_col_in_innobase_format(
 		}
 	} else if (type == DATA_BLOB && row_format_col) {
 
-		ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
+		ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len,
+			need_compression, dict_data, dict_data_len,
+			prebuilt);
 	}
 
 	dfield_set_data(dfield, ptr, col_len);
@@ -559,7 +1092,11 @@ row_mysql_convert_row_to_innobase(
 			TRUE, /* MySQL row format data */
 			mysql_rec + templ->mysql_col_offset,
 			templ->mysql_col_len,
-			dict_table_is_comp(prebuilt->table));
+			dict_table_is_comp(prebuilt->table),
+			templ->compressed,
+			reinterpret_cast<const byte*>(
+				templ->zip_dict_data.str),
+			templ->zip_dict_data.length, prebuilt);
 next_column:
 		;
 	}
@@ -905,6 +1442,10 @@ row_prebuilt_free(
 		mem_heap_free(prebuilt->blob_heap);
 	}
 
+	if (prebuilt->compress_heap) {
+		mem_heap_free(prebuilt->compress_heap);
+	}
+
 	if (prebuilt->old_vers_heap) {
 		mem_heap_free(prebuilt->old_vers_heap);
 	}
@@ -1334,6 +1875,9 @@ row_insert_for_mysql(
 		return(DB_READ_ONLY);
 	}
 
+	if (UNIV_LIKELY_NULL(prebuilt->compress_heap))
+		mem_heap_empty(prebuilt->compress_heap);
+
 	trx->op_info = "inserting";
 
 	row_mysql_delay_if_needed();
@@ -2729,6 +3273,10 @@ row_drop_tables_for_mysql_in_background(void)
 		return(n_tables + n_tables_dropped);
 	}
 
+	DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep",
+		os_thread_sleep(5000000);
+	);
+
 	table = dict_table_open_on_name(drop->table_name, FALSE, FALSE,
 					DICT_ERR_IGNORE_NONE);
 
@@ -2739,6 +3287,16 @@ row_drop_tables_for_mysql_in_background(void)
 		goto already_dropped;
 	}
 
+	if (!table->to_be_dropped) {
+		/* There is a scenario: the old table is dropped
+		just after it's added into drop list, and new
+		table with the same name is created, then we try
+		to drop the new table in background. */
+		dict_table_close(table, FALSE, FALSE);
+
+		goto already_dropped;
+	}
+
 	ut_a(!table->can_be_evicted);
 
 	dict_table_close(table, FALSE, FALSE);
@@ -2869,6 +3427,12 @@ row_mysql_table_id_reassign(
 	pars_info_add_ull_literal(info, "old_id", table->id);
 	pars_info_add_ull_literal(info, "new_id", *new_id);
 
+	/* As micro-SQL does not support int4 == int8 comparisons,
+	old and new IDs are added again under different names as
+	int4 values*/
+	pars_info_add_int4_literal(info, "old_id_narrow", table->id);
+	pars_info_add_int4_literal(info, "new_id_narrow", *new_id);
+
 	err = que_eval_sql(
 		info,
 		"PROCEDURE RENUMBER_TABLE_PROC () IS\n"
@@ -2879,6 +3443,8 @@ row_mysql_table_id_reassign(
 		" WHERE TABLE_ID = :old_id;\n"
 		"UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n"
 		" WHERE TABLE_ID = :old_id;\n"
+		"UPDATE SYS_ZIP_DICT_COLS SET TABLE_ID = :new_id_narrow\n"
+		" WHERE TABLE_ID = :old_id_narrow;\n"
 		"END;\n", FALSE, trx);
 
 	return(err);
@@ -3645,6 +4211,12 @@ row_truncate_table_for_mysql(
 	pars_info_add_ull_literal(info, "old_id", table->id);
 	pars_info_add_ull_literal(info, "new_id", new_id);
 
+	/* As micro-SQL does not support int4 == int8 comparisons,
+	old and new IDs are added again under different names as
+	int4 values*/
+	pars_info_add_int4_literal(info, "old_id_narrow", table->id);
+	pars_info_add_int4_literal(info, "new_id_narrow", new_id);
+
 	err = que_eval_sql(info,
 			   "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n"
 			   "BEGIN\n"
@@ -3656,6 +4228,9 @@ row_truncate_table_for_mysql(
 			   "UPDATE SYS_INDEXES"
 			   " SET TABLE_ID = :new_id, SPACE = :new_space\n"
 			   " WHERE TABLE_ID = :old_id;\n"
+			   "UPDATE SYS_ZIP_DICT_COLS\n"
+			   " SET TABLE_ID = :new_id_narrow\n"
+			   " WHERE TABLE_ID = :old_id_narrow;\n"
 			   "END;\n"
 			   , FALSE, trx);
 
@@ -4006,6 +4581,13 @@ row_drop_table_for_mysql(
 		}
 	}
 
+
+	DBUG_EXECUTE_IF("row_drop_table_add_to_background",
+		row_add_table_to_background_drop_list(table->name);
+		err = DB_SUCCESS;
+		goto funct_exit;
+	);
+
 	/* TODO: could we replace the counter n_foreign_key_checks_running
 	with lock checks on the table? Acquire here an exclusive lock on the
 	table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
@@ -4276,6 +4858,19 @@ row_drop_table_for_mysql(
 			filepath = fil_make_ibd_name(tablename, false);
 		}
 
+		/* Remove all compression dictionary references for the
+		table */
+		err = dict_create_remove_zip_dict_references_for_table(
+			table->id, trx);
+		if (err != DB_SUCCESS) {
+			ib_logf(IB_LOG_LEVEL_ERROR, "Error: (%s) not "
+				"able to remove compression dictionary "
+				"references for table %s", ut_strerr(err),
+				tablename);
+
+			goto funct_exit;
+		}
+
 		if (dict_table_has_fts_index(table)
 		    || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
 			ut_ad(table->n_ref_count == 0);
@@ -4623,6 +5218,19 @@ row_drop_database_for_mysql(
 	row_mysql_lock_data_dictionary(trx);
 
 	while ((table_name = dict_get_first_table_name_in_db(name))) {
+		/* Drop parent table if it is a fts aux table, to
+		avoid accessing dropped fts aux tables in information
+		scheam when parent table still exists.
+		Note: Drop parent table will drop fts aux tables. */
+		char*	parent_table_name;
+		parent_table_name = fts_get_parent_table_name(
+				table_name, strlen(table_name));
+
+		if (parent_table_name != NULL) {
+			mem_free(table_name);
+			table_name = parent_table_name;
+		}
+
 		ut_a(memcmp(table_name, name, namelen) == 0);
 
 		table = dict_table_open_on_name(
diff --git a/storage/xtradb/row/row0sel.cc b/storage/xtradb/row/row0sel.cc
index a42923de8caec..ad1e9e2bf9d83 100644
--- a/storage/xtradb/row/row0sel.cc
+++ b/storage/xtradb/row/row0sel.cc
@@ -2456,9 +2456,11 @@ row_sel_convert_mysql_key_to_innobase(
 		if (UNIV_LIKELY(!is_null)) {
 			buf = row_mysql_store_col_in_innobase_format(
 					dfield, buf,
-					FALSE, /* MySQL key value format col */
+					/* MySQL key value format col */
+					FALSE,
 					key_ptr + data_offset, data_len,
-					dict_table_is_comp(index->table));
+					dict_table_is_comp(index->table),
+					false, 0, 0 ,0);
 			ut_a(buf <= original_buf + buf_len);
 		}
 
@@ -2551,12 +2553,16 @@ row_sel_store_row_id_to_prebuilt(
 
 #ifdef UNIV_DEBUG
 /** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
-# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \
-	row_sel_field_store_in_mysql_format_func(dest,templ,idx,field,src,len)
+# define row_sel_field_store_in_mysql_format( \
+	dest,templ,idx,field,src,len,prebuilt) \
+	row_sel_field_store_in_mysql_format_func \
+	(dest,templ,idx,field,src,len, prebuilt)
 #else /* UNIV_DEBUG */
 /** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
-# define row_sel_field_store_in_mysql_format(dest,templ,idx,field,src,len) \
-	row_sel_field_store_in_mysql_format_func(dest,templ,src,len)
+# define row_sel_field_store_in_mysql_format( \
+	dest,templ,idx,field,src,len,prebuilt) \
+	row_sel_field_store_in_mysql_format_func \
+	(dest,templ,src,len, prebuilt)
 #endif /* UNIV_DEBUG */
 
 /**************************************************************//**
@@ -2586,7 +2592,10 @@ row_sel_field_store_in_mysql_format_func(
 				templ->icp_rec_field_no */
 #endif /* UNIV_DEBUG */
 	const byte*	data,	/*!< in: data to store */
-	ulint		len)	/*!< in: length of the data */
+	ulint		len,	/*!< in: length of the data */
+	row_prebuilt_t* prebuilt)
+				/*!< in: use prebuilt->compress_heap
+				only here */
 {
 	byte*			ptr;
 #ifdef UNIV_DEBUG
@@ -2630,6 +2639,15 @@ row_sel_field_store_in_mysql_format_func(
 		field_end = dest + templ->mysql_col_len;
 
 		if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
+			/* If this is a compressed column,
+			decompress it first */
+			if (templ->compressed)
+				data = row_decompress_column(data, &len,
+					reinterpret_cast<const byte*>(
+						templ->zip_dict_data.str),
+					templ->zip_dict_data.length,
+					prebuilt);
+
 			/* This is a >= 5.0.3 type true VARCHAR. Store the
 			length of the data to the first byte or the first
 			two bytes of dest. */
@@ -2680,7 +2698,11 @@ row_sel_field_store_in_mysql_format_func(
 		already copied to the buffer in row_sel_store_mysql_rec */
 
 		row_mysql_store_blob_ref(dest, templ->mysql_col_len, data,
-					 len);
+					len, templ->compressed,
+					reinterpret_cast<const byte*>(
+						templ->zip_dict_data.str),
+					templ->zip_dict_data.length,
+					prebuilt);
 		break;
 
 	case DATA_MYSQL:
@@ -2833,7 +2855,7 @@ row_sel_store_mysql_field_func(
 
 		row_sel_field_store_in_mysql_format(
 			mysql_rec + templ->mysql_col_offset,
-			templ, index, field_no, data, len);
+			templ, index, field_no, data, len, prebuilt);
 
 		if (heap != prebuilt->blob_heap) {
 			mem_heap_free(heap);
@@ -2883,7 +2905,7 @@ row_sel_store_mysql_field_func(
 
 		row_sel_field_store_in_mysql_format(
 			mysql_rec + templ->mysql_col_offset,
-			templ, index, field_no, data, len);
+			templ, index, field_no, data, len, prebuilt);
 	}
 
 	ut_ad(len != UNIV_SQL_NULL);
@@ -2931,6 +2953,9 @@ row_sel_store_mysql_rec(
 		prebuilt->blob_heap = NULL;
 	}
 
+	if (UNIV_LIKELY_NULL(prebuilt->compress_heap))
+		mem_heap_empty(prebuilt->compress_heap);
+
 	for (i = 0; i < prebuilt->n_template; i++) {
 		const mysql_row_templ_t*templ = &prebuilt->mysql_template[i];
 		const ulint		field_no
diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc
index a0dd32c203f4d..4a709160ea6b0 100644
--- a/storage/xtradb/srv/srv0mon.cc
+++ b/storage/xtradb/srv/srv0mon.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
 
 This program is free software; you can redistribute it and/or modify it under
@@ -1367,7 +1367,10 @@ srv_mon_set_module_control(
 				module */
 				set_current_module = FALSE;
 			} else if (module_id == MONITOR_ALL_COUNTER) {
-				continue;
+				if (!(innodb_counter_info[ix].monitor_type
+				      & MONITOR_GROUP_MODULE)) {
+					continue;
+				}
 			} else {
 				/* Hitting the next module, stop */
 				break;
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
index 7886d705ae4ec..c04b446f4800f 100644
--- a/storage/xtradb/srv/srv0srv.cc
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -195,6 +195,9 @@ UNIV_INTERN char**	srv_data_file_names = NULL;
 /* size in database pages */
 UNIV_INTERN ulint*	srv_data_file_sizes = NULL;
 
+/** Whether the redo log tracking is currently enabled. Note that it is
+possible for the log tracker thread to be running and the tracking to be
+disabled */
 UNIV_INTERN my_bool	srv_track_changed_pages = FALSE;
 
 UNIV_INTERN ulonglong	srv_max_bitmap_file_size = 100 * 1024 * 1024;
@@ -793,6 +796,9 @@ UNIV_INTERN os_event_t	srv_checkpoint_completed_event;
 
 UNIV_INTERN os_event_t	srv_redo_log_tracked_event;
 
+/** Whether the redo log tracker thread has been started. Does not take into
+account whether the tracking is currently enabled (see srv_track_changed_pages
+for that) */
 UNIV_INTERN bool	srv_redo_log_thread_started = false;
 
 /*********************************************************************//**
@@ -2431,13 +2437,8 @@ DECLARE_THREAD(srv_redo_log_follow_thread)(
 		os_event_wait(srv_checkpoint_completed_event);
 		os_event_reset(srv_checkpoint_completed_event);
 
-#ifdef UNIV_DEBUG
-		if (!srv_track_changed_pages) {
-			continue;
-		}
-#endif
-
-		if (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
+		if (srv_track_changed_pages
+		    && srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
 			if (!log_online_follow_redo_log()) {
 				/* TODO: sync with I_S log tracking status? */
 				ib_logf(IB_LOG_LEVEL_ERROR,
diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc
index dbddb4a7a1c56..82beb83192fbb 100644
--- a/storage/xtradb/srv/srv0start.cc
+++ b/storage/xtradb/srv/srv0start.cc
@@ -2780,6 +2780,12 @@ innobase_start_or_create_for_mysql(void)
 		}
 	}
 
+	/* Create the SYS_ZIP_DICT system table */
+	err = dict_create_or_check_sys_zip_dict();
+	if (err != DB_SUCCESS) {
+		return(err);
+	}
+
 	srv_is_being_started = FALSE;
 
 	ut_a(trx_purge_state() == PURGE_STATE_INIT);
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index bd1e74becaa10..318c248f74220 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1,5 +1,5 @@
 /* Copyright (c) 2003, 2013, Oracle and/or its affiliates
-   Copyright (c) 2009, 2014, SkySQL Ab.
+   Copyright (c) 2009, 2016, MariaDB
 
    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index bcb07daae032e..b4fc6297afd50 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1,5 +1,5 @@
 /* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
-   Copyright (c) 2009, 2013, Monty Program Ab
+   Copyright (c) 2009, 2016, MariaDB
 
    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
diff --git a/support-files/mysql.server.sh b/support-files/mysql.server.sh
index b562354a586ce..8d2e78edbd895 100644
--- a/support-files/mysql.server.sh
+++ b/support-files/mysql.server.sh
@@ -319,7 +319,7 @@ case "$mode" in
     then
       # Give extra arguments to mysqld with the my.cnf file. This script
       # may be overwritten at next upgrade.
-      $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" "$@" >/dev/null 2>&1 &
+      $bindir/mysqld_safe --datadir="$datadir" --pid-file="$mysqld_pid_file_path" "$@" &
       wait_for_ready; return_value=$?
 
       # Make lock for RedHat / SuSE
diff --git a/tests/async_queries.c b/tests/async_queries.c
index 76e884e6a6910..a8889fc8d5ab9 100644
--- a/tests/async_queries.c
+++ b/tests/async_queries.c
@@ -425,7 +425,7 @@ main(int argc, char *argv[])
 
   event_dispatch();
 
-  free(sds);
+  my_free(sds);
 
   mysql_library_end();
 
diff --git a/win/packaging/CMakeLists.txt b/win/packaging/CMakeLists.txt
index 0535a486d57ba..1682bae6986cb 100644
--- a/win/packaging/CMakeLists.txt
+++ b/win/packaging/CMakeLists.txt
@@ -24,10 +24,13 @@ ENDIF()
 
 
 SET(MANUFACTURER "MariaDB Corporation Ab")
-FIND_PATH(WIX_DIR heat.exe
-  "$ENV{ProgramFiles}/WiX Toolset v3.9/bin"
-  "$ENV{ProgramFiles}/WiX Toolset v3.10/bin"
-)
+SET(WIX_BIN_PATHS)
+FOREACH(WIX_VER 3.9 3.10 3.11)
+  LIST(APPEND WIX_BIN_PATHS "$ENV{ProgramFiles}/WiX Toolset v${WIX_VER}/bin")
+  LIST(APPEND WIX_BIN_PATHS "$ENV{ProgramFiles} (x86)/WiX Toolset v${WIX_VER}/bin")
+ENDFOREACH()
+
+FIND_PATH(WIX_DIR heat.exe ${WIX_BIN_PATHS})
 SET(CPACK_WIX_PACKAGE_BASE_NAME "MariaDB")
 IF(CMAKE_SIZEOF_VOID_P EQUAL 4)
  SET(CPACK_WIX_UPGRADE_CODE "49EB7A6A-1CEF-4A1E-9E89-B9A4993963E3")
diff --git a/win/packaging/create_msi.cmake.in b/win/packaging/create_msi.cmake.in
index c2ab648a6dbf2..1f847a396950d 100644
--- a/win/packaging/create_msi.cmake.in
+++ b/win/packaging/create_msi.cmake.in
@@ -434,6 +434,7 @@ EXECUTE_PROCESS(
 IF(SIGNCODE)
   EXECUTE_PROCESS(
   COMMAND ${SIGNTOOL_EXECUTABLE} sign ${SIGNTOOL_PARAMETERS} 
+  /d ${CPACK_PACKAGE_FILE_NAME}.msi
   ${CPACK_PACKAGE_FILE_NAME}.msi
 )
 ENDIF()