Skip to content

Commit 420f8e2

Browse files
committed
MDEV-24854: Change innodb_flush_method=O_DIRECT by default
We have innodb_use_native_aio=ON by default since the introduction of that parameter in commit 2f9fb41 (MySQL 5.5 and MariaDB 5.5). However, to really benefit from the setting, the files should be opened in O_DIRECT mode, to bypass the file system cache. In this way, the reads and writes can be submitted with DMA, using the InnoDB buffer pool directly, and no processor cycles need to be used for copying data. The use of O_DIRECT benefits not only the current libaio implementation, but also liburing. os_file_set_nocache(): Test innodb_flush_method in the function, not in the callers.
1 parent 43b239a commit 420f8e2

File tree

7 files changed

+27
-39
lines changed

7 files changed

+27
-39
lines changed

extra/mariabackup/xtrabackup.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -1506,7 +1506,7 @@ struct my_option xb_server_options[] =
15061506
"With which method to flush data.",
15071507
&srv_file_flush_method, &srv_file_flush_method,
15081508
&innodb_flush_method_typelib, GET_ENUM, REQUIRED_ARG,
1509-
IF_WIN(SRV_ALL_O_DIRECT_FSYNC, SRV_FSYNC), 0, 0, 0, 0, 0},
1509+
IF_WIN(SRV_ALL_O_DIRECT_FSYNC, SRV_O_DIRECT), 0, 0, 0, 0, 0},
15101510

15111511
{"innodb_log_buffer_size", OPT_INNODB_LOG_BUFFER_SIZE,
15121512
"The size of the buffer which InnoDB uses to write log to the log files on disk.",

mysql-test/suite/sys_vars/r/innodb_flush_method_func.result

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
call mtr.add_suppression("InnoDB: Failed to set .*DIRECT");
22
select @@innodb_flush_method;
33
@@innodb_flush_method
4-
fsync
4+
O_DIRECT
55
create table t(a serial) engine=innodb;
66
FLUSH TABLES;
77
# restart: --innodb-flush-method=5

mysql-test/suite/sys_vars/r/sysvars_innodb.result

+1-1
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,7 @@ READ_ONLY NO
743743
COMMAND_LINE_ARGUMENT OPTIONAL
744744
VARIABLE_NAME INNODB_FLUSH_METHOD
745745
SESSION_VALUE NULL
746-
DEFAULT_VALUE fsync
746+
DEFAULT_VALUE O_DIRECT
747747
VARIABLE_SCOPE GLOBAL
748748
VARIABLE_TYPE ENUM
749749
VARIABLE_COMMENT With which method to flush data.

sql-bench/example

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ machine="Linux-x64"
66

77
# InnoDB tests
88

9-
./run-all-tests --suffix=-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_log_file_size=2000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --innodb_flush_method=O_DIRECT --innodb_log_files_in_group=1 --skip-innodb-doublewrite" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --log
9+
./run-all-tests --suffix=-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_log_file_size=2000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --log
1010

11-
./run-all-tests --suffix=_fast-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_log_file_size=2000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --innodb_flush_method=O_DIRECT --innodb_log_files_in_group=1 --skip-innodb-doublewrite" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log
11+
./run-all-tests --suffix=_fast-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_log_file_size=2000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log
1212

1313
# MyISAM tests
1414

storage/innobase/handler/ha_innodb.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -18308,7 +18308,7 @@ static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
1830818308
static MYSQL_SYSVAR_ENUM(flush_method, innodb_flush_method,
1830918309
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
1831018310
"With which method to flush data.",
18311-
NULL, NULL, IF_WIN(SRV_ALL_O_DIRECT_FSYNC, SRV_FSYNC),
18311+
NULL, NULL, IF_WIN(SRV_ALL_O_DIRECT_FSYNC, SRV_O_DIRECT),
1831218312
&innodb_flush_method_typelib);
1831318313

1831418314
static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted,

storage/innobase/include/fil0fil.h

+4-6
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,11 @@ enum srv_flush_t
6161
/** do not flush after writing */
6262
SRV_NOSYNC,
6363
/** invoke os_file_set_nocache() on data files. This implies using
64-
non-buffered IO but still using fsync, the reason for which is that
65-
some FS do not flush meta-data when unbuffered IO happens */
64+
unbuffered I/O but still fdatasync(), because some filesystems might
65+
not flush meta-data on write completion */
6666
SRV_O_DIRECT,
67-
/** do not use fsync() when using direct IO i.e.: it can be set to
68-
avoid the fsync() call that we make when using SRV_UNIX_O_DIRECT.
69-
However, in this case user/DBA should be sure about the integrity of
70-
the meta-data */
67+
/** Like O_DIRECT, but skip fdatasync(), assuming that the data is
68+
durable on write completion */
7169
SRV_O_DIRECT_NO_FSYNC
7270
#ifdef _WIN32
7371
/** Traditional Windows appoach to open all files without caching,

storage/innobase/os/os0file.cc

+17-27
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
Copyright (c) 1995, 2019, Oracle and/or its affiliates. All Rights Reserved.
44
Copyright (c) 2009, Percona Inc.
5-
Copyright (c) 2013, 2020, MariaDB Corporation.
5+
Copyright (c) 2013, 2021, MariaDB Corporation.
66
77
Portions of this file contain modifications contributed and copyrighted
88
by Percona Inc.. Those modifications are
@@ -1112,12 +1112,8 @@ os_file_create_simple_func(
11121112
/* This function is always called for data files, we should disable
11131113
OS caching (O_DIRECT) here as we do in os_file_create_func(), so
11141114
we open the same file in the same mode, see man page of open(2). */
1115-
if (!srv_read_only_mode
1116-
&& *success
1117-
&& (srv_file_flush_method == SRV_O_DIRECT
1118-
|| srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC)) {
1119-
1120-
os_file_set_nocache(file, name, mode_str);
1115+
if (!srv_read_only_mode && *success) {
1116+
os_file_set_nocache(file, name, mode_str);
11211117
}
11221118

11231119
#ifdef USE_FILE_LOCK
@@ -1426,11 +1422,8 @@ os_file_create_func(
14261422
if (!read_only
14271423
&& *success
14281424
&& type != OS_LOG_FILE && type != OS_DATA_TEMP_FILE
1429-
&& type != OS_DATA_FILE_NO_O_DIRECT
1430-
&& (srv_file_flush_method == SRV_O_DIRECT
1431-
|| srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC)) {
1432-
1433-
os_file_set_nocache(file, name, mode_str);
1425+
&& type != OS_DATA_FILE_NO_O_DIRECT) {
1426+
os_file_set_nocache(file, name, mode_str);
14341427
}
14351428

14361429
#ifdef USE_FILE_LOCK
@@ -3484,6 +3477,15 @@ os_file_set_nocache(
34843477
const char* file_name MY_ATTRIBUTE((unused)),
34853478
const char* operation_name MY_ATTRIBUTE((unused)))
34863479
{
3480+
const auto innodb_flush_method = srv_file_flush_method;
3481+
switch (innodb_flush_method) {
3482+
case SRV_O_DIRECT:
3483+
case SRV_O_DIRECT_NO_FSYNC:
3484+
break;
3485+
default:
3486+
return;
3487+
}
3488+
34873489
/* some versions of Solaris may not have DIRECTIO_ON */
34883490
#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
34893491
if (directio(fd, DIRECTIO_ON) == -1) {
@@ -3502,23 +3504,11 @@ os_file_set_nocache(
35023504
if (errno_save == EINVAL) {
35033505
if (!warning_message_printed) {
35043506
warning_message_printed = true;
3505-
# ifdef UNIV_LINUX
3506-
ib::warn()
3507-
<< "Failed to set O_DIRECT on file"
3508-
<< file_name << "; " << operation_name
3509-
<< ": " << strerror(errno_save) << ", "
3510-
"continuing anyway. O_DIRECT is "
3511-
"known to result in 'Invalid argument' "
3512-
"on Linux on tmpfs, "
3513-
"see MySQL Bug#26662.";
3514-
# else /* UNIV_LINUX */
3515-
goto short_warning;
3516-
# endif /* UNIV_LINUX */
3507+
ib::info()
3508+
<< "Setting O_DIRECT on file "
3509+
<< file_name << " failed";
35173510
}
35183511
} else {
3519-
# ifndef UNIV_LINUX
3520-
short_warning:
3521-
# endif
35223512
ib::warn()
35233513
<< "Failed to set O_DIRECT on file "
35243514
<< file_name << "; " << operation_name

0 commit comments

Comments
 (0)