-
Notifications
You must be signed in to change notification settings - Fork 1.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement statx(STATX_DIOALIGN)
so applications can discover correct O_DIRECT
alignment
#16972
Open
robn
wants to merge
3
commits into
openzfs:master
Choose a base branch
from
robn:statx-dioalign
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+489
−2
Open
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
*/ | ||
/* | ||
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. | ||
* Copyright (c) 2025, Rob Norris <[email protected]> | ||
*/ | ||
|
||
#ifndef _SYS_FS_ZFS_VNOPS_H | ||
|
@@ -42,6 +43,8 @@ extern int zfs_clone_range_replay(znode_t *, uint64_t, uint64_t, uint64_t, | |
extern int zfs_getsecattr(znode_t *, vsecattr_t *, int, cred_t *); | ||
extern int zfs_setsecattr(znode_t *, vsecattr_t *, int, cred_t *); | ||
|
||
extern int zfs_get_direct_alignment(znode_t *, uint64_t *); | ||
|
||
extern int mappedread(znode_t *, int, zfs_uio_t *); | ||
extern int mappedread_sf(znode_t *, int, zfs_uio_t *); | ||
extern void update_pages(znode_t *, int64_t, int, objset_t *); | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
/* | ||
* Copyright (c) 2011, Lawrence Livermore National Security, LLC. | ||
* Copyright (c) 2015 by Chunwei Chen. All rights reserved. | ||
* Copyright (c) 2025, Rob Norris <[email protected]> | ||
*/ | ||
|
||
|
||
|
@@ -30,6 +31,7 @@ | |
#include <sys/zfs_vnops.h> | ||
#include <sys/zfs_znode.h> | ||
#include <sys/dmu_objset.h> | ||
#include <sys/spa_impl.h> | ||
#include <sys/vfs.h> | ||
#include <sys/zpl.h> | ||
#include <sys/file.h> | ||
|
@@ -490,6 +492,17 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask, | |
} | ||
#endif | ||
|
||
#ifdef STATX_DIOALIGN | ||
if (request_mask & STATX_DIOALIGN) { | ||
uint64_t align; | ||
if (zfs_get_direct_alignment(zp, &align) == 0) { | ||
stat->dio_mem_align = PAGE_SIZE; | ||
stat->dio_offset_align = align; | ||
stat->result_mask |= STATX_DIOALIGN; | ||
} | ||
} | ||
#endif | ||
|
||
#ifdef STATX_ATTR_IMMUTABLE | ||
if (zp->z_pflags & ZFS_IMMUTABLE) | ||
stat->attributes |= STATX_ATTR_IMMUTABLE; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ | |
* Copyright (c) 2015 by Chunwei Chen. All rights reserved. | ||
* Copyright 2017 Nexenta Systems, Inc. | ||
* Copyright (c) 2021, 2022 by Pawel Jakub Dawidek | ||
* Copyright (c) 2025, Rob Norris <[email protected]> | ||
*/ | ||
|
||
/* Portions Copyright 2007 Jeremy Teo */ | ||
|
@@ -1083,6 +1084,24 @@ zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr) | |
return (error); | ||
} | ||
|
||
/* | ||
* Get the optimal alignment to ensure direct IO can be performed without | ||
* incurring any RMW penalty on write. If direct IO is not enabled for this | ||
* file, returns an error. | ||
*/ | ||
int | ||
zfs_get_direct_alignment(znode_t *zp, uint64_t *alignp) | ||
{ | ||
zfsvfs_t *zfsvfs = ZTOZSB(zp); | ||
|
||
if (!zfs_dio_enabled || zfsvfs->z_os->os_direct == ZFS_DIRECT_DISABLED) | ||
return (SET_ERROR(EOPNOTSUPP)); | ||
|
||
*alignp = MAX(zp->z_blksz, PAGE_SIZE); | ||
|
||
return (0); | ||
} | ||
|
||
#ifdef ZFS_DEBUG | ||
static int zil_fault_io = 0; | ||
#endif | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I may misremember, but I am not sure
z_blksz
is always a power of 2 for files of one block, and respectively always a multiple of PAGE_SIZE. I wonder if this still needs some logic fromzfs_write()
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should be fine with
z_blksz
always being a power of two, even for files with a single block. But as mentioned before I think we'll see some confusion/problems with the file offset alignment restriction changing as that first block grows.Thinking about this some more, it seems like the best thing to do is return
MAX(zp->z_blksz, PAGE_SIZE)
once multiple blocks have been allocated and thus the block size is fixed for the lifetime of the file. For files with only a single block we'd returnMAX(zfsvfs->z_max_blksz, PAGE_SIZE)
. This has a couple advantages.It ensures that for new files, and files smaller than the max record size, the file offset alignment returned will work for Direct I/O even when the file size grows and multiple blocks are required.
For existing files which already have multiple blocks we'll return the optimal size for the file.
The downside is that's a bit more restrictive that strictly required, but I think that's preferable to changing the alignment requirement after the application has checked for it using
statx()
.We'd definitely want to update
statx_dioalign.ksh
to include some version of this test.direct=standard
.statx(2)
to grab the alignment.