diff --git a/kernel/arch/riscv64/syscall_table.json b/kernel/arch/riscv64/syscall_table.json index 589184e28..5c4f36557 100644 --- a/kernel/arch/riscv64/syscall_table.json +++ b/kernel/arch/riscv64/syscall_table.json @@ -2736,5 +2736,22 @@ ], "return_type": "int", "abi": "c" + }, + { + "name": "umount2", + "nr": 156, + "nr_args": 2, + "args": [ + [ + "const char *", + "target" + ], + [ + "int", + "flags" + ] + ], + "return_type": "int", + "abi": "c" } ] diff --git a/kernel/arch/x86_64/syscall_table.json b/kernel/arch/x86_64/syscall_table.json index 4050ffb96..cffbbcab4 100644 --- a/kernel/arch/x86_64/syscall_table.json +++ b/kernel/arch/x86_64/syscall_table.json @@ -2780,5 +2780,23 @@ ], "return_type": "int", "abi": "c" + }, + { + "name": "umount2", + "nr": 156, + "nr_args": 2, + "args": [ + [ + "const char *", + "target" + ], + [ + "int", + "flags" + ] + ], + "return_type": "int", + "abi": "c" } + ] diff --git a/kernel/include/onyx/dentry.h b/kernel/include/onyx/dentry.h index 87b4531a1..3da2cd8ab 100644 --- a/kernel/include/onyx/dentry.h +++ b/kernel/include/onyx/dentry.h @@ -130,6 +130,16 @@ struct dcache_shrink_result enum lru_walk_ret scan_dcache_lru_one(struct lru_list *lru, struct list_head *object, void *data); enum lru_walk_ret shrink_dcache_lru_one(struct lru_list *lru, struct list_head *object, void *data); +void dentry_shrink_subtree(struct dentry *dentry); + +/** + * @brief Do the final unref on a whole subtree + * Should _only_ be used by in-memory filesystems that use the dcache as their directories. + * + * @param dentry Root dentry + */ +void dentry_unref_subtree(struct dentry *dentry); + __END_CDECLS #ifdef __cplusplus diff --git a/kernel/include/onyx/rcupdate.h b/kernel/include/onyx/rcupdate.h index 12bff54c0..7936c503a 100644 --- a/kernel/include/onyx/rcupdate.h +++ b/kernel/include/onyx/rcupdate.h @@ -27,15 +27,17 @@ void synchronize_rcu(); void __kfree_rcu(struct rcu_head *head, unsigned long off); #ifdef __cplusplus -#define _Static_assert(x) static_assert(x) +#define _Static_assert(x, m) static_assert(x, m) #endif #define is_kfree_rcu_off(off) ((off) < 4096) -#define kfree_rcu(ptr, head) \ - ({ \ - unsigned long off = offsetof(__typeof__(*(ptr)), head); \ - _Static_assert(is_kfree_rcu_off(offsetof(__typeof__(*(ptr)), head))); \ - __kfree_rcu(&(ptr)->head, off); \ +#define kfree_rcu(ptr, head) \ + ({ \ + unsigned long off = offsetof(__typeof__(*(ptr)), head); \ + _Static_assert( \ + is_kfree_rcu_off(offsetof(__typeof__(*(ptr)), head)), \ + "kfree_rcu's rcu_head needs to be within 4096 bytes off the start of the struct"); \ + __kfree_rcu(&(ptr)->head, off); \ }) /** diff --git a/kernel/include/onyx/superblock.h b/kernel/include/onyx/superblock.h index 95aac6b09..835cf92d4 100644 --- a/kernel/include/onyx/superblock.h +++ b/kernel/include/onyx/superblock.h @@ -15,12 +15,14 @@ #include #include #include +#include #include #include struct file; struct bio_req; struct blockdev; +struct mount; #define SB_FLAG_NODIRTY (1 << 0) #define SB_FLAG_IN_MEMORY (1 << 1) @@ -34,13 +36,18 @@ struct superblock int (*flush_inode)(struct inode *inode, bool in_sync); int (*kill_inode)(struct inode *inode); int (*statfs)(struct statfs *buf, struct superblock *sb); + int (*umount)(struct mount *mnt); + int (*shutdown)(struct superblock *sb); unsigned int s_block_size; struct blockdev *s_bdev; dev_t s_devnr; unsigned long s_flags; struct mutex s_rename_lock; struct lru_list s_dcache_lru; - struct shrinker s_shrinker; + union { + struct shrinker s_shrinker; + struct rcu_head s_rcu; + }; }; __BEGIN_CDECLS @@ -51,6 +58,8 @@ void superblock_add_inode_unlocked(struct superblock *sb, struct inode *inode); void superblock_add_inode(struct superblock *sb, struct inode *inode); void superblock_remove_inode(struct superblock *sb, struct inode *inode); void superblock_kill(struct superblock *sb); +void sb_shutdown(struct superblock *sb); +int sb_generic_shutdown(struct superblock *sb); struct page_iov; diff --git a/kernel/include/onyx/tmpfs.h b/kernel/include/onyx/tmpfs.h index 17aea05a2..dbb4e2bfd 100644 --- a/kernel/include/onyx/tmpfs.h +++ b/kernel/include/onyx/tmpfs.h @@ -36,15 +36,12 @@ class tmpfs_superblock : public superblock dev_t fs_minor; - list_head_cpp fs_list_node; - const file_ops *tmpfs_ops_; atomic nblocks; atomic ino_nr; tmpfs_superblock() - : superblock{}, curr_inode{}, fs_minor{++curr_minor_number}, fs_list_node{this}, - tmpfs_ops_{&tmpfs_fops} + : superblock{}, curr_inode{}, fs_minor{++curr_minor_number}, tmpfs_ops_{&tmpfs_fops} { superblock_init(this); s_block_size = PAGE_SIZE; diff --git a/kernel/kernel/fs/dentry.cpp b/kernel/kernel/fs/dentry.cpp index 57f568fa0..48a1f42cd 100644 --- a/kernel/kernel/fs/dentry.cpp +++ b/kernel/kernel/fs/dentry.cpp @@ -785,6 +785,8 @@ void dentry_do_unlink(dentry *entry) auto parent = entry->d_parent; DCHECK(spin_lock_held(&parent->d_lock)); dput_locked(parent); + if ((entry->d_flags & (DENTRY_FLAG_LRU | DENTRY_FLAG_SHRINK)) == DENTRY_FLAG_LRU) + d_remove_lru(entry); entry->d_parent = nullptr; if (!d_is_negative(entry)) @@ -795,7 +797,6 @@ void dentry_do_unlink(dentry *entry) { inode_dec_nlink(parent->d_inode); inode_dec_nlink(entry->d_inode); - dentry_shrink_subtree(entry); } } @@ -865,6 +866,8 @@ void dentry_do_rename_unlink(dentry *entry) auto parent = entry->d_parent; DCHECK(spin_lock_held(&parent->d_lock)); WARN_ON(dput_locked(parent) == 0); + if ((entry->d_flags & (DENTRY_FLAG_LRU | DENTRY_FLAG_SHRINK)) == DENTRY_FLAG_LRU) + d_remove_lru(entry); entry->d_parent = nullptr; /* The dcache buckets are already locked, so we don't grab the lock again. Just open-code the @@ -920,7 +923,10 @@ void dentry_rename(dentry *dent, const char *name, dentry *parent, spin_lock(&dentry_ht_locks[oldi]); } + spin_lock(&parent->d_lock); dentry_do_rename_unlink(dst); + spin_unlock(&parent->d_lock); + spin_lock(&dent->d_lock); DCHECK(dentry_is_in_chain(dent, oldi)); @@ -1263,6 +1269,47 @@ void dentry_shrink_subtree(struct dentry *dentry) } } +static d_walk_ret find_unref(void *data, struct dentry *dentry) +{ + struct shrink_data *s = (struct shrink_data *) data; + if (!(dentry->d_flags & DENTRY_FLAG_SHRINK)) + { + if (dentry->d_flags & DENTRY_FLAG_LRU) + d_remove_lru(dentry); + + list_add_tail(&dentry->d_lru, &s->shrink_list); + dentry->d_flags |= DENTRY_FLAG_SHRINK | DENTRY_FLAG_LRU; + } + + return D_WALK_CONTINUE; +} + +static void unref_list(struct shrink_data *s) +{ + list_for_every_safe (&s->shrink_list) + { + struct dentry *dentry = container_of(l, struct dentry, d_lru); + list_remove(&dentry->d_lru); + dentry->d_flags &= ~(DENTRY_FLAG_SHRINK | DENTRY_FLAG_LRU); + dput(dentry); + } +} + +/** + * @brief Do the final unref on a whole subtree + * Should _only_ be used by in-memory filesystems that use the dcache as their directories. + * + * @param dentry Root dentry + */ +void dentry_unref_subtree(struct dentry *dentry) +{ + struct shrink_data data; + INIT_LIST_HEAD(&data.shrink_list); + d_walk(dentry, &data, find_unref); + if (!list_is_empty(&data.shrink_list)) + unref_list(&data); +} + enum lru_walk_ret scan_dcache_lru_one(struct lru_list *lru, struct list_head *object, void *data) { struct dentry *dentry = container_of(object, struct dentry, d_lru); diff --git a/kernel/kernel/fs/ext2/ext2.cpp b/kernel/kernel/fs/ext2/ext2.cpp index f2c7fbc0a..42040184e 100644 --- a/kernel/kernel/fs/ext2/ext2.cpp +++ b/kernel/kernel/fs/ext2/ext2.cpp @@ -656,6 +656,16 @@ int ext2_statfs(struct statfs *buf, superblock *sb) return ((ext2_superblock *) sb)->stat_fs(buf); } +static int ext2_shutdown_sb(struct superblock *sb_) +{ + ext2_superblock *sb = (ext2_superblock *) sb_; + /* Shutdown the sb generically first, then tear down the ext2_superblock. This is required for + * e.g sync purposes. */ + sb_generic_shutdown(sb); + sb->~ext2_superblock(); + return 0; +} + struct superblock *ext2_mount_partition(struct vfs_mount_info *info) { struct blockdev *dev = info->bdev; @@ -782,6 +792,7 @@ struct superblock *ext2_mount_partition(struct vfs_mount_info *info) sb->flush_inode = ext2_flush_inode; sb->kill_inode = ext2_kill_inode; sb->statfs = ext2_statfs; + sb->shutdown = ext2_shutdown_sb; sb->sb->s_mtime = clock_get_posix_time(); sb->sb->s_mnt_count++; diff --git a/kernel/kernel/fs/mount.c b/kernel/kernel/fs/mount.c index f87cc7453..2383c66a3 100644 --- a/kernel/kernel/fs/mount.c +++ b/kernel/kernel/fs/mount.c @@ -7,6 +7,7 @@ */ #include +#include #include #include @@ -208,7 +209,6 @@ static int mnt_commit(struct mount *mnt, const char *target) __atomic_or_fetch(&mnt->mnt_point->d_flags, DENTRY_FLAG_MOUNTPOINT, __ATOMIC_RELEASE); write_sequnlock(&mount_lock); - pr_info("mounted %p on %s\n", mnt, target); return 0; } @@ -316,6 +316,101 @@ int sys_mount(const char *usource, const char *utarget, const char *ufilesystemt return ret; } +/* HACK */ +#define LOOKUP_NOFOLLOW (1 << 0) +#define LOOKUP_FAIL_IF_LINK (1 << 1) +#define LOOKUP_MUST_BE_DIR (1 << 2) +#define LOOKUP_INTERNAL_TRAILING_SLASH (1 << 3) +#define LOOKUP_EMPTY_PATH (1 << 4) +#define LOOKUP_DONT_DO_LAST_NAME (1 << 5) +#define LOOKUP_INTERNAL_SAW_LAST_NAME (1U << 31) + +static bool attempt_disconnect(struct mount *mount) +{ + bool ok = false; + write_seqlock(&mount_lock); + /* No one can grab a reference to a mount while we hold mount_lock. As such, checking the refs + * here is mostly safe. Note that we can spuriouly see a ref-up here, but that's not _really_ a + * problem. We expect a mnt_count of 1 for the struct path we hold. */ + if (mount->mnt_count == 1) + { + struct dentry *mp = mount->mnt_point; + list_remove(&mount->mnt_mp_node); + list_remove(&mount->mnt_node); + ok = true; + + /* Check if we have nothing mounted at mp anymore. If so, unset DENTRY_FLAG_MOUNTPOINT. + * There's no race because MOUNTPOINT is only set while holding mount_lock in write mode. */ + if (!mnt_find_by_mp(mp)) + __atomic_and_fetch(&mp->d_flags, ~DENTRY_FLAG_MOUNTPOINT, __ATOMIC_RELEASE); + } + + write_sequnlock(&mount_lock); + return ok; +} + +static int do_umount_path(struct path *path, int flags) +{ + int err = -EINVAL; + struct mount *mount = path->mount; + + /* Check if the path given is actually a mountpoint */ + if (path->mount->mnt_root != path->dentry) + goto out_put_path; + + err = -EBUSY; + if (!attempt_disconnect(mount)) + goto out_put_path; + + /* Mount was disconnected. No one should hold a reference to one of this mount's dentries after + * this. */ + path_put(path); + + if (mount->mnt_sb->umount) + mount->mnt_sb->umount(mount); + + dentry_shrink_subtree(mount->mnt_root); + dput(mount->mnt_point); + + WARN_ON(mount->mnt_root->d_ref != 1); + + /* Undo our fake d_parent... */ + mount->mnt_root->d_parent = NULL; + /* Finally, put our root */ + dput(mount->mnt_root); + + /* Now shutdown the superblock */ + sb_shutdown(mount->mnt_sb); + kfree_rcu(mount, mnt_rcu); + return 0; +out_put_path: + path_put(path); + return err; +} + +int sys_umount2(const char *utarget, int flags) +{ + if (!is_root_user()) + return -EPERM; + const char *target = strcpy_from_user(utarget); + if (!target) + return -errno; + if (flags & ~UMOUNT_NOFOLLOW) + return -EINVAL; + + struct path path; + int err = + path_openat(AT_FDCWD, target, + LOOKUP_MUST_BE_DIR | (flags & UMOUNT_NOFOLLOW ? LOOKUP_NOFOLLOW : 0), &path); + if (err < 0) + goto out; + + err = do_umount_path(&path, flags); +out: + free((void *) target); + return err; +} + static __init void mount_init(void) { for (int i = 0; i < MT_HASH_SIZE; i++) diff --git a/kernel/kernel/fs/namei.cpp b/kernel/kernel/fs/namei.cpp index b008966f0..322866267 100644 --- a/kernel/kernel/fs/namei.cpp +++ b/kernel/kernel/fs/namei.cpp @@ -1079,6 +1079,8 @@ int unlink_vfs(const char *path, int flags, int dirfd) spin_lock(&dentry->d_lock); dentry_do_unlink(child); spin_unlock(&dentry->d_lock); + if (dentry_is_dir(child)) + dentry_shrink_subtree(child); } out2: diff --git a/kernel/kernel/fs/superblock.cpp b/kernel/kernel/fs/superblock.cpp index adcfdb874..a33a8bf89 100644 --- a/kernel/kernel/fs/superblock.cpp +++ b/kernel/kernel/fs/superblock.cpp @@ -26,8 +26,10 @@ void superblock_init(struct superblock *sb) sb->s_shrinker.flags = SHRINKER_NEEDS_IO; sb->s_shrinker.scan_objects = sb_scan_objects; sb->s_shrinker.shrink_objects = sb_shrink_objects; - shrinker_register(&sb->s_shrinker); + + sb->umount = nullptr; + sb->shutdown = sb_generic_shutdown; } int sb_read_bio(struct superblock *sb, struct page_iov *vec, size_t nr_vecs, size_t block_number) @@ -94,3 +96,57 @@ static int sb_shrink_objects(struct shrinker *s, struct shrink_control *ctl) shrink_list(&sdata); return 0; } + +void inode_release(struct inode *); + +static void sb_reap_inodes(struct superblock *sb) +{ + /* TODO: All of this code really is super suspicious. The hope is that this inodes are so dead + * and burried that no one else looks at this. */ + DEFINE_LIST(reap_list); + spin_lock(&sb->s_ilock); + + /* Lets be careful, the fs might be using the other inodes for stuff. */ + list_for_every_safe (&sb->s_inodes) + { + struct inode *ino = container_of(l, inode, i_sb_list_node); + spin_lock(&ino->i_lock); + if (ino->i_refc != 0) + { + spin_unlock(&ino->i_lock); + continue; + } + + ino->i_flags |= I_FREEING; + list_remove(&ino->i_sb_list_node); + list_add_tail(&ino->i_sb_list_node, &reap_list); + + spin_unlock(&ino->i_lock); + } + + spin_unlock(&sb->s_ilock); + + list_for_every_safe (&reap_list) + { + struct inode *ino = container_of(l, inode, i_sb_list_node); + inode_release(ino); + } +} + +int sb_generic_shutdown(struct superblock *sb) +{ + sb_reap_inodes(sb); + return 0; +} + +void sb_shutdown(struct superblock *sb) +{ + sb->shutdown(sb); + if (sb->s_bdev) + bdev_release(sb->s_bdev); + + shrinker_unregister(&sb->s_shrinker); + WARN_ON(!list_is_empty(&sb->s_inodes)); + WARN_ON(sb->s_ref != 1); + kfree_rcu(sb, s_rcu); +} diff --git a/kernel/kernel/fs/tmpfs.cpp b/kernel/kernel/fs/tmpfs.cpp index 389ec4fa5..cc2486ed5 100644 --- a/kernel/kernel/fs/tmpfs.cpp +++ b/kernel/kernel/fs/tmpfs.cpp @@ -31,9 +31,6 @@ // TODO: Parts of this should definitely be separated as they're generic enough // for every pseudo filesystem we might want to stick in Onyx -static DECLARE_MUTEX(tmpfs_list_lock); -static struct list_head filesystems = LIST_HEAD_INIT(filesystems); - atomic tmpfs_superblock::curr_minor_number{1}; tmpfs_inode *tmpfs_create_inode(mode_t mode, struct dentry *dir, dev_t rdev = 0) @@ -370,13 +367,10 @@ tmpfs_inode *tmpfs_superblock::create_inode(mode_t mode, dev_t rdev) return inode; } -static void tmpfs_append(tmpfs_superblock *fs) +static int tmpfs_umount(struct mount *mnt) { - mutex_lock(&tmpfs_list_lock); - - list_add_tail(&fs->fs_list_node, &filesystems); - - mutex_unlock(&tmpfs_list_lock); + dentry_unref_subtree(mnt->mnt_root); + return 0; } tmpfs_superblock *tmpfs_create_sb() @@ -384,8 +378,7 @@ tmpfs_superblock *tmpfs_create_sb() tmpfs_superblock *new_fs = new tmpfs_superblock{}; if (!new_fs) return nullptr; - - tmpfs_append(new_fs); + new_fs->umount = tmpfs_umount; return new_fs; } @@ -421,7 +414,9 @@ struct superblock *tmpfs_mount(struct vfs_mount_info *info) return (struct superblock *) ERR_PTR(-ENOMEM); } + node->i_nlink = 2; d_positiveize(info->root_dir, node); + dget(info->root_dir); return new_sb; } diff --git a/kernel/kernel/fs/writeback.cpp b/kernel/kernel/fs/writeback.cpp index f4ba83cd2..ad683b8cd 100644 --- a/kernel/kernel/fs/writeback.cpp +++ b/kernel/kernel/fs/writeback.cpp @@ -231,42 +231,7 @@ void flush_do_sync() } } -enum d_walk_ret -{ - D_WALK_CONTINUE, - D_WALK_QUIT, - D_WALK_NORETRY, - D_WALK_SKIP, - __D_WALK_RESTART -}; - -void d_walk(struct dentry *parent, void *data, - enum d_walk_ret (*enter)(void *data, struct dentry *)); - -void kasan_check_memory(unsigned long addr, size_t size, bool write); - -static enum d_walk_ret enter(void *data, struct dentry *dentry) -{ - kasan_check_memory((unsigned long) dentry, sizeof(struct dentry), false); - pr_info("dentry %s refs %lx\n", dentry->d_name, dentry->d_ref); - (*((int *) data))++; - return D_WALK_CONTINUE; -} - -void dentry_shrink_subtree(struct dentry *dentry); - void sys_sync() { flush_do_sync(); - struct path p = get_filesystem_root(); - int dentries = 0; - d_walk(p.dentry, &dentries, enter); - pr_info("seen %d dentries\n", dentries); - DCHECK(!sched_is_preemption_disabled()); - dentry_shrink_subtree(p.dentry); - DCHECK(!sched_is_preemption_disabled()); - dentries = 0; - d_walk(p.dentry, &dentries, enter); - pr_info("seen %d dentries\n", dentries); - DCHECK(!sched_is_preemption_disabled()); } diff --git a/musl b/musl index ff0d5bb86..aac18fa53 160000 --- a/musl +++ b/musl @@ -1 +1 @@ -Subproject commit ff0d5bb86c4ef55eb37e1196e96dc5051a868e31 +Subproject commit aac18fa5305d6c0642110d86e032a7d192fc354c