From 6a4076219f9c8a17a573c2fdccac4ca9b7b7fa7d Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Sun, 4 Aug 2024 23:38:55 +0100 Subject: [PATCH] dentry: Add proper shrinking, LRU Add proper shrinking, LRU, with some dentry fixes sprinkled around. Shrinkers aren't great atm, but they Work(tm) ish. Signed-off-by: Pedro Falcato --- kernel/include/onyx/dentry.h | 26 ++- kernel/include/onyx/list.h | 14 ++ kernel/include/onyx/lru.h | 51 +++++ kernel/include/onyx/mm/shrinker.h | 4 + kernel/include/onyx/rcupdate.h | 4 + kernel/include/onyx/seqlock.h | 16 +- kernel/include/onyx/superblock.h | 4 + kernel/kernel/Makefile | 2 +- kernel/kernel/fs/anon_inode.cpp | 3 +- kernel/kernel/fs/dentry.cpp | 348 +++++++++++++++++++++++++----- kernel/kernel/fs/dev.cpp | 2 +- kernel/kernel/fs/mount.c | 8 +- kernel/kernel/fs/pipe.cpp | 1 + kernel/kernel/fs/superblock.cpp | 48 ++++- kernel/kernel/fs/writeback.cpp | 35 +++ kernel/kernel/ktrace.cpp | 4 +- kernel/kernel/lru.c | 50 +++++ kernel/kernel/mm/anon.cpp | 1 + kernel/kernel/mm/reclaim.c | 1 + kernel/kernel/net/socket.cpp | 1 + kernel/kernel/proc_event.cpp | 2 + 21 files changed, 564 insertions(+), 61 deletions(-) create mode 100644 kernel/include/onyx/lru.h create mode 100644 kernel/kernel/lru.c diff --git a/kernel/include/onyx/dentry.h b/kernel/include/onyx/dentry.h index d42635b49..87b4531a1 100644 --- a/kernel/include/onyx/dentry.h +++ b/kernel/include/onyx/dentry.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,9 @@ __BEGIN_CDECLS #define DENTRY_FLAG_FAILED (1 << 3) #define DENTRY_FLAG_NEGATIVE (1 << 4) #define DENTRY_FLAG_HASHED (1 << 5) +#define DENTRY_FLAG_SHRINK (1 << 6) +#define DENTRY_FLAG_LRU (1 << 7) +#define DENTRY_FLAG_REFERENCED (1 << 8) struct dentry_operations { @@ -56,7 +60,11 @@ struct dentry struct list_head d_cache_node; struct list_head d_children_head; const struct dentry_operations *d_ops; - struct rcu_head d_rcu; + union { + struct rcu_head d_rcu; + struct list_head d_lru; + }; + unsigned long d_private; #ifdef __cplusplus atomic d_flags; @@ -106,6 +114,22 @@ __always_inline bool dentry_is_symlink(const struct dentry *d) return S_ISLNK(d->d_inode->i_mode); } +struct dcache_scan_result +{ + unsigned long scanned_bytes; + unsigned long scanned_objs; +}; + +struct dcache_shrink_result +{ + unsigned long to_shrink_bytes; + unsigned long to_shrink_objs; + struct list_head reclaim_list; +}; + +enum lru_walk_ret scan_dcache_lru_one(struct lru_list *lru, struct list_head *object, void *data); +enum lru_walk_ret shrink_dcache_lru_one(struct lru_list *lru, struct list_head *object, void *data); + __END_CDECLS #ifdef __cplusplus diff --git a/kernel/include/onyx/list.h b/kernel/include/onyx/list.h index 6513dcc72..0308dba60 100644 --- a/kernel/include/onyx/list.h +++ b/kernel/include/onyx/list.h @@ -204,6 +204,20 @@ static inline void list_splice_tail(struct list_head *src, struct list_head *dst list_splice_internal(src, dst->prev, dst); } +static inline int list_is_head(const struct list_head *list, const struct list_head *head) +{ + return list == head; +} + +#define list_entry(ptr, type, member) container_of(ptr, type, member) +#define list_next_entry(pos, member) list_entry((pos)->member.next, __typeof__(*(pos)), member) +#define list_prepare_entry(pos, head, member) ((pos) ?: list_entry(head, __typeof__(*pos), member)) +#define list_entry_is_head(pos, head, member) list_is_head(&pos->member, (head)) + +#define list_for_each_entry_continue(pos, head, member) \ + for (pos = list_next_entry(pos, member); !list_entry_is_head(pos, head, member); \ + pos = list_next_entry(pos, member)) + /* * TODO: This code is weird, inconsistent, and needs to be rewritten * and re-thought. diff --git a/kernel/include/onyx/lru.h b/kernel/include/onyx/lru.h new file mode 100644 index 000000000..b33522dda --- /dev/null +++ b/kernel/include/onyx/lru.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2024 Pedro Falcato + * This file is part of Onyx, and is released under the terms of the GPLv2 License + * check LICENSE at the root directory for more information + * + * SPDX-License-Identifier: GPL-2.0-only + */ + +#ifndef _ONYX_LRU_H +#define _ONYX_LRU_H + +#include +#include + +/** + * @brief Simple LRU list implementation + * + */ +struct lru_list +{ + struct list_head obj_list; + struct spinlock lock; +}; + +__BEGIN_CDECLS + +static inline void lru_list_init(struct lru_list *lru) +{ + INIT_LIST_HEAD(&lru->obj_list); + spinlock_init(&lru->lock); +} + +void lru_list_add(struct lru_list *lru, struct list_head *object); +void lru_list_remove(struct lru_list *lru, struct list_head *object); + +enum lru_walk_ret +{ + LRU_WALK_ROTATE = 0, + LRU_WALK_SKIP, + LRU_WALK_STOP, + LRU_WALK_REMOVED, +}; + +void lru_list_walk(struct lru_list *lru, + enum lru_walk_ret (*walk)(struct lru_list *lru, struct list_head *obj, + void *data), + void *data); + +__END_CDECLS + +#endif diff --git a/kernel/include/onyx/mm/shrinker.h b/kernel/include/onyx/mm/shrinker.h index 726246542..122b0a073 100644 --- a/kernel/include/onyx/mm/shrinker.h +++ b/kernel/include/onyx/mm/shrinker.h @@ -10,6 +10,8 @@ #include +__BEGIN_CDECLS + struct shrink_control { /** @@ -41,4 +43,6 @@ struct shrinker void shrinker_register(struct shrinker *shr); void shrinker_unregister(struct shrinker *shr); +__END_CDECLS + #endif diff --git a/kernel/include/onyx/rcupdate.h b/kernel/include/onyx/rcupdate.h index a0d9a413b..12bff54c0 100644 --- a/kernel/include/onyx/rcupdate.h +++ b/kernel/include/onyx/rcupdate.h @@ -26,6 +26,10 @@ void call_rcu(struct rcu_head *head, void (*callback)(struct rcu_head *head)); void synchronize_rcu(); void __kfree_rcu(struct rcu_head *head, unsigned long off); +#ifdef __cplusplus +#define _Static_assert(x) static_assert(x) +#endif + #define is_kfree_rcu_off(off) ((off) < 4096) #define kfree_rcu(ptr, head) \ ({ \ diff --git a/kernel/include/onyx/seqlock.h b/kernel/include/onyx/seqlock.h index db9bf1971..6aa97e7d1 100644 --- a/kernel/include/onyx/seqlock.h +++ b/kernel/include/onyx/seqlock.h @@ -31,7 +31,21 @@ static inline unsigned int read_seqbegin(seqlock_t *sl) static inline bool read_seqretry(seqlock_t *sl, unsigned int old) { - return read_seqcount_retry(&sl->seqcount, old); + return !(old & 1) && read_seqcount_retry(&sl->seqcount, old); +} + +static inline void read_seqbegin_or_lock(seqlock_t *sl, unsigned int *seq) +{ + if (unlikely(*seq & 1)) + spin_lock(&sl->lock); + else + *seq = read_seqbegin(sl); +} + +static inline void done_seqretry(seqlock_t *sl, unsigned int seq) +{ + if (unlikely(seq & 1)) + spin_unlock(&sl->lock); } #endif diff --git a/kernel/include/onyx/superblock.h b/kernel/include/onyx/superblock.h index 8194d70bd..95aac6b09 100644 --- a/kernel/include/onyx/superblock.h +++ b/kernel/include/onyx/superblock.h @@ -12,6 +12,8 @@ #include #include +#include +#include #include #include #include @@ -37,6 +39,8 @@ struct superblock dev_t s_devnr; unsigned long s_flags; struct mutex s_rename_lock; + struct lru_list s_dcache_lru; + struct shrinker s_shrinker; }; __BEGIN_CDECLS diff --git a/kernel/kernel/Makefile b/kernel/kernel/Makefile index 2fcbf1ee8..5d70cae79 100644 --- a/kernel/kernel/Makefile +++ b/kernel/kernel/Makefile @@ -5,7 +5,7 @@ kern-y+= arc4random.o binfmt.o compression.o copy.o cppnew.o cpprt.o crc32.o dev smp.o spinlock.o symbol.o tasklet.o time.o timer.o utils.o wait_queue.o \ worker.o cred.o list.o softirq.o cputime.o rlimit.o handle.o ctor.o internal_abi.o ssp.o \ cmdline.o syscall_thunk.o vdso.o sysinfo.o memstream.o perf.o radix.o rcupdate.o iovec_iter.o \ - maple_tree.o bug.o + maple_tree.o bug.o lru.o kern-$(CONFIG_UBSAN)+= ubsan.o diff --git a/kernel/kernel/fs/anon_inode.cpp b/kernel/kernel/fs/anon_inode.cpp index f9e65ce7b..fc147917b 100644 --- a/kernel/kernel/fs/anon_inode.cpp +++ b/kernel/kernel/fs/anon_inode.cpp @@ -64,6 +64,7 @@ struct file *anon_inode_open(mode_t file_type, struct file_ops *ops, const char dentry = dentry_create(name, ino, nullptr); if (!dentry) goto err; + dget(dentry); f = inode_to_file(ino); if (!f) @@ -73,7 +74,7 @@ struct file *anon_inode_open(mode_t file_type, struct file_ops *ops, const char return f; err: if (dentry) - dentry_put(dentry); + dput(dentry); if (ino) inode_unref(ino); return nullptr; diff --git a/kernel/kernel/fs/dentry.cpp b/kernel/kernel/fs/dentry.cpp index 2e20c0a43..57f568fa0 100644 --- a/kernel/kernel/fs/dentry.cpp +++ b/kernel/kernel/fs/dentry.cpp @@ -70,7 +70,6 @@ static inline int d_revalidate(struct dentry *dentry, unsigned int flags) } void dentry_remove_from_cache(dentry *dent, dentry *parent); -void dentry_kill_unlocked(dentry *entry); static dentry *d_lookup_internal(dentry *dent, std::string_view name) { @@ -177,7 +176,6 @@ static struct dentry *dentry_add_to_cache_careful(dentry *dent, dentry *parent) list_add_tail_rcu(&dent->d_cache_node, dentry_ht.get_hashtable(index)); dent->d_flags |= DENTRY_FLAG_HASHED; - dget(dent); spin_unlock(&dentry_ht_locks[index]); return dent; } @@ -262,6 +260,42 @@ static inline void d_unfreeze_refs(struct dentry *dentry) __atomic_and_fetch(&dentry->d_ref, ~D_REF_LOCKED, __ATOMIC_RELEASE); } +static inline void d_add_lru(struct dentry *dentry) +{ + DCHECK(spin_lock_held(&dentry->d_lock)); + DCHECK(!(dentry->d_flags & (DENTRY_FLAG_LRU | DENTRY_FLAG_SHRINK))); + struct superblock *sb; + /* Sniff out the sb from our inode, or our parent's inode. This _should_ be safe, our parent's + * inode can't go away magically. */ + + if (dentry->d_inode) + sb = dentry->d_inode->i_sb; + else + sb = dentry->d_parent->d_inode->i_sb; + DCHECK(sb != nullptr); + + dentry->d_flags |= DENTRY_FLAG_LRU; + lru_list_add(&sb->s_dcache_lru, &dentry->d_lru); +} + +static inline void d_remove_lru(struct dentry *dentry) +{ + DCHECK((dentry->d_flags & (DENTRY_FLAG_LRU | DENTRY_FLAG_SHRINK)) == DENTRY_FLAG_LRU); + DCHECK(spin_lock_held(&dentry->d_lock)); + + struct superblock *sb; + /* Sniff out the sb from our inode, or our parent's inode. This _should_ be safe, our parent's + * inode can't go away magically. */ + + if (dentry->d_inode) + sb = dentry->d_inode->i_sb; + else + sb = dentry->d_parent->d_inode->i_sb; + + lru_list_remove(&sb->s_dcache_lru, &dentry->d_lru); + dentry->d_flags &= ~DENTRY_FLAG_LRU; +} + static bool d_should_retain(struct dentry *dentry, bool locked) { unsigned long flags; @@ -271,6 +305,16 @@ static bool d_should_retain(struct dentry *dentry, bool locked) if (!(flags & DENTRY_FLAG_HASHED)) return false; + if (!(flags & DENTRY_FLAG_LRU)) + { + /* If not in an LRU, try to add it (if locked) */ + if (!locked) + return false; + d_add_lru(dentry); + } + else if (!(flags & DENTRY_FLAG_REFERENCED)) + dentry->d_flags |= DENTRY_FLAG_REFERENCED; + return true; } @@ -337,6 +381,9 @@ static struct dentry *d_destroy(struct dentry *dentry) if (dentry->d_flags & DENTRY_FLAG_HASHED) dentry_remove_from_cache(dentry, dentry->d_parent); + if ((dentry->d_flags & (DENTRY_FLAG_LRU | DENTRY_FLAG_SHRINK)) == DENTRY_FLAG_LRU) + d_remove_lru(dentry); + if (dentry->d_inode) { /* Lets take this moment to gather the inode, release the lock and _then_ put the inode */ @@ -728,53 +775,7 @@ char *dentry_to_file_name(struct dentry *dentry) return nullptr; } -static void dentry_shrink_subtree(struct dentry *dentry) -{ -#if 0 - /* Keep shrinking this subtree while we can find able children. Dentries with reference counts - * != 1 are skipped. */ - DEFINE_LIST(queue); - for (; dentry != nullptr; - dentry = list_is_empty(&queue) - ? nullptr - : container_of(list_first_element(&queue), struct dentry, d_parent_dir_node)) - { - if (!dentry_is_dir(dentry)) - continue; - list_for_every_safe (&dentry->d_children_head) - { - if (dentry->d_flags & DENTRY_FLAG_HASHED) - { - dentry_remove_from_cache(dentry, dentry->d_parent); - dentry_put(dentry); - } - } - } -#endif - /* TODO. For now, axe entries in a single level and bugger off */ - list_for_every_safe (&dentry->d_children_head) - { - struct dentry *child = container_of(l, struct dentry, d_parent_dir_node); - spin_lock(&child->d_lock); - unsigned long predicted_refs = 1; - - /* This is racey, we don't have lockref... TODO? */ - if (child->d_ref != predicted_refs) - { - spin_unlock(&child->d_lock); - continue; - } - - if (child->d_flags & DENTRY_FLAG_HASHED) - dentry_remove_from_cache(child, dentry); - - list_remove(&child->d_parent_dir_node); - child->d_parent = nullptr; - spin_unlock(&child->d_lock); - dput(dentry); - dput(child); - } -} +void dentry_shrink_subtree(struct dentry *dentry); void dentry_do_unlink(dentry *entry) { @@ -1063,3 +1064,252 @@ void d_positiveize(struct dentry *dentry, struct inode *inode) dentry->d_inode = inode; dentry->d_flags.and_fetch(~DENTRY_FLAG_NEGATIVE, mem_order::release); } + +enum d_walk_ret +{ + D_WALK_CONTINUE, + D_WALK_QUIT, + D_WALK_NORETRY, + D_WALK_SKIP, + __D_WALK_RESTART +}; + +struct d_walk_state +{ + struct dentry *root; + struct dentry *parent; + struct dentry *dentry; + unsigned int seq; + enum d_walk_ret stop; + bool retry; +}; + +static void d_ascend(struct d_walk_state *state) +{ + struct dentry *parent = state->parent; + struct dentry *parent2 = parent->d_parent; + + if (state->root == parent) + { + state->stop = D_WALK_QUIT; + return; + } + + rcu_read_lock(); + spin_unlock(&parent->d_lock); + spin_lock(&parent2->d_lock); + /* Let's be careful going up... If we had a rename at the same time, restart the whole + * process *with rename_lock held* */ + if (read_seqretry(&rename_lock, state->seq)) + { + spin_unlock(&parent2->d_lock); + state->seq = 1; + if (state->retry) + { + read_seqbegin_or_lock(&rename_lock, &state->seq); + state->stop = __D_WALK_RESTART; + } + else + state->stop = D_WALK_QUIT; + rcu_read_unlock(); + return; + } + + state->parent = parent2; + state->dentry = parent; + rcu_read_unlock(); +} + +void d_walk(struct dentry *parent, void *data, + enum d_walk_ret (*enter)(void *data, struct dentry *)) +{ + struct d_walk_state state; + enum d_walk_ret ret; + state.seq = 0; + state.retry = true; + read_seqbegin_or_lock(&rename_lock, &state.seq); +restart: + state.root = parent; + state.parent = parent; + state.dentry = NULL; + state.stop = D_WALK_CONTINUE; + spin_lock(&state.parent->d_lock); + + ret = enter(data, state.parent); + switch (ret) + { + case D_WALK_CONTINUE: + break; + case D_WALK_NORETRY: + state.retry = false; + break; + case D_WALK_SKIP: + case D_WALK_QUIT: + goto out; + case __D_WALK_RESTART: + spin_unlock(&state.parent->d_lock); + goto restart; + } + + while (state.stop != D_WALK_QUIT) + { + repeat: + state.dentry = + list_prepare_entry(state.dentry, &state.parent->d_children_head, d_parent_dir_node); + + list_for_each_entry_continue(state.dentry, &state.parent->d_children_head, + d_parent_dir_node) + { + spin_lock(&state.dentry->d_lock); + ret = enter(data, state.dentry); + switch (ret) + { + case D_WALK_CONTINUE: + break; + case D_WALK_NORETRY: + state.retry = false; + break; + case D_WALK_SKIP: + spin_unlock(&state.dentry->d_lock); + continue; + case D_WALK_QUIT: + spin_unlock(&state.dentry->d_lock); + goto out; + case __D_WALK_RESTART: + spin_unlock(&state.dentry->d_lock); + goto restart; + } + + if (!list_is_empty(&state.dentry->d_children_head)) + { + spin_unlock(&state.parent->d_lock); + state.parent = state.dentry; + state.dentry = NULL; + goto repeat; + } + + spin_unlock(&state.dentry->d_lock); + } + + d_ascend(&state); + if (state.stop == __D_WALK_RESTART) + goto restart; + } + +out: + spin_unlock(&state.parent->d_lock); + done_seqretry(&rename_lock, state.seq); +} + +struct shrink_data +{ + struct list_head shrink_list; +}; + +static d_walk_ret find_shrink(void *data, struct dentry *dentry) +{ + struct shrink_data *s = (struct shrink_data *) data; + if (dentry->d_ref == 0) + { + if (!(dentry->d_flags & DENTRY_FLAG_SHRINK)) + { + if (dentry->d_flags & DENTRY_FLAG_LRU) + d_remove_lru(dentry); + + list_add_tail(&dentry->d_lru, &s->shrink_list); + dentry->d_flags |= DENTRY_FLAG_SHRINK | DENTRY_FLAG_LRU; + } + } + + return D_WALK_CONTINUE; +} + +static void kill_one(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + d_freeze_refs(dentry); + + if (d_refs(dentry) != 0) + { + d_unfreeze_refs(dentry); + spin_unlock(&dentry->d_lock); + return; + } + + while ((dentry = d_destroy(dentry))) + ; +} + +void shrink_list(struct shrink_data *s) +{ + list_for_every_safe (&s->shrink_list) + { + struct dentry *dentry = container_of(l, struct dentry, d_lru); + list_remove(&dentry->d_lru); + kill_one(dentry); + } +} + +void dentry_shrink_subtree(struct dentry *dentry) +{ + struct shrink_data data; + INIT_LIST_HEAD(&data.shrink_list); + for (;;) + { + d_walk(dentry, &data, find_shrink); + if (list_is_empty(&data.shrink_list)) + break; + shrink_list(&data); + } +} + +enum lru_walk_ret scan_dcache_lru_one(struct lru_list *lru, struct list_head *object, void *data) +{ + struct dentry *dentry = container_of(object, struct dentry, d_lru); + struct dcache_scan_result *scan_res = (struct dcache_scan_result *) data; + if (spin_try_lock(&dentry->d_lock)) + return LRU_WALK_SKIP; + d_freeze_refs(dentry); + + if (d_refs(dentry) == 0) + { + scan_res->scanned_bytes += sizeof(struct dentry) + dentry->d_name_length; + scan_res->scanned_objs++; + } + + d_unfreeze_refs(dentry); + spin_unlock(&dentry->d_lock); + return LRU_WALK_SKIP; +} + +enum lru_walk_ret shrink_dcache_lru_one(struct lru_list *lru, struct list_head *object, void *data) +{ + struct dentry *dentry = container_of(object, struct dentry, d_lru); + struct dcache_shrink_result *shrink_res = (struct dcache_shrink_result *) data; + if (!shrink_res->to_shrink_objs) + return LRU_WALK_STOP; + + if (spin_try_lock(&dentry->d_lock)) + return LRU_WALK_SKIP; + + if (dentry->d_flags & DENTRY_FLAG_REFERENCED) + { + dentry->d_flags &= ~DENTRY_FLAG_REFERENCED; + spin_unlock(&dentry->d_lock); + return LRU_WALK_ROTATE; + } + + /* No need to freeze refs, shrink_list will take care of the final check */ + if (d_refs(dentry) > 0) + { + spin_unlock(&dentry->d_lock); + return LRU_WALK_SKIP; + } + + dentry->d_flags |= DENTRY_FLAG_SHRINK; + list_remove(&dentry->d_lru); + list_add_tail(&dentry->d_lru, &shrink_res->reclaim_list); + spin_unlock(&dentry->d_lock); + shrink_res->to_shrink_objs--; + return LRU_WALK_REMOVED; +} diff --git a/kernel/kernel/fs/dev.cpp b/kernel/kernel/fs/dev.cpp index 62d211604..1afa45845 100644 --- a/kernel/kernel/fs/dev.cpp +++ b/kernel/kernel/fs/dev.cpp @@ -674,7 +674,7 @@ struct superblock *devfs_mount(struct vfs_mount_info *info) dev_unregister_dev(ex.value(), true); return nullptr; } - + superblock_init(new_fs.get()); new_fs->s_devnr = ex.value()->dev(); new_fs->s_flags |= SB_FLAG_NODIRTY; diff --git a/kernel/kernel/fs/mount.c b/kernel/kernel/fs/mount.c index 11a1e3b31..f87cc7453 100644 --- a/kernel/kernel/fs/mount.c +++ b/kernel/kernel/fs/mount.c @@ -61,7 +61,7 @@ static struct mount *mnt_find_by_mp(struct dentry *mountpoint) { /* rcu_read_lock held */ unsigned int bucket = fnv_hash(&mountpoint, sizeof(void *)) & (MT_HASH_SIZE - 1); - list_for_every_rcu(&mp_hashtable[bucket]) + list_for_every_rcu (&mp_hashtable[bucket]) { struct mount *mnt = container_of(l, struct mount, mnt_mp_node); if (mnt->mnt_point == mountpoint) @@ -179,7 +179,7 @@ static int mnt_commit(struct mount *mnt, const char *target) } mnt->mnt_point = filp->f_dentry; - dentry_get(mnt->mnt_point); + dget(mnt->mnt_point); /* Another hack... */ mnt->mnt_root->d_parent = mnt->mnt_point; /* TODO: This isn't quite safe when we get proper mnt putting and umount */ @@ -202,7 +202,7 @@ static int mnt_commit(struct mount *mnt, const char *target) list_add_tail(&mnt->mnt_node, &mount_hashtable[mnt_hashbucket(mnt)]); /* Ref up for the mount root */ - dentry_get(mnt->mnt_root); + dget(mnt->mnt_root); if (mnt->mnt_point) __atomic_or_fetch(&mnt->mnt_point->d_flags, DENTRY_FLAG_MOUNTPOINT, __ATOMIC_RELEASE); @@ -263,7 +263,7 @@ int do_mount(const char *source, const char *target, const char *fstype, unsigne mnt = NULL; out3: if (root_dentry) - dentry_put(root_dentry); + dput(root_dentry); out2: if (mnt) kfree(mnt); diff --git a/kernel/kernel/fs/pipe.cpp b/kernel/kernel/fs/pipe.cpp index 2eca2c4ad..6b17e460c 100644 --- a/kernel/kernel/fs/pipe.cpp +++ b/kernel/kernel/fs/pipe.cpp @@ -733,6 +733,7 @@ static int pipe_create(struct file **pipe_readable, struct file **pipe_writeable // Get new refs for the second fd dget(anon_pipe_dent); + dget(anon_pipe_dent); inode_ref(anon_pipe_ino); *pipe_readable = rd; diff --git a/kernel/kernel/fs/superblock.cpp b/kernel/kernel/fs/superblock.cpp index 534e7ab7e..adcfdb874 100644 --- a/kernel/kernel/fs/superblock.cpp +++ b/kernel/kernel/fs/superblock.cpp @@ -1,12 +1,19 @@ /* - * Copyright (c) 2017 Pedro Falcato + * Copyright (c) 2017 - 2024 Pedro Falcato * This file is part of Onyx, and is released under the terms of the GPLv2 License * check LICENSE at the root directory for more information + * + * SPDX-License-Identifier: GPL-2.0-only */ + #include +#include #include #include +static int sb_scan_objects(struct shrinker *s, struct shrink_control *ctl); +static int sb_shrink_objects(struct shrinker *s, struct shrink_control *ctl); + void superblock_init(struct superblock *sb) { INIT_LIST_HEAD(&sb->s_inodes); @@ -14,6 +21,13 @@ void superblock_init(struct superblock *sb) spinlock_init(&sb->s_ilock); sb->s_flags = 0; mutex_init(&sb->s_rename_lock); + lru_list_init(&sb->s_dcache_lru); + sb->s_shrinker.name = "superblock"; + sb->s_shrinker.flags = SHRINKER_NEEDS_IO; + sb->s_shrinker.scan_objects = sb_scan_objects; + sb->s_shrinker.shrink_objects = sb_shrink_objects; + + shrinker_register(&sb->s_shrinker); } int sb_read_bio(struct superblock *sb, struct page_iov *vec, size_t nr_vecs, size_t block_number) @@ -48,3 +62,35 @@ int sb_write_bio(struct superblock *sb, struct page_iov *vec, size_t nr_vecs, si bio_put(r); return st; } + +#define shrinker_to_sb(s) container_of(s, struct superblock, s_shrinker) + +static int sb_scan_objects(struct shrinker *s, struct shrink_control *ctl) +{ + struct superblock *sb = shrinker_to_sb(s); + struct dcache_scan_result res = {}; + lru_list_walk(&sb->s_dcache_lru, scan_dcache_lru_one, &res); + ctl->target_objs = res.scanned_bytes; + return 0; +} + +struct shrink_data +{ + struct list_head shrink_list; +}; + +void shrink_list(struct shrink_data *s); + +static int sb_shrink_objects(struct shrinker *s, struct shrink_control *ctl) +{ + struct superblock *sb = shrinker_to_sb(s); + struct dcache_shrink_result res = {0, ctl->target_objs}; + INIT_LIST_HEAD(&res.reclaim_list); + lru_list_walk(&sb->s_dcache_lru, shrink_dcache_lru_one, &res); + ctl->nr_freed = ctl->target_objs - res.to_shrink_objs; + struct shrink_data sdata; + INIT_LIST_HEAD(&sdata.shrink_list); + list_move(&sdata.shrink_list, &res.reclaim_list); + shrink_list(&sdata); + return 0; +} diff --git a/kernel/kernel/fs/writeback.cpp b/kernel/kernel/fs/writeback.cpp index ad683b8cd..f4ba83cd2 100644 --- a/kernel/kernel/fs/writeback.cpp +++ b/kernel/kernel/fs/writeback.cpp @@ -231,7 +231,42 @@ void flush_do_sync() } } +enum d_walk_ret +{ + D_WALK_CONTINUE, + D_WALK_QUIT, + D_WALK_NORETRY, + D_WALK_SKIP, + __D_WALK_RESTART +}; + +void d_walk(struct dentry *parent, void *data, + enum d_walk_ret (*enter)(void *data, struct dentry *)); + +void kasan_check_memory(unsigned long addr, size_t size, bool write); + +static enum d_walk_ret enter(void *data, struct dentry *dentry) +{ + kasan_check_memory((unsigned long) dentry, sizeof(struct dentry), false); + pr_info("dentry %s refs %lx\n", dentry->d_name, dentry->d_ref); + (*((int *) data))++; + return D_WALK_CONTINUE; +} + +void dentry_shrink_subtree(struct dentry *dentry); + void sys_sync() { flush_do_sync(); + struct path p = get_filesystem_root(); + int dentries = 0; + d_walk(p.dentry, &dentries, enter); + pr_info("seen %d dentries\n", dentries); + DCHECK(!sched_is_preemption_disabled()); + dentry_shrink_subtree(p.dentry); + DCHECK(!sched_is_preemption_disabled()); + dentries = 0; + d_walk(p.dentry, &dentries, enter); + pr_info("seen %d dentries\n", dentries); + DCHECK(!sched_is_preemption_disabled()); } diff --git a/kernel/kernel/ktrace.cpp b/kernel/kernel/ktrace.cpp index c2970a582..3e28d395a 100644 --- a/kernel/kernel/ktrace.cpp +++ b/kernel/kernel/ktrace.cpp @@ -450,7 +450,7 @@ static int buffd_create(struct file **pfd, u32 cpu_nr) wr->f_dentry = anon_dent; // Get new refs for the second fd - dentry_get(anon_dent); + dget(anon_dent); inode_ref(anon_ino); *pfd = rd; @@ -458,7 +458,7 @@ static int buffd_create(struct file **pfd, u32 cpu_nr) anon_ino->i_helper = (void *) (unsigned long) cpu_nr; return 0; err2: - dentry_put(anon_dent); + dput(anon_dent); err0: if (anon_ino) close_vfs(anon_ino); diff --git a/kernel/kernel/lru.c b/kernel/kernel/lru.c new file mode 100644 index 000000000..f9d6d800c --- /dev/null +++ b/kernel/kernel/lru.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2024 Pedro Falcato + * This file is part of Onyx, and is released under the terms of the GPLv2 License + * check LICENSE at the root directory for more information + * + * SPDX-License-Identifier: GPL-2.0-only + */ + +#include + +void lru_list_add(struct lru_list *lru, struct list_head *object) +{ + spin_lock(&lru->lock); + list_add_tail(object, &lru->obj_list); + spin_unlock(&lru->lock); +} + +void lru_list_remove(struct lru_list *lru, struct list_head *object) +{ + spin_lock(&lru->lock); + list_remove(object); + spin_unlock(&lru->lock); +} + +void lru_list_walk(struct lru_list *lru, + enum lru_walk_ret (*walk)(struct lru_list *lru, struct list_head *obj, + void *data), + void *data) +{ + spin_lock(&lru->lock); + list_for_every_safe (&lru->obj_list) + { + enum lru_walk_ret ret = walk(lru, l, data); + switch (ret) + { + case LRU_WALK_ROTATE: + list_remove(l); + list_add_tail(l, &lru->obj_list); + break; + case LRU_WALK_SKIP: + case LRU_WALK_REMOVED: + continue; + case LRU_WALK_STOP: + goto out; + } + } + +out: + spin_unlock(&lru->lock); +} diff --git a/kernel/kernel/mm/anon.cpp b/kernel/kernel/mm/anon.cpp index ea4326129..47f217e3b 100644 --- a/kernel/kernel/mm/anon.cpp +++ b/kernel/kernel/mm/anon.cpp @@ -116,6 +116,7 @@ struct file *anon_get_shmem(size_t len) dentry = dentry_create("[anon_shmem]", ino, nullptr); if (!dentry) goto err; + dget(dentry); f = inode_to_file(ino); if (!f) diff --git a/kernel/kernel/mm/reclaim.c b/kernel/kernel/mm/reclaim.c index 35922faef..b988a0d62 100644 --- a/kernel/kernel/mm/reclaim.c +++ b/kernel/kernel/mm/reclaim.c @@ -80,6 +80,7 @@ static void shrink_objects(struct reclaim_data *data, unsigned long free_page_ta if (st == SHRINK_STOP) continue; + pr_info("shrinker %s freed %lu objects\n", shrinker->name, control.nr_freed); needed_bytes -= control.nr_freed * average_object_size; } diff --git a/kernel/kernel/net/socket.cpp b/kernel/kernel/net/socket.cpp index 50ab181fe..af4e02a80 100644 --- a/kernel/kernel/net/socket.cpp +++ b/kernel/kernel/net/socket.cpp @@ -907,6 +907,7 @@ file *socket_inode_to_file(inode *ino) return nullptr; } + dget(dent); f->f_dentry = dent; return f; } diff --git a/kernel/kernel/proc_event.cpp b/kernel/kernel/proc_event.cpp index 7cfcd2d3f..b4cb49344 100644 --- a/kernel/kernel/proc_event.cpp +++ b/kernel/kernel/proc_event.cpp @@ -168,6 +168,8 @@ int sys_proc_event_attach(pid_t pid, unsigned long flags) return -ENOMEM; } + dget(d); + struct file *f = inode_to_file(ino); if (!f) {