diff --git a/.bcachefs_revision b/.bcachefs_revision index 2a12d1f7f..afaeb7c5f 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -2be6fc9b111cad37da8838e39c66244767bc7d0a +47d1e7a86242b814ec9ff4f426d521832515f115 diff --git a/include/linux/console.h b/include/linux/console.h index d01aa9a2d..31aaa087e 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -1,7 +1,8 @@ #ifndef _LINUX_CONSOLE_H_ #define _LINUX_CONSOLE_H_ -#define console_lock() -#define console_unlock() +#define console_lock() do {} while (0) +#define console_trylock() true +#define console_unlock() do {} while (0) #endif /* _LINUX_CONSOLE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index fedb1c3c2..99d6a47af 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -60,6 +60,7 @@ #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ #define PF_KSWAPD 0x00040000 /* I am kswapd */ +#define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ #define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 9ff822e4f..d9c5a92fa 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -2384,6 +2384,7 @@ void bch2_recalc_capacity(struct bch_fs *c) reserved_sectors = min(reserved_sectors, capacity); + c->reserved = reserved_sectors; c->capacity = capacity - reserved_sectors; c->bucket_size_max = bucket_size_max; diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 49a5789d7..cabf866c7 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -626,14 +626,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, if (usage->d[BCH_DATA_need_gc_gens].buckets > avail) bch2_gc_gens_async(c); - if (should_invalidate_buckets(ca, *usage)) { + if (should_invalidate_buckets(ca, *usage)) bch2_dev_do_invalidates(ca); - rcu_read_lock(); - struct task_struct *t = rcu_dereference(c->copygc_thread); - if (t) - wake_up_process(t); - rcu_read_unlock(); - } if (!avail) { if (cl && !waiting) { @@ -1595,7 +1589,7 @@ void bch2_fs_allocator_foreground_init(struct bch_fs *c) } } -static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob) +void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob) { struct bch_dev *ca = ob_dev(c, ob); unsigned data_type = ob->data_type; @@ -1712,6 +1706,8 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c) printbuf_tabstops_reset(out); printbuf_tabstop_push(out, 24); + prt_printf(out, "capacity\t%llu\n", c->capacity); + prt_printf(out, "reserved\t%llu\n", c->reserved); prt_printf(out, "hidden\t%llu\n", percpu_u64_get(&c->usage->hidden)); prt_printf(out, "btree\t%llu\n", percpu_u64_get(&c->usage->btree)); prt_printf(out, "data\t%llu\n", percpu_u64_get(&c->usage->data)); diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc_foreground.h index a42c9730d..6da9e7e29 100644 --- a/libbcachefs/alloc_foreground.h +++ b/libbcachefs/alloc_foreground.h @@ -222,6 +222,7 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp void bch2_fs_allocator_foreground_init(struct bch_fs *); +void bch2_open_bucket_to_text(struct printbuf *, struct bch_fs *, struct open_bucket *); void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *); void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *); diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index 96217e8d0..3cc02479a 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -762,12 +762,12 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, for (enum btree_id btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) { - unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1; + unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1; struct btree_iter iter; struct btree *b; - if (!((1U << btree) & btree_leaf_mask) && - !((1U << btree) & btree_interior_mask)) + if (!(BIT_ULL(btree) & btree_leaf_mask) && + !(BIT_ULL(btree) & btree_interior_mask)) continue; bch2_trans_begin(trans); @@ -951,8 +951,8 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) while (1) { ret = bch2_get_btree_in_memory_pos(trans, - (1U << BTREE_ID_extents)| - (1U << BTREE_ID_reflink), + BIT_ULL(BTREE_ID_extents)| + BIT_ULL(BTREE_ID_reflink), ~0, start, &end); if (ret) diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 65e46225e..91361a167 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -872,6 +872,7 @@ struct bch_fs { struct bch_devs_mask rw_devs[BCH_DATA_NR]; u64 capacity; /* sectors */ + u64 reserved; /* sectors */ /* * When capacity _decreases_ (due to a disk being removed), we @@ -980,7 +981,7 @@ struct bch_fs { struct bch_fs_rebalance rebalance; /* COPYGC */ - struct task_struct __rcu *copygc_thread; + struct task_struct *copygc_thread; struct write_point copygc_write_point; s64 copygc_wait_at; s64 copygc_wait; diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 1e4c3e309..6cbf2aa6a 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -676,6 +676,9 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in /* root */ do { +retry_root: + bch2_trans_begin(trans); + struct btree_iter iter; bch2_trans_node_iter_init(trans, &iter, btree, POS_MIN, 0, bch2_btree_id_root(c, btree)->b->c.level, 0); @@ -686,7 +689,7 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in if (b != btree_node_root(c, b)) { bch2_trans_iter_exit(trans, &iter); - continue; + goto retry_root; } gc_pos_set(c, gc_pos_btree(btree, b->c.level + 1, SPOS_MAX)); diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index e092f541c..db700caf8 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -1006,6 +1006,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); unsigned u64s; unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); + u64 max_journal_seq = 0; struct printbuf buf = PRINTBUF; int ret = 0, retry_read = 0, write = READ; u64 start_time = local_clock(); @@ -1181,6 +1182,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, sort_iter_add(iter, vstruct_idx(i, 0), vstruct_last(i)); + + max_journal_seq = max(max_journal_seq, le64_to_cpu(i->journal_seq)); } if (ptr_written) { @@ -1217,6 +1220,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, swap(sorted, b->data); set_btree_bset(b, b->set, &b->data->keys); b->nsets = 1; + b->data->keys.journal_seq = cpu_to_le64(max_journal_seq); BUG_ON(b->nr.live_u64s != u64s); diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c index dd52372ac..8bdfe573e 100644 --- a/libbcachefs/btree_locking.c +++ b/libbcachefs/btree_locking.c @@ -231,7 +231,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) prt_newline(&buf); } - bch2_print_string_as_lines(KERN_ERR, buf.buf); + bch2_print_string_as_lines_nonblocking(KERN_ERR, buf.buf); printbuf_exit(&buf); BUG(); } diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h index b9fd14e31..8dbceec8e 100644 --- a/libbcachefs/btree_locking.h +++ b/libbcachefs/btree_locking.h @@ -13,8 +13,6 @@ #include "btree_iter.h" #include "six.h" -#include - void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags); #ifdef CONFIG_LOCKDEP diff --git a/libbcachefs/btree_node_scan.c b/libbcachefs/btree_node_scan.c index 2cb0442f6..ddc5ada3e 100644 --- a/libbcachefs/btree_node_scan.c +++ b/libbcachefs/btree_node_scan.c @@ -76,6 +76,7 @@ static bool found_btree_node_is_readable(struct btree_trans *trans, return ret; f->sectors_written = b->written; + f->journal_seq = le64_to_cpu(b->data->keys.journal_seq); six_unlock_read(&b->c.lock); /* @@ -105,7 +106,8 @@ static int found_btree_node_cmp_cookie(const void *_l, const void *_r) static int found_btree_node_cmp_time(const struct found_btree_node *l, const struct found_btree_node *r) { - return cmp_int(l->seq, r->seq); + return cmp_int(l->seq, r->seq) ?: + cmp_int(l->journal_seq, r->seq); } static int found_btree_node_cmp_pos(const void *_l, const void *_r) diff --git a/libbcachefs/btree_node_scan_types.h b/libbcachefs/btree_node_scan_types.h index 5cfaeb5ac..b6c36c45d 100644 --- a/libbcachefs/btree_node_scan_types.h +++ b/libbcachefs/btree_node_scan_types.h @@ -11,6 +11,7 @@ struct found_btree_node { u8 level; unsigned sectors_written; u32 seq; + u64 journal_seq; u64 cookie; struct bpos min_key; diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 9575fb65b..31ee50184 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -2647,6 +2647,28 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c, return end; } +static void bch2_btree_alloc_to_text(struct printbuf *out, + struct bch_fs *c, + struct btree_alloc *a) +{ + printbuf_indent_add(out, 2); + bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&a->k)); + prt_newline(out); + + struct open_bucket *ob; + unsigned i; + open_bucket_for_each(c, &a->ob, ob, i) + bch2_open_bucket_to_text(out, c, ob); + + printbuf_indent_sub(out, 2); +} + +void bch2_btree_reserve_cache_to_text(struct printbuf *out, struct bch_fs *c) +{ + for (unsigned i = 0; i < c->btree_reserve_cache_nr; i++) + bch2_btree_alloc_to_text(out, c, &c->btree_reserve_cache[i]); +} + void bch2_fs_btree_interior_update_exit(struct bch_fs *c) { if (c->btree_node_rewrite_worker) diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h index b5b76ce01..02c6ecada 100644 --- a/libbcachefs/btree_update_interior.h +++ b/libbcachefs/btree_update_interior.h @@ -335,6 +335,8 @@ struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *, void bch2_do_pending_node_rewrites(struct bch_fs *); void bch2_free_pending_node_rewrites(struct bch_fs *); +void bch2_btree_reserve_cache_to_text(struct printbuf *, struct bch_fs *); + void bch2_fs_btree_interior_update_exit(struct bch_fs *); void bch2_fs_btree_interior_update_init_early(struct bch_fs *); int bch2_fs_btree_interior_update_init(struct bch_fs *); diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 25549c231..2650a0d24 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -426,7 +426,7 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n" "while marking %s", ptr->dev, bucket_nr, b_gen, - *bucket_gen(ca, bucket_nr), + bucket_gen_get(ca, bucket_nr), bch2_data_type_str(bucket_data_type ?: ptr_data_type), ptr->gen, (printbuf_reset(&buf), diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 4a14741b8..2d35eeb24 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -116,6 +116,14 @@ static inline u8 *bucket_gen(struct bch_dev *ca, size_t b) return gens->b + b; } +static inline u8 bucket_gen_get(struct bch_dev *ca, size_t b) +{ + rcu_read_lock(); + u8 gen = *bucket_gen(ca, b); + rcu_read_unlock(); + return gen; +} + static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca, const struct bch_extent_ptr *ptr) { diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 3ea8dbc4d..24fba256e 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -2140,8 +2140,7 @@ int __init bch2_vfs_init(void) { int ret = -ENOMEM; - bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT | - SLAB_ACCOUNT); + bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT); if (!bch2_inode_cache) goto err; diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h index 679f5f5e5..da0e4a745 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/inode.h @@ -88,7 +88,7 @@ struct bkey_inode_buf { #define x(_name, _bits) + 8 + _bits / 8 u8 _pad[0 + BCH_INODE_FIELDS_v3()]; #undef x -} __packed __aligned(8); +}; void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *); int bch2_inode_unpack(struct bkey_s_c, struct bch_inode_unpacked *); diff --git a/libbcachefs/io_misc.c b/libbcachefs/io_misc.c index 4ec979b4b..2cf629775 100644 --- a/libbcachefs/io_misc.c +++ b/libbcachefs/io_misc.c @@ -125,8 +125,12 @@ int bch2_extent_fallocate(struct btree_trans *trans, bch2_bkey_buf_exit(&old, c); if (closure_nr_remaining(&cl) != 1) { - bch2_trans_unlock(trans); - closure_sync(&cl); + bch2_trans_unlock_long(trans); + + if (closure_sync_timeout(&cl, HZ * 10)) { + bch2_print_allocator_stuck(c); + closure_sync(&cl); + } } return ret; diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index f7aa835d7..deef4f024 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -357,18 +357,19 @@ static int bch2_copygc_thread(void *arg) } last = atomic64_read(&clock->now); - wait = max_t(long, 0, bch2_copygc_wait_amount(c) - clock->max_slop); + wait = bch2_copygc_wait_amount(c); - if (wait > 0) { + if (wait > clock->max_slop) { c->copygc_wait_at = last; c->copygc_wait = last + wait; move_buckets_wait(&ctxt, buckets, true); - trace_and_count(c, copygc_wait, c, wait, c->copygc_wait); - bch2_io_clock_schedule_timeout(clock, c->copygc_wait); + trace_and_count(c, copygc_wait, c, wait, last + wait); + bch2_kthread_io_clock_wait(clock, last + wait, + MAX_SCHEDULE_TIMEOUT); continue; } - c->copygc_wait = c->copygc_wait_at = 0; + c->copygc_wait = 0; c->copygc_running = true; ret = bch2_copygc(&ctxt, buckets, &did_work); @@ -383,7 +384,8 @@ static int bch2_copygc_thread(void *arg) min_member_capacity = 128 * 2048; bch2_trans_unlock_long(ctxt.trans); - bch2_io_clock_schedule_timeout(clock, last + (min_member_capacity >> 8)); + bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6), + MAX_SCHEDULE_TIMEOUT); } } @@ -399,10 +401,9 @@ static int bch2_copygc_thread(void *arg) void bch2_copygc_stop(struct bch_fs *c) { - struct task_struct *t = rcu_dereference_protected(c->copygc_thread, true); - if (t) { - kthread_stop(t); - put_task_struct(t); + if (c->copygc_thread) { + kthread_stop(c->copygc_thread); + put_task_struct(c->copygc_thread); } c->copygc_thread = NULL; } @@ -429,8 +430,8 @@ int bch2_copygc_start(struct bch_fs *c) get_task_struct(t); - rcu_assign_pointer(c->copygc_thread, t); - wake_up_process(t); + c->copygc_thread = t; + wake_up_process(c->copygc_thread); return 0; } diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 7c9cafdcf..1c0d1fb20 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -17,6 +17,7 @@ #include "btree_iter.h" #include "btree_key_cache.h" #include "btree_update.h" +#include "btree_update_interior.h" #include "btree_gc.h" #include "buckets.h" #include "clock.h" @@ -144,6 +145,7 @@ write_attribute(trigger_journal_flush); write_attribute(trigger_journal_writes); write_attribute(trigger_btree_cache_shrink); write_attribute(trigger_btree_key_cache_shrink); +write_attribute(trigger_freelist_wakeup); rw_attribute(gc_gens_pos); read_attribute(uuid); @@ -170,6 +172,7 @@ read_attribute(compression_stats); read_attribute(journal_debug); read_attribute(btree_cache); read_attribute(btree_key_cache); +read_attribute(btree_reserve_cache); read_attribute(stripes_heap); read_attribute(open_buckets); read_attribute(open_buckets_partial); @@ -357,6 +360,9 @@ SHOW(bch2_fs) if (attr == &sysfs_btree_key_cache) bch2_btree_key_cache_to_text(out, &c->btree_key_cache); + if (attr == &sysfs_btree_reserve_cache) + bch2_btree_reserve_cache_to_text(out, c); + if (attr == &sysfs_stripes_heap) bch2_stripes_heap_to_text(out, c); @@ -475,6 +481,9 @@ STORE(bch2_fs) if (attr == &sysfs_trigger_journal_writes) bch2_journal_do_writes(&c->journal); + if (attr == &sysfs_trigger_freelist_wakeup) + closure_wake_up(&c->freelist_wait); + #ifdef CONFIG_BCACHEFS_TESTS if (attr == &sysfs_perf_test) { char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp; @@ -577,6 +586,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_journal_debug, &sysfs_btree_cache, &sysfs_btree_key_cache, + &sysfs_btree_reserve_cache, &sysfs_new_stripes, &sysfs_stripes_heap, &sysfs_open_buckets, @@ -596,6 +606,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_trigger_journal_writes, &sysfs_trigger_btree_cache_shrink, &sysfs_trigger_btree_key_cache_shrink, + &sysfs_trigger_freelist_wakeup, &sysfs_gc_gens_pos, diff --git a/libbcachefs/util.c b/libbcachefs/util.c index de331dec2..4ec7e44d6 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -252,8 +252,10 @@ void bch2_prt_u64_base2(struct printbuf *out, u64 v) bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1); } -void bch2_print_string_as_lines(const char *prefix, const char *lines) +static void __bch2_print_string_as_lines(const char *prefix, const char *lines, + bool nonblocking) { + bool locked = false; const char *p; if (!lines) { @@ -261,7 +263,13 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines) return; } - console_lock(); + if (!nonblocking) { + console_lock(); + locked = true; + } else { + locked = console_trylock(); + } + while (1) { p = strchrnul(lines, '\n'); printk("%s%.*s\n", prefix, (int) (p - lines), lines); @@ -269,7 +277,18 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines) break; lines = p + 1; } - console_unlock(); + if (locked) + console_unlock(); +} + +void bch2_print_string_as_lines(const char *prefix, const char *lines) +{ + return __bch2_print_string_as_lines(prefix, lines, false); +} + +void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines) +{ + return __bch2_print_string_as_lines(prefix, lines, true); } int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr, diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 76ffe08e7..2def4f761 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -315,6 +315,7 @@ void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned); void bch2_prt_u64_base2(struct printbuf *, u64); void bch2_print_string_as_lines(const char *prefix, const char *lines); +void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines); typedef DARRAY(unsigned long) bch_stacktrace; int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t);