Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(accounts-db): support fast loading #324

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions scripts/view_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
import random

def random_color_generator():
# take [r, g, b] from colormap
# Dark2 chosen for visibility from
# https://matplotlib.org/stable/users/explain/colors/colormaps.html#qualitative
return plt.colormaps["Dark2"](random.random())[:3]
r = random.randint(0, 255)
g = random.randint(0, 255)
b = random.randint(0, 255)
return (r / 255, g / 255, b / 255)

def view_results(paths):
path = paths[0]
Expand Down Expand Up @@ -40,8 +40,8 @@ def view_results(paths):
for i in range(len(benchmark_names)):
plt.clf()
plt.title(benchmark_names[i], wrap=True)

for df_i, df in enumerate(dfs):
label = paths[df_i]
# remove the header and the benchmark name
benchmark_runtimes = df.T[2:][i]
benchmark_runtimes.replace('', np.nan, inplace=True)
Expand All @@ -51,7 +51,7 @@ def view_results(paths):

color = colors[df_i]
mean = np.mean(benchmark_runtimes)
plt.scatter(benchmark_runtimes, np.zeros_like(benchmark_runtimes), color=color, label=paths[df_i])
plt.scatter(benchmark_runtimes, np.zeros_like(benchmark_runtimes), color=color, label=label)

var = np.var(benchmark_runtimes)
plt.errorbar(mean, 1, xerr=np.sqrt(var), fmt='o', color=color) # mean
Expand Down
584 changes: 321 additions & 263 deletions src/accountsdb/db.zig

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion src/accountsdb/fuzz.zig
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ pub fn run(seed: u64, args: *std.process.ArgIterator) !void {
.number_of_index_shards = sig.accounts_db.db.ACCOUNT_INDEX_SHARDS,
.use_disk_index = use_disk,
.lru_size = 10_000,
.prealloc_number_of_accounts = 1_000_000,
// TODO: other things we can fuzz (number of shards, ...)
},
null,
Expand Down Expand Up @@ -334,7 +335,7 @@ pub fn run(seed: u64, args: *std.process.ArgIterator) !void {
var alt_accounts_db = try AccountsDB.init(allocator, .noop, alternative_snapshot_dir, accounts_db.config, null);
defer alt_accounts_db.deinit();

_ = try alt_accounts_db.loadWithDefaults(allocator, &snapshot_fields, 1, true, 1_500);
_ = try alt_accounts_db.loadWithDefaults(allocator, &snapshot_fields, 1, true, 500);
const maybe_inc_slot = if (snapshot_files.incremental_snapshot) |inc| inc.slot else null;
logger.info().logf("loaded and validated snapshot at slot: {} (and inc snapshot @ slot {any})", .{ snapshot_info.slot, maybe_inc_slot });
}
Expand Down
436 changes: 350 additions & 86 deletions src/accountsdb/index.zig

Large diffs are not rendered by default.

29 changes: 29 additions & 0 deletions src/accountsdb/swiss_map.zig
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,35 @@ pub fn SwissMapUnmanaged(
}
unreachable;
}

pub fn setFromMemory(self: *Self, memory: []u8) void {
// from ensureTotalCapacity:
// memory.len === n_groups * (@sizeOf([GROUP_SIZE]KeyValue) + @sizeOf([GROUP_SIZE]State))
const n_groups = memory.len / (@sizeOf([GROUP_SIZE]KeyValue) + @sizeOf([GROUP_SIZE]State));

const group_size = n_groups * @sizeOf([GROUP_SIZE]KeyValue);
const group_ptr: [*][GROUP_SIZE]KeyValue = @alignCast(@ptrCast(memory.ptr));
const groups = group_ptr[0..n_groups];
const states_ptr: [*]@Vector(GROUP_SIZE, u8) = @alignCast(@ptrCast(memory.ptr + group_size));
const states = states_ptr[0..n_groups];

self._capacity = n_groups * GROUP_SIZE;
self.groups = groups;
self.states = states;
self.memory = memory;
self.bit_mask = n_groups - 1;

self._count = 0;
for (0..self.groups.len) |i| {
const state_vec = self.states[i];
for (0..GROUP_SIZE) |j| {
const state: State = @bitCast(state_vec[j]);
if (state.state == .occupied) {
self._count += 1;
}
}
}
}
};
}

Expand Down
4 changes: 2 additions & 2 deletions src/benchmarks.zig
Original file line number Diff line number Diff line change
Expand Up @@ -418,9 +418,9 @@ pub fn benchmarkCSV(

if (j == runtime_info.fields.len - 1) {
// dont print trailing comma
try writer_average.print("{d}, {d}, {any}, {any}", .{ f_max, f_min, f_mean, f_std_dev });
try writer_average.print("{d}, {d}, {any}, {any}", .{ f_min, f_max, f_mean, f_std_dev });
} else {
try writer_average.print("{d}, {d}, {any}, {any}, ", .{ f_max, f_min, f_mean, f_std_dev });
try writer_average.print("{d}, {d}, {any}, {any}, ", .{ f_min, f_max, f_mean, f_std_dev });
}
}
try writer_average.print("\n", .{});
Expand Down
75 changes: 16 additions & 59 deletions src/bincode/bincode.zig
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,22 @@ test "bincode: custom field serialization" {
try std.testing.expect(r.skip_me == 20);
}

test "bincode: test arraylist with ptr struct" {
var array = std.ArrayList(sig.accounts_db.index.AccountRef).init(std.testing.allocator);
defer array.deinit();

var ref = sig.accounts_db.index.AccountRef.DEFAULT;
ref.slot = 10;
try array.append(ref);

var buf: [1024]u8 = undefined;
const bytes = try writeToSlice(&buf, array, .{});
var array2 = try readFromSlice(std.testing.allocator, std.ArrayList(sig.accounts_db.index.AccountRef), bytes, .{});
defer array2.deinit();

try std.testing.expectEqualSlices(sig.accounts_db.index.AccountRef, array.items, array2.items);
}

test "bincode: test arraylist" {
var array = std.ArrayList(u8).init(std.testing.allocator);
defer array.deinit();
Expand Down Expand Up @@ -959,65 +975,6 @@ test "bincode: (legacy) serialize an array" {
}, buffer.items);
}

// TODO: Fix bincode.Option to use ? instead
// test "bincode: decode arbitrary object" {
// const Mint = struct {
// authority: bincode.Option([32]u8),
// supply: u64,
// decimals: u8,
// is_initialized: bool,
// freeze_authority: bincode.Option([32]u8),
// };

// const bytes = [_]u8{
// 1, 0, 0, 0, 83, 18, 223, 14, 150, 112, 155, 39, 143, 181,
// 58, 12, 16, 228, 56, 110, 253, 193, 149, 16, 253, 81, 214, 206,
// 246, 126, 227, 182, 123, 225, 246, 203, 1, 0, 0, 0, 0, 0,
// 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 83, 18, 223,
// 14, 150, 112, 155, 39, 143, 181, 58, 12, 16, 228, 56, 110, 253,
// 193, 149, 16, 253, 81, 214, 206, 246, 126, 227, 182, 123,
// };
// const mint = try bincode.readFromSlice(testing.allocator, Mint, &bytes, .{});
// defer bincode.free(testing.allocator, mint);

// try std.testing.expectEqual(@as(u64, 1), mint.supply);
// try std.testing.expectEqual(@as(u8, 0), mint.decimals);
// try std.testing.expectEqual(true, mint.is_initialized);
// try std.testing.expect(mint.authority == .some);
// try std.testing.expect(mint.freeze_authority == .some);
// }

// TODO: Fix bincode.Option to use ? instead
// test "bincode: option serialize and deserialize" {
// const Mint = struct {
// authority: bincode.Option([32]u8),
// supply: u64,
// decimals: u8,
// is_initialized: bool,
// freeze_authority: bincode.Option([32]u8),
// };

// var buffer = std.ArrayList(u8).init(testing.allocator);
// defer buffer.deinit();

// const expected: Mint = .{
// .authority = bincode.Option([32]u8).from([_]u8{ 1, 2, 3, 4 } ** 8),
// .supply = 1,
// .decimals = 0,
// .is_initialized = true,
// .freeze_authority = bincode.Option([32]u8).from([_]u8{ 5, 6, 7, 8 } ** 8),
// };

// try bincode.write(buffer.writer(), expected, .{});

// try std.testing.expectEqual(@as(usize, 82), buffer.items.len);

// const actual = try bincode.readFromSlice(testing.allocator, Mint, buffer.items, .{});
// defer bincode.free(testing.allocator, actual);

// try std.testing.expectEqual(expected, actual);
// }

test "bincode: serialize and deserialize" {
var buffer = std.ArrayList(u8).init(testing.allocator);
defer buffer.deinit();
Expand Down
17 changes: 14 additions & 3 deletions src/cmd/cmd.zig
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,14 @@ pub fn run() !void {
.value_name = "accounts_per_file_estimate",
};

var prealloc_number_of_accounts_option = cli.Option{
.long_name = "max-number-of-accounts",
.help = "maximum number of accounts to store in the accounts db",
.value_ref = cli.mkRef(&config.current.accounts_db.prealloc_number_of_accounts),
.required = false,
.value_name = "prealloc_number_of_accounts",
};

// geyser options
var enable_geyser_option = cli.Option{
.long_name = "enable-geyser",
Expand Down Expand Up @@ -396,6 +404,7 @@ pub fn run() !void {
&number_of_index_shards_option,
&genesis_file_path,
&accounts_per_file_estimate,
&prealloc_number_of_accounts_option,
// geyser
&enable_geyser_option,
&geyser_pipe_path_option,
Expand All @@ -414,14 +423,14 @@ pub fn run() !void {
&cli.Command{
.name = "shred-collector",
.description = .{ .one_line = "Run the shred collector to collect and store shreds", .detailed =
\\ This command runs the shred collector without running the full validator
\\ This command runs the shred collector without running the full validator
\\ (mainly excluding the accounts-db setup).
\\
\\ NOTE: this means that this command *requires* a leader schedule to be provided
\\ (which would usually be derived from the accountsdb snapshot).
\\
\\ NOTE: this command also requires `start_slot` (`--test-repair-for-slot`) to be given as well (
\\ which is usually derived from the accountsdb snapshot). This can be done
\\ which is usually derived from the accountsdb snapshot). This can be done
\\ with `--test-repair-for-slot $(solana slot -u testnet)` for testnet or another `-u` for mainnet/devnet.
},
.options = &.{
Expand Down Expand Up @@ -492,6 +501,7 @@ pub fn run() !void {
&number_of_index_shards_option,
&genesis_file_path,
&accounts_per_file_estimate,
&prealloc_number_of_accounts_option,
// geyser
&enable_geyser_option,
&geyser_pipe_path_option,
Expand Down Expand Up @@ -585,7 +595,7 @@ pub fn run() !void {
.description = .{
.one_line = "Test transaction sender service",
.detailed =
\\Simulates a stream of transaction being sent to the transaction sender by
\\Simulates a stream of transaction being sent to the transaction sender by
\\running a mock transaction generator thread. For the moment this just sends
\\transfer transactions between to hard coded testnet accounts.
,
Expand Down Expand Up @@ -1475,6 +1485,7 @@ fn loadSnapshot(
.number_of_index_shards = config.current.accounts_db.number_of_index_shards,
.use_disk_index = config.current.accounts_db.use_disk_index,
.lru_size = 10_000,
.prealloc_number_of_accounts = config.current.accounts_db.prealloc_number_of_accounts,
},
geyser_writer,
);
Expand Down
4 changes: 4 additions & 0 deletions src/cmd/config.zig
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ pub const AccountsDBConfig = struct {
force_new_snapshot_download: bool = false,
/// estimate of the number of accounts per file (used for preallocation)
accounts_per_file_estimate: u64 = 1_500,
/// maximum number of accounts to manage references to for the account index. if loading from a snapshot,
/// the number of accounts supported will be max(this, accounts_per_file_estimate * number_of_account_files).
/// NOTE: usually number_of_account_files is ~400K.
prealloc_number_of_accounts: u64 = 0,
};

pub const GeyserConfig = struct {
Expand Down
Loading