From 33a5fe63bf92909f71bdcc6666ab7b3bc5b3b860 Mon Sep 17 00:00:00 2001 From: Ian Kerins Date: Thu, 23 Nov 2023 10:33:50 -0500 Subject: [PATCH] Overhaul /repositories Increase performance on instances with large number of repositories by using the same limiting semantics we use on the search page. The difference is that this limiting is done entirely in neogrok, as zoekt has no scoring/truncating parameters for repository search. So this limit does nothing to reduce API bandwith, only to help render performance. Rendering an HTML table with 5000 rows is simply too expensive to do on every keypress. Make the table sortable by columns. Clarify the table layout and expand the documentation to describe what shards are. --- .changeset/slimy-eyes-kneel.md | 5 + src/lib/server/zoekt-list-repositories.ts | 15 +- src/routes/about/+page.svelte | 64 +++++-- src/routes/repositories/+page.svelte | 2 +- src/routes/repositories/branches.svelte | 23 +++ .../repositories/repositories-list.svelte | 165 +++++++++++++++--- .../repositories/repository-name.svelte | 10 ++ src/routes/repositories/repository.svelte | 45 ----- src/routes/repositories/route-list-query.ts | 39 ++++- src/routes/repositories/search-form.svelte | 96 ++++++---- .../sortable-column-header.svelte | 24 +++ src/routes/repositories/table-sorting.ts | 42 +++++ 12 files changed, 394 insertions(+), 136 deletions(-) create mode 100644 .changeset/slimy-eyes-kneel.md create mode 100644 src/routes/repositories/branches.svelte create mode 100644 src/routes/repositories/repository-name.svelte delete mode 100644 src/routes/repositories/repository.svelte create mode 100644 src/routes/repositories/sortable-column-header.svelte create mode 100644 src/routes/repositories/table-sorting.ts diff --git a/.changeset/slimy-eyes-kneel.md b/.changeset/slimy-eyes-kneel.md new file mode 100644 index 0000000..373abe1 --- /dev/null +++ b/.changeset/slimy-eyes-kneel.md @@ -0,0 +1,5 @@ +--- +"neogrok": minor +--- + +Enhance the repositories list page, making it more performant on instances with large numbers of repositories, and make columns sortable by clicking on their headers diff --git a/src/lib/server/zoekt-list-repositories.ts b/src/lib/server/zoekt-list-repositories.ts index 8ba8f89..967e2d6 100644 --- a/src/lib/server/zoekt-list-repositories.ts +++ b/src/lib/server/zoekt-list-repositories.ts @@ -47,15 +47,18 @@ export async function listRepositories( const statsSchema = v .object({ + Shards: v.number(), Documents: v.number(), IndexBytes: v.number(), ContentBytes: v.number(), }) - .map(({ Documents, IndexBytes, ContentBytes }) => ({ + .map(({ Shards, Documents, IndexBytes, ContentBytes }) => ({ + shardCount: Shards, fileCount: Documents, indexBytes: IndexBytes, contentBytes: ContentBytes, })); +export type RepoStats = v.Infer; const dateSchema = v.string().chain((str) => { const date = new Date(str); @@ -125,10 +128,10 @@ const listResultSchema = v.object({ })), Stats: statsSchema, }) - .map(({ Repository, IndexMetadata: { lastIndexed }, Stats }) => ({ + .map(({ Repository, IndexMetadata, Stats }) => ({ ...Repository, - lastIndexed, - stats: Stats, + ...IndexMetadata, + ...Stats, })), ), ) @@ -147,7 +150,5 @@ const listResultSchema = v.object({ const toISOStringWithoutMs = (d: Date) => d.toISOString().replace(/\.\d{3}Z$/, "Z"); -export type ListResults = ReadonlyDeep< - v.Infer["List"] ->; +export type ListResults = v.Infer["List"]; export type Repository = ListResults["repositories"][number]; diff --git a/src/routes/about/+page.svelte b/src/routes/about/+page.svelte index 22516e8..a1903bf 100644 --- a/src/routes/about/+page.svelte +++ b/src/routes/about/+page.svelte @@ -107,20 +107,56 @@ the repositories indexed in the backing zoekt instance, including a variety of data about them.

-

- Note that the search input on this page has the same semantics as the - search input on the main search page: you are writing a full zoekt query, but instead of getting normal search results, you get repositories that - contain any results matching the query. So, filters the table to repositories with "linux" in their name, while filters the table to repositories with linux in their - contents. -

+
+ Repository search +

+ Note that the search input on this page has the same semantics as the + search input on the main search page: you are writing a full zoekt query, but instead of getting normal search results, you get repositories + that contain any results matching the query. So, filters the table to repositories with "linux" in their name, while filters the table to repositories with linux in their + contents. +

+

+ To improve page performance on deployments with large numbers of + repositories, there is a repos input that limits the number of + displayed repositories in the same way that the files and + matches inputs on the search page do. +

+
+
+ Repository stats +

+ The tabulated data includes links to the repository and its indexed + branches, the times the repository was last indexed and that it was last + committed to, and data about the index shards and their contents. + The table can be sorted by clicking on column headers: the first click will + sort in ascending order, the second in descending, and the third will restore + the status quo. +

+

+ Shards are what zoekt calls the files emitted from its indexer, and + they're all that's used by the zoekt-webserver backing neogrok to handle + neogrok's API requests; they contain the above-described repository metadata, + indexes used to quickly search repository content, and the repository content + itself (file names and contents). Indexing a repository typically results + in a single shard, but zoekt limits shard files to be about 100MiB in size, + so big repositories get more than one shard. +

+

+ When you search repository contents (i.e. make a non-repo: + query), are in fact searching repository shards, and so for a + repository with more than one shard, you will see that the counts of + shards and associated data in the table go down when you enter a query + that matches content in only some of its shards. +

+
diff --git a/src/routes/repositories/+page.svelte b/src/routes/repositories/+page.svelte index e8be39d..f0822f7 100644 --- a/src/routes/repositories/+page.svelte +++ b/src/routes/repositories/+page.svelte @@ -32,6 +32,6 @@ ? data.listOutcome.results : previousListResults ?? { repositories: [], - stats: { fileCount: 0, contentBytes: 0, indexBytes: 0 }, + stats: { shardCount: 0, fileCount: 0, contentBytes: 0, indexBytes: 0 }, }} /> diff --git a/src/routes/repositories/branches.svelte b/src/routes/repositories/branches.svelte new file mode 100644 index 0000000..84e27cd --- /dev/null +++ b/src/routes/repositories/branches.svelte @@ -0,0 +1,23 @@ + + +{#each branches as { name: branchName, version }} + {branchName}@ + {#if commitUrlTemplate} + {abbreviateVersion(version)} + {:else} + {abbreviateVersion(version)} + {/if} + +{/each} diff --git a/src/routes/repositories/repositories-list.svelte b/src/routes/repositories/repositories-list.svelte index 98f8df5..53404e9 100644 --- a/src/routes/repositories/repositories-list.svelte +++ b/src/routes/repositories/repositories-list.svelte @@ -1,41 +1,156 @@ - - -

- {repositories.length} - {repositories.length === 1 ? "repository" : "repositories"} containing - {fileCount} files consuming - {prettyBytes(indexBytes + contentBytes, { space: false })} of RAM +

+ + zoekt: {results.repositories.length} + {results.repositories.length === 1 ? "repository" : "repositories"} / + {results.stats.shardCount} + {results.stats.shardCount === 1 ? "shard" : "shards"} / + {results.stats.fileCount} + {results.stats.fileCount === 1 ? "file" : "files"} / + {prettyBytes(results.stats.indexBytes + results.stats.contentBytes, { + space: false, + binary: true, + })} RAM + + neogrok: {truncated.length} + {truncated.length === 1 ? "repository" : "repositories"} + / + {truncatedStats.shardCount} + {truncatedStats.shardCount === 1 ? "shard" : "shards"} / + {truncatedStats.fileCount} + {truncatedStats.fileCount === 1 ? "file" : "files"} / + {prettyBytes(truncatedStats.indexBytes + truncatedStats.contentBytes, { + space: false, + binary: true, + })} RAM +

+
- - - - - +
RepositoryFile count
+ + + + + + + + + + - - - - + + + + + + - {#each repositories as repository} - + {#each truncated as { name, url, branches, commitUrlTemplate, shardCount, fileCount, indexBytes, contentBytes, lastIndexed, lastCommit }} + + + + + + + + + + {/each}
Index shard files
Repository
BranchesContent size in RAMIndex size in RAMLast indexedLast commitShard countContained filesIndex size in RAMContent size in RAMLast indexedLast commit
{shardCount}{fileCount}{prettyBytes(indexBytes, { space: false, binary: true })}{prettyBytes(contentBytes, { space: false, binary: true })}{lastIndexed}{lastCommit}
diff --git a/src/routes/repositories/repository-name.svelte b/src/routes/repositories/repository-name.svelte new file mode 100644 index 0000000..4c745bf --- /dev/null +++ b/src/routes/repositories/repository-name.svelte @@ -0,0 +1,10 @@ + + +
+ {#if url}{name}{:else}{name}{/if} +
diff --git a/src/routes/repositories/repository.svelte b/src/routes/repositories/repository.svelte deleted file mode 100644 index 173d1a0..0000000 --- a/src/routes/repositories/repository.svelte +++ /dev/null @@ -1,45 +0,0 @@ - - - - - {#if url.length > 0}{name}{:else}{name}{/if} - - {fileCount} - - {#each branches as { name: branchName, version }} - {branchName}@ - {#if commitUrlTemplate} - {abbreviateVersion(version)} - {:else} - {abbreviateVersion(version)} - {/if} - - {/each} - - {prettyBytes(contentBytes, { space: false })} - {prettyBytes(indexBytes, { space: false })} - {lastIndexed} - {lastCommit} - diff --git a/src/routes/repositories/route-list-query.ts b/src/routes/repositories/route-list-query.ts index 3c82f4b..36b8e0f 100644 --- a/src/routes/repositories/route-list-query.ts +++ b/src/routes/repositories/route-list-query.ts @@ -1,18 +1,31 @@ import { goto } from "$app/navigation"; import { navigating, page } from "$app/stores"; import type { SearchType } from "$lib/preferences"; +import type { ListQuery } from "$lib/server/zoekt-list-repositories"; import { derived, get } from "svelte/store"; -type RouteListQuery = { - readonly query: string | undefined; +const defaultQueryOptions: RouteListQuery = Object.freeze({ repos: 100 }); + +type RouteListQuery = ListQuery & { + // This is only used in the frontend, there is no support for truncation in + // the zoekt repositories list API, because there is no sorting. + readonly repos: number; }; export const parseSearchParams = ( searchParams: URLSearchParams, -): RouteListQuery => ({ +): RouteListQuery => { + const parsedRepos = Number.parseInt(searchParams.get("repos") ?? "", 10); + // coerce the empty string to undefined - query: searchParams.get("q") || undefined, -}); + const query = searchParams.get("q") || undefined; + const repos = parsedRepos >= 0 ? parsedRepos : defaultQueryOptions.repos; + + return { + query, + repos, + }; +}; export const routeListQuery = derived(page, (p) => parseSearchParams(p.url.searchParams), @@ -23,9 +36,11 @@ export const routeListQuery = derived(page, (p) => let lastNavigateTime = 0; export const updateRouteListQuery = ({ query, + repos, searchType, }: { query?: string; + repos?: number; searchType: SearchType; }) => { // SvelteKit "buffers" ongoing navigations - navigations complete, _then_ the @@ -38,17 +53,25 @@ export const updateRouteListQuery = ({ const listQuery = parseSearchParams(baselineUrl.searchParams); const queryChanged = (query || undefined) !== listQuery.query; + const reposChanged = + repos !== undefined && repos >= 0 && repos !== listQuery.repos; - if (queryChanged) { + if (queryChanged || reposChanged) { const now = Date.now(); const next = new URL(baselineUrl); - if (query) { + if (queryChanged && query) { next.searchParams.set("q", query); - } else { + } else if (queryChanged) { next.searchParams.delete("q"); } + if (reposChanged && repos === defaultQueryOptions.repos) { + next.searchParams.delete("repos"); + } else if (reposChanged) { + next.searchParams.set("repos", repos.toString()); + } + goto(next, { replaceState: searchType === "live" && now - lastNavigateTime < 2000, keepFocus: true, diff --git a/src/routes/repositories/search-form.svelte b/src/routes/repositories/search-form.svelte index e8cd923..b0ac05a 100644 --- a/src/routes/repositories/search-form.svelte +++ b/src/routes/repositories/search-form.svelte @@ -6,26 +6,34 @@ import { routeListQuery, updateRouteListQuery } from "./route-list-query"; import ToggleSearchType from "$lib/toggle-search-type.svelte"; import LoadingEllipsis from "$lib/loading-ellipsis.svelte"; + import IntegerInput from "$lib/integer-input.svelte"; export let queryError: string | null; const searchType = acquireSearchTypeStore(); let query: string | undefined; + let repos: number; const unsubscribe = routeListQuery.subscribe((rq) => { // Sync form values with route state whenever a navigation _not_ related to // direct user interactions with the form. Those are inherently already // covered by the relevant input bindings, and the resulting navigations // can conflict with those bindings. if ($navigating?.type !== "goto") { - ({ query } = rq); + ({ query, repos } = rq); } }); onDestroy(unsubscribe); + const shouldLiveSearch = () => + $searchType === "live" && + // Same trigram efficiency rules as on the main search page. + (!query || query.length >= 3); + const manualSubmit = () => { updateRouteListQuery({ query, + repos, searchType: $searchType, }); }; @@ -42,6 +50,7 @@ // These all indicate when form changes with manual search are not yet submitted. $: queryPending = $navigating === null && ($routeListQuery.query ?? "") !== (query ?? ""); + $: reposPending = $navigating === null && $routeListQuery.repos !== repos;
-