Skip to content

Commit 911d142

Browse files
jonathantanmygitster
authored andcommitted
index-pack --promisor: dedup before checking links
Commit c08589e (index-pack: repack local links into promisor packs, 2024-11-01) fixed a bug with what was believed to be a negligible decrease in performance [1] [2]. But at $DAYJOB, with at least one repo, it was found that the decrease in performance was very significant. Looking at the patch, whenever we parse an object in the packfile to be indexed, we check the targets of all its outgoing links for its existence. However, this could be optimized by first collecting all such targets into an oidset (thus deduplicating them) before checking. Teach Git to do that. On a certain fetch from the aforementioned repo, this improved performance from approximately 7 hours to 24m47.815s. This number will be further reduced in a subsequent patch. [1] https://lore.kernel.org/git/CAG1j3zGiNMbri8rZNaF0w+yP+6OdMz0T8+8_Wgd1R_p1HzVasg@mail.gmail.com/ [2] https://lore.kernel.org/git/[email protected]/ Signed-off-by: Jonathan Tan <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent cc01bad commit 911d142

File tree

1 file changed

+42
-33
lines changed

1 file changed

+42
-33
lines changed

builtin/index-pack.c

Lines changed: 42 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -155,11 +155,11 @@ static int input_fd, output_fd;
155155
static const char *curr_pack;
156156

157157
/*
158-
* local_links is guarded by read_mutex, and record_local_links is read-only in
159-
* a thread.
158+
* outgoing_links is guarded by read_mutex, and record_outgoing_links is
159+
* read-only in a thread.
160160
*/
161-
static struct oidset local_links = OIDSET_INIT;
162-
static int record_local_links;
161+
static struct oidset outgoing_links = OIDSET_INIT;
162+
static int record_outgoing_links;
163163

164164
static struct thread_local_data *thread_data;
165165
static int nr_dispatched;
@@ -812,18 +812,12 @@ static int check_collison(struct object_entry *entry)
812812
return 0;
813813
}
814814

815-
static void record_if_local_object(const struct object_id *oid)
815+
static void record_outgoing_link(const struct object_id *oid)
816816
{
817-
struct object_info info = OBJECT_INFO_INIT;
818-
if (oid_object_info_extended(the_repository, oid, &info, 0))
819-
/* Missing; assume it is a promisor object */
820-
return;
821-
if (info.whence == OI_PACKED && info.u.packed.pack->pack_promisor)
822-
return;
823-
oidset_insert(&local_links, oid);
817+
oidset_insert(&outgoing_links, oid);
824818
}
825819

826-
static void do_record_local_links(struct object *obj)
820+
static void do_record_outgoing_links(struct object *obj)
827821
{
828822
if (obj->type == OBJ_TREE) {
829823
struct tree *tree = (struct tree *)obj;
@@ -837,16 +831,16 @@ static void do_record_local_links(struct object *obj)
837831
*/
838832
return;
839833
while (tree_entry_gently(&desc, &entry))
840-
record_if_local_object(&entry.oid);
834+
record_outgoing_link(&entry.oid);
841835
} else if (obj->type == OBJ_COMMIT) {
842836
struct commit *commit = (struct commit *) obj;
843837
struct commit_list *parents = commit->parents;
844838

845839
for (; parents; parents = parents->next)
846-
record_if_local_object(&parents->item->object.oid);
840+
record_outgoing_link(&parents->item->object.oid);
847841
} else if (obj->type == OBJ_TAG) {
848842
struct tag *tag = (struct tag *) obj;
849-
record_if_local_object(get_tagged_oid(tag));
843+
record_outgoing_link(get_tagged_oid(tag));
850844
}
851845
}
852846

@@ -896,7 +890,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
896890
free(has_data);
897891
}
898892

899-
if (strict || do_fsck_object || record_local_links) {
893+
if (strict || do_fsck_object || record_outgoing_links) {
900894
read_lock();
901895
if (type == OBJ_BLOB) {
902896
struct blob *blob = lookup_blob(the_repository, oid);
@@ -928,8 +922,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
928922
die(_("fsck error in packed object"));
929923
if (strict && fsck_walk(obj, NULL, &fsck_options))
930924
die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid));
931-
if (record_local_links)
932-
do_record_local_links(obj);
925+
if (record_outgoing_links)
926+
do_record_outgoing_links(obj);
933927

934928
if (obj->type == OBJ_TREE) {
935929
struct tree *item = (struct tree *) obj;
@@ -1779,28 +1773,43 @@ static void repack_local_links(void)
17791773
struct strbuf line = STRBUF_INIT;
17801774
struct oidset_iter iter;
17811775
struct object_id *oid;
1782-
char *base_name;
1776+
char *base_name = NULL;
17831777

1784-
if (!oidset_size(&local_links))
1778+
if (!oidset_size(&outgoing_links))
17851779
return;
17861780

1787-
base_name = mkpathdup("%s/pack/pack", repo_get_object_directory(the_repository));
1781+
oidset_iter_init(&outgoing_links, &iter);
1782+
while ((oid = oidset_iter_next(&iter))) {
1783+
struct object_info info = OBJECT_INFO_INIT;
1784+
if (oid_object_info_extended(the_repository, oid, &info, 0))
1785+
/* Missing; assume it is a promisor object */
1786+
continue;
1787+
if (info.whence == OI_PACKED && info.u.packed.pack->pack_promisor)
1788+
continue;
17881789

1789-
strvec_push(&cmd.args, "pack-objects");
1790-
strvec_push(&cmd.args, "--exclude-promisor-objects-best-effort");
1791-
strvec_push(&cmd.args, base_name);
1792-
cmd.git_cmd = 1;
1793-
cmd.in = -1;
1794-
cmd.out = -1;
1795-
if (start_command(&cmd))
1796-
die(_("could not start pack-objects to repack local links"));
1790+
if (!cmd.args.nr) {
1791+
base_name = mkpathdup(
1792+
"%s/pack/pack",
1793+
repo_get_object_directory(the_repository));
1794+
strvec_push(&cmd.args, "pack-objects");
1795+
strvec_push(&cmd.args,
1796+
"--exclude-promisor-objects-best-effort");
1797+
strvec_push(&cmd.args, base_name);
1798+
cmd.git_cmd = 1;
1799+
cmd.in = -1;
1800+
cmd.out = -1;
1801+
if (start_command(&cmd))
1802+
die(_("could not start pack-objects to repack local links"));
1803+
}
17971804

1798-
oidset_iter_init(&local_links, &iter);
1799-
while ((oid = oidset_iter_next(&iter))) {
18001805
if (write_in_full(cmd.in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 ||
18011806
write_in_full(cmd.in, "\n", 1) < 0)
18021807
die(_("failed to feed local object to pack-objects"));
18031808
}
1809+
1810+
if (!cmd.args.nr)
1811+
return;
1812+
18041813
close(cmd.in);
18051814

18061815
out = xfdopen(cmd.out, "r");
@@ -1899,7 +1908,7 @@ int cmd_index_pack(int argc,
18991908
} else if (skip_to_optional_arg(arg, "--keep", &keep_msg)) {
19001909
; /* nothing to do */
19011910
} else if (skip_to_optional_arg(arg, "--promisor", &promisor_msg)) {
1902-
record_local_links = 1;
1911+
record_outgoing_links = 1;
19031912
} else if (starts_with(arg, "--threads=")) {
19041913
char *end;
19051914
nr_threads = strtoul(arg+10, &end, 0);

0 commit comments

Comments
 (0)