Skip to content

Commit c739a7d

Browse files
committed
last-modified: use Bloom filters when available
Our 'git last-modified' performs a revision walk, and computes a diff at each point in the walk to figure out whether a given revision changed any of the paths it considers interesting. When changed-path Bloom filters are available, we can avoid computing many such diffs. Before computing a diff, we first check if any of the remaining paths of interest were possibly changed at a given commit by consulting its Bloom filter. If any of them are, we are resigned to compute the diff. If none of those queries returned "maybe", we know that the given commit doesn't contain any changed paths which are interesting to us. So, we can avoid computing it in this case. Comparing the perf test results on git.git: Test HEAD~ HEAD ------------------------------------------------------------------------------------ 8020.1: top-level last-modified 4.49(4.34+0.11) 2.22(2.05+0.09) -50.6% 8020.2: top-level recursive last-modified 5.64(5.45+0.11) 5.62(5.30+0.11) -0.4% 8020.3: subdir last-modified 0.11(0.06+0.04) 0.07(0.03+0.04) -36.4% Based-on-patch-by: Taylor Blau <[email protected]> Signed-off-by: Toon Claes <[email protected]>
1 parent a017f2c commit c739a7d

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed

builtin/last-modified.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "git-compat-util.h"
2+
#include "bloom.h"
23
#include "builtin.h"
4+
#include "commit-graph.h"
35
#include "commit.h"
46
#include "config.h"
57
#include "diff.h"
@@ -17,6 +19,7 @@
1719
struct last_modified_entry {
1820
struct hashmap_entry hashent;
1921
struct object_id oid;
22+
struct bloom_key key;
2023
const char path[FLEX_ARRAY];
2124
};
2225

@@ -40,6 +43,12 @@ struct last_modified {
4043

4144
static void last_modified_release(struct last_modified *lm)
4245
{
46+
struct hashmap_iter iter;
47+
struct last_modified_entry *ent;
48+
49+
hashmap_for_each_entry(&lm->paths, &iter, ent, hashent)
50+
clear_bloom_key(&ent->key);
51+
4352
hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent);
4453
release_revisions(&lm->rev);
4554
}
@@ -67,6 +76,9 @@ static void add_path_from_diff(struct diff_queue_struct *q,
6776

6877
FLEX_ALLOC_STR(ent, path, path);
6978
oidcpy(&ent->oid, &p->two->oid);
79+
if (lm->rev.bloom_filter_settings)
80+
fill_bloom_key(path, strlen(path), &ent->key,
81+
lm->rev.bloom_filter_settings);
7082
hashmap_entry_init(&ent->hashent, strhash(ent->path));
7183
hashmap_add(&lm->paths, &ent->hashent);
7284
}
@@ -126,6 +138,7 @@ static void mark_path(const char *path, const struct object_id *oid,
126138
data->callback(path, data->commit, data->callback_data);
127139

128140
hashmap_remove(data->paths, &ent->hashent, path);
141+
clear_bloom_key(&ent->key);
129142
free(ent);
130143
}
131144

@@ -169,6 +182,28 @@ static void last_modified_diff(struct diff_queue_struct *q,
169182
}
170183
}
171184

185+
186+
static int maybe_changed_path(struct last_modified *lm, struct commit *origin)
187+
{
188+
struct bloom_filter *filter;
189+
struct last_modified_entry *ent;
190+
struct hashmap_iter iter;
191+
192+
if (!lm->rev.bloom_filter_settings)
193+
return 1;
194+
195+
filter = get_bloom_filter(lm->rev.repo, origin);
196+
if (!filter)
197+
return 1;
198+
199+
hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) {
200+
if (bloom_filter_contains(filter, &ent->key,
201+
lm->rev.bloom_filter_settings))
202+
return 1;
203+
}
204+
return 0;
205+
}
206+
172207
static int last_modified_run(struct last_modified *lm,
173208
last_modified_callback cb, void *cbdata)
174209
{
@@ -189,6 +224,9 @@ static int last_modified_run(struct last_modified *lm,
189224
if (!data.commit)
190225
break;
191226

227+
if (!maybe_changed_path(lm, data.commit))
228+
continue;
229+
192230
if (data.commit->object.flags & BOUNDARY) {
193231
diff_tree_oid(lm->rev.repo->hash_algo->empty_tree,
194232
&data.commit->object.oid, "",
@@ -238,6 +276,13 @@ static int last_modified_init(struct last_modified *lm, struct repository *r,
238276
return argc;
239277
}
240278

279+
/*
280+
* We're not interested in generation numbers here,
281+
* but calling this function to prepare the commit-graph.
282+
*/
283+
(void)generation_numbers_enabled(lm->rev.repo);
284+
lm->rev.bloom_filter_settings = get_bloom_filter_settings(lm->rev.repo);
285+
241286
if (populate_paths_from_revs(lm) < 0)
242287
return error(_("unable to setup last-modified"));
243288

0 commit comments

Comments
 (0)