Skip to content

Commit 897afa4

Browse files
author
Gal Ben David
committed
- removed compile_rules method. it is now possible to add rules at any
time - fixed a concurrency issue with RE2 where it has locked during specific regex patterns - fixed a seg fault bug with libgit2 where access to specific field inside the git_commit object.
1 parent 789f68f commit 897afa4

File tree

5 files changed

+53
-84
lines changed

5 files changed

+53
-84
lines changed

README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,6 @@ grs.add_rule(
8080
match_whitelist_patterns=[],
8181
match_blacklist_patterns=[],
8282
)
83-
# Compiles the rules. Should be called only once after all the rules were added
84-
grs.compile_rules()
8583

8684
# Add file extensions to ignore during the search
8785
grs.add_ignored_file_extension(

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name='PyRepScan',
8-
version='0.4.0',
8+
version='0.4.1',
99
author='Gal Ben David',
1010
author_email='[email protected]',
1111
url='https://github.com/intsights/PyRepScan',

src/git_repository_scanner.cpp

Lines changed: 29 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,6 @@ class GitRepositoryScanner {
3333
this->rules_manager.add_rule(name, match_pattern, match_whitelist_patterns, match_blacklist_patterns);
3434
}
3535

36-
void compile_rules() {
37-
this->rules_manager.compile_rules();
38-
}
39-
4036
void add_ignored_file_extension(
4137
std::string file_extension
4238
) {
@@ -89,23 +85,13 @@ class GitRepositoryScanner {
8985
return;
9086
}
9187

92-
const git_oid * current_commit_id = git_commit_id(current_commit);
93-
char current_commit_id_string[41] = {0};
94-
git_oid_fmt(current_commit_id_string, current_commit_id);
95-
96-
const git_signature * current_commit_author = git_commit_author(current_commit);
97-
std::string current_commit_message = git_commit_message(current_commit);
98-
99-
git_time_t commit_time = git_commit_time(current_commit);
100-
10188
if (current_commit_parent_count == 1) {
10289
git_commit_parent(&parent_commit, current_commit, 0);
10390
git_commit_tree(&current_git_tree, current_commit);
10491
git_commit_tree(&parent_git_tree, parent_commit);
10592

10693
git_diff_tree_to_tree(&diff, git_repo, parent_git_tree, current_git_tree, NULL);
10794

108-
git_commit_free(current_commit);
10995
git_commit_free(parent_commit);
11096
git_tree_free(current_git_tree);
11197
git_tree_free(parent_git_tree);
@@ -114,7 +100,6 @@ class GitRepositoryScanner {
114100

115101
git_diff_tree_to_tree(&diff, git_repo, NULL, current_git_tree, NULL);
116102

117-
git_commit_free(current_commit);
118103
git_tree_free(current_git_tree);
119104
}
120105

@@ -136,25 +121,33 @@ class GitRepositoryScanner {
136121
std::string content((const char *)git_blob_rawcontent(blob));
137122
auto matches = this->rules_manager.scan_content(content);
138123
if (matches.has_value()) {
139-
for (auto & match : matches.value()) {
140-
char new_file_oid[41] = {0};
141-
git_oid_fmt(new_file_oid, &delta->new_file.id);
124+
this->results_mutex.lock();
125+
126+
const git_oid * current_commit_id = git_commit_id(current_commit);
127+
char current_commit_id_string[41] = {0};
128+
git_oid_fmt(current_commit_id_string, current_commit_id);
142129

143-
std::string current_commit_author_name = "";
144-
if (nullptr != current_commit_author->name) {
145-
current_commit_author_name = current_commit_author->name;
146-
}
147-
std::string current_commit_author_email = "";
148-
if (nullptr != current_commit_author->email) {
149-
current_commit_author_email = current_commit_author->email;
150-
}
130+
const git_signature * current_commit_author = git_commit_author(current_commit);
131+
std::string current_commit_message = git_commit_message(current_commit);
151132

152-
this->results_mutex.lock();
133+
char new_file_oid[41] = {0};
134+
git_oid_fmt(new_file_oid, &delta->new_file.id);
135+
136+
std::string current_commit_author_name = "";
137+
if (nullptr != current_commit_author->name) {
138+
current_commit_author_name = current_commit_author->name;
139+
}
140+
std::string current_commit_author_email = "";
141+
if (nullptr != current_commit_author->email) {
142+
current_commit_author_email = current_commit_author->email;
143+
}
153144

154-
std::tm * commit_time_tm = std::gmtime(&commit_time);
155-
std::ostringstream commit_time_ss;
156-
commit_time_ss << std::put_time(commit_time_tm, "%FT%T");
145+
git_time_t commit_time = git_commit_time(current_commit);
146+
std::tm * commit_time_tm = std::gmtime(&commit_time);
147+
std::ostringstream commit_time_ss;
148+
commit_time_ss << std::put_time(commit_time_tm, "%FT%T");
157149

150+
for (auto & match : matches.value()) {
158151
results.push_back(
159152
{
160153
{"commit_id", std::string(current_commit_id_string)},
@@ -168,14 +161,15 @@ class GitRepositoryScanner {
168161
{"match", match["match"]},
169162
}
170163
);
171-
172-
this->results_mutex.unlock();
173164
}
165+
166+
this->results_mutex.unlock();
174167
}
175168
git_blob_free(blob);
176169
}
177170
}
178171

172+
git_commit_free(current_commit);
179173
git_diff_free(diff);
180174
}
181175

@@ -210,6 +204,8 @@ class GitRepositoryScanner {
210204
tf::Executor executor;
211205
executor.run(taskflow).wait();
212206

207+
git_repository_free(git_repo);
208+
213209
return results;
214210
}
215211

@@ -264,11 +260,6 @@ PYBIND11_MODULE(pyrepscan, m) {
264260
pybind11::arg("match_whitelist_patterns"),
265261
pybind11::arg("match_blacklist_patterns")
266262
)
267-
.def(
268-
"compile_rules",
269-
&RulesManager::compile_rules,
270-
""
271-
)
272263
.def(
273264
"add_ignored_file_extension",
274265
&RulesManager::add_ignored_file_extension,
@@ -313,15 +304,10 @@ PYBIND11_MODULE(pyrepscan, m) {
313304
pybind11::arg("repository_path"),
314305
pybind11::arg("branch_glob_pattern")
315306
)
316-
.def(
317-
"compile_rules",
318-
&GitRepositoryScanner::compile_rules,
319-
"Compile all the added rules to make them available for a scan.\nCall this function after you finished adding all the rules"
320-
)
321307
.def(
322308
"add_rule",
323309
&GitRepositoryScanner::add_rule,
324-
"Adding a rule to the list of rules.\nAfter a compile_rules call, no more rules can be added.",
310+
"Adding a rule to the list of rules.",
325311
pybind11::arg("name"),
326312
pybind11::arg("match_pattern"),
327313
pybind11::arg("match_whitelist_patterns"),

src/rules_manager.hpp

Lines changed: 23 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,10 @@ class RulesManager {
7171
this->match_whitelist_pattern_set.push_back(match_whitelists_pattern_set);
7272
}
7373

74-
this->match_pattern_set.Add(match_pattern, NULL);
7574
this->match_patterns.push_back(std::make_shared<re2::RE2>(match_pattern));
7675
this->rule_names.push_back(name);
7776
}
7877

79-
void compile_rules() {
80-
this->match_pattern_set.Compile();
81-
}
82-
8378
void add_ignored_file_extension(
8479
const std::string &file_extension
8580
) {
@@ -114,39 +109,37 @@ class RulesManager {
114109
inline std::optional<std::vector<std::map<std::string, std::string>>> scan_content(
115110
const std::string &content
116111
) {
117-
std::vector<std::int32_t> matched_regexes;
112+
std::vector<std::map<std::string, std::string>> matches;
113+
std::string match;
118114

119-
if (this->match_pattern_set.Match(content, &matched_regexes)) {
120-
std::vector<std::map<std::string, std::string>> matches;
121-
std::string match;
115+
for (std::uint32_t pattern_index = 0; pattern_index < this->match_patterns.size(); ++pattern_index) {
116+
re2::StringPiece input(content);
117+
while (re2::RE2::FindAndConsume(&input, *this->match_patterns[pattern_index], &match)) {
118+
if (this->match_blacklist_pattern_set[pattern_index]->Match(match, NULL)) {
119+
continue;
120+
}
122121

123-
for (const auto & match_index : matched_regexes) {
124-
re2::StringPiece input(content);
125-
while (re2::RE2::FindAndConsume(&input, *this->match_patterns[match_index], &match)) {
126-
if (this->match_blacklist_pattern_set[match_index]->Match(match, NULL)) {
127-
continue;
128-
}
122+
if (
123+
this->match_whitelist_pattern_set[pattern_index] != nullptr &&
124+
!this->match_whitelist_pattern_set[pattern_index]->Match(match, NULL)
125+
) {
126+
continue;
127+
}
129128

130-
if (
131-
this->match_whitelist_pattern_set[match_index] != nullptr &&
132-
!this->match_whitelist_pattern_set[match_index]->Match(match, NULL)
133-
) {
134-
continue;
129+
matches.push_back(
130+
{
131+
{"rule_name", this->rule_names[pattern_index]},
132+
{"match", match},
135133
}
136-
137-
matches.push_back(
138-
{
139-
{"rule_name", this->rule_names[match_index]},
140-
{"match", match},
141-
}
142-
);
143-
}
134+
);
144135
}
136+
}
145137

138+
if (matches.size() == 0) {
139+
return std::nullopt;
140+
} else {
146141
return matches;
147142
}
148-
149-
return std::nullopt;
150143
}
151144

152145
inline std::vector<std::string> check_pattern(
@@ -178,8 +171,4 @@ class RulesManager {
178171
std::vector<std::shared_ptr<re2::RE2::Set>> match_whitelist_pattern_set;
179172
std::vector<std::shared_ptr<re2::RE2::Set>> match_blacklist_pattern_set;
180173
std::vector<std::shared_ptr<re2::RE2>> match_patterns;
181-
re2::RE2::Set match_pattern_set = re2::RE2::Set(
182-
re2::RE2::Quiet,
183-
re2::RE2::UNANCHORED
184-
);
185174
};

tests/test_pyrepscan.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ def test_add_rule_one(
7777
match_whitelist_patterns=[],
7878
match_blacklist_patterns=[],
7979
)
80-
rules_manager.compile_rules()
8180

8281
matches = rules_manager.scan_content(
8382
content='first line\nsecond line\nthird line',
@@ -125,7 +124,6 @@ def test_add_rule_two(
125124
r'line',
126125
],
127126
)
128-
rules_manager.compile_rules()
129127

130128
matches = rules_manager.scan_content(
131129
content='first line\nsecond line\nthird line',
@@ -164,7 +162,6 @@ def test_add_rule_three(
164162
r'line',
165163
],
166164
)
167-
rules_manager.compile_rules()
168165

169166
matches = rules_manager.scan_content(
170167
content='first line\nsecond line\nthird line',
@@ -359,7 +356,6 @@ def test_scan(
359356
match_whitelist_patterns=[],
360357
match_blacklist_patterns=[],
361358
)
362-
grs.compile_rules()
363359

364360
grs.add_ignored_file_extension('py')
365361
grs.add_ignored_file_path('test_')

0 commit comments

Comments
 (0)