Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion src/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
};

use color_eyre::eyre::WrapErr as _;
use itertools::Itertools as _;

Check warning on line 10 in src/output.rs

View workflow job for this annotation

GitHub Actions / rustfmt

Diff in /home/runner/work/proxy-scraper-checker/proxy-scraper-checker/src/output.rs

use crate::{
HashMap,
config::Config,
ipdb,
proxy::{Proxy, ProxyType},
utils::is_docker,

Check warning on line 16 in src/output.rs

View workflow job for this annotation

GitHub Actions / rustfmt

Diff in /home/runner/work/proxy-scraper-checker/proxy-scraper-checker/src/output.rs
HashMap,
};

fn compare_timeout(a: &Proxy, b: &Proxy) -> Ordering {
Expand Down Expand Up @@ -70,9 +70,26 @@
if config.output.sort_by_speed {
proxies.sort_unstable_by(compare_timeout);
} else {
proxies.sort_unstable_by(compare_natural);

Check warning on line 73 in src/output.rs

View workflow job for this annotation

GitHub Actions / rustfmt

Diff in /home/runner/work/proxy-scraper-checker/proxy-scraper-checker/src/output.rs
}

// Deduplicate proxies by exit_ip when available. Different proxies can exit via the same IP.
// We do this after sorting so that if sorted by speed, the fastest one is kept.
// Track seen exit_ip per protocol to avoid cross-protocol removal
let mut seen: std::collections::HashSet<(ProxyType, String)> =
std::collections::HashSet::new();
Comment on lines +79 to +80
Copy link

Copilot AI Sep 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The deduplication logic uses std::collections::HashSet directly instead of the project's HashMap alias. For consistency with the existing codebase that imports HashMap, consider using std::collections::HashSet consistently or adding a HashSet alias to match the pattern.

Copilot uses AI. Check for mistakes.
let mut deduped = Vec::with_capacity(proxies.len());
for p in proxies {
if let Some(ip) = &p.exit_ip {
let key = (p.protocol, ip.clone());
if !seen.insert(key) {
continue;
}
}
deduped.push(p);
}
Comment on lines +82 to +90
Copy link

Copilot AI Sep 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The deduplication logic clones the IP string for each proxy when creating the key. Consider using a reference to avoid unnecessary string allocations: change the HashSet type to HashSet<(ProxyType, &str)> and use &ip instead of ip.clone() in the key.

Copilot uses AI. Check for mistakes.
proxies = deduped;

if config.output.json.enabled {
let (maybe_asn_db, maybe_geo_db) = tokio::try_join!(
async {
Expand Down
3 changes: 1 addition & 2 deletions src/parsers.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
use std::sync::LazyLock;

pub static PROXY_REGEX: LazyLock<fancy_regex::Regex> = LazyLock::new(|| {
let pattern = r"(?:^|[^0-9A-Za-z])(?:(?P<protocol>https?|socks[45]):\/\/)?(?:(?P<username>[0-9A-Za-z]{1,64}):(?P<password>[0-9A-Za-z]{1,64})@)?(?P<host>[A-Za-z][\-\.A-Za-z]{0,251}[A-Za-z]|[A-Za-z]|(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(?:\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])){3}):(?P<port>[0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])(?=[^0-9A-Za-z]|$)";
let pattern = r"(?:^|[^0-9A-Za-z])(?:(?P<protocol>https?|socks[45]):\/\/)?(?:(?P<username>[0-9A-Za-z._~\-]{1,256}):(?P<password>[0-9A-Za-z._~\-]{1,256})@)?(?P<host>[A-Za-z][\-\.A-Za-z]{0,251}[A-Za-z]|[A-Za-z]|(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(?:\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])){3}):(?P<port>[0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])(?=[^0-9A-Za-z]|$)";
Copy link

Copilot AI Sep 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regex allows hyphens in usernames and passwords without escaping them in the character class. In regex character classes, hyphens should be escaped or placed at the beginning/end to avoid being interpreted as a range operator. Consider changing [0-9A-Za-z._~\-] to [0-9A-Za-z._~-] or [0-9A-Za-z._~\-] where the hyphen is properly positioned.

Copilot uses AI. Check for mistakes.
fancy_regex::RegexBuilder::new(pattern)
.backtrack_limit(usize::MAX)
.build()
.unwrap()
});

static IPV4_REGEX: LazyLock<fancy_regex::Regex> = LazyLock::new(|| {
let pattern = r"^\s*(?P<host>(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])(?:\.(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])){3})(?::(?:[0-9]|[1-9][0-9]{1,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5]))?\s*$";
fancy_regex::Regex::new(pattern).unwrap()
Expand Down
Loading