From cf59306acf47555a0a9fbba48882dd75cb2d9e4f Mon Sep 17 00:00:00 2001 From: Ax333l Date: Mon, 16 Jun 2025 17:12:17 +0200 Subject: [PATCH 01/17] implement fcrdns challenge --- data/crawlers/bingbot.yaml | 35 +--- data/crawlers/googlebot.yaml | 264 +------------------------------ data/crawlers/mojeekbot.yaml | 4 +- data/crawlers/qwantbot.yaml | 6 +- internal/fcrdns/fcrdns.go | 107 +++++++++++++ internal/fcrdns/fcrdns_test.go | 97 ++++++++++++ lib/anubis.go | 22 ++- lib/config.go | 2 + lib/policy/bot.go | 10 +- lib/policy/config/config.go | 21 ++- lib/policy/config/config_test.go | 19 +++ lib/policy/policy.go | 9 ++ 12 files changed, 291 insertions(+), 305 deletions(-) create mode 100644 internal/fcrdns/fcrdns.go create mode 100644 internal/fcrdns/fcrdns_test.go diff --git a/data/crawlers/bingbot.yaml b/data/crawlers/bingbot.yaml index 2f7885dd..2d2ba9f2 100644 --- a/data/crawlers/bingbot.yaml +++ b/data/crawlers/bingbot.yaml @@ -1,34 +1,5 @@ - name: bingbot user_agent_regex: \+http\://www\.bing\.com/bingbot\.htm - action: ALLOW - # https://www.bing.com/toolbox/bingbot.json - remote_addresses: [ - "157.55.39.0/24", - "207.46.13.0/24", - "40.77.167.0/24", - "13.66.139.0/24", - "13.66.144.0/24", - "52.167.144.0/24", - "13.67.10.16/28", - "13.69.66.240/28", - "13.71.172.224/28", - "139.217.52.0/28", - "191.233.204.224/28", - "20.36.108.32/28", - "20.43.120.16/28", - "40.79.131.208/28", - "40.79.186.176/28", - "52.231.148.0/28", - "20.79.107.240/28", - "51.105.67.0/28", - "20.125.163.80/28", - "40.77.188.0/22", - "65.55.210.0/24", - "199.30.24.0/23", - "40.77.202.0/24", - "40.77.139.0/25", - "20.74.197.0/28", - "20.15.133.160/27", - "40.77.177.0/24", - "40.77.178.0/23" - ] + action: FCRDNS + # https://www.bing.com/webmasters/help/how-to-verify-bingbot-3905dc26 + domain_regex: \.search\.msn\.com$ diff --git a/data/crawlers/googlebot.yaml b/data/crawlers/googlebot.yaml index f1735126..28ff64ba 100644 --- a/data/crawlers/googlebot.yaml +++ b/data/crawlers/googlebot.yaml @@ -1,263 +1,5 @@ - name: googlebot user_agent_regex: \+http\://www\.google\.com/bot\.html - action: ALLOW - # https://developers.google.com/static/search/apis/ipranges/googlebot.json - remote_addresses: [ - "2001:4860:4801:10::/64", - "2001:4860:4801:11::/64", - "2001:4860:4801:12::/64", - "2001:4860:4801:13::/64", - "2001:4860:4801:14::/64", - "2001:4860:4801:15::/64", - "2001:4860:4801:16::/64", - "2001:4860:4801:17::/64", - "2001:4860:4801:18::/64", - "2001:4860:4801:19::/64", - "2001:4860:4801:1a::/64", - "2001:4860:4801:1b::/64", - "2001:4860:4801:1c::/64", - "2001:4860:4801:1d::/64", - "2001:4860:4801:1e::/64", - "2001:4860:4801:1f::/64", - "2001:4860:4801:20::/64", - "2001:4860:4801:21::/64", - "2001:4860:4801:22::/64", - "2001:4860:4801:23::/64", - "2001:4860:4801:24::/64", - "2001:4860:4801:25::/64", - "2001:4860:4801:26::/64", - "2001:4860:4801:27::/64", - "2001:4860:4801:28::/64", - "2001:4860:4801:29::/64", - "2001:4860:4801:2::/64", - "2001:4860:4801:2a::/64", - "2001:4860:4801:2b::/64", - "2001:4860:4801:2c::/64", - "2001:4860:4801:2d::/64", - "2001:4860:4801:2e::/64", - "2001:4860:4801:2f::/64", - "2001:4860:4801:31::/64", - "2001:4860:4801:32::/64", - "2001:4860:4801:33::/64", - "2001:4860:4801:34::/64", - "2001:4860:4801:35::/64", - "2001:4860:4801:36::/64", - "2001:4860:4801:37::/64", - "2001:4860:4801:38::/64", - "2001:4860:4801:39::/64", - "2001:4860:4801:3a::/64", - "2001:4860:4801:3b::/64", - "2001:4860:4801:3c::/64", - "2001:4860:4801:3d::/64", - "2001:4860:4801:3e::/64", - "2001:4860:4801:40::/64", - "2001:4860:4801:41::/64", - "2001:4860:4801:42::/64", - "2001:4860:4801:43::/64", - "2001:4860:4801:44::/64", - "2001:4860:4801:45::/64", - "2001:4860:4801:46::/64", - "2001:4860:4801:47::/64", - "2001:4860:4801:48::/64", - "2001:4860:4801:49::/64", - "2001:4860:4801:4a::/64", - "2001:4860:4801:4b::/64", - "2001:4860:4801:4c::/64", - "2001:4860:4801:50::/64", - "2001:4860:4801:51::/64", - "2001:4860:4801:52::/64", - "2001:4860:4801:53::/64", - "2001:4860:4801:54::/64", - "2001:4860:4801:55::/64", - "2001:4860:4801:56::/64", - "2001:4860:4801:60::/64", - "2001:4860:4801:61::/64", - "2001:4860:4801:62::/64", - "2001:4860:4801:63::/64", - "2001:4860:4801:64::/64", - "2001:4860:4801:65::/64", - "2001:4860:4801:66::/64", - "2001:4860:4801:67::/64", - "2001:4860:4801:68::/64", - "2001:4860:4801:69::/64", - "2001:4860:4801:6a::/64", - "2001:4860:4801:6b::/64", - "2001:4860:4801:6c::/64", - "2001:4860:4801:6d::/64", - "2001:4860:4801:6e::/64", - "2001:4860:4801:6f::/64", - "2001:4860:4801:70::/64", - "2001:4860:4801:71::/64", - "2001:4860:4801:72::/64", - "2001:4860:4801:73::/64", - "2001:4860:4801:74::/64", - "2001:4860:4801:75::/64", - "2001:4860:4801:76::/64", - "2001:4860:4801:77::/64", - "2001:4860:4801:78::/64", - "2001:4860:4801:79::/64", - "2001:4860:4801:80::/64", - "2001:4860:4801:81::/64", - "2001:4860:4801:82::/64", - "2001:4860:4801:83::/64", - "2001:4860:4801:84::/64", - "2001:4860:4801:85::/64", - "2001:4860:4801:86::/64", - "2001:4860:4801:87::/64", - "2001:4860:4801:88::/64", - "2001:4860:4801:90::/64", - "2001:4860:4801:91::/64", - "2001:4860:4801:92::/64", - "2001:4860:4801:93::/64", - "2001:4860:4801:94::/64", - "2001:4860:4801:95::/64", - "2001:4860:4801:96::/64", - "2001:4860:4801:a0::/64", - "2001:4860:4801:a1::/64", - "2001:4860:4801:a2::/64", - "2001:4860:4801:a3::/64", - "2001:4860:4801:a4::/64", - "2001:4860:4801:a5::/64", - "2001:4860:4801:c::/64", - "2001:4860:4801:f::/64", - "192.178.5.0/27", - "192.178.6.0/27", - "192.178.6.128/27", - "192.178.6.160/27", - "192.178.6.192/27", - "192.178.6.32/27", - "192.178.6.64/27", - "192.178.6.96/27", - "34.100.182.96/28", - "34.101.50.144/28", - "34.118.254.0/28", - "34.118.66.0/28", - "34.126.178.96/28", - "34.146.150.144/28", - "34.147.110.144/28", - "34.151.74.144/28", - "34.152.50.64/28", - "34.154.114.144/28", - "34.155.98.32/28", - "34.165.18.176/28", - "34.175.160.64/28", - "34.176.130.16/28", - "34.22.85.0/27", - "34.64.82.64/28", - "34.65.242.112/28", - "34.80.50.80/28", - "34.88.194.0/28", - "34.89.10.80/28", - "34.89.198.80/28", - "34.96.162.48/28", - "35.247.243.240/28", - "66.249.64.0/27", - "66.249.64.128/27", - "66.249.64.160/27", - "66.249.64.224/27", - "66.249.64.32/27", - "66.249.64.64/27", - "66.249.64.96/27", - "66.249.65.0/27", - "66.249.65.128/27", - "66.249.65.160/27", - "66.249.65.192/27", - "66.249.65.224/27", - "66.249.65.32/27", - "66.249.65.64/27", - "66.249.65.96/27", - "66.249.66.0/27", - "66.249.66.128/27", - "66.249.66.160/27", - "66.249.66.192/27", - "66.249.66.224/27", - "66.249.66.32/27", - "66.249.66.64/27", - "66.249.66.96/27", - "66.249.68.0/27", - "66.249.68.128/27", - "66.249.68.32/27", - "66.249.68.64/27", - "66.249.68.96/27", - "66.249.69.0/27", - "66.249.69.128/27", - "66.249.69.160/27", - "66.249.69.192/27", - "66.249.69.224/27", - "66.249.69.32/27", - "66.249.69.64/27", - "66.249.69.96/27", - "66.249.70.0/27", - "66.249.70.128/27", - "66.249.70.160/27", - "66.249.70.192/27", - "66.249.70.224/27", - "66.249.70.32/27", - "66.249.70.64/27", - "66.249.70.96/27", - "66.249.71.0/27", - "66.249.71.128/27", - "66.249.71.160/27", - "66.249.71.192/27", - "66.249.71.224/27", - "66.249.71.32/27", - "66.249.71.64/27", - "66.249.71.96/27", - "66.249.72.0/27", - "66.249.72.128/27", - "66.249.72.160/27", - "66.249.72.192/27", - "66.249.72.224/27", - "66.249.72.32/27", - "66.249.72.64/27", - "66.249.72.96/27", - "66.249.73.0/27", - "66.249.73.128/27", - "66.249.73.160/27", - "66.249.73.192/27", - "66.249.73.224/27", - "66.249.73.32/27", - "66.249.73.64/27", - "66.249.73.96/27", - "66.249.74.0/27", - "66.249.74.128/27", - "66.249.74.160/27", - "66.249.74.192/27", - "66.249.74.32/27", - "66.249.74.64/27", - "66.249.74.96/27", - "66.249.75.0/27", - "66.249.75.128/27", - "66.249.75.160/27", - "66.249.75.192/27", - "66.249.75.224/27", - "66.249.75.32/27", - "66.249.75.64/27", - "66.249.75.96/27", - "66.249.76.0/27", - "66.249.76.128/27", - "66.249.76.160/27", - "66.249.76.192/27", - "66.249.76.224/27", - "66.249.76.32/27", - "66.249.76.64/27", - "66.249.76.96/27", - "66.249.77.0/27", - "66.249.77.128/27", - "66.249.77.160/27", - "66.249.77.192/27", - "66.249.77.224/27", - "66.249.77.32/27", - "66.249.77.64/27", - "66.249.77.96/27", - "66.249.78.0/27", - "66.249.78.32/27", - "66.249.79.0/27", - "66.249.79.128/27", - "66.249.79.160/27", - "66.249.79.192/27", - "66.249.79.224/27", - "66.249.79.32/27", - "66.249.79.64/27", - "66.249.79.96/27" - ] + action: FCRDNS + # https://developers.google.com/search/docs/crawling-indexing/verifying-googlebot + domain_regex: \.googlebot\.com$ diff --git a/data/crawlers/mojeekbot.yaml b/data/crawlers/mojeekbot.yaml index 40661203..640296a4 100644 --- a/data/crawlers/mojeekbot.yaml +++ b/data/crawlers/mojeekbot.yaml @@ -1,5 +1,5 @@ - name: mojeekbot user_agent_regex: \+https\://www\.mojeek\.com/bot\.html - action: ALLOW + action: FCRDNS # https://www.mojeek.com/bot.html - remote_addresses: [ "5.102.173.71/32" ] \ No newline at end of file + domain_regex: \.mojeek\.com$ diff --git a/data/crawlers/qwantbot.yaml b/data/crawlers/qwantbot.yaml index a4021549..ac1a64c0 100644 --- a/data/crawlers/qwantbot.yaml +++ b/data/crawlers/qwantbot.yaml @@ -1,5 +1,5 @@ - name: qwantbot user_agent_regex: \+https\://help\.qwant\.com/bot/ - action: ALLOW - # https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json - remote_addresses: [ "91.242.162.0/24" ] + action: FCRDNS + # https://help.qwant.com/bot/ + domain_regex: \.qwant\.com diff --git a/internal/fcrdns/fcrdns.go b/internal/fcrdns/fcrdns.go new file mode 100644 index 00000000..d2d8fd01 --- /dev/null +++ b/internal/fcrdns/fcrdns.go @@ -0,0 +1,107 @@ +package fcrdns + +import ( + "context" + "net" + "net/netip" + "regexp" + "strings" + "time" + + "github.com/TecharoHQ/anubis/decaymap" +) + +type FCrDNS struct { + resolver *net.Resolver + forwardLookupCacheV4 *decaymap.Impl[string, []netip.Addr] + forwardLookupCacheV6 *decaymap.Impl[string, []netip.Addr] + reverseLookupCache *decaymap.Impl[string, []string] +} + +func NewFCrDNS() *FCrDNS { + return &FCrDNS{ + resolver: &net.Resolver{}, + forwardLookupCacheV4: decaymap.New[string, []netip.Addr](), + forwardLookupCacheV6: decaymap.New[string, []netip.Addr](), + reverseLookupCache: decaymap.New[string, []string](), + } +} + +func (f *FCrDNS) Check(ip string, allowedDomainRegex *regexp.Regexp) (bool, error) { + clientAddr, err := netip.ParseAddr(ip) + if err != nil { + return false, err + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + hosts, err := f.reverseLookup(ctx, ip) + if err != nil { + return false, err + } + + for _, host := range hosts { + if !allowedDomainRegex.MatchString(host) { + continue + } + + addresses, err := f.forwardLookup(ctx, host, clientAddr.Is6()) + if err != nil { + return false, err + } + for _, addr := range addresses { + if addr == clientAddr { + return true, nil + } + } + } + + return false, nil +} + +func (f *FCrDNS) Cleanup() { + f.forwardLookupCacheV4.Cleanup() + f.forwardLookupCacheV6.Cleanup() + f.reverseLookupCache.Cleanup() +} + +func (f *FCrDNS) reverseLookup(ctx context.Context, addr string) ([]string, error) { + if result, ok := f.reverseLookupCache.Get(addr); ok { + return result, nil + } + + rawHosts, err := f.resolver.LookupAddr(ctx, addr) + if err != nil { + return []string{}, err + } + + hosts := []string{} + for _, host := range rawHosts { + hosts = append(hosts, strings.TrimSuffix(host, ".")) + } + + f.reverseLookupCache.Set(addr, hosts, time.Hour) + return hosts, nil +} + +func (f *FCrDNS) forwardLookup(ctx context.Context, host string, ipv6 bool) ([]netip.Addr, error) { + cache := f.forwardLookupCacheV4 + network := "ip4" + if ipv6 { + cache = f.forwardLookupCacheV6 + network = "ip6" + } + + if result, ok := cache.Get(host); ok { + return result, nil + } + + result, err := f.resolver.LookupNetIP(ctx, network, host) + if err != nil { + return []netip.Addr{}, err + } + + cache.Set(host, result, time.Hour) + return result, nil +} diff --git a/internal/fcrdns/fcrdns_test.go b/internal/fcrdns/fcrdns_test.go new file mode 100644 index 00000000..04500c71 --- /dev/null +++ b/internal/fcrdns/fcrdns_test.go @@ -0,0 +1,97 @@ +package fcrdns + +import ( + "net/netip" + "regexp" + "testing" + "time" +) + +func TestFCrDNSCheck(t *testing.T) { + localhostRegex := regexp.MustCompile("^localhost$") + localhost := netip.MustParseAddr("127.0.0.1") + localhostV6 := netip.MustParseAddr("::1") + + tests := []struct { + name string + clientIp string + host string + hostIp netip.Addr + regexp *regexp.Regexp + expected bool + }{ + { + name: "IPv4", + clientIp: "127.0.0.1", + host: "localhost", + hostIp: localhost, + regexp: localhostRegex, + expected: true, + }, + { + name: "IPv6", + clientIp: "::1", + host: "localhost", + hostIp: localhostV6, + regexp: localhostRegex, + expected: true, + }, + { + name: "No regexp match", + clientIp: "127.0.0.1", + host: "localhost", + hostIp: localhost, + regexp: regexp.MustCompile("^remotehost$"), + expected: false, + }, + { + name: "No reverse DNS record", + clientIp: "127.0.0.1", + regexp: localhostRegex, + expected: false, + }, + { + name: "No forward DNS record", + clientIp: "127.0.0.1", + host: "localhost", + regexp: localhostRegex, + expected: false, + }, + { + name: "IP mismatch", + clientIp: "127.0.0.1", + host: "localhost", + hostIp: netip.IPv4Unspecified(), + regexp: localhostRegex, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := NewFCrDNS() + f.resolver = nil // There shouldn't be any network real requests for these tests. + + if tt.host != "" { + f.reverseLookupCache.Set(tt.clientIp, []string{tt.host}, time.Hour) + + if !tt.hostIp.IsValid() { + f.forwardLookupCacheV4.Set(tt.host, []netip.Addr{}, time.Hour) + f.forwardLookupCacheV6.Set(tt.host, []netip.Addr{}, time.Hour) + } else if tt.hostIp.Is6() { + f.forwardLookupCacheV6.Set(tt.host, []netip.Addr{tt.hostIp}, time.Hour) + } else { + f.forwardLookupCacheV4.Set(tt.host, []netip.Addr{tt.hostIp}, time.Hour) + } + } else { + f.reverseLookupCache.Set(tt.clientIp, []string{}, time.Hour) + } + + if ok, err := f.Check(tt.clientIp, tt.regexp); err != nil { + t.Errorf("unexpected error: %v", err) + } else if ok != tt.expected { + t.Errorf("expected: %t, got: %t", tt.expected, ok) + } + }) + } +} diff --git a/lib/anubis.go b/lib/anubis.go index bc142849..58b614aa 100644 --- a/lib/anubis.go +++ b/lib/anubis.go @@ -25,6 +25,7 @@ import ( "github.com/TecharoHQ/anubis/internal" "github.com/TecharoHQ/anubis/internal/dnsbl" "github.com/TecharoHQ/anubis/internal/ogtags" + "github.com/TecharoHQ/anubis/internal/fcrdns" "github.com/TecharoHQ/anubis/lib/policy" "github.com/TecharoHQ/anubis/lib/policy/config" ) @@ -66,6 +67,7 @@ type Server struct { opts Options DNSBLCache *decaymap.Impl[string, dnsbl.DroneBLResponse] OGTags *ogtags.OGTagCache + FCrDNS *fcrdns.FCrDNS } func (s *Server) challengeFor(r *http.Request, difficulty int) string { @@ -112,7 +114,7 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request, httpS return } - if s.checkRules(w, r, cr, lg, rule) { + if s.checkRules(w, r, cr, lg, rule, ip) { return } @@ -153,7 +155,7 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request, httpS s.ServeHTTPNext(w, r) } -func (s *Server) checkRules(w http.ResponseWriter, r *http.Request, cr policy.CheckResult, lg *slog.Logger, rule *policy.Bot) bool { +func (s *Server) checkRules(w http.ResponseWriter, r *http.Request, cr policy.CheckResult, lg *slog.Logger, rule *policy.Bot, ip string) bool { switch cr.Rule { case config.RuleAllow: lg.Debug("allowing traffic to origin (explicit)") @@ -174,6 +176,21 @@ func (s *Server) checkRules(w http.ResponseWriter, r *http.Request, cr policy.Ch return true case config.RuleChallenge: lg.Debug("challenge requested") + case config.RuleDns: + lg.Debug("performing reverse dns check") + + if passed, err := s.FCrDNS.Check(ip, rule.DomainRegex); err != nil { + lg.Error("got error while performing reverse dns check", "err", err) + s.respondWithError(w, r, fmt.Sprintf("Could not verify reverse DNS: %s", err.Error())) + } else if passed { + lg.Debug("allowing traffic to origin (reverse dns check passed)") + s.ServeHTTPNext(w, r) + } else { + lg.Debug("denying traffic (reverse dns check failed)") + s.respondWithStatus(w, r, "Access Denied: You appear to be impersonating a bot. Try disabling any User-Agent switchers", http.StatusOK) + } + + return true case config.RuleBenchmark: lg.Debug("serving benchmark page") s.RenderBench(w, r) @@ -420,4 +437,5 @@ func (s *Server) check(r *http.Request) (policy.CheckResult, *policy.Bot, error) func (s *Server) CleanupDecayMap() { s.DNSBLCache.Cleanup() s.OGTags.Cleanup() + s.FCrDNS.Cleanup() } diff --git a/lib/config.go b/lib/config.go index 44b64791..2ccdf3b5 100644 --- a/lib/config.go +++ b/lib/config.go @@ -16,6 +16,7 @@ import ( "github.com/TecharoHQ/anubis/decaymap" "github.com/TecharoHQ/anubis/internal" "github.com/TecharoHQ/anubis/internal/dnsbl" + "github.com/TecharoHQ/anubis/internal/fcrdns" "github.com/TecharoHQ/anubis/internal/ogtags" "github.com/TecharoHQ/anubis/lib/policy" "github.com/TecharoHQ/anubis/web" @@ -91,6 +92,7 @@ func New(opts Options) (*Server, error) { opts: opts, DNSBLCache: decaymap.New[string, dnsbl.DroneBLResponse](), OGTags: ogtags.NewOGTagCache(opts.Target, opts.OGPassthrough, opts.OGTimeToLive, opts.OGCacheConsidersHost), + FCrDNS: fcrdns.NewFCrDNS(), } mux := http.NewServeMux() diff --git a/lib/policy/bot.go b/lib/policy/bot.go index 3a436557..954e98f0 100644 --- a/lib/policy/bot.go +++ b/lib/policy/bot.go @@ -2,16 +2,18 @@ package policy import ( "fmt" + "regexp" "github.com/TecharoHQ/anubis/internal" "github.com/TecharoHQ/anubis/lib/policy/config" ) type Bot struct { - Name string - Action config.Rule - Challenge *config.ChallengeRules - Rules Checker + Name string + Action config.Rule + Challenge *config.ChallengeRules + Rules Checker + DomainRegex *regexp.Regexp } func (b Bot) Hash() string { diff --git a/lib/policy/config/config.go b/lib/policy/config/config.go index c670baca..f879a1f8 100644 --- a/lib/policy/config/config.go +++ b/lib/policy/config/config.go @@ -23,6 +23,7 @@ var ( ErrInvalidUserAgentRegex = errors.New("config.Bot: invalid user agent regex") ErrInvalidPathRegex = errors.New("config.Bot: invalid path regex") ErrInvalidHeadersRegex = errors.New("config.Bot: invalid headers regex") + ErrInvalidDomainRegex = errors.New("config.Bot: invalid domain regex") ErrInvalidCIDR = errors.New("config.Bot: invalid CIDR") ErrRegexEndsWithNewline = errors.New("config.Bot: regular expression ends with newline (try >- instead of > in yaml)") ErrInvalidImportStatement = errors.New("config.ImportStatement: invalid source file") @@ -37,6 +38,7 @@ const ( RuleAllow Rule = "ALLOW" RuleDeny Rule = "DENY" RuleChallenge Rule = "CHALLENGE" + RuleDns Rule = "FCRDNS" RuleBenchmark Rule = "DEBUG_BENCHMARK" ) @@ -56,6 +58,7 @@ type BotConfig struct { Action Rule `json:"action"` RemoteAddr []string `json:"remote_addresses"` Challenge *ChallengeRules `json:"challenge,omitempty"` + DomainRegex *string `json:"domain_regex"` } func (b BotConfig) Zero() bool { @@ -67,6 +70,7 @@ func (b BotConfig) Zero() bool { b.Action != "", len(b.RemoteAddr) != 0, b.Challenge != nil, + b.DomainRegex != nil, } { if cond { return false @@ -134,9 +138,21 @@ func (b BotConfig) Valid() error { } } } + if b.Action == RuleDns { + if b.DomainRegex == nil { + errs = append(errs, ErrDnsTestNoDomains) + } else if _, err := regexp.Compile(*b.DomainRegex); err != nil { + errs = append(errs, ErrInvalidDomainRegex, err) + } + if b.UserAgentRegex == nil { + errs = append(errs, ErrDnsTestNoUserAgent) + } + } else if b.DomainRegex != nil { + errs = append(errs, ErrDnsTestInvalidAction) + } switch b.Action { - case RuleAllow, RuleBenchmark, RuleChallenge, RuleDeny: + case RuleAllow, RuleBenchmark, RuleChallenge, RuleDns, RuleDeny: // okay default: errs = append(errs, fmt.Errorf("%w: %q", ErrUnknownAction, b.Action)) @@ -165,6 +181,9 @@ var ( ErrChallengeRuleHasWrongAlgorithm = errors.New("config.Bot.ChallengeRules: algorithm is invalid") ErrChallengeDifficultyTooLow = errors.New("config.Bot.ChallengeRules: difficulty is too low (must be >= 1)") ErrChallengeDifficultyTooHigh = errors.New("config.Bot.ChallengeRules: difficulty is too high (must be <= 64)") + ErrDnsTestInvalidAction = errors.New("config.Bot.DnsTest: specifying domain regex is only supported for FCRDNS rules") + ErrDnsTestNoDomains = errors.New("config.Bot.DnsTest: FCRDNS rules must specify a domain regex") + ErrDnsTestNoUserAgent = errors.New("config.Bot.DnsTest: FCRDNS rules must specify a user agent regex") ) func (cr ChallengeRules) Valid() error { diff --git a/lib/policy/config/config_test.go b/lib/policy/config/config_test.go index 86c490ee..05532c52 100644 --- a/lib/policy/config/config_test.go +++ b/lib/policy/config/config_test.go @@ -182,6 +182,25 @@ func TestBotValid(t *testing.T) { }, err: nil, }, + { + name: "no user agent regex", + bot: BotConfig{ + Name: "search-bot", + Action: RuleAllow, + Domains: []string{"example.com"}, + }, + err: ErrDnsTestNoUserAgent, + }, + { + name: "reverse dns", + bot: BotConfig{ + Name: "search-bot", + Action: RuleAllow, + UserAgentRegex: p("SearchBot"), + Domains: []string{"example.com"}, + }, + err: nil, + }, } for _, cs := range tests { diff --git a/lib/policy/policy.go b/lib/policy/policy.go index 7c45ff6f..97b382c7 100644 --- a/lib/policy/policy.go +++ b/lib/policy/policy.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "io" + "regexp" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" @@ -92,6 +93,14 @@ func ParseConfig(fin io.Reader, fname string, defaultDifficulty int) (*ParsedCon } } + if b.DomainRegex != nil { + if rex, err := regexp.Compile(*b.DomainRegex); err != nil { + validationErrs = append(validationErrs, fmt.Errorf("while processing rule %s domain regex: %w", b.Name, err)) + } else { + parsedBot.DomainRegex = rex + } + } + if b.Challenge == nil { parsedBot.Challenge = &config.ChallengeRules{ Difficulty: defaultDifficulty, From 6d71ea217d9fda829cb8047c57e73b1b3eb970c6 Mon Sep 17 00:00:00 2001 From: Ax333l Date: Mon, 16 Jun 2025 20:56:57 +0200 Subject: [PATCH 02/17] use algorithm instead --- data/crawlers/applebot.yaml | 4 +++- data/crawlers/bingbot.yaml | 2 ++ data/crawlers/googlebot.yaml | 2 ++ data/crawlers/mojeekbot.yaml | 2 ++ data/crawlers/qwantbot.yaml | 4 +++- lib/config.go | 3 ++- lib/policy/config/config.go | 22 ++++++++++-------- lib/policy/config/config_test.go | 40 +++++++++++++++++++++++++++----- 8 files changed, 60 insertions(+), 19 deletions(-) diff --git a/data/crawlers/applebot.yaml b/data/crawlers/applebot.yaml index b29d7267..65a47769 100644 --- a/data/crawlers/applebot.yaml +++ b/data/crawlers/applebot.yaml @@ -2,5 +2,7 @@ # https://support.apple.com/en-us/119829 - name: applebot user_agent_regex: Applebot - action: CHALLENGE domain_regex: \.applebot\.apple\.com$ + action: CHALLENGE + challenge: + algorithm: "fcrdns" diff --git a/data/crawlers/bingbot.yaml b/data/crawlers/bingbot.yaml index 700421fe..5dd2d778 100644 --- a/data/crawlers/bingbot.yaml +++ b/data/crawlers/bingbot.yaml @@ -3,3 +3,5 @@ action: CHALLENGE # https://www.bing.com/webmasters/help/how-to-verify-bingbot-3905dc26 domain_regex: \.search\.msn\.com$ + challenge: + algorithm: "fcrdns" diff --git a/data/crawlers/googlebot.yaml b/data/crawlers/googlebot.yaml index e280c223..ee8cac38 100644 --- a/data/crawlers/googlebot.yaml +++ b/data/crawlers/googlebot.yaml @@ -3,3 +3,5 @@ action: CHALLENGE # https://developers.google.com/search/docs/crawling-indexing/verifying-googlebot domain_regex: \.googlebot\.com$ + challenge: + algorithm: "fcrdns" diff --git a/data/crawlers/mojeekbot.yaml b/data/crawlers/mojeekbot.yaml index e3582226..f26fa35b 100644 --- a/data/crawlers/mojeekbot.yaml +++ b/data/crawlers/mojeekbot.yaml @@ -3,3 +3,5 @@ action: CHALLENGE # https://www.mojeek.com/bot.html domain_regex: \.mojeek\.com$ + challenge: + algorithm: "fcrdns" diff --git a/data/crawlers/qwantbot.yaml b/data/crawlers/qwantbot.yaml index 42eb1246..1d867443 100644 --- a/data/crawlers/qwantbot.yaml +++ b/data/crawlers/qwantbot.yaml @@ -2,4 +2,6 @@ user_agent_regex: \+https\://help\.qwant\.com/bot/ action: CHALLENGE # https://help.qwant.com/bot/ - domain_regex: \.qwant\.com + domain_regex: \.qwant\.com$ + challenge: + algorithm: "fcrdns" diff --git a/lib/config.go b/lib/config.go index a5e1273a..ef503852 100644 --- a/lib/config.go +++ b/lib/config.go @@ -21,6 +21,7 @@ import ( "github.com/TecharoHQ/anubis/internal/ogtags" "github.com/TecharoHQ/anubis/lib/challenge" "github.com/TecharoHQ/anubis/lib/policy" + "github.com/TecharoHQ/anubis/lib/policy/config" "github.com/TecharoHQ/anubis/web" "github.com/TecharoHQ/anubis/xess" ) @@ -75,7 +76,7 @@ func LoadPoliciesOrDefault(fname string, defaultDifficulty int) (*policy.ParsedC var validationErrs []error for _, b := range anubisPolicy.Bots { - if _, ok := challenge.Get(b.Challenge.Algorithm); !ok { + if _, ok := challenge.Get(b.Challenge.Algorithm); !ok && b.Challenge.Algorithm != config.FCrDNSAlgorithm { validationErrs = append(validationErrs, fmt.Errorf("%w %s", policy.ErrChallengeRuleHasWrongAlgorithm, b.Challenge.Algorithm)) } } diff --git a/lib/policy/config/config.go b/lib/policy/config/config.go index 910a7390..a407a703 100644 --- a/lib/policy/config/config.go +++ b/lib/policy/config/config.go @@ -51,7 +51,7 @@ type BotConfig struct { UserAgentRegex *string `json:"user_agent_regex,omitempty" yaml:"user_agent_regex,omitempty"` PathRegex *string `json:"path_regex,omitempty" yaml:"path_regex,omitempty"` HeadersRegex map[string]string `json:"headers_regex,omitempty" yaml:"headers_regex,omitempty"` - DomainRegex *string `json:"domain_regex" yaml:"domain_regex,omitempty"` + DomainRegex *string `json:"domain_regex,omitempty" yaml:"domain_regex,omitempty"` Expression *ExpressionOrList `json:"expression,omitempty" yaml:"expression,omitempty"` Challenge *ChallengeRules `json:"challenge,omitempty" yaml:"challenge,omitempty"` Weight *Weight `json:"weight,omitempty" yaml:"weight,omitempty"` @@ -142,7 +142,7 @@ func (b *BotConfig) Valid() error { } } } - if b.Action == RuleChallenge && b.Challenge.Algorithm == FCrDNSAlgorithm { + if b.Action == RuleChallenge && b.Challenge != nil && b.Challenge.Algorithm == FCrDNSAlgorithm { if b.DomainRegex == nil { errs = append(errs, ErrChallengeNoDomains) } else if _, err := regexp.Compile(*b.DomainRegex); err != nil { @@ -195,20 +195,22 @@ var ( ErrChallengeRuleHasWrongAlgorithm = errors.New("config.Bot.ChallengeRules: algorithm is invalid") ErrChallengeDifficultyTooLow = errors.New("config.Bot.ChallengeRules: difficulty is too low (must be >= 1)") ErrChallengeDifficultyTooHigh = errors.New("config.Bot.ChallengeRules: difficulty is too high (must be <= 64)") - ErrChallengeDomainUnsupported = errors.New("config.Bot.ChallengeRules: specifying domain regex is only supported for FCRDNS rules") - ErrChallengeNoDomains = errors.New("config.Bot.ChallengeRules: FCRDNS rules must specify a domain regex") - ErrChallengeNoUserAgent = errors.New("config.Bot.ChallengeRules: FCRDNS rules must specify a user agent regex or expression") + ErrChallengeDomainUnsupported = errors.New("config.Bot.ChallengeRules: specifying domain regex is only supported for challenge rules with the \"fcrdns\" algorithm") + ErrChallengeNoDomains = errors.New("config.Bot.ChallengeRules: FCrDNS rules must specify a domain regex") + ErrChallengeNoUserAgent = errors.New("config.Bot.ChallengeRules: FCrDNS rules must specify a user agent regex or expression") ) func (cr ChallengeRules) Valid() error { var errs []error - if cr.Difficulty < 1 { - errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooLow, cr.Difficulty)) - } + if cr.Algorithm != FCrDNSAlgorithm { + if cr.Difficulty < 1 { + errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooLow, cr.Difficulty)) + } - if cr.Difficulty > 64 { - errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooHigh, cr.Difficulty)) + if cr.Difficulty > 64 { + errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooHigh, cr.Difficulty)) + } } if len(errs) != 0 { diff --git a/lib/policy/config/config_test.go b/lib/policy/config/config_test.go index c176b918..5b70bf12 100644 --- a/lib/policy/config/config_test.go +++ b/lib/policy/config/config_test.go @@ -171,19 +171,47 @@ func TestBotValid(t *testing.T) { { name: "no user agent regex", bot: BotConfig{ - Name: "search-bot", - Action: RuleAllow, - Domains: []string{"example.com"}, + Name: "search-bot", + Action: RuleChallenge, + DomainRegex: p("example.com"), + Challenge: &ChallengeRules{ + Algorithm: "fcrdns", + }, + }, + err: ErrChallengeNoUserAgent, + }, + { + name: "no domain regex", + bot: BotConfig{ + Name: "search-bot", + Action: RuleChallenge, + UserAgentRegex: p("SearchBot"), + Challenge: &ChallengeRules{ + Algorithm: "fcrdns", + }, }, - err: ErrDnsTestNoUserAgent, + err: ErrChallengeNoDomains, + }, + { + name: "no fcrdnsalgorithm", + bot: BotConfig{ + Name: "search-bot", + Action: RuleChallenge, + UserAgentRegex: p("SearchBot"), + DomainRegex: p("example.com"), + }, + err: ErrChallengeDomainUnsupported, }, { name: "reverse dns", bot: BotConfig{ Name: "search-bot", - Action: RuleAllow, + Action: RuleChallenge, UserAgentRegex: p("SearchBot"), - Domains: []string{"example.com"}, + DomainRegex: p("example.com"), + Challenge: &ChallengeRules{ + Algorithm: "fcrdns", + }, }, err: nil, }, From 1d3ed05711f2f6665d5a694199a9dcfc6d0cdd68 Mon Sep 17 00:00:00 2001 From: Ax333l Date: Tue, 17 Jun 2025 17:12:10 +0200 Subject: [PATCH 03/17] add documentation --- docs/docs/CHANGELOG.md | 1 + .../admin/configuration/challenges/fcrdns.mdx | 23 +++++++++++++++++++ lib/policy/config/config_test.go | 2 +- 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 docs/docs/admin/configuration/challenges/fcrdns.mdx diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index b029cb9e..03f410e5 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- Implement a DNS-based challenge method: [`fcrdns`](./admin/configuration/challenges/fcrdns.mdx) ([#431](https://github.com/TecharoHQ/anubis/issues/431)) - Remove the unused `/test-error` endpoint and update the testing endpoint `/make-challenge` to only be enabled in development - Add `--xff-strip-private` flag/envvar to toggle skipping X-Forwarded-For private addresses or not diff --git a/docs/docs/admin/configuration/challenges/fcrdns.mdx b/docs/docs/admin/configuration/challenges/fcrdns.mdx new file mode 100644 index 00000000..75ba05da --- /dev/null +++ b/docs/docs/admin/configuration/challenges/fcrdns.mdx @@ -0,0 +1,23 @@ +# FCrDNS (No JavaScript) + +The `fcrdns` challenge dynamically verifies some legitimate bots using DNS and the client IP address. It works in three steps: + +1. Reverse DNS records for the client IP are looked up. +2. Any records that match the `domain_regex` are queried. +3. The client passes the challenge if the domain has an `A` or `AAAA` record that matches the client IP. + +You will need to do some searching to find out what the reverse DNS of legitimate requests should look like. Use `remote_addresses` instead if the bot you are working with cannot be identified with reverse DNS. +This challenge only makes sense for bots, so all rules using the `fcrdns` method must have a `user_agent_regex` or `expression` to identify them. + +To use it in your Anubis configuration: + +```yaml +# Example rule for Qwantbot: https://help.qwant.com/bot/ +- name: qwantbot + user_agent_regex: >- + Qwantbot + action: CHALLENGE + domain_regex: \.qwant\.com$ # The reverse domain expected for legitimate requests (see the link above) + challenge: + algorithm: fcrdns +``` diff --git a/lib/policy/config/config_test.go b/lib/policy/config/config_test.go index 5b70bf12..7bcb2f64 100644 --- a/lib/policy/config/config_test.go +++ b/lib/policy/config/config_test.go @@ -193,7 +193,7 @@ func TestBotValid(t *testing.T) { err: ErrChallengeNoDomains, }, { - name: "no fcrdnsalgorithm", + name: "no fcrdns algorithm", bot: BotConfig{ Name: "search-bot", Action: RuleChallenge, From c6869b301ae7a65f69d4391cb42e05f4f93da168 Mon Sep 17 00:00:00 2001 From: Ax333l Date: Sun, 22 Jun 2025 19:02:54 +0200 Subject: [PATCH 04/17] implement as a checker instead --- cmd/anubis/main.go | 6 +++++- data/crawlers/applebot.yaml | 4 +--- data/crawlers/bingbot.yaml | 4 +--- data/crawlers/googlebot.yaml | 4 +--- data/crawlers/mojeekbot.yaml | 4 +--- data/crawlers/qwantbot.yaml | 4 +--- internal/fcrdns/context.go | 14 ++++++++++++ lib/anubis.go | 19 +--------------- lib/config.go | 6 +++--- lib/policy/checker.go | 28 ++++++++++++++++++++++++ lib/policy/checker/checker.go | 7 ++---- lib/policy/config/config.go | 28 ++++++------------------ lib/policy/config/config_test.go | 37 -------------------------------- lib/policy/policy.go | 22 +++++++++++-------- 14 files changed, 78 insertions(+), 109 deletions(-) create mode 100644 internal/fcrdns/context.go diff --git a/cmd/anubis/main.go b/cmd/anubis/main.go index bc21473e..b2084933 100644 --- a/cmd/anubis/main.go +++ b/cmd/anubis/main.go @@ -30,6 +30,7 @@ import ( "github.com/TecharoHQ/anubis" "github.com/TecharoHQ/anubis/data" "github.com/TecharoHQ/anubis/internal" + "github.com/TecharoHQ/anubis/internal/fcrdns" "github.com/TecharoHQ/anubis/internal/thoth" libanubis "github.com/TecharoHQ/anubis/lib" botPolicy "github.com/TecharoHQ/anubis/lib/policy" @@ -239,7 +240,8 @@ func main() { } } - ctx := context.Background() + fdns := fcrdns.NewFCrDNS() + ctx := fcrdns.With(context.Background(), fdns) // Thoth configuration switch { @@ -347,6 +349,8 @@ func main() { Target: *target, WebmasterEmail: *webmasterEmail, OGCacheConsidersHost: *ogCacheConsiderHost, + FCrDNS: fdns, + }) if err != nil { log.Fatalf("can't construct libanubis.Server: %v", err) diff --git a/data/crawlers/applebot.yaml b/data/crawlers/applebot.yaml index 65a47769..4add0264 100644 --- a/data/crawlers/applebot.yaml +++ b/data/crawlers/applebot.yaml @@ -3,6 +3,4 @@ - name: applebot user_agent_regex: Applebot domain_regex: \.applebot\.apple\.com$ - action: CHALLENGE - challenge: - algorithm: "fcrdns" + action: ALLOW diff --git a/data/crawlers/bingbot.yaml b/data/crawlers/bingbot.yaml index 5dd2d778..0f3074d3 100644 --- a/data/crawlers/bingbot.yaml +++ b/data/crawlers/bingbot.yaml @@ -1,7 +1,5 @@ - name: bingbot user_agent_regex: \+http\://www\.bing\.com/bingbot\.htm - action: CHALLENGE + action: ALLOW # https://www.bing.com/webmasters/help/how-to-verify-bingbot-3905dc26 domain_regex: \.search\.msn\.com$ - challenge: - algorithm: "fcrdns" diff --git a/data/crawlers/googlebot.yaml b/data/crawlers/googlebot.yaml index ee8cac38..9f9ec8c2 100644 --- a/data/crawlers/googlebot.yaml +++ b/data/crawlers/googlebot.yaml @@ -1,7 +1,5 @@ - name: googlebot user_agent_regex: \+http\://www\.google\.com/bot\.html - action: CHALLENGE + action: ALLOW # https://developers.google.com/search/docs/crawling-indexing/verifying-googlebot domain_regex: \.googlebot\.com$ - challenge: - algorithm: "fcrdns" diff --git a/data/crawlers/mojeekbot.yaml b/data/crawlers/mojeekbot.yaml index f26fa35b..97d26aa8 100644 --- a/data/crawlers/mojeekbot.yaml +++ b/data/crawlers/mojeekbot.yaml @@ -1,7 +1,5 @@ - name: mojeekbot user_agent_regex: \+https\://www\.mojeek\.com/bot\.html - action: CHALLENGE + action: ALLOW # https://www.mojeek.com/bot.html domain_regex: \.mojeek\.com$ - challenge: - algorithm: "fcrdns" diff --git a/data/crawlers/qwantbot.yaml b/data/crawlers/qwantbot.yaml index 1d867443..a65f5352 100644 --- a/data/crawlers/qwantbot.yaml +++ b/data/crawlers/qwantbot.yaml @@ -1,7 +1,5 @@ - name: qwantbot user_agent_regex: \+https\://help\.qwant\.com/bot/ - action: CHALLENGE + action: ALLOW # https://help.qwant.com/bot/ domain_regex: \.qwant\.com$ - challenge: - algorithm: "fcrdns" diff --git a/internal/fcrdns/context.go b/internal/fcrdns/context.go new file mode 100644 index 00000000..61073344 --- /dev/null +++ b/internal/fcrdns/context.go @@ -0,0 +1,14 @@ +package fcrdns + +import "context" + +type ctxKey struct{} + +func With(ctx context.Context, fcrdns *FCrDNS) context.Context { + return context.WithValue(ctx, ctxKey{}, fcrdns) +} + +func FromContext(ctx context.Context) (*FCrDNS, bool) { + cli, ok := ctx.Value(ctxKey{}).(*FCrDNS) + return cli, ok +} diff --git a/lib/anubis.go b/lib/anubis.go index c520b2fc..03e5b452 100644 --- a/lib/anubis.go +++ b/lib/anubis.go @@ -23,8 +23,8 @@ import ( "github.com/TecharoHQ/anubis/decaymap" "github.com/TecharoHQ/anubis/internal" "github.com/TecharoHQ/anubis/internal/dnsbl" - "github.com/TecharoHQ/anubis/internal/ogtags" "github.com/TecharoHQ/anubis/internal/fcrdns" + "github.com/TecharoHQ/anubis/internal/ogtags" "github.com/TecharoHQ/anubis/lib/challenge" "github.com/TecharoHQ/anubis/lib/policy" "github.com/TecharoHQ/anubis/lib/policy/checker" @@ -220,23 +220,6 @@ func (s *Server) checkRules(w http.ResponseWriter, r *http.Request, cr policy.Ch s.respondWithStatus(w, r, fmt.Sprintf("Access Denied: error code %s", hash), s.policy.StatusCodes.Deny) return true case config.RuleChallenge: - if rule.Challenge.Algorithm == config.FCrDNSAlgorithm { - lg.Debug("performing reverse dns check") - - if passed, err := s.FCrDNS.Check(ip, rule.DomainRegex); err != nil { - lg.Error("got error while performing reverse dns check", "err", err) - s.respondWithError(w, r, fmt.Sprintf("Could not verify reverse DNS: %s", err.Error())) - } else if passed { - lg.Debug("allowing traffic to origin (reverse dns check passed)") - s.ServeHTTPNext(w, r) - } else { - lg.Debug("denying traffic (reverse dns check failed)") - s.respondWithStatus(w, r, "Access Denied: You appear to be impersonating a bot. Try disabling any User-Agent switchers", http.StatusOK) - } - - return true - } - lg.Debug("challenge requested") case config.RuleBenchmark: lg.Debug("serving benchmark page") diff --git a/lib/config.go b/lib/config.go index 2e1a85c6..8c08454d 100644 --- a/lib/config.go +++ b/lib/config.go @@ -22,7 +22,6 @@ import ( "github.com/TecharoHQ/anubis/internal/ogtags" "github.com/TecharoHQ/anubis/lib/challenge" "github.com/TecharoHQ/anubis/lib/policy" - "github.com/TecharoHQ/anubis/lib/policy/config" "github.com/TecharoHQ/anubis/web" "github.com/TecharoHQ/anubis/xess" ) @@ -30,6 +29,7 @@ import ( type Options struct { Next http.Handler Policy *policy.ParsedConfig + FCrDNS *fcrdns.FCrDNS Target string CookieDomain string CookieName string @@ -77,7 +77,7 @@ func LoadPoliciesOrDefault(ctx context.Context, fname string, defaultDifficulty var validationErrs []error for _, b := range anubisPolicy.Bots { - if _, ok := challenge.Get(b.Challenge.Algorithm); !ok && b.Challenge.Algorithm != config.FCrDNSAlgorithm { + if _, ok := challenge.Get(b.Challenge.Algorithm); !ok { validationErrs = append(validationErrs, fmt.Errorf("%w %s", policy.ErrChallengeRuleHasWrongAlgorithm, b.Challenge.Algorithm)) } } @@ -115,7 +115,7 @@ func New(opts Options) (*Server, error) { opts: opts, DNSBLCache: decaymap.New[string, dnsbl.DroneBLResponse](), OGTags: ogtags.NewOGTagCache(opts.Target, opts.OGPassthrough, opts.OGTimeToLive, opts.OGCacheConsidersHost), - FCrDNS: fcrdns.NewFCrDNS(), + FCrDNS: opts.FCrDNS, cookieName: cookieName, } diff --git a/lib/policy/checker.go b/lib/policy/checker.go index 33b58d4f..d87ce373 100644 --- a/lib/policy/checker.go +++ b/lib/policy/checker.go @@ -9,6 +9,7 @@ import ( "strings" "github.com/TecharoHQ/anubis/internal" + "github.com/TecharoHQ/anubis/internal/fcrdns" "github.com/TecharoHQ/anubis/lib/policy/checker" "github.com/yl2chen/cidranger" ) @@ -186,3 +187,30 @@ func NewHeadersChecker(headermap map[string]string) (checker.Impl, error) { return result, nil } + +type FCrDNSChecker struct { + fcrdns *fcrdns.FCrDNS + regexp *regexp.Regexp + hash string +} + +func NewFCrDNSChecker(fcrdns *fcrdns.FCrDNS, domainRexStr string) (checker.Impl, error) { + rex, err := regexp.Compile(strings.TrimSpace(domainRexStr)) + if err != nil { + return nil, fmt.Errorf("%w: regex %s failed parse: %w", ErrMisconfiguration, domainRexStr, err) + } + return &FCrDNSChecker{fcrdns, rex, internal.FastHash(domainRexStr)}, nil +} + +func (fdc *FCrDNSChecker) Check(r *http.Request) (bool, error) { + host := r.Header.Get("X-Real-Ip") + if host == "" { + return false, fmt.Errorf("%w: header X-Real-Ip is not set", ErrMisconfiguration) + } + + return fdc.fcrdns.Check(host, fdc.regexp) +} + +func (fdc *FCrDNSChecker) Hash() string { + return fdc.hash +} diff --git a/lib/policy/checker/checker.go b/lib/policy/checker/checker.go index 1ee276aa..6c4a6c3b 100644 --- a/lib/policy/checker/checker.go +++ b/lib/policy/checker/checker.go @@ -19,15 +19,12 @@ type List []Impl func (l List) Check(r *http.Request) (bool, error) { for _, c := range l { ok, err := c.Check(r) - if err != nil { + if err != nil || !ok { return ok, err } - if ok { - return ok, nil - } } - return false, nil + return true, nil } func (l List) Hash() string { diff --git a/lib/policy/config/config.go b/lib/policy/config/config.go index ca7d71b3..7809df33 100644 --- a/lib/policy/config/config.go +++ b/lib/policy/config/config.go @@ -45,7 +45,6 @@ const ( ) const DefaultAlgorithm = "fast" -const FCrDNSAlgorithm = "fcrdns" type BotConfig struct { UserAgentRegex *string `json:"user_agent_regex,omitempty" yaml:"user_agent_regex,omitempty"` @@ -150,19 +149,11 @@ func (b *BotConfig) Valid() error { } } } - if b.Action == RuleChallenge && b.Challenge != nil && b.Challenge.Algorithm == FCrDNSAlgorithm { - if b.DomainRegex == nil { - errs = append(errs, ErrChallengeNoDomains) - } else if _, err := regexp.Compile(*b.DomainRegex); err != nil { + if b.DomainRegex != nil { + if _, err := regexp.Compile(*b.DomainRegex); err != nil { errs = append(errs, ErrInvalidDomainRegex, err) } - if b.UserAgentRegex == nil && b.Expression == nil { - errs = append(errs, ErrChallengeNoUserAgent) - } - } else if b.DomainRegex != nil { - errs = append(errs, ErrChallengeDomainUnsupported) } - if b.Expression != nil { if err := b.Expression.Valid(); err != nil { errs = append(errs, err) @@ -203,22 +194,17 @@ var ( ErrChallengeRuleHasWrongAlgorithm = errors.New("config.Bot.ChallengeRules: algorithm is invalid") ErrChallengeDifficultyTooLow = errors.New("config.Bot.ChallengeRules: difficulty is too low (must be >= 1)") ErrChallengeDifficultyTooHigh = errors.New("config.Bot.ChallengeRules: difficulty is too high (must be <= 64)") - ErrChallengeDomainUnsupported = errors.New("config.Bot.ChallengeRules: specifying domain regex is only supported for challenge rules with the \"fcrdns\" algorithm") - ErrChallengeNoDomains = errors.New("config.Bot.ChallengeRules: FCrDNS rules must specify a domain regex") - ErrChallengeNoUserAgent = errors.New("config.Bot.ChallengeRules: FCrDNS rules must specify a user agent regex or expression") ) func (cr ChallengeRules) Valid() error { var errs []error - if cr.Algorithm != FCrDNSAlgorithm { - if cr.Difficulty < 1 { - errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooLow, cr.Difficulty)) - } + if cr.Difficulty < 1 { + errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooLow, cr.Difficulty)) + } - if cr.Difficulty > 64 { - errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooHigh, cr.Difficulty)) - } + if cr.Difficulty > 64 { + errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooHigh, cr.Difficulty)) } if len(errs) != 0 { diff --git a/lib/policy/config/config_test.go b/lib/policy/config/config_test.go index 7bcb2f64..85876248 100644 --- a/lib/policy/config/config_test.go +++ b/lib/policy/config/config_test.go @@ -168,40 +168,6 @@ func TestBotValid(t *testing.T) { }, err: nil, }, - { - name: "no user agent regex", - bot: BotConfig{ - Name: "search-bot", - Action: RuleChallenge, - DomainRegex: p("example.com"), - Challenge: &ChallengeRules{ - Algorithm: "fcrdns", - }, - }, - err: ErrChallengeNoUserAgent, - }, - { - name: "no domain regex", - bot: BotConfig{ - Name: "search-bot", - Action: RuleChallenge, - UserAgentRegex: p("SearchBot"), - Challenge: &ChallengeRules{ - Algorithm: "fcrdns", - }, - }, - err: ErrChallengeNoDomains, - }, - { - name: "no fcrdns algorithm", - bot: BotConfig{ - Name: "search-bot", - Action: RuleChallenge, - UserAgentRegex: p("SearchBot"), - DomainRegex: p("example.com"), - }, - err: ErrChallengeDomainUnsupported, - }, { name: "reverse dns", bot: BotConfig{ @@ -209,9 +175,6 @@ func TestBotValid(t *testing.T) { Action: RuleChallenge, UserAgentRegex: p("SearchBot"), DomainRegex: p("example.com"), - Challenge: &ChallengeRules{ - Algorithm: "fcrdns", - }, }, err: nil, }, diff --git a/lib/policy/policy.go b/lib/policy/policy.go index d0be8871..8a466ba6 100644 --- a/lib/policy/policy.go +++ b/lib/policy/policy.go @@ -5,9 +5,9 @@ import ( "errors" "fmt" "io" - "regexp" "log/slog" + "github.com/TecharoHQ/anubis/internal/fcrdns" "github.com/TecharoHQ/anubis/internal/thoth" "github.com/TecharoHQ/anubis/lib/policy/checker" "github.com/TecharoHQ/anubis/lib/policy/config" @@ -49,6 +49,7 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic var validationErrs []error tc, hasThothClient := thoth.FromContext(ctx) + fcrdns, hasFCrDNS := fcrdns.FromContext(ctx) result := NewParsedConfig(c) result.DefaultDifficulty = defaultDifficulty @@ -102,14 +103,6 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic } } - if b.DomainRegex != nil { - if rex, err := regexp.Compile(*b.DomainRegex); err != nil { - validationErrs = append(validationErrs, fmt.Errorf("while processing rule %s domain regex: %w", b.Name, err)) - } else { - parsedBot.DomainRegex = rex - } - } - if b.Expression != nil { c, err := NewCELChecker(b.Expression) if err != nil { @@ -119,6 +112,7 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic } } + // These checkers may require network requests and should run last. if b.ASNs != nil { if !hasThothClient { slog.Warn("You have specified a Thoth specific check but you have no Thoth client configured. Please read https://anubis.techaro.lol/docs/admin/thoth for more information", "check", "asn", "settings", b.ASNs) @@ -137,6 +131,16 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic cl = append(cl, tc.GeoIPCheckerFor(b.GeoIP.Countries)) } + if b.DomainRegex != nil { + if !hasFCrDNS { + validationErrs = append(validationErrs, fmt.Errorf("while processing rule %s: no FCrDNS client in the context. This is a bug", b.Name)) + } else if c, err := NewFCrDNSChecker(fcrdns, *b.DomainRegex); err != nil { + validationErrs = append(validationErrs, fmt.Errorf("while processing rule %s domain regex: %w", b.Name, err)) + } else { + cl = append(cl, c) + } + } + if b.Challenge == nil { parsedBot.Challenge = &config.ChallengeRules{ Difficulty: defaultDifficulty, From 17cfb245d3dbf37e9c7f3f195974d629ac91ce05 Mon Sep 17 00:00:00 2001 From: Ax333l Date: Sun, 22 Jun 2025 21:51:58 +0200 Subject: [PATCH 05/17] cleanup --- lib/anubis.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/anubis.go b/lib/anubis.go index 02ae0f1d..530da4f0 100644 --- a/lib/anubis.go +++ b/lib/anubis.go @@ -131,7 +131,7 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request, httpS return } - if s.checkRules(w, r, cr, lg, rule, ip) { + if s.checkRules(w, r, cr, lg, rule) { return } @@ -195,7 +195,7 @@ func (s *Server) maybeReverseProxy(w http.ResponseWriter, r *http.Request, httpS s.ServeHTTPNext(w, r) } -func (s *Server) checkRules(w http.ResponseWriter, r *http.Request, cr policy.CheckResult, lg *slog.Logger, rule *policy.Bot, ip string) bool { +func (s *Server) checkRules(w http.ResponseWriter, r *http.Request, cr policy.CheckResult, lg *slog.Logger, rule *policy.Bot) bool { // Adjust cookie path if base prefix is not empty cookiePath := "/" if anubis.BasePrefix != "" { From 5fbb70f72515f0ec80613ddd33b3d51bf78a0530 Mon Sep 17 00:00:00 2001 From: Ax333l Date: Wed, 25 Jun 2025 14:53:34 +0200 Subject: [PATCH 06/17] update documentation --- .../admin/configuration/challenges/fcrdns.mdx | 23 ----------- docs/docs/admin/policies.mdx | 39 +++++++++++++++++++ 2 files changed, 39 insertions(+), 23 deletions(-) delete mode 100644 docs/docs/admin/configuration/challenges/fcrdns.mdx diff --git a/docs/docs/admin/configuration/challenges/fcrdns.mdx b/docs/docs/admin/configuration/challenges/fcrdns.mdx deleted file mode 100644 index 75ba05da..00000000 --- a/docs/docs/admin/configuration/challenges/fcrdns.mdx +++ /dev/null @@ -1,23 +0,0 @@ -# FCrDNS (No JavaScript) - -The `fcrdns` challenge dynamically verifies some legitimate bots using DNS and the client IP address. It works in three steps: - -1. Reverse DNS records for the client IP are looked up. -2. Any records that match the `domain_regex` are queried. -3. The client passes the challenge if the domain has an `A` or `AAAA` record that matches the client IP. - -You will need to do some searching to find out what the reverse DNS of legitimate requests should look like. Use `remote_addresses` instead if the bot you are working with cannot be identified with reverse DNS. -This challenge only makes sense for bots, so all rules using the `fcrdns` method must have a `user_agent_regex` or `expression` to identify them. - -To use it in your Anubis configuration: - -```yaml -# Example rule for Qwantbot: https://help.qwant.com/bot/ -- name: qwantbot - user_agent_regex: >- - Qwantbot - action: CHALLENGE - domain_regex: \.qwant\.com$ # The reverse domain expected for legitimate requests (see the link above) - challenge: - algorithm: fcrdns -``` diff --git a/docs/docs/admin/policies.mdx b/docs/docs/admin/policies.mdx index 4633cdeb..4a095746 100644 --- a/docs/docs/admin/policies.mdx +++ b/docs/docs/admin/policies.mdx @@ -233,6 +233,45 @@ remote_addresses: +### Reverse DNS based filtering + +The `domain_regex` field can be used to verify some legitimate bots using DNS and the client IP address. It works in three steps: + +1. Reverse DNS records for the client IP are looked up. +2. Any records that match the `domain_regex` are queried. +3. The client passes the challenge if the domain has an `A` or `AAAA` record that matches the client IP. + +You will need to look at the bot owner's website to find out what the reverse DNS of legitimate requests should look like. Use `remote_addresses` instead if the bot you are working with cannot be identified with reverse DNS. +Rules that use `domain_regex` should also have a `user_agent_regex` or `expression` to avoid unnecessary DNS requests. + +Example: + + + + +```json +{ + "name": "qwantbot", + "user_agent_regex": "\\+https\\:\\/\\/help\\.qwant\\.com/bot/", + "action": "ALLOW", + "domain_regex": "\\.qwant\\.com$" +} +``` + + + + +```yaml +- name: qwantbot + user_agent_regex: \+https\://help\.qwant\.com/bot/ + action: ALLOW + # The reverse domain expected for legitimate requests (see https://help.qwant.com/bot/) + domain_regex: \.qwant\.com$ +``` + + + + ## Imprint / Impressum support Anubis has support for showing imprint / impressum information. This is defined in the `impressum` block of your configuration. See [Imprint / Impressum configuration](./configuration/impressum.mdx) for more information. From fee4bd9f1b17841e512070d37ce525d6cfb156e6 Mon Sep 17 00:00:00 2001 From: Ax333l Date: Thu, 26 Jun 2025 21:16:42 +0200 Subject: [PATCH 07/17] add fcrdns function to cel --- data/crawlers/applebot.yaml | 6 +- data/crawlers/bingbot.yaml | 8 ++- data/crawlers/googlebot.yaml | 8 ++- data/crawlers/mojeekbot.yaml | 8 ++- data/crawlers/qwantbot.yaml | 8 ++- docs/docs/admin/configuration/expressions.mdx | 21 ++++++ docs/docs/admin/policies.mdx | 17 ++--- lib/policy/celchecker.go | 14 ++-- lib/policy/expressions/environment.go | 43 ++++++++++++- lib/policy/expressions/fcrdns.go | 64 +++++++++++++++++++ lib/policy/policy.go | 6 +- 11 files changed, 171 insertions(+), 32 deletions(-) create mode 100644 lib/policy/expressions/fcrdns.go diff --git a/data/crawlers/applebot.yaml b/data/crawlers/applebot.yaml index 4add0264..05e2a2a0 100644 --- a/data/crawlers/applebot.yaml +++ b/data/crawlers/applebot.yaml @@ -1,6 +1,8 @@ # Indexing for search and Siri # https://support.apple.com/en-us/119829 - name: applebot - user_agent_regex: Applebot - domain_regex: \.applebot\.apple\.com$ + expression: + all: + - userAgent.matches("Applebot") + - fcrdns.check("\\.applebot\\.apple\\.com$") action: ALLOW diff --git a/data/crawlers/bingbot.yaml b/data/crawlers/bingbot.yaml index 0f3074d3..9ae11e50 100644 --- a/data/crawlers/bingbot.yaml +++ b/data/crawlers/bingbot.yaml @@ -1,5 +1,7 @@ - name: bingbot - user_agent_regex: \+http\://www\.bing\.com/bingbot\.htm - action: ALLOW # https://www.bing.com/webmasters/help/how-to-verify-bingbot-3905dc26 - domain_regex: \.search\.msn\.com$ + expression: + all: + - userAgent.matches("\\+http\\://www\\.bing\\.com/bingbot\\.htm") + - fcrdns.check("\\.search\\.msn\\.com$") + action: ALLOW diff --git a/data/crawlers/googlebot.yaml b/data/crawlers/googlebot.yaml index 9f9ec8c2..b92751eb 100644 --- a/data/crawlers/googlebot.yaml +++ b/data/crawlers/googlebot.yaml @@ -1,5 +1,7 @@ - name: googlebot - user_agent_regex: \+http\://www\.google\.com/bot\.html - action: ALLOW # https://developers.google.com/search/docs/crawling-indexing/verifying-googlebot - domain_regex: \.googlebot\.com$ + expression: + all: + - userAgent.matches("\\+http\\://www\\.google\\.com/bot\\.html") + - fcrdns.check("\\.googlebot\\.com$") + action: ALLOW diff --git a/data/crawlers/mojeekbot.yaml b/data/crawlers/mojeekbot.yaml index 97d26aa8..24fb5bf3 100644 --- a/data/crawlers/mojeekbot.yaml +++ b/data/crawlers/mojeekbot.yaml @@ -1,5 +1,7 @@ - name: mojeekbot - user_agent_regex: \+https\://www\.mojeek\.com/bot\.html - action: ALLOW # https://www.mojeek.com/bot.html - domain_regex: \.mojeek\.com$ + expression: + all: + - userAgent.matches("\\+https\\://www\\.mojeek\\.com/bot\\.html") + - fcrdns.check("\\.mojeek\\.com$") + action: ALLOW diff --git a/data/crawlers/qwantbot.yaml b/data/crawlers/qwantbot.yaml index a65f5352..8c0e1ac5 100644 --- a/data/crawlers/qwantbot.yaml +++ b/data/crawlers/qwantbot.yaml @@ -1,5 +1,7 @@ - name: qwantbot - user_agent_regex: \+https\://help\.qwant\.com/bot/ - action: ALLOW # https://help.qwant.com/bot/ - domain_regex: \.qwant\.com$ + expression: + all: + - userAgent.matches("\\+https\\://help\\.qwant\\.com/bot/") + - fcrdns.check("\\.qwant\\.com$") + action: ALLOW diff --git a/docs/docs/admin/configuration/expressions.mdx b/docs/docs/admin/configuration/expressions.mdx index 0786c220..e210173a 100644 --- a/docs/docs/admin/configuration/expressions.mdx +++ b/docs/docs/admin/configuration/expressions.mdx @@ -108,6 +108,7 @@ Anubis exposes the following variables to expressions: | `query` | `map[string, string]` | The [query parameters](https://web.dev/articles/url-parts#query) of the request being processed. | `?foo=bar` -> `{"foo": "bar"}` | | `remoteAddress` | `string` | The IP address of the client. | `1.1.1.1` | | `userAgent` | `string` | The [`User-Agent`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/User-Agent) string in the request being processed. | `Mozilla/5.0 Gecko/20100101 Firefox/137.0` | +| `fcrdns` | `fcrdns` | Custom object for performing the forward-confirmed reverse DNS check. | `fcrdns.check("\\.qwant\\.com$")` | Of note: in many languages when you look up a key in a map and there is nothing there, the language will return some "falsy" value like `undefined` in JavaScript, `None` in Python, or the zero value of the type in Go. In CEL, if you try to look up a value that does not exist, execution of the expression will fail and Anubis will return an error. @@ -167,6 +168,26 @@ This is best applied when doing explicit block rules, eg: It seems counter-intuitive to allow known bad clients through sometimes, but this allows you to confuse attackers by making Anubis' behavior random. Adjust the thresholds and numbers as facts and circumstances demand. +### `fcrdns.check` + +```ts +function check(pattern: string): bool; +``` + +`fcrdns.check` accepts a regex string and performs the same check as the `domain_regex` field. For more details, read [Reverse DNS based filtering](../policies#reverse-dns-based-filtering). + +Example for search engine bots: + +```yaml +- name: qwantbot + # https://help.qwant.com/bot/ + expression: + all: + - userAgent.matches("\\+https\\://help\\.qwant\\.com/bot/") + - fcrdns.check("\\.qwant\\.com$") + action: ALLOW +``` + ## Life advice Expressions are very powerful. This is a benefit and a burden. If you are not careful with your expression targeting, you will be liable to get yourself into trouble. If you are at all in doubt, throw a `CHALLENGE` over a `DENY`. Legitimate users can easily work around a `CHALLENGE` result with a [proof of work challenge](../../design/why-proof-of-work.mdx). Bots are less likely to be able to do this. diff --git a/docs/docs/admin/policies.mdx b/docs/docs/admin/policies.mdx index 4a095746..2f687fdf 100644 --- a/docs/docs/admin/policies.mdx +++ b/docs/docs/admin/policies.mdx @@ -242,7 +242,7 @@ The `domain_regex` field can be used to verify some legitimate bots using DNS an 3. The client passes the challenge if the domain has an `A` or `AAAA` record that matches the client IP. You will need to look at the bot owner's website to find out what the reverse DNS of legitimate requests should look like. Use `remote_addresses` instead if the bot you are working with cannot be identified with reverse DNS. -Rules that use `domain_regex` should also have a `user_agent_regex` or `expression` to avoid unnecessary DNS requests. +It is recommended to use an `expression` with `fcrdns.check` instead to avoid unnecessary DNS requests if you are making a rule for search engine bots. An example rule for that case can be found [here](./configuration/expressions.mdx#fcrdnscheck) Example: @@ -251,10 +251,9 @@ Example: ```json { - "name": "qwantbot", - "user_agent_regex": "\\+https\\:\\/\\/help\\.qwant\\.com/bot/", - "action": "ALLOW", - "domain_regex": "\\.qwant\\.com$" + "name": "deny-bad-isp", + "action": "DENY", + "domain_regex": "\\.bad-isp\\.com$" } ``` @@ -262,11 +261,9 @@ Example: ```yaml -- name: qwantbot - user_agent_regex: \+https\://help\.qwant\.com/bot/ - action: ALLOW - # The reverse domain expected for legitimate requests (see https://help.qwant.com/bot/) - domain_regex: \.qwant\.com$ +- name: deny-bad-isp + action: DENY + domain_regex: \.bad-isp\.com$ ``` diff --git a/lib/policy/celchecker.go b/lib/policy/celchecker.go index c2cc3356..ada2e623 100644 --- a/lib/policy/celchecker.go +++ b/lib/policy/celchecker.go @@ -5,6 +5,7 @@ import ( "net/http" "github.com/TecharoHQ/anubis/internal" + "github.com/TecharoHQ/anubis/internal/fcrdns" "github.com/TecharoHQ/anubis/lib/policy/config" "github.com/TecharoHQ/anubis/lib/policy/expressions" "github.com/google/cel-go/cel" @@ -13,10 +14,11 @@ import ( type CELChecker struct { program cel.Program + fcrdns *fcrdns.FCrDNS src string } -func NewCELChecker(cfg *config.ExpressionOrList) (*CELChecker, error) { +func NewCELChecker(cfg *config.ExpressionOrList, fcrdns *fcrdns.FCrDNS) (*CELChecker, error) { env, err := expressions.BotEnvironment() if err != nil { return nil, err @@ -28,8 +30,9 @@ func NewCELChecker(cfg *config.ExpressionOrList) (*CELChecker, error) { } return &CELChecker{ - src: cfg.String(), - program: program, + program, + fcrdns, + cfg.String(), }, nil } @@ -38,7 +41,7 @@ func (cc *CELChecker) Hash() string { } func (cc *CELChecker) Check(r *http.Request) (bool, error) { - result, _, err := cc.program.ContextEval(r.Context(), &CELRequest{r}) + result, _, err := cc.program.ContextEval(r.Context(), &CELRequest{r, cc.fcrdns}) if err != nil { return false, err @@ -53,6 +56,7 @@ func (cc *CELChecker) Check(r *http.Request) (bool, error) { type CELRequest struct { *http.Request + fcrdns *fcrdns.FCrDNS } func (cr *CELRequest) Parent() cel.Activation { return nil } @@ -73,6 +77,8 @@ func (cr *CELRequest) ResolveName(name string) (any, bool) { return expressions.URLValues{Values: cr.URL.Query()}, true case "headers": return expressions.HTTPHeaders{Header: cr.Header}, true + case "fcrdns": + return expressions.FCrDNS{FCrDNS: cr.fcrdns, Ip: cr.Header.Get("X-Real-Ip")}, true default: return nil, false } diff --git a/lib/policy/expressions/environment.go b/lib/policy/expressions/environment.go index 6f46377a..6e766c46 100644 --- a/lib/policy/expressions/environment.go +++ b/lib/policy/expressions/environment.go @@ -2,11 +2,13 @@ package expressions import ( "math/rand/v2" + "regexp" "github.com/google/cel-go/cel" "github.com/google/cel-go/common/types" "github.com/google/cel-go/common/types/ref" "github.com/google/cel-go/ext" + "github.com/google/cel-go/interpreter" ) // BotEnvironment creates a new CEL environment, this is the set of @@ -15,6 +17,7 @@ import ( // of blowing up at runtime. func BotEnvironment() (*cel.Env, error) { return New( + cel.Types(FCrDNSType), // Variables exposed to CEL programs: cel.Variable("remoteAddress", cel.StringType), cel.Variable("host", cel.StringType), @@ -23,6 +26,20 @@ func BotEnvironment() (*cel.Env, error) { cel.Variable("path", cel.StringType), cel.Variable("query", cel.MapType(cel.StringType, cel.StringType)), cel.Variable("headers", cel.MapType(cel.StringType, cel.StringType)), + cel.Variable("fcrdns", FCrDNSType), + cel.Function("check", cel.MemberOverload("fcrdns_check_string", []*cel.Type{FCrDNSType, cel.StringType}, cel.BoolType, + cel.BinaryBinding(func(lhs, rhs ref.Val) ref.Val { + f, ok := lhs.Value().(FCrDNS) + if !ok { + return types.ValOrErr(types.False, "receiver is not an fcrdns instance, but is %T", lhs) + } + pattern, ok := rhs.Value().(string) + if !ok { + return types.ValOrErr(types.False, "value is not a string, but is %T", rhs) + } + + return f.check(pattern) + }))), ) } @@ -64,6 +81,29 @@ func New(opts ...cel.EnvOption) (*cel.Env, error) { return cel.NewEnv(args...) } +var fcrdnsRegexOptimization = &interpreter.RegexOptimization{ + Function: "check", + RegexIndex: 1, + Factory: func(call interpreter.InterpretableCall, regexPattern string) (interpreter.InterpretableCall, error) { + compiledRegex, err := regexp.Compile(regexPattern) + if err != nil { + return nil, err + } + + return interpreter.NewCall(call.ID(), call.Function(), call.OverloadID(), call.Args(), func(values ...ref.Val) ref.Val { + if len(values) != 2 { + return types.NoSuchOverloadErr() + } + + in, ok := values[0].Value().(FCrDNS) + if !ok { + return types.NoSuchOverloadErr() + } + return in.checkOptimized(compiledRegex) + }), nil + }, +} + // Compile takes CEL environment and syntax tree then emits an optimized // Program for execution. func Compile(env *cel.Env, src string) (cel.Program, error) { @@ -79,8 +119,9 @@ func Compile(env *cel.Env, src string) (cel.Program, error) { return env.Program( ast, + // optimize regular expressions right now instead of on the fly + cel.OptimizeRegex(interpreter.MatchesRegexOptimization, fcrdnsRegexOptimization), cel.EvalOptions( - // optimize regular expressions right now instead of on the fly cel.OptOptimize, ), ) diff --git a/lib/policy/expressions/fcrdns.go b/lib/policy/expressions/fcrdns.go new file mode 100644 index 00000000..d45ffcd1 --- /dev/null +++ b/lib/policy/expressions/fcrdns.go @@ -0,0 +1,64 @@ +package expressions + +import ( + "reflect" + "regexp" + + "github.com/TecharoHQ/anubis/internal/fcrdns" + "github.com/google/cel-go/common/types" + "github.com/google/cel-go/common/types/ref" +) + +var FCrDNSType *types.Type = types.NewObjectType("fcrdns") + +// FCrDNS is a type to expose the FCrDNS test to CEL programs. +type FCrDNS struct { + FCrDNS *fcrdns.FCrDNS + Ip string +} + +func (f FCrDNS) ConvertToNative(typeDesc reflect.Type) (any, error) { + return nil, ErrNotImplemented +} + +func (f FCrDNS) ConvertToType(typeVal ref.Type) ref.Val { + switch typeVal { + case FCrDNSType: + return f + case types.TypeType: + return FCrDNSType + } + + return types.NewErr("can't convert from %q to %q", FCrDNSType, typeVal) +} + +func (f FCrDNS) Equal(other ref.Val) ref.Val { + return types.Bool(false) // We don't want to compare FCrDNS instances +} + +func (f FCrDNS) Type() ref.Type { + return FCrDNSType +} + +func (f FCrDNS) Value() any { return f } + +func (f FCrDNS) check(pattern string) ref.Val { + rex, err := regexp.Compile(pattern) + if err != nil { + return types.ValOrErr(types.False, "%w: Failed to compiler regexp pattern %s", err, pattern) + } + return f.checkOptimized(rex) +} + +func (f FCrDNS) checkOptimized(pattern *regexp.Regexp) ref.Val { + if f.Ip == "" { + return types.ValOrErr(types.False, "header X-Real-Ip is not set") + } + + res, err := f.FCrDNS.Check(f.Ip, pattern) + v := types.Bool(res) + if err != nil { + return types.ValOrErr(v, "%w: FCrDNS check failed", err) + } + return v +} diff --git a/lib/policy/policy.go b/lib/policy/policy.go index b43e66cc..b398ef1f 100644 --- a/lib/policy/policy.go +++ b/lib/policy/policy.go @@ -110,15 +110,15 @@ func ParseConfig(ctx context.Context, fin io.Reader, fname string, defaultDiffic } if b.Expression != nil { - c, err := NewCELChecker(b.Expression) - if err != nil { + if !hasFCrDNS { + validationErrs = append(validationErrs, fmt.Errorf("while processing rule %s: no FCrDNS client in the context. This is a bug", b.Name)) + } else if c, err := NewCELChecker(b.Expression, fcrdns); err != nil { validationErrs = append(validationErrs, fmt.Errorf("while processing rule %s expressions: %w", b.Name, err)) } else { cl = append(cl, c) } } - // These checkers may require network requests and should run last. if b.ASNs != nil { if !hasThothClient { slog.Warn("You have specified a Thoth specific check but you have no Thoth client configured. Please read https://anubis.techaro.lol/docs/admin/thoth for more information", "check", "asn", "settings", b.ASNs) From c44a0302c8211d48159ce4848a000ff0607fff8a Mon Sep 17 00:00:00 2001 From: Ax333l Date: Thu, 26 Jun 2025 21:18:37 +0200 Subject: [PATCH 08/17] revert multiple checker behavior --- lib/policy/checker/checker.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/policy/checker/checker.go b/lib/policy/checker/checker.go index 6c4a6c3b..1bef3700 100644 --- a/lib/policy/checker/checker.go +++ b/lib/policy/checker/checker.go @@ -19,12 +19,12 @@ type List []Impl func (l List) Check(r *http.Request) (bool, error) { for _, c := range l { ok, err := c.Check(r) - if err != nil || !ok { + if err != nil { return ok, err } } - return true, nil + return false, nil } func (l List) Hash() string { From 8018a5b7c8a56f0438b530753a0d3dc09c314072 Mon Sep 17 00:00:00 2001 From: Ax333l Date: Thu, 26 Jun 2025 21:20:02 +0200 Subject: [PATCH 09/17] fully revert multi checker behavior --- lib/policy/checker/checker.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/policy/checker/checker.go b/lib/policy/checker/checker.go index 1bef3700..1ee276aa 100644 --- a/lib/policy/checker/checker.go +++ b/lib/policy/checker/checker.go @@ -22,6 +22,9 @@ func (l List) Check(r *http.Request) (bool, error) { if err != nil { return ok, err } + if ok { + return ok, nil + } } return false, nil From 5feae378804a8443e69b366bfa33d7918986b998 Mon Sep 17 00:00:00 2001 From: Ax333l Date: Thu, 26 Jun 2025 21:27:54 +0200 Subject: [PATCH 10/17] fix formatting --- lib/anubis.go | 2 +- lib/config.go | 4 ++-- lib/policy/expressions/environment.go | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/anubis.go b/lib/anubis.go index f663afde..def53ef9 100644 --- a/lib/anubis.go +++ b/lib/anubis.go @@ -69,7 +69,7 @@ type Server struct { policy *policy.ParsedConfig DNSBLCache *decaymap.Impl[string, dnsbl.DroneBLResponse] OGTags *ogtags.OGTagCache - FCrDNS *fcrdns.FCrDNS + FCrDNS *fcrdns.FCrDNS cookieName string ed25519Priv ed25519.PrivateKey hs512Secret []byte diff --git a/lib/config.go b/lib/config.go index f71b9255..e4975a3f 100644 --- a/lib/config.go +++ b/lib/config.go @@ -30,7 +30,7 @@ import ( type Options struct { Next http.Handler - FCrDNS *fcrdns.FCrDNS + FCrDNS *fcrdns.FCrDNS Policy *policy.ParsedConfig Target string CookieDynamicDomain bool @@ -117,7 +117,7 @@ func New(opts Options) (*Server, error) { opts: opts, DNSBLCache: decaymap.New[string, dnsbl.DroneBLResponse](), OGTags: ogtags.NewOGTagCache(opts.Target, opts.Policy.OpenGraph), - FCrDNS: opts.FCrDNS, + FCrDNS: opts.FCrDNS, cookieName: cookieName, } diff --git a/lib/policy/expressions/environment.go b/lib/policy/expressions/environment.go index 6e766c46..10512b6c 100644 --- a/lib/policy/expressions/environment.go +++ b/lib/policy/expressions/environment.go @@ -84,7 +84,7 @@ func New(opts ...cel.EnvOption) (*cel.Env, error) { var fcrdnsRegexOptimization = &interpreter.RegexOptimization{ Function: "check", RegexIndex: 1, - Factory: func(call interpreter.InterpretableCall, regexPattern string) (interpreter.InterpretableCall, error) { + Factory: func(call interpreter.InterpretableCall, regexPattern string) (interpreter.InterpretableCall, error) { compiledRegex, err := regexp.Compile(regexPattern) if err != nil { return nil, err From be7900bc562757ec5446e6958e2b17b98fdb5f76 Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Fri, 27 Jun 2025 14:04:45 -0400 Subject: [PATCH 11/17] chore(lib/policy): run go tool goimports Signed-off-by: Xe Iaso --- lib/policy/bot.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/policy/bot.go b/lib/policy/bot.go index 8680df5c..d7556fe9 100644 --- a/lib/policy/bot.go +++ b/lib/policy/bot.go @@ -10,11 +10,11 @@ import ( ) type Bot struct { - Rules checker.Impl - Challenge *config.ChallengeRules - Weight *config.Weight - Name string - Action config.Rule + Rules checker.Impl + Challenge *config.ChallengeRules + Weight *config.Weight + Name string + Action config.Rule DomainRegex *regexp.Regexp } From c7263816e95af610208686059eef3d0b4e65d062 Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Fri, 27 Jun 2025 14:12:29 -0400 Subject: [PATCH 12/17] docs(CHANGELOG): update changelog notes Signed-off-by: Xe Iaso --- docs/docs/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index e0e11811..6228804a 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -- Implement a DNS-based challenge method: [`fcrdns`](./admin/configuration/challenges/fcrdns.mdx) ([#431](https://github.com/TecharoHQ/anubis/issues/431)) +- Implement a [forward-confirming reverse DNS-based check method](./admin/policies.mdx#reverse-dns-based-filtering) and [expression method](./admin/configuration/expressions.mdx#fcrdnscheck) ([#431](https://github.com/TecharoHQ/anubis/issues/431)) - Fix OpenGraph passthrough ([#717](https://github.com/TecharoHQ/anubis/issues/717)) ## v1.20.0: Thancred Waters From c5bb34b1f063223b743cc4e7c238539d9d761288 Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Fri, 27 Jun 2025 14:13:11 -0400 Subject: [PATCH 13/17] feat(data/crawlers): add weight to clients that lie If a client claims to be Googlebot but isn't from Google, that's kinda suspicious and should be treated as such. Signed-off-by: Xe Iaso --- data/crawlers/applebot.yaml | 8 ++++++++ data/crawlers/bingbot.yaml | 8 ++++++++ data/crawlers/googlebot.yaml | 8 ++++++++ data/crawlers/mojeekbot.yaml | 8 ++++++++ data/crawlers/qwantbot.yaml | 9 +++++++++ docs/docs/CHANGELOG.md | 1 + 6 files changed, 42 insertions(+) diff --git a/data/crawlers/applebot.yaml b/data/crawlers/applebot.yaml index 05e2a2a0..27e3fd2a 100644 --- a/data/crawlers/applebot.yaml +++ b/data/crawlers/applebot.yaml @@ -6,3 +6,11 @@ - userAgent.matches("Applebot") - fcrdns.check("\\.applebot\\.apple\\.com$") action: ALLOW +- name: not-applebot + expression: + all: + - userAgent.matches("Applebot") + - '!(fcrdns.check("\\.applebot\\.apple\\.com$"))' + action: WEIGH + weight: + adjust: 5 diff --git a/data/crawlers/bingbot.yaml b/data/crawlers/bingbot.yaml index 9ae11e50..c78c0360 100644 --- a/data/crawlers/bingbot.yaml +++ b/data/crawlers/bingbot.yaml @@ -5,3 +5,11 @@ - userAgent.matches("\\+http\\://www\\.bing\\.com/bingbot\\.htm") - fcrdns.check("\\.search\\.msn\\.com$") action: ALLOW +- name: not-bingbot + expression: + all: + - userAgent.matches("\\+http\\://www\\.bing\\.com/bingbot\\.htm") + - '!(fcrdns.check("\\.search\\.msn\\.com$"))' + action: WEIGH + weight: + adjust: 5 diff --git a/data/crawlers/googlebot.yaml b/data/crawlers/googlebot.yaml index b92751eb..ea873a63 100644 --- a/data/crawlers/googlebot.yaml +++ b/data/crawlers/googlebot.yaml @@ -5,3 +5,11 @@ - userAgent.matches("\\+http\\://www\\.google\\.com/bot\\.html") - fcrdns.check("\\.googlebot\\.com$") action: ALLOW +- name: not-googlebot + expression: + all: + - userAgent.matches("\\+http\\://www\\.google\\.com/bot\\.html") + - fcrdns.check("\\.googlebot\\.com$") + action: WEIGH + weight: + adjust: 5 diff --git a/data/crawlers/mojeekbot.yaml b/data/crawlers/mojeekbot.yaml index 24fb5bf3..82d0c69a 100644 --- a/data/crawlers/mojeekbot.yaml +++ b/data/crawlers/mojeekbot.yaml @@ -5,3 +5,11 @@ - userAgent.matches("\\+https\\://www\\.mojeek\\.com/bot\\.html") - fcrdns.check("\\.mojeek\\.com$") action: ALLOW +- name: not-mojeekbot + expression: + all: + - userAgent.matches("\\+https\\://www\\.mojeek\\.com/bot\\.html") + - '!(fcrdns.check("\\.mojeek\\.com$"))' + action: WEIGH + weight: + adjust: 5 diff --git a/data/crawlers/qwantbot.yaml b/data/crawlers/qwantbot.yaml index 8c0e1ac5..271f2597 100644 --- a/data/crawlers/qwantbot.yaml +++ b/data/crawlers/qwantbot.yaml @@ -5,3 +5,12 @@ - userAgent.matches("\\+https\\://help\\.qwant\\.com/bot/") - fcrdns.check("\\.qwant\\.com$") action: ALLOW +- name: not-qwantbot + # https://help.qwant.com/bot/ + expression: + all: + - userAgent.matches("\\+https\\://help\\.qwant\\.com/bot/") + - '!(fcrdns.check("\\.qwant\\.com$"))' + action: WEIGH + weight: + adjust: 5 diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index 6228804a..daf60496 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - Implement a [forward-confirming reverse DNS-based check method](./admin/policies.mdx#reverse-dns-based-filtering) and [expression method](./admin/configuration/expressions.mdx#fcrdnscheck) ([#431](https://github.com/TecharoHQ/anubis/issues/431)) +- Add default rules for increasing the weight of clients that pretend to be search engines but are not valid - Fix OpenGraph passthrough ([#717](https://github.com/TecharoHQ/anubis/issues/717)) ## v1.20.0: Thancred Waters From 01b8b87cf1a2d9e5f29b9d3049340242dae7f219 Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Fri, 27 Jun 2025 14:17:32 -0400 Subject: [PATCH 14/17] chore: spelling Signed-off-by: Xe Iaso --- .github/actions/spelling/expect.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 2a3663d2..ec57f6ca 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -87,6 +87,11 @@ extldflags facebookgo Factset fastcgi +FCr +fcrdns +fcrdnscheck +fdc +fdns fediverse finfos Firecrawl @@ -136,6 +141,7 @@ impressum inp IPTo iptoasn +isp iss isset ivh @@ -227,6 +233,7 @@ redir redirectscheme refactors relayd +remotehost reputational reqmeta risc From dfac61225bde091975af29b0a9a53ab1c75441a3 Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Fri, 27 Jun 2025 14:22:40 -0400 Subject: [PATCH 15/17] chore(data/crawlers/googlebot): fix typo Signed-off-by: Xe Iaso --- data/crawlers/googlebot.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/crawlers/googlebot.yaml b/data/crawlers/googlebot.yaml index ea873a63..f12dc8d4 100644 --- a/data/crawlers/googlebot.yaml +++ b/data/crawlers/googlebot.yaml @@ -9,7 +9,7 @@ expression: all: - userAgent.matches("\\+http\\://www\\.google\\.com/bot\\.html") - - fcrdns.check("\\.googlebot\\.com$") + - '!(fcrdns.check("\\.googlebot\\.com$"))' action: WEIGH weight: adjust: 5 From 64989c4868660cf7e310e11f8463d5ebce9ace9a Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Fri, 27 Jun 2025 14:31:56 -0400 Subject: [PATCH 16/17] chore: fix spelling errors Signed-off-by: Xe Iaso --- docs/docs/admin/policies.mdx | 3 +-- lib/policy/expressions/environment.go | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/docs/admin/policies.mdx b/docs/docs/admin/policies.mdx index 2f687fdf..9ba0326b 100644 --- a/docs/docs/admin/policies.mdx +++ b/docs/docs/admin/policies.mdx @@ -241,8 +241,7 @@ The `domain_regex` field can be used to verify some legitimate bots using DNS an 2. Any records that match the `domain_regex` are queried. 3. The client passes the challenge if the domain has an `A` or `AAAA` record that matches the client IP. -You will need to look at the bot owner's website to find out what the reverse DNS of legitimate requests should look like. Use `remote_addresses` instead if the bot you are working with cannot be identified with reverse DNS. -It is recommended to use an `expression` with `fcrdns.check` instead to avoid unnecessary DNS requests if you are making a rule for search engine bots. An example rule for that case can be found [here](./configuration/expressions.mdx#fcrdnscheck) +You will need to look at the bot owner's website to find out what the reverse DNS of legitimate requests should look like. Use `remote_addresses` instead if the bot you are working with cannot be identified with reverse DNS. It is recommended to use an `expression` with [`fcrdns.check`](./configuration/expressions.mdx#fcrdnscheck) instead to avoid unnecessary DNS requests if you are making a rule for search engine bots. Example: diff --git a/lib/policy/expressions/environment.go b/lib/policy/expressions/environment.go index 10512b6c..7ea3febf 100644 --- a/lib/policy/expressions/environment.go +++ b/lib/policy/expressions/environment.go @@ -31,7 +31,7 @@ func BotEnvironment() (*cel.Env, error) { cel.BinaryBinding(func(lhs, rhs ref.Val) ref.Val { f, ok := lhs.Value().(FCrDNS) if !ok { - return types.ValOrErr(types.False, "receiver is not an fcrdns instance, but is %T", lhs) + return types.ValOrErr(types.False, "receiver is not a fcrdns instance, but is %T", lhs) } pattern, ok := rhs.Value().(string) if !ok { From ff978d20b7cc650bc95a0e9ce9c1a30f0832ecfa Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Fri, 27 Jun 2025 14:40:10 -0400 Subject: [PATCH 17/17] tests(lib): inject fcrdns into the test contexts Signed-off-by: Xe Iaso --- lib/anubis_test.go | 9 ++++++++- lib/config_test.go | 12 ++++++++++-- lib/policy/policy_test.go | 17 +++++++++++++---- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/lib/anubis_test.go b/lib/anubis_test.go index e3089a84..f76a5c6f 100644 --- a/lib/anubis_test.go +++ b/lib/anubis_test.go @@ -15,6 +15,7 @@ import ( "github.com/TecharoHQ/anubis" "github.com/TecharoHQ/anubis/data" "github.com/TecharoHQ/anubis/internal" + "github.com/TecharoHQ/anubis/internal/fcrdns" "github.com/TecharoHQ/anubis/internal/thoth/thothmock" "github.com/TecharoHQ/anubis/lib/policy" "github.com/TecharoHQ/anubis/lib/policy/config" @@ -29,6 +30,9 @@ func loadPolicies(t *testing.T, fname string, difficulty int) *policy.ParsedConf ctx := thothmock.WithMockThoth(t) + fdns := fcrdns.NewFCrDNS() + ctx = fcrdns.With(ctx, fdns) + if fname == "" { fname = "./testdata/test_config.yaml" } @@ -175,7 +179,10 @@ func TestLoadPolicies(t *testing.T) { } defer fin.Close() - if _, err := policy.ParseConfig(t.Context(), fin, fname, 4); err != nil { + fdns := fcrdns.NewFCrDNS() + ctx := fcrdns.With(t.Context(), fdns) + + if _, err := policy.ParseConfig(ctx, fin, fname, 4); err != nil { t.Fatal(err) } }) diff --git a/lib/config_test.go b/lib/config_test.go index 71cb7347..711a32ad 100644 --- a/lib/config_test.go +++ b/lib/config_test.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/TecharoHQ/anubis" + "github.com/TecharoHQ/anubis/internal/fcrdns" "github.com/TecharoHQ/anubis/internal/thoth/thothmock" "github.com/TecharoHQ/anubis/lib/policy" ) @@ -26,7 +27,10 @@ func TestBadConfigs(t *testing.T) { for _, st := range finfos { st := st t.Run(st.Name(), func(t *testing.T) { - if _, err := LoadPoliciesOrDefault(t.Context(), filepath.Join("policy", "config", "testdata", "bad", st.Name()), anubis.DefaultDifficulty); err == nil { + fdns := fcrdns.NewFCrDNS() + ctx := fcrdns.With(t.Context(), fdns) + + if _, err := LoadPoliciesOrDefault(ctx, filepath.Join("policy", "config", "testdata", "bad", st.Name()), anubis.DefaultDifficulty); err == nil { t.Fatal(err) } else { t.Log(err) @@ -46,13 +50,17 @@ func TestGoodConfigs(t *testing.T) { t.Run(st.Name(), func(t *testing.T) { t.Run("with-thoth", func(t *testing.T) { ctx := thothmock.WithMockThoth(t) + fdns := fcrdns.NewFCrDNS() + ctx = fcrdns.With(ctx, fdns) if _, err := LoadPoliciesOrDefault(ctx, filepath.Join("policy", "config", "testdata", "good", st.Name()), anubis.DefaultDifficulty); err != nil { t.Fatal(err) } }) t.Run("without-thoth", func(t *testing.T) { - if _, err := LoadPoliciesOrDefault(t.Context(), filepath.Join("policy", "config", "testdata", "good", st.Name()), anubis.DefaultDifficulty); err != nil { + fdns := fcrdns.NewFCrDNS() + ctx := fcrdns.With(t.Context(), fdns) + if _, err := LoadPoliciesOrDefault(ctx, filepath.Join("policy", "config", "testdata", "good", st.Name()), anubis.DefaultDifficulty); err != nil { t.Fatal(err) } }) diff --git a/lib/policy/policy_test.go b/lib/policy/policy_test.go index 9ada1c95..a5451bf9 100644 --- a/lib/policy/policy_test.go +++ b/lib/policy/policy_test.go @@ -7,11 +7,14 @@ import ( "github.com/TecharoHQ/anubis" "github.com/TecharoHQ/anubis/data" + "github.com/TecharoHQ/anubis/internal/fcrdns" "github.com/TecharoHQ/anubis/internal/thoth/thothmock" ) func TestDefaultPolicyMustParse(t *testing.T) { ctx := thothmock.WithMockThoth(t) + fdns := fcrdns.NewFCrDNS() + ctx = fcrdns.With(ctx, fdns) fin, err := data.BotPolicies.Open("botPolicies.json") if err != nil { @@ -25,7 +28,6 @@ func TestDefaultPolicyMustParse(t *testing.T) { } func TestGoodConfigs(t *testing.T) { - finfos, err := os.ReadDir("config/testdata/good") if err != nil { t.Fatal(err) @@ -42,6 +44,8 @@ func TestGoodConfigs(t *testing.T) { defer fin.Close() ctx := thothmock.WithMockThoth(t) + fdns := fcrdns.NewFCrDNS() + ctx = fcrdns.With(ctx, fdns) if _, err := ParseConfig(ctx, fin, fin.Name(), anubis.DefaultDifficulty); err != nil { t.Fatal(err) } @@ -54,7 +58,10 @@ func TestGoodConfigs(t *testing.T) { } defer fin.Close() - if _, err := ParseConfig(t.Context(), fin, fin.Name(), anubis.DefaultDifficulty); err != nil { + fdns := fcrdns.NewFCrDNS() + ctx := fcrdns.With(t.Context(), fdns) + + if _, err := ParseConfig(ctx, fin, fin.Name(), anubis.DefaultDifficulty); err != nil { t.Fatal(err) } }) @@ -63,8 +70,6 @@ func TestGoodConfigs(t *testing.T) { } func TestBadConfigs(t *testing.T) { - ctx := thothmock.WithMockThoth(t) - finfos, err := os.ReadDir("config/testdata/bad") if err != nil { t.Fatal(err) @@ -79,6 +84,10 @@ func TestBadConfigs(t *testing.T) { } defer fin.Close() + ctx := thothmock.WithMockThoth(t) + fdns := fcrdns.NewFCrDNS() + ctx = fcrdns.With(ctx, fdns) + if _, err := ParseConfig(ctx, fin, fin.Name(), anubis.DefaultDifficulty); err == nil { t.Fatal(err) } else {