diff --git a/data/crawlers/seekport.yaml b/data/crawlers/seekport.yaml new file mode 100644 index 00000000..3f5daecd --- /dev/null +++ b/data/crawlers/seekport.yaml @@ -0,0 +1,401 @@ +# Alternative German search engine +# https://bot.seekport.com/ +- name: seekportbot + user_agent_regex: SeekportBot + action: ALLOW + # https://bot.seekport.com/seekportbot_ips.txt + remote_addresses: [ + "8.6.8.194/32", + "45.32.64.84/32", + "45.32.71.141/32", + "45.32.76.221/32", + "45.32.79.194/32", + "45.32.79.200/32", + "45.32.80.202/32", + "45.32.86.191/32", + "45.32.86.192/32", + "45.32.90.31/32", + "45.32.94.222/32", + "45.63.49.97/32", + "45.63.53.35/32", + "45.63.53.92/32", + "45.63.54.35/32", + "45.76.67.107/32", + "45.76.75.127/32", + "45.76.76.169/32", + "45.76.173.93/32", + "45.76.173.185/32", + "45.77.85.70/32", + "45.77.122.130/32", + "46.101.178.140/32", + "46.101.217.136/32", + "51.15.128.42/32", + "51.15.196.148/32", + "51.15.206.253/32", + "51.15.217.228/32", + "51.15.218.154/32", + "51.15.227.225/32", + "51.15.232.133/32", + "51.15.234.161/32", + "51.68.89.88/32", + "51.68.164.64/32", + "51.68.164.98/32", + "51.68.164.132/32", + "51.68.164.224/32", + "51.68.165.11/32", + "51.68.165.38/32", + "51.68.165.184/32", + "51.68.165.201/32", + "51.68.165.223/32", + "51.68.166.37/32", + "51.68.166.168/32", + "51.68.166.195/32", + "51.68.166.201/32", + "51.68.167.15/32", + "51.68.167.77/32", + "51.68.167.111/32", + "51.68.167.245/32", + "51.75.62.3/32", + "51.75.62.184/32", + "51.75.168.198/32", + "51.77.108.244/32", + "51.79.25.241/32", + "51.79.29.202/32", + "51.79.88.205/32", + "51.79.91.41/32", + "51.79.91.43/32", + "51.79.91.170/32", + "51.79.91.233/32", + "51.79.91.239/32", + "51.79.92.45/32", + "51.79.92.212/32", + "51.79.93.156/32", + "51.79.100.85/32", + "51.79.103.17/32", + "51.79.103.248/32", + "51.83.34.35/32", + "51.83.34.51/32", + "51.83.34.90/32", + "51.83.34.104/32", + "51.83.34.114/32", + "51.83.34.183/32", + "51.83.34.193/32", + "51.83.34.224/32", + "51.83.35.9/32", + "51.83.35.39/32", + "51.83.35.86/32", + "51.83.35.92/32", + "51.83.35.94/32", + "51.83.35.127/32", + "51.83.35.160/32", + "51.83.35.163/32", + "51.83.35.173/32", + "51.83.35.207/32", + "51.83.224.16/32", + "51.83.224.23/32", + "51.83.224.179/32", + "51.83.224.227/32", + "51.83.224.236/32", + "51.83.225.82/32", + "51.83.225.218/32", + "51.83.225.251/32", + "51.83.225.252/32", + "51.83.226.9/32", + "51.83.226.53/32", + "51.83.226.105/32", + "51.83.226.111/32", + "51.83.226.204/32", + "51.83.227.59/32", + "51.83.227.148/32", + "51.83.227.226/32", + "51.89.224.158/32", + "51.158.79.36/32", + "51.158.107.242/32", + "51.158.120.235/32", + "51.158.123.235/32", + "51.161.80.5/32", + "51.161.82.53/32", + "51.161.83.91/32", + "51.161.83.127/32", + "51.161.83.248/32", + "54.37.0.32/32", + "54.37.0.77/32", + "54.37.0.174/32", + "54.37.0.218/32", + "54.37.1.207/32", + "54.37.1.234/32", + "54.37.2.4/32", + "54.37.2.145/32", + "54.37.4.36/32", + "54.37.5.1/32", + "54.37.5.234/32", + "54.37.6.165/32", + "54.37.7.64/32", + "54.37.7.201/32", + "54.37.7.218/32", + "54.37.77.164/32", + "65.21.35.179/32", + "65.21.35.180/30", + "65.21.35.184/30", + "65.21.35.188/31", + "65.21.35.190/32", + "65.21.35.194/31", + "65.21.35.196/30", + "65.21.35.200/30", + "65.21.35.204/31", + "65.21.35.223/32", + "65.21.35.224/28", + "65.21.35.240/29", + "65.21.35.248/30", + "65.21.35.252/31", + "65.21.35.254/32", + "65.21.74.6/32", + "65.21.91.185/32", + "65.21.136.197/32", + "65.21.177.80/32", + "65.21.179.1/32", + "65.21.180.65/32", + "65.21.197.13/32", + "65.21.199.133/32", + "65.21.202.117/32", + "65.21.205.62/32", + "65.21.206.24/32", + "65.21.207.198/32", + "65.21.231.77/32", + "65.21.237.6/32", + "65.21.237.134/32", + "65.108.2.73/32", + "65.108.6.5/32", + "65.108.6.6/32", + "65.108.6.231/32", + "65.108.8.9/32", + "65.108.8.132/32", + "65.108.11.66/32", + "65.108.13.42/32", + "65.108.13.103/32", + "65.108.13.104/32", + "65.108.41.9/32", + "65.108.41.10/32", + "65.108.44.74/32", + "65.108.45.180/32", + "65.108.66.11/32", + "65.108.67.8/32", + "65.108.71.126/32", + "65.108.72.9/32", + "65.108.74.44/32", + "65.108.74.119/32", + "65.108.74.120/32", + "65.108.75.167/32", + "65.108.77.15/32", + "65.108.77.16/32", + "65.108.79.171/32", + "65.108.99.40/29", + "65.108.99.48/29", + "65.108.99.56/31", + "65.108.99.58/32", + "65.108.99.107/32", + "65.108.99.108/30", + "65.108.99.112/29", + "65.108.99.120/30", + "65.108.99.124/31", + "65.108.99.126/32", + "65.108.99.172/30", + "65.108.99.176/29", + "65.108.99.184/30", + "65.108.99.188/31", + "65.108.99.190/32", + "65.108.103.50/31", + "65.108.103.52/32", + "65.108.103.96/28", + "65.108.103.112/30", + "65.108.103.116/32", + "66.42.97.195/32", + "66.42.110.246/32", + "68.183.65.188/32", + "95.217.72.254/32", + "95.217.199.97/32", + "95.217.203.110/32", + "104.207.153.148/32", + "104.207.154.9/32", + "104.238.141.198/32", + "104.248.16.236/32", + "104.248.25.93/32", + "135.125.245.119/32", + "135.181.3.203/32", + "135.181.4.50/32", + "135.181.4.54/32", + "135.181.4.69/32", + "135.181.61.178/32", + "135.181.62.173/32", + "135.181.72.179/32", + "135.181.73.252/32", + "135.181.74.194/31", + "135.181.74.196/32", + "135.181.76.42/32", + "135.181.78.178/31", + "135.181.78.180/31", + "135.181.78.182/32", + "135.181.78.184/32", + "135.181.79.61/32", + "135.181.79.99/32", + "135.181.79.114/32", + "135.181.82.112/32", + "135.181.85.126/32", + "135.181.85.180/32", + "135.181.85.192/31", + "135.181.87.126/32", + "135.181.113.179/32", + "135.181.114.163/32", + "135.181.114.235/32", + "135.181.119.46/32", + "135.181.136.229/32", + "135.181.136.240/31", + "135.181.137.2/32", + "135.181.137.52/32", + "135.181.137.58/31", + "135.181.137.60/31", + "135.181.137.62/32", + "135.181.137.98/31", + "135.181.137.100/32", + "135.181.137.102/32", + "135.181.137.110/32", + "135.181.137.115/32", + "135.181.138.2/31", + "135.181.138.4/30", + "135.181.138.8/31", + "135.181.138.45/32", + "135.181.138.46/31", + "135.181.138.49/32", + "135.181.138.58/32", + "135.181.140.19/32", + "135.181.140.21/32", + "135.181.140.22/32", + "135.181.140.34/31", + "135.181.140.36/32", + "135.181.140.38/32", + "135.181.140.112/32", + "135.181.140.124/32", + "135.181.140.143/32", + "135.181.140.163/32", + "135.181.140.173/32", + "135.181.140.183/32", + "135.181.140.185/32", + "135.181.140.186/32", + "135.181.140.251/32", + "135.181.140.252/32", + "135.181.177.220/32", + "138.197.181.193/32", + "141.94.104.30/32", + "141.94.104.105/32", + "141.94.104.115/32", + "141.94.104.138/32", + "141.94.105.77/32", + "141.94.105.81/32", + "141.94.105.141/32", + "141.94.105.186/32", + "141.94.105.203/32", + "141.94.106.21/32", + "141.94.106.23/32", + "141.94.106.55/32", + "141.94.106.71/32", + "141.94.106.127/32", + "141.94.107.54/32", + "141.94.107.83/32", + "141.94.107.146/32", + "141.94.107.230/32", + "141.94.169.120/32", + "142.93.105.87/32", + "142.93.168.165/32", + "142.93.168.199/32", + "144.202.114.74/32", + "144.202.117.163/32", + "144.202.119.127/32", + "144.202.121.92/32", + "144.202.123.60/32", + "149.28.68.230/32", + "149.28.83.109/32", + "149.28.85.67/32", + "149.28.89.240/32", + "149.248.4.32/32", + "149.248.13.145/32", + "149.248.13.240/32", + "149.248.17.24/32", + "149.248.17.135/32", + "149.248.17.243/32", + "149.248.21.146/32", + "159.223.29.210/32", + "159.223.30.219/32", + "163.172.129.78/32", + "164.92.252.7/32", + "164.92.252.156/32", + "164.92.254.30/32", + "165.22.23.21/32", + "167.71.43.145/32", + "167.172.96.99/32", + "167.172.96.105/32", + "167.172.96.192/32", + "167.172.96.205/32", + "167.172.97.4/32", + "167.172.97.46/32", + "167.172.97.136/32", + "167.172.98.25/32", + "167.172.98.50/32", + "167.172.100.9/32", + "167.172.100.234/32", + "167.172.100.236/32", + "167.172.102.155/32", + "167.172.102.172/32", + "167.172.102.189/32", + "167.172.104.5/32", + "167.172.104.75/32", + "167.172.104.126/32", + "167.172.104.227/32", + "167.172.104.243/32", + "167.172.104.245/32", + "167.172.105.77/32", + "167.172.106.24/32", + "167.172.106.28/32", + "167.172.106.99/32", + "167.172.106.119/32", + "167.172.108.98/32", + "167.172.108.165/32", + "167.172.108.192/32", + "167.172.110.37/32", + "167.172.110.123/32", + "167.172.110.160/32", + "167.172.177.184/32", + "167.172.177.208/32", + "167.172.179.51/32", + "167.172.179.142/32", + "167.172.179.178/32", + "167.172.182.89/32", + "167.172.182.251/32", + "167.172.183.41/32", + "167.172.183.73/32", + "167.172.187.15/32", + "167.172.187.173/32", + "167.172.187.250/32", + "167.172.190.4/32", + "167.172.190.41/32", + "167.172.190.44/32", + "167.172.190.125/32", + "167.172.190.139/32", + "178.128.206.198/32", + "198.244.143.44/32", + "206.81.18.244/32", + "206.81.26.33/32", + "206.81.29.118/32", + "207.246.96.241/32", + "207.246.100.87/32", + "207.246.102.173/32", + "207.246.103.10/32", + "207.246.106.44/32", + "207.246.109.253/32", + "212.47.237.184/32", + "212.47.240.195/32", + "212.47.242.4/32", + "212.47.244.167/32", + "212.47.245.230/32", + "212.47.249.219/32", + "212.47.251.165/32", + ] diff --git a/data/services/data-mining/_deny-all.yaml b/data/services/data-mining/_deny-all.yaml new file mode 100644 index 00000000..713af6ab --- /dev/null +++ b/data/services/data-mining/_deny-all.yaml @@ -0,0 +1,6 @@ +- import: (data)/services/data-mining/builtwith.yaml +- import: (data)/services/data-mining/checkmark-network.yaml +- import: (data)/services/data-mining/mixrank.yaml +- import: (data)/services/data-mining/pandalytics.yaml +- import: (data)/services/data-mining/twingly.yaml +- import: (data)/services/data-mining/zoominfo.yaml diff --git a/data/services/data-mining/builtwith.yaml b/data/services/data-mining/builtwith.yaml new file mode 100644 index 00000000..98e0e13d --- /dev/null +++ b/data/services/data-mining/builtwith.yaml @@ -0,0 +1,9 @@ +# The BuiltWith system visits a website to determine the technology profile it +# is using by looking at the publicly visible code on a website. +# +# Website: https://builtwith.com +# Bot info: https://builtwith.com/biup + +- name: builtwith + user_agent_regex: BW/\d+\.\d+; rb\.gy/ + action: DENY diff --git a/data/services/data-mining/checkmark-network.yaml b/data/services/data-mining/checkmark-network.yaml new file mode 100644 index 00000000..d0eb4c37 --- /dev/null +++ b/data/services/data-mining/checkmark-network.yaml @@ -0,0 +1,8 @@ +# Checkmark Network is a brand and intellectual property protection company. +# +# Website: https://www.checkmarknetwork.com +# Crawler info: https://www.checkmarknetwork.com/spider.html/ + +- name: checkmark-network + user_agent_regex: ^CheckMarkNetwork/ + action: DENY diff --git a/data/services/data-mining/mixrank.yaml b/data/services/data-mining/mixrank.yaml new file mode 100644 index 00000000..9197f8fd --- /dev/null +++ b/data/services/data-mining/mixrank.yaml @@ -0,0 +1,7 @@ +# MixRank is a commercial data mining entity for large enterprise customers. +# +# Website: https://mixrank.com + +- name: mixrank + user_agent_regex: MixrankBot + action: DENY diff --git a/data/services/data-mining/pandalytics.yaml b/data/services/data-mining/pandalytics.yaml new file mode 100644 index 00000000..58ae3228 --- /dev/null +++ b/data/services/data-mining/pandalytics.yaml @@ -0,0 +1,9 @@ +# Domainsbot collects information on domains and website data for intellectual +# property disputes. +# +# Website: https://domainsbot.com +# Crawler info: https://domainsbot.com/pandalytics/ + +- name: pandalytics + user_agent_regex: ^Pandalytics/ + action: DENY diff --git a/data/services/data-mining/twingly.yaml b/data/services/data-mining/twingly.yaml new file mode 100644 index 00000000..c9cd8e2b --- /dev/null +++ b/data/services/data-mining/twingly.yaml @@ -0,0 +1,8 @@ +# Twingly is a Danish social data mining and media monitoring company. +# +# Website: https://www.twingly.com +# Crawler info: https://app.twingly.com/public-docs/crawler + +- name: twingly + user_agent_regex: Twingly Recon + action: DENY diff --git a/data/services/data-mining/zoominfo.yaml b/data/services/data-mining/zoominfo.yaml new file mode 100644 index 00000000..0401c217 --- /dev/null +++ b/data/services/data-mining/zoominfo.yaml @@ -0,0 +1,9 @@ +# zoominfo is a data mining and sales platform for enterprise use, feeding +# the gathered information into a machine learning model, offering AI services +# to customers. +# +# Website: https://www.zoominfo.com + +- name: zoominfo + user_agent_regex: ZoominfoBot + action: DENY diff --git a/data/services/monitoring/statuscake.yaml b/data/services/monitoring/statuscake.yaml new file mode 100644 index 00000000..e8c643cf --- /dev/null +++ b/data/services/monitoring/statuscake.yaml @@ -0,0 +1,287 @@ +# Statuscake is a website and endpoint monitoring service. +# +# Website: https://www.statuscake.com + +- name: statuscake + user_agent_regex: StatusCake + action: ALLOW + # https://www.statuscake.com/kb/knowledge-base/what-are-your-ips/ + remote_addresses: [ + "13.48.110.214/32", + "13.48.238.157/32", + "13.49.141.132/32", + "13.245.199.158/32", + "13.245.222.203/32", + "13.246.54.122/32", + "13.247.17.60/32", + "15.160.68.179/32", + "15.161.88.159/32", + "16.170.114.19/32", + "18.102.1.62/32", + "18.102.253.202/32", + "34.13.166.3/32", + "34.13.185.113/32", + "34.13.188.75/32", + "34.22.96.192/32", + "34.34.97.213/32", + "34.34.185.236/32", + "34.40.39.31/32", + "34.65.97.99/32", + "34.65.97.251/32", + "34.65.116.80/32", + "34.65.146.108/32", + "34.78.123.120/32", + "34.92.43.64/32", + "34.92.171.119/32", + "34.94.175.122/32", + "34.94.216.215/32", + "34.95.163.121/32", + "34.95.214.187/32", + "34.96.139.85/32", + "34.102.121.250/32", + "34.107.112.44/32", + "34.116.156.130/32", + "34.116.229.102/32", + "34.118.53.12/32", + "34.118.59.148/32", + "34.141.16.202/32", + "34.147.219.142/32", + "34.150.35.14/32", + "34.155.58.180/32", + "34.155.143.57/32", + "34.155.232.229/32", + "34.159.178.196/32", + "34.163.147.250/32", + "34.165.13.190/32", + "34.165.80.145/32", + "34.165.120.108/32", + "34.165.219.192/32", + "34.174.9.202/32", + "34.174.16.112/32", + "34.174.16.200/32", + "34.174.49.169/32", + "34.175.19.199/32", + "34.175.21.165/32", + "34.175.49.24/32", + "34.175.86.21/32", + "35.187.119.100/32", + "35.195.188.224/32", + "35.197.195.213/32", + "35.197.232.17/32", + "35.197.251.116/32", + "35.198.232.244/32", + "35.199.96.243/32", + "35.235.115.146/32", + "35.247.248.27/32", + "45.32.7.22/32", + "45.32.36.158/32", + "45.32.69.14/32", + "45.32.128.80/32", + "45.32.141.163/32", + "45.32.145.79/32", + "45.32.151.21/32", + "45.32.160.172/32", + "45.32.166.195/32", + "45.32.171.24/32", + "45.32.183.128/32", + "45.32.193.13/32", + "45.32.195.186/32", + "45.32.195.225/32", + "45.32.212.56/32", + "45.63.26.78/32", + "45.63.51.63/32", + "45.63.61.213/32", + "45.63.76.68/32", + "45.63.78.84/32", + "45.63.86.120/32", + "45.63.88.213/32", + "45.63.96.68/32", + "45.63.104.11/32", + "45.63.121.159/32", + "45.76.1.44/32", + "45.76.3.112/32", + "45.76.122.71/32", + "45.76.123.211/32", + "45.76.130.43/32", + "45.76.134.85/32", + "45.76.134.164/32", + "45.76.192.50/32", + "45.77.235.246/32", + "52.48.52.95/32", + "54.72.247.193/32", + "54.170.7.14/32", + "54.194.6.253/32", + "68.183.39.102/32", + "104.156.229.24/32", + "104.156.255.184/32", + "104.236.163.90/32", + "104.238.164.105/32", + "107.170.235.240/32", + "107.191.47.131/32", + "107.191.57.237/32", + "108.61.119.153/32", + "108.61.162.214/32", + "108.61.173.0/32", + "108.61.205.201/32", + "108.61.212.141/32", + "108.61.215.179/32", + "108.61.252.147/32", + "128.199.105.174/32", + "138.68.83.231/32", + "138.68.252.209/32", + "138.197.201.67/32", + "140.82.4.219/32", + "141.164.35.233/32", + "141.164.48.161/32", + "141.164.58.9/32", + "143.110.177.18/32", + "143.110.177.252/32", + "143.110.177.254/32", + "143.110.216.47/32", + "143.110.216.64/32", + "143.110.216.159/32", + "143.110.216.228/32", + "143.198.153.99/32", + "143.244.178.189/32", + "146.190.20.1/32", + "146.190.20.113/32", + "149.28.79.140/32", + "159.65.19.28/32", + "159.223.74.121/32", + "159.223.175.210/32", + "162.243.37.40/32", + "162.243.141.135/32", + "165.22.125.189/32", + "165.22.210.218/32", + "165.227.74.224/32", + "167.71.61.88/32", + "167.71.143.76/32", + "167.172.161.111/32", + "167.172.163.51/32", + "167.172.163.238/32", + "167.172.171.10/32", + "167.172.171.89/32", + "167.172.171.150/32", + "167.172.173.165/32", + "167.172.175.168/32", + "174.138.57.253/32", + "178.62.40.182/32", + "178.62.83.142/32", + "178.62.85.105/32", + "188.166.158.224/32", + "192.241.243.72/32", + "198.199.100.13/32", + "198.199.112.67/32", + "198.211.121.217/32", + "198.211.123.207/32", + "199.247.12.100/32", + "206.189.178.14/32", + "209.97.132.240/32", + "209.97.171.44/32", + "209.97.178.115/32", + "216.238.73.102/32", + "216.238.76.215/32", + "216.238.77.111/32", + "216.238.84.207/32", + "2001:19f0:5:7b1:5400:4ff:fe14:f77b/128", + "2001:19f0:5:2e88:5400:4ff:fe15:3ea1/128", + "2001:19f0:5:5f7c:5400:4ff:fe14:f77c/128", + "2001:19f0:1000:3df6:5400:5ff:fe24:5efa/128", + "2001:19f0:5401:834:5400:4ff:fe15:88ca/128", + "2001:19f0:5401:925:5400:4ff:fe14:e3c0/128", + "2001:19f0:5401:190e:5400:4ff:fe15:88c9/128", + "2001:19f0:5401:204a:5400:4ff:fe14:e3c1/128", + "2001:19f0:5801:5db:5400:4ff:fe15:8c39/128", + "2001:19f0:5801:c8a:5400:4ff:fe15:3359/128", + "2001:19f0:5801:ca9:5400:4ff:fe15:3358/128", + "2001:19f0:5801:1107:5400:4ff:fe15:8c38/128", + "2001:19f0:5801:15d2:5400:4ff:fe15:3fdf/128", + "2001:19f0:5801:15e1:5400:4ff:fe15:3fde/128", + "2001:19f0:5c00:151f:5400:4ff:fe14:e873/128", + "2001:19f0:5c00:1f6f:5400:4ff:fe14:e874/128", + "2001:19f0:5c00:1fbf:5400:4ff:fe14:e875/128", + "2001:19f0:5c01:c87:5400:4ff:fe15:8940/128", + "2001:19f0:6001:327c:5400:4ff:fe14:ec29/128", + "2001:19f0:6001:4930:5400:4ff:fe15:8c19/128", + "2001:19f0:6001:4a76:5400:4ff:fe14:ec2a/128", + "2001:19f0:6001:584d:5400:4ff:fe15:8c18/128", + "2001:19f0:6401:43f:5400:4ff:fe14:e9dd/128", + "2001:19f0:6401:745:5400:4ff:fe14:e9de/128", + "2001:19f0:6401:d69:5400:4ff:fe14:e9dc/128", + "2001:19f0:6401:ef2:5400:4ff:fe15:3e39/128", + "2001:19f0:6801:1aa:5400:4ff:fe15:402b/128", + "2001:19f0:6801:1f84:5400:4ff:fe14:f8d3/128", + "2001:19f0:7001:1299:5400:4ff:fe15:3620/128", + "2001:19f0:7001:38c4:5400:4ff:fe15:3622/128", + "2001:19f0:7001:50a4:5400:4ff:fe15:8c64/128", + "2001:19f0:7002:599:5400:4ff:fe15:3621/128", + "2001:19f0:7400:89bb:5400:4ff:fe15:44d5/128", + "2001:19f0:7401:8cbe:5400:4ff:fe15:8ed5/128", + "2001:19f0:7401:8cbf:5400:4ff:fe15:8ed6/128", + "2001:19f0:7402:261:5400:4ff:fe15:8ed3/128", + "2001:19f0:7402:bf4:5400:4ff:fe15:8ed2/128", + "2001:19f0:9002:689:5400:4ff:fe15:3e71/128", + "2001:19f0:9002:b50:5400:4ff:fe14:f368/128", + "2001:19f0:9002:17f4:5400:4ff:fe14:f36a/128", + "2001:19f0:9002:2778:5400:4ff:fe14:f369/128", + "2001:19f0:ac01:122:5400:4ff:fe14:fb1a/128", + "2001:19f0:ac01:3c3:5400:4ff:fe14:fb1c/128", + "2001:19f0:ac01:518:5400:4ff:fe14:fb1b/128", + "2001:19f0:ac01:154a:5400:4ff:fe14:fb1e/128", + "2001:19f0:ac01:1dc9:5400:4ff:fe14:fb1d/128", + "2001:19f0:b400:100b:5400:4ff:fe14:efa7/128", + "2001:19f0:b400:1056:5400:4ff:fe14:efa8/128", + "2001:19f0:b400:2036:5400:5ff:fe24:5eb9/128", + "2001:19f0:b400:205e:5400:5ff:fe24:5eb8/128", + "2400:6180:0:d0::11e9:9001/128", + "2400:6180:0:d0::141d:9001/128", + "2400:6180:0:d0::144f:8001/128", + "2400:6180:100:d0::9cd:d001/128", + "2400:6180:100:d0::a6a:6001/128", + "2400:6180:100:d0::afd:8001/128", + "2400:6180:100:d0::b91:1001/128", + "2401:c080:1c02:be:5400:4ff:fe14:fce0/128", + "2401:c080:1c02:ba6:5400:4ff:fe14:fcdf/128", + "2401:c080:1c02:ef0:5400:4ff:fe14:fcde/128", + "2604:a880:0:1010::21:1001/128", + "2604:a880:0:1010::4e:2001/128", + "2604:a880:1:20::69:e001/128", + "2604:a880:1:20::6e:3001/128", + "2604:a880:2:d0::203d:a001/128", + "2604:a880:2:d0::203e:4001/128", + "2604:a880:4:1d0::3ae:8000/128", + "2604:a880:4:1d0::414:4000/128", + "2604:a880:400:d0::21b0:1/128", + "2604:a880:400:d0::21d4:b001/128", + "2604:a880:800:10::607:8001/128", + "2604:a880:800:10::a31:e001/128", + "2604:a880:cad:d0::2aa:9001/128", + "2604:a880:cad:d0::c91:6001/128", + "2604:a880:cad:d0::c95:7001/128", + "2604:a880:cad:d0::c9f:4001/128", + "2a03:b0c0:0:1010::8:e001/128", + "2a03:b0c0:0:1010::15:a001/128", + "2a03:b0c0:1:d0::1216:4001/128", + "2a03:b0c0:1:d0::1229:2001/128", + "2a03:b0c0:1:d0::1229:c001/128", + "2a03:b0c0:1:d0::1231:c001/128", + "2a03:b0c0:1:d0::123d:4001/128", + "2a03:b0c0:1:d0::1240:c001/128", + "2a03:b0c0:1:d0::1242:1001/128", + "2a03:b0c0:1:d0::1243:9001/128", + "2a03:b0c0:2:d0::129b:7001/128", + "2a03:b0c0:2:d0::12d1:1/128", + "2a03:b0c0:3:d0::195c:7001/128", + "2a03:b0c0:3:d0::199a:1/128", + "2a03:b0c0:3:d0::199a:4001/128", + "2a03:b0c0:3:d0::199b:2001/128", + "2a03:b0c0:3:d0::199c:2001/128", + "2a03:b0c0:3:d0::19a5:4001/128", + "2a03:b0c0:3:d0::19a7:7001/128", + "2a03:b0c0:3:d0::19b4:1/128", + "2a03:b0c0:3:d0::19b4:f001/128", + "2a03:b0c0:3:d0::19b6:f001/128", + "2a05:f480:1000:d63:5400:4ff:fe15:44de/128", + "2a05:f480:1c00:280:5400:4ff:fe14:f8d1/128", + "2a05:f480:1c00:b4a:5400:4ff:fe14:f8d4/128", + ] diff --git a/data/services/uptime-robot.yaml b/data/services/monitoring/uptime-robot.yaml similarity index 98% rename from data/services/uptime-robot.yaml rename to data/services/monitoring/uptime-robot.yaml index b0d9e089..516c0da8 100644 --- a/data/services/uptime-robot.yaml +++ b/data/services/monitoring/uptime-robot.yaml @@ -1,3 +1,7 @@ +# Uptime Robot is a website and endpoint monitoring service. +# +# Website: https://uptimerobot.com + - name: uptime-robot user_agent_regex: UptimeRobot action: ALLOW diff --git a/data/services/security/_deny-all.yaml b/data/services/security/_deny-all.yaml new file mode 100644 index 00000000..ecea2672 --- /dev/null +++ b/data/services/security/_deny-all.yaml @@ -0,0 +1 @@ +- import: (data)/services/security/bitsightbot.yaml diff --git a/data/services/security/bitsightbot.yaml b/data/services/security/bitsightbot.yaml new file mode 100644 index 00000000..86d5dd4f --- /dev/null +++ b/data/services/security/bitsightbot.yaml @@ -0,0 +1,9 @@ +# BitSightBot is a web crawler operated by BitSight Technologies, a company +# specializing in cybersecurity ratings. This bot is designed to scan and +# collect data from websites to assess their security posture. +# +# Website: https://www.bitsight.com + +- name: bitsightbot + user_agent_regex: BitSightBot + action: DENY diff --git a/data/services/seo/_deny-all.yaml b/data/services/seo/_deny-all.yaml new file mode 100644 index 00000000..040dd634 --- /dev/null +++ b/data/services/seo/_deny-all.yaml @@ -0,0 +1,7 @@ +- import: (data)/services/seo/ahrefs.yaml +- import: (data)/services/seo/dataforseo.yaml +- import: (data)/services/seo/mj12bot.yaml +- import: (data)/services/seo/screaming-frog.yaml +- import: (data)/services/seo/seokicks.yaml +- import: (data)/services/seo/serpstat.yaml +- import: (data)/services/seo/zoombot.yaml diff --git a/data/services/seo/ahrefs-allow.yaml b/data/services/seo/ahrefs-allow.yaml new file mode 100644 index 00000000..5c3b5c72 --- /dev/null +++ b/data/services/seo/ahrefs-allow.yaml @@ -0,0 +1,62 @@ +# Ahrefs is a large SEO company used by single bloggers to large enterprises. +# +# Website: https://ahrefs.com +# Crawler info: https://ahrefs.com/robot + +- name: ahrefs + user_agent_regex: (AhrefsBot|AhrefsSiteAudit)/ + action: ALLOW + # https://api.ahrefs.com/v3/public/crawler-ip-ranges + remote_addresses: [ + "5.39.1.224/27", + "5.39.109.160/27", + "15.235.27.0/24", + "15.235.96.0/24", + "15.235.98.0/24", + "37.59.204.128/27", + "51.68.247.192/27", + "51.75.236.128/27", + "51.89.129.0/24", + "51.161.37.0/24", + "51.161.65.0/24", + "51.195.183.0/24", + "51.195.215.0/24", + "51.195.244.0/24", + "51.222.95.0/24", + "51.222.168.0/24", + "51.222.253.0/26", + "54.36.148.0/23", + "54.37.118.64/27", + "54.38.147.0/24", + "54.39.0.0/24", + "54.39.6.0/24", + "54.39.89.0/24", + "54.39.136.0/24", + "54.39.203.0/24", + "54.39.210.0/24", + "92.222.104.192/27", + "92.222.108.96/27", + "94.23.188.192/27", + "142.44.220.0/24", + "142.44.225.0/24", + "142.44.228.0/24", + "142.44.233.0/24", + "148.113.128.0/24", + "148.113.130.0/24", + "167.114.139.0/24", + "168.100.149.0/24", + "176.31.139.0/27", + "198.244.168.0/24", + "198.244.183.0/24", + "198.244.186.193/32", + "198.244.186.194/31", + "198.244.186.196/30", + "198.244.186.200/31", + "198.244.186.202/32", + "198.244.226.0/24", + "198.244.240.0/24", + "198.244.242.0/24", + "202.8.40.0/22", + "202.94.84.110/31", + "202.94.84.112/31", + ] diff --git a/data/services/seo/ahrefs.yaml b/data/services/seo/ahrefs.yaml new file mode 100644 index 00000000..0dd07e3d --- /dev/null +++ b/data/services/seo/ahrefs.yaml @@ -0,0 +1,8 @@ +# Ahrefs is a large SEO company used by single bloggers to large enterprises. +# +# Website: https://ahrefs.com +# Crawler info: https://ahrefs.com/robot + +- name: ahrefs + user_agent_regex: (AhrefsBot|AhrefsSiteAudit)/ + action: DENY diff --git a/data/services/seo/dataforseo.yaml b/data/services/seo/dataforseo.yaml new file mode 100644 index 00000000..0c65cbf9 --- /dev/null +++ b/data/services/seo/dataforseo.yaml @@ -0,0 +1,8 @@ +# DataForSEO is a Ukraininan SEO company. +# +# Website: https://dataforseo.com +# Crawler info: https://dataforseo.com/dataforseo-bot + +- name: dataforseo + user_agent_regex: DataForSeoBot + action: DENY diff --git a/data/services/seo/mj12bot.yaml b/data/services/seo/mj12bot.yaml new file mode 100644 index 00000000..f46a1c4f --- /dev/null +++ b/data/services/seo/mj12bot.yaml @@ -0,0 +1,10 @@ +# Majestic is a UK based specialist search engine and commercial SEO entity. +# They claim to "spider the Web for the purpose of building a search engine" +# with a distributed crawler. +# +# Website: https://majestic.com +# Crawler info: https://mj12bot.com + +- name: mj12bot + user_agent_regex: MJ12bot/ + action: DENY diff --git a/data/services/seo/screaming-frog.yaml b/data/services/seo/screaming-frog.yaml new file mode 100644 index 00000000..bb84999b --- /dev/null +++ b/data/services/seo/screaming-frog.yaml @@ -0,0 +1,9 @@ +# Screaming Frog is a smaller actor in the SEO space and their crawler +# occasionally attempts to access content despite being explicitly excluded via +# robots.txt directives. +# +# Company website: https://www.screamingfrog.co.uk + +- name: screaming-frog + user_agent_regex: ^Screaming Frog SEO Spider/ + action: DENY diff --git a/data/services/seo/seokicks.yaml b/data/services/seo/seokicks.yaml new file mode 100644 index 00000000..cfcca047 --- /dev/null +++ b/data/services/seo/seokicks.yaml @@ -0,0 +1,8 @@ +# SEOkicks is a German commercial SEO company. +# +# Website: https://www.seokicks.de +# Crawler info: https://www.seokicks.de/robot.html + +- name: seokicks + user_agent_regex: SEOkicks + action: DENY diff --git a/data/services/seo/serpstat.yaml b/data/services/seo/serpstat.yaml new file mode 100644 index 00000000..e66e2702 --- /dev/null +++ b/data/services/seo/serpstat.yaml @@ -0,0 +1,8 @@ +# Serpstat is a US-based SEO company. +# +# Website: https://serpstat.com +# Crawler info: https://serpstatbot.com + +- name: serpstatbot + user_agent_regex: serpstatbot/ + action: DENY diff --git a/data/services/seo/zoombot.yaml b/data/services/seo/zoombot.yaml new file mode 100644 index 00000000..22b40e5b --- /dev/null +++ b/data/services/seo/zoombot.yaml @@ -0,0 +1,8 @@ +# SEOZoom is an Italian SEO company. +# +# Website: https://www.seozoom.com +# Crawler info: https://www.seozoom.it/bot/ + +- name: zoombot + user_agent_regex: ^zoombot + action: DENY