diff --git a/.gitignore b/.gitignore index 242b4aa..357d719 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,5 @@ __pycache__ .pytest_cache .python-version *.egg-info +*.vscode dist diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 955e9c8..ddf51d3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -101,7 +101,7 @@ repos: rev: 22.3.0 hooks: - id: black - - repo: https://gitlab.com/pycqa/flake8 + - repo: https://github.com/PyCQA/flake8 rev: 3.9.2 hooks: - id: flake8 @@ -117,6 +117,8 @@ repos: - id: mypy additional_dependencies: - types-setuptools + - types-PyYAML + - types-requests - repo: https://github.com/asottile/pyupgrade rev: v2.31.1 hooks: diff --git a/setup.py b/setup.py index c978428..132dc2c 100644 --- a/setup.py +++ b/setup.py @@ -94,8 +94,10 @@ def get_version(version_file): "schema", "setuptools >= 24.2.0", "tqdm", - "urllib3", "validators", + "PyYAML", + "requests", + "loguru" ], extras_require={ "test": [ diff --git a/src/findcdn/__init__.py b/src/findcdn/__init__.py index 0d4a92b..bd6f70e 100644 --- a/src/findcdn/__init__.py +++ b/src/findcdn/__init__.py @@ -4,14 +4,15 @@ # package_name.__version__, which is used to get version information about this # Python package. from ._version import __version__ # noqa: F401 -from .findcdn import interactive, main -from .findcdn_err import FileWriteError, InvalidDomain, OutputFileExists + +# from .findcdn import interactive, main +# from .findcdn_err import FileWriteError, InvalidDomain, OutputFileExists +from .cdnEngine import analyze_domain, analyze_domains __all__ = [ "main", "__version__", "interactive", - "OutputFileExists", - "InvalidDomain", - "FileWriteError", + "analyze_domain", + "analyze_domains", ] diff --git a/src/findcdn/_version.py b/src/findcdn/_version.py index 5eb9b0e..de155d7 100644 --- a/src/findcdn/_version.py +++ b/src/findcdn/_version.py @@ -1,2 +1,2 @@ """This file defines the version of this module.""" -__version__ = "0.1.0" +__version__ = "1.0.0" diff --git a/src/findcdn/cdnEngine/__init__.py b/src/findcdn/cdnEngine/__init__.py index ce0487b..741452f 100644 --- a/src/findcdn/cdnEngine/__init__.py +++ b/src/findcdn/cdnEngine/__init__.py @@ -1,6 +1,6 @@ -"""cdnEngine library.""" -from . import detectCDN -from .cdnEngine import Chef, DomainPot, run_checks +"""cdnEngine Logic.""" -"""Define public exports.""" -__all__ = ["DomainPot", "Chef", "run_checks", "detectCDN"] +from .analyzers import ARGS +from .cdnEngine import ANALYZERS, analyze_domain, analyze_domains + +__all__ = ["ANALYZERS", "analyze_domain", "analyze_domains", "ARGS"] diff --git a/src/findcdn/cdnEngine/analyzers/__cdn_config__.py b/src/findcdn/cdnEngine/analyzers/__cdn_config__.py new file mode 100644 index 0000000..f940c0d --- /dev/null +++ b/src/findcdn/cdnEngine/analyzers/__cdn_config__.py @@ -0,0 +1,1008 @@ +"""Configuration file to define CDN fingerprinting information.""" + +# Standard Python Libraries +from ipaddress import IPv4Network, IPv6Network, ip_network +from typing import Dict, List, Union + +"""Top 14 CDNs most commonly used.""" +CDN_RANGES: Dict[str, List[Union[str, IPv4Network, IPv6Network]]] = { + "Incapsula": [ # Imperva is Incapsula + ip_network("199.83.128.0/21"), + ip_network("198.143.32.0/19"), + ip_network("149.126.72.0/21"), + ip_network("103.28.248.0/22"), + ip_network("45.64.64.0/22"), + ip_network("45.64.64.0/22"), + ip_network("192.230.64.0/18"), + ip_network("107.154.0.0/16"), + ip_network("107.154.0.0/16"), + ip_network("45.223.0.0/16"), + ], + "Cloudflare": [ + ip_network("173.245.48.0/20"), + ip_network("103.21.244.0/22"), + ip_network("103.22.200.0/22"), + ip_network("103.31.4.0/22"), + ip_network("141.101.64.0/18"), + ip_network("108.162.192.0/18"), + ip_network("190.93.240.0/20"), + ip_network("188.114.96.0/20"), + ip_network("197.234.240.0/22"), + ip_network("198.41.128.0/17"), + ip_network("162.158.0.0/15"), + ip_network("104.16.0.0/13"), + ip_network("104.24.0.0/14"), + ip_network("172.64.0.0/13"), + ip_network("131.0.72.0/22"), + ], + "Akamai": [ + ip_network("23.32.0.0/11"), + ip_network("23.192.0.0/11"), + ip_network("2.16.0.0/13"), + ip_network("104.64.0.0/10"), + ip_network("184.24.0.0/13"), + ip_network("23.0.0.0/12"), + ip_network("95.100.0.0/15"), + ip_network("92.122.0.0/15"), + ip_network("184.50.0.0/15"), + ip_network("88.221.0.0/16"), + ip_network("23.64.0.0/14"), + ip_network("72.246.0.0/15"), + ip_network("96.16.0.0/15"), + ip_network("96.6.0.0/15"), + ip_network("69.192.0.0/16"), + ip_network("23.72.0.0/13"), + ip_network("173.222.0.0/15"), + ip_network("118.214.0.0/16"), + ip_network("184.84.0.0/14"), + ], + "Cloudfront": [ # https://d7uri8nf7uskq.cloudfront.net/tools/list-cloudfront-ips + ip_network("108.138.0.0/15"), + ip_network("108.156.0.0/14"), + ip_network("116.129.226.0/25"), + ip_network("116.129.226.128/26"), + ip_network("118.193.97.128/25"), + ip_network("118.193.97.64/26"), + ip_network("119.147.182.0/25"), + ip_network("119.147.182.128/26"), + ip_network("120.232.236.0/25"), + ip_network("120.232.236.128/26"), + ip_network("120.253.240.192/26"), + ip_network("120.253.241.160/27"), + ip_network("120.253.245.128/26"), + ip_network("120.253.245.192/27"), + ip_network("120.52.12.64/26"), + ip_network("120.52.153.192/26"), + ip_network("120.52.22.96/27"), + ip_network("120.52.39.128/27"), + ip_network("13.113.196.64/26"), + ip_network("13.113.203.0/24"), + ip_network("13.124.199.0/24"), + ip_network("13.210.67.128/26"), + ip_network("13.224.0.0/14"), + ip_network("13.228.69.0/24"), + ip_network("13.233.177.192/26"), + ip_network("13.249.0.0/16"), + ip_network("13.32.0.0/15"), + ip_network("13.35.0.0/16"), + ip_network("13.48.32.0/24"), + ip_network("13.54.63.128/26"), + ip_network("13.59.250.0/26"), + ip_network("130.176.0.0/17"), + ip_network("130.176.128.0/18"), + ip_network("130.176.192.0/19"), + ip_network("130.176.224.0/20"), + ip_network("143.204.0.0/16"), + ip_network("144.220.0.0/16"), + ip_network("15.158.0.0/16"), + ip_network("15.188.184.0/24"), + ip_network("15.207.13.128/25"), + ip_network("15.207.213.128/25"), + ip_network("18.154.0.0/15"), + ip_network("18.160.0.0/15"), + ip_network("18.164.0.0/15"), + ip_network("18.172.0.0/15"), + ip_network("18.192.142.0/23"), + ip_network("18.200.212.0/23"), + ip_network("18.216.170.128/25"), + ip_network("18.229.220.192/26"), + ip_network("18.238.0.0/15"), + ip_network("18.244.0.0/15"), + ip_network("18.64.0.0/14"), + ip_network("18.68.0.0/16"), + ip_network("180.163.57.0/25"), + ip_network("180.163.57.128/26"), + ip_network("204.246.164.0/22"), + ip_network("204.246.168.0/22"), + ip_network("204.246.172.0/24"), + ip_network("204.246.173.0/24"), + ip_network("204.246.174.0/23"), + ip_network("204.246.176.0/20"), + ip_network("205.251.200.0/21"), + ip_network("205.251.208.0/20"), + ip_network("205.251.249.0/24"), + ip_network("205.251.250.0/23"), + ip_network("205.251.252.0/23"), + ip_network("205.251.254.0/24"), + ip_network("216.137.32.0/19"), + ip_network("223.71.11.0/27"), + ip_network("223.71.71.128/25"), + ip_network("223.71.71.96/27"), + ip_network("3.10.17.128/25"), + ip_network("3.101.158.0/23"), + ip_network("3.11.53.0/24"), + ip_network("3.128.93.0/24"), + ip_network("3.134.215.0/24"), + ip_network("3.231.2.0/25"), + ip_network("3.234.232.224/27"), + ip_network("3.236.169.192/26"), + ip_network("3.236.48.0/23"), + ip_network("3.35.130.128/25"), + ip_network("34.195.252.0/24"), + ip_network("34.216.51.0/25"), + ip_network("34.223.12.224/27"), + ip_network("34.223.80.192/26"), + ip_network("34.226.14.0/24"), + ip_network("35.158.136.0/24"), + ip_network("35.162.63.192/26"), + ip_network("35.167.191.128/26"), + ip_network("36.103.232.0/25"), + ip_network("36.103.232.128/26"), + ip_network("44.227.178.0/24"), + ip_network("44.234.108.128/25"), + ip_network("44.234.90.252/30"), + ip_network("52.124.128.0/17"), + ip_network("52.15.127.128/26"), + ip_network("52.199.127.192/26"), + ip_network("52.212.248.0/26"), + ip_network("52.220.191.0/26"), + ip_network("52.222.128.0/17"), + ip_network("52.46.0.0/18"), + ip_network("52.47.139.0/24"), + ip_network("52.52.191.128/26"), + ip_network("52.56.127.0/25"), + ip_network("52.57.254.0/24"), + ip_network("52.66.194.128/26"), + ip_network("52.78.247.128/26"), + ip_network("52.82.128.0/19"), + ip_network("52.84.0.0/15"), + ip_network("54.182.0.0/16"), + ip_network("54.192.0.0/16"), + ip_network("54.230.0.0/17"), + ip_network("54.230.128.0/18"), + ip_network("54.230.200.0/21"), + ip_network("54.230.208.0/20"), + ip_network("54.230.224.0/19"), + ip_network("54.233.255.128/26"), + ip_network("54.239.128.0/18"), + ip_network("54.239.192.0/19"), + ip_network("54.240.128.0/18"), + ip_network("58.254.138.0/25"), + ip_network("58.254.138.128/26"), + ip_network("64.252.128.0/18"), + ip_network("64.252.64.0/18"), + ip_network("65.8.0.0/16"), + ip_network("65.9.0.0/17"), + ip_network("65.9.128.0/18"), + ip_network("70.132.0.0/18"), + ip_network("71.152.0.0/17"), + ip_network("99.79.169.0/24"), + ip_network("99.84.0.0/16"), + ip_network("99.86.0.0/16"), + ], + "CacheFly": [ + ip_network("167.88.158.0/24"), + ip_network("167.88.159.0/24"), + ip_network("204.93.142.0/24"), + ip_network("204.93.143.0/24"), + ip_network("204.93.145.0/24"), + ip_network("204.93.146.0/24"), + ip_network("204.93.150.0/24"), + ip_network("204.93.230.0/24"), + ip_network("204.93.231.0/24"), + ip_network("205.234.149.0/24"), + ip_network("205.234.155.0/24"), + ip_network("205.234.175.0/24"), + ip_network("205.234.216.0/24"), + ip_network("205.234.229.0/24"), + ip_network("205.234.240.0/24"), + ip_network("216.246.100.0/24"), + ip_network("216.246.104.0/24"), + ip_network("216.246.118.0/24"), + ip_network("216.246.119.0/24"), + ip_network("216.246.19.0/24"), + ip_network("216.246.40.0/24"), + ip_network("45.88.132.0/22"), + ip_network("50.31.196.0/24"), + ip_network("50.31.197.0/24"), + ip_network("50.31.201.0/24"), + ip_network("50.31.238.0/24"), + ip_network("66.225.197.0/24"), + ip_network("66.225.208.0/24"), + ip_network("66.225.214.0/24"), + ip_network("66.225.222.0/24"), + ip_network("66.225.243.0/24"), + ip_network("69.22.157.0/24"), + ip_network("69.31.3.0/24"), + ip_network("75.102.32.0/24"), + ip_network("75.102.33.0/24"), + ip_network("75.102.42.0/24"), + ], + "Airee": [ + ip_network("185.107.72.0/24"), # WEBO LLC + ip_network("185.107.73.0/24"), # WEBO LLC + ip_network("185.107.75.0/24"), # airee + ], + "Edgecast": [ + ip_network("108.161.240.0/20"), + ip_network("110.232.176.0/22"), + ip_network("117.103.183.0/24"), + ip_network("117.18.232.0/21"), + ip_network("121.189.46.0/23"), + ip_network("180.240.184.0/24"), + ip_network("192.16.0.0/18"), + ip_network("192.229.128.0/17"), + ip_network("192.30.0.0/19"), + ip_network("194.255.210.64/26"), + ip_network("198.7.16.0/20"), + ip_network("203.74.4.64/26"), + ip_network("213.64.234.0/26"), + ip_network("46.22.64.0/20"), + ip_network("46.22.64.0/23"), + ip_network("46.22.66.0/23"), + ip_network("46.22.70.0/23"), + ip_network("46.22.72.0/22"), + ip_network("5.104.64.0/21"), + ip_network("68.232.32.0/20"), + ip_network("72.21.80.0/20"), + ip_network("88.194.45.128/26"), + ip_network("93.184.208.0/20"), + ], + "MaxCDN": [ + ip_network("108.161.176.0/20"), + ip_network("94.46.144.0/20"), + ip_network("146.88.128.0/20"), + ip_network("198.232.124.0/22"), + ip_network("23.111.8.0/22"), + ip_network("217.22.28.0/22"), + ip_network("64.125.76.64/27"), + ip_network("64.125.76.96/27"), + ip_network("64.125.78.96/27"), + ip_network("64.125.78.192/27"), + ip_network("64.125.78.224/27"), + ip_network("64.125.102.32/27"), + ip_network("64.125.102.64/27"), + ip_network("64.125.102.96/27"), + ip_network("94.31.27.64/27"), + ip_network("94.31.33.128/27"), + ip_network("94.31.33.160/27"), + ip_network("94.31.33.192/27"), + ip_network("94.31.56.160/27"), + ip_network("177.54.148.0/24"), + ip_network("185.18.207.64/26"), + ip_network("50.31.249.224/27"), + ip_network("50.31.251.32/28"), + ip_network("119.81.42.192/27"), + ip_network("119.81.104.96/28"), + ip_network("119.81.67.8/29"), + ip_network("119.81.0.104/30"), + ip_network("119.81.1.144/30"), + ip_network("27.50.77.226/32"), + ip_network("27.50.79.130/32"), + ip_network("119.81.131.130/32"), + ip_network("119.81.131.131/32"), + ip_network("216.12.211.59/32"), + ip_network("216.12.211.60/32"), + ip_network("37.58.110.67/32"), + ip_network("37.58.110.68/32"), + ip_network("158.85.206.228/32"), + ip_network("158.85.206.231/32"), + ip_network("174.36.204.195/32"), + ip_network("174.36.204.196/32"), + ip_network("151.139.0.0/19"), + ip_network("103.66.28.0/22"), + ip_network("103.228.104.0/22"), + ip_network("108.168.175.204/32"), + ], + "Beluga": [ # NuCDN + ip_network("104.37.176.0/21"), + ip_network("104.37.178.0/24"), + ip_network("104.37.179.0/24"), + ip_network("104.37.183.0/24"), + ip_network("162.253.156.0/22"), + ip_network("162.253.159.0/24"), + ip_network("162.255.24.0/22"), + ip_network("162.255.24.0/24"), + ip_network("162.255.25.0/24"), + ip_network("199.167.64.0/22"), + ip_network("199.167.64.0/24"), + ip_network("199.167.65.0/24"), + ip_network("199.167.66.0/24"), + ip_network("68.169.76.0/24"), + ip_network("8.20.241.0/24"), + ip_network("8.20.243.0/24"), + ip_network("8.20.247.0/24"), + ip_network("8.26.56.0/24"), + ip_network("92.61.240.0/21"), + ip_network("92.61.248.0/24"), + ], + "Limelight": [ + ip_network("111.119.0.0/22"), + ip_network("111.119.11.0/24"), + ip_network("111.119.16.0/23"), + ip_network("111.119.20.0/23"), + ip_network("111.119.22.0/23"), + ip_network("111.119.24.0/21"), + ip_network("111.119.4.0/22"), + ip_network("117.121.248.0/21"), + ip_network("117.121.248.0/22"), + ip_network("117.121.248.0/23"), + ip_network("117.121.250.0/23"), + ip_network("117.121.254.0/23"), + ip_network("178.249.104.0/21"), + ip_network("178.249.105.0/24"), + ip_network("178.249.110.0/24"), + ip_network("178.79.192.0/18"), + ip_network("178.79.196.0/23"), + ip_network("178.79.203.0/24"), + ip_network("178.79.214.0/23"), + ip_network("178.79.228.0/23"), + ip_network("178.79.230.0/23"), + ip_network("178.79.232.0/22"), + ip_network("178.79.236.0/22"), + ip_network("178.79.240.0/21"), + ip_network("178.79.248.0/21"), + ip_network("185.116.100.0/22"), + ip_network("185.178.52.0/22"), + ip_network("203.77.184.0/22"), + ip_network("203.77.188.0/22"), + ip_network("203.77.188.0/23"), + ip_network("203.9.176.0/21"), + ip_network("203.9.177.0/24"), + ip_network("203.9.178.0/24"), + ip_network("206.223.120.0/24"), + ip_network("208.111.128.0/18"), + ip_network("208.111.131.0/24"), + ip_network("208.111.146.0/24"), + ip_network("208.111.152.0/23"), + ip_network("208.111.154.0/24"), + ip_network("208.111.157.0/24"), + ip_network("208.111.172.0/24"), + ip_network("208.111.180.0/22"), + ip_network("208.111.184.0/24"), + ip_network("208.111.188.0/22"), + ip_network("208.48.140.0/24"), + ip_network("208.69.177.0/24"), + ip_network("208.69.178.0/24"), + ip_network("208.69.179.0/24"), + ip_network("208.69.180.0/24"), + ip_network("208.69.181.0/24"), + ip_network("208.69.182.0/24"), + ip_network("208.69.183.0/24"), + ip_network("216.247.120.0/24"), + ip_network("216.247.121.0/24"), + ip_network("216.247.123.0/24"), + ip_network("41.63.64.0/18"), + ip_network("41.63.64.0/22"), + ip_network("41.63.68.0/23"), + ip_network("41.63.96.0/23"), + ip_network("41.63.99.0/24"), + ip_network("46.183.88.0/21"), + ip_network("46.228.144.0/20"), + ip_network("46.228.145.0/24"), + ip_network("46.228.148.0/24"), + ip_network("46.228.149.0/24"), + ip_network("46.228.150.0/24"), + ip_network("68.142.100.0/24"), + ip_network("68.142.115.0/24"), + ip_network("68.142.123.0/24"), + ip_network("68.142.126.0/23"), + ip_network("68.142.64.0/18"), + ip_network("68.142.68.0/22"), + ip_network("68.142.72.0/23"), + ip_network("68.142.74.0/24"), + ip_network("68.142.82.0/24"), + ip_network("68.142.91.0/24"), + ip_network("69.164.0.0/18"), + ip_network("69.164.0.0/24"), + ip_network("69.164.17.0/24"), + ip_network("69.164.22.0/23"), + ip_network("69.164.24.0/22"), + ip_network("69.164.28.0/22"), + ip_network("69.164.32.0/23"), + ip_network("69.164.40.0/23"), + ip_network("69.164.5.0/24"), + ip_network("69.164.52.0/24"), + ip_network("69.164.54.0/23"), + ip_network("69.164.58.0/24"), + ip_network("69.164.6.0/23"), + ip_network("69.28.128.0/18"), + ip_network("69.28.131.0/24"), + ip_network("69.28.134.0/24"), + ip_network("69.28.139.0/24"), + ip_network("69.28.142.0/24"), + ip_network("69.28.143.0/24"), + ip_network("69.28.147.0/24"), + ip_network("69.28.168.0/24"), + ip_network("69.28.169.0/24"), + ip_network("69.28.170.0/24"), + ip_network("69.28.174.0/23"), + ], + "Fastly": [ # https://api.fastly.com/public-ip-list + ip_network("185.199.110.0/24"), + ip_network("23.235.32.0/20"), + ip_network("43.249.72.0/22"), + ip_network("103.244.50.0/24"), + ip_network("103.245.222.0/23"), + ip_network("103.245.224.0/24"), + ip_network("104.156.80.0/20"), + ip_network("140.248.64.0/18"), + ip_network("140.248.128.0/17"), + ip_network("146.75.0.0/17"), + ip_network("151.101.0.0/16"), + ip_network("157.52.64.0/18"), + ip_network("167.82.0.0/17"), + ip_network("167.82.128.0/20"), + ip_network("167.82.160.0/20"), + ip_network("167.82.224.0/20"), + ip_network("172.111.64.0/18"), + ip_network("185.31.16.0/22"), + ip_network("199.27.72.0/21"), + ip_network("199.232.0.0/16"), + ip_network("103.245.222.0/24"), + ip_network("103.245.224.0/24"), + ip_network("104.156.80.0/24"), + ip_network("104.156.81.0/24"), + ip_network("104.156.82.0/24"), + ip_network("104.156.83.0/24"), + ip_network("104.156.84.0/24"), + ip_network("104.156.85.0/24"), + ip_network("104.156.86.0/24"), + ip_network("104.156.87.0/24"), + ip_network("104.156.89.0/24"), + ip_network("104.156.90.0/24"), + ip_network("104.156.91.0/24"), + ip_network("104.156.92.0/24"), + ip_network("104.156.93.0/24"), + ip_network("104.156.94.0/24"), + ip_network("104.156.95.0/24"), + ip_network("104.244.43.0/24"), + ip_network("140.248.0.0/18"), + ip_network("140.248.0.0/22"), + ip_network("140.248.10.0/24"), + ip_network("140.248.11.0/24"), + ip_network("140.248.12.0/22"), + ip_network("140.248.128.0/18"), + ip_network("140.248.16.0/24"), + ip_network("140.248.17.0/24"), + ip_network("140.248.18.0/24"), + ip_network("140.248.19.0/24"), + ip_network("140.248.192.0/18"), + ip_network("140.248.192.0/19"), + ip_network("140.248.193.0/24"), + ip_network("140.248.194.0/24"), + ip_network("140.248.20.0/23"), + ip_network("140.248.22.0/23"), + ip_network("140.248.224.0/19"), + ip_network("140.248.224.0/24"), + ip_network("140.248.225.0/24"), + ip_network("140.248.226.0/24"), + ip_network("140.248.24.0/24"), + ip_network("140.248.25.0/24"), + ip_network("140.248.28.0/24"), + ip_network("140.248.29.0/24"), + ip_network("140.248.30.0/23"), + ip_network("140.248.32.0/24"), + ip_network("140.248.33.0/24"), + ip_network("140.248.34.0/23"), + ip_network("140.248.38.0/24"), + ip_network("140.248.39.0/24"), + ip_network("140.248.40.0/24"), + ip_network("140.248.4.0/22"), + ip_network("140.248.65.0/24"), + ip_network("140.248.66.0/24"), + ip_network("140.248.67.0/24"), + ip_network("140.248.68.0/24"), + ip_network("140.248.69.0/24"), + ip_network("140.248.70.0/24"), + ip_network("140.248.71.0/24"), + ip_network("140.248.72.0/24"), + ip_network("140.248.73.0/24"), + ip_network("140.248.74.0/24"), + ip_network("140.248.75.0/24"), + ip_network("140.248.77.0/24"), + ip_network("140.248.78.0/24"), + ip_network("140.248.79.0/24"), + ip_network("140.248.8.0/24"), + ip_network("140.248.83.0/24"), + ip_network("140.248.9.0/24"), + ip_network("146.75.0.0/17"), + ip_network("146.75.0.0/22"), + ip_network("146.75.100.0/22"), + ip_network("146.75.104.0/22"), + ip_network("146.75.108.0/22"), + ip_network("146.75.112.0/22"), + ip_network("146.75.116.0/22"), + ip_network("146.75.12.0/22"), + ip_network("146.75.128.0/17"), + ip_network("146.75.132.0/23"), + ip_network("146.75.134.0/23"), + ip_network("146.75.136.0/23"), + ip_network("146.75.140.0/23"), + ip_network("146.75.142.0/23"), + ip_network("146.75.146.0/23"), + ip_network("146.75.148.0/23"), + ip_network("146.75.152.0/23"), + ip_network("146.75.154.0/23"), + ip_network("146.75.158.0/23"), + ip_network("146.75.160.0/23"), + ip_network("146.75.16.0/22"), + ip_network("146.75.162.0/23"), + ip_network("146.75.164.0/23"), + ip_network("146.75.166.0/24"), + ip_network("146.75.167.0/24"), + ip_network("146.75.168.0/24"), + ip_network("146.75.169.0/24"), + ip_network("146.75.172.0/23"), + ip_network("146.75.174.0/24"), + ip_network("146.75.175.0/24"), + ip_network("146.75.176.0/23"), + ip_network("146.75.178.0/24"), + ip_network("146.75.179.0/24"), + ], + "Myracloud": [ + ip_network("103.51.164.0/24"), + ip_network("103.51.165.0/24"), + ip_network("103.51.166.0/24"), + ip_network("103.51.167.0/24"), + ip_network("141.15.11.0/24"), + ip_network("141.15.127.0/24"), + ip_network("141.15.3.0/24"), + ip_network("141.15.34.0/24"), + ip_network("141.15.35.0/24"), + ip_network("141.15.64.0/24"), + ip_network("141.15.65.0/24"), + ip_network("185.5.80.0/24"), + ip_network("185.5.81.0/24"), + ip_network("185.5.82.0/24"), + ip_network("185.5.83.0/24"), + ip_network("185.85.0.0/24"), + ip_network("185.85.1.0/24"), + ip_network("185.85.2.0/24"), + ip_network("185.85.3.0/24"), + ip_network("193.25.234.0/24"), + ip_network("195.234.0.0/24"), + ip_network("217.66.49.0/24"), + ip_network("217.66.50.0/24"), + ip_network("37.143.40.0/24"), + ip_network("37.143.41.0/24"), + ip_network("37.143.42.0/24"), + ip_network("37.143.43.0/24"), + ip_network("37.143.44.0/24"), + ip_network("37.143.45.0/24"), + ip_network("37.143.46.0/24"), + ip_network("37.143.47.0/24"), + ip_network("45.112.84.0/24"), + ip_network("45.112.85.0/24"), + ip_network("45.112.87.0/24"), + ip_network("45.91.156.0/24"), + ip_network("45.91.157.0/24"), + ip_network("45.91.158.0/24"), + ip_network("45.91.159.0/24"), + ip_network("62.201.172.0/24"), + ip_network("80.90.0.0/24"), + ip_network("80.90.10.0/24"), + ip_network("80.90.1.0/24"), + ip_network("80.90.11.0/24"), + ip_network("80.90.12.0/24"), + ip_network("80.90.13.0/24"), + ip_network("80.90.14.0/24"), + ip_network("80.90.15.0/24"), + ip_network("80.90.16.0/24"), + ip_network("80.90.17.0/24"), + ip_network("80.90.18.0/24"), + ip_network("80.90.19.0/24"), + ip_network("80.90.20.0/24"), + ip_network("80.90.21.0/24"), + ip_network("80.90.22.0/24"), + ip_network("80.90.23.0/24"), + ip_network("80.90.24.0/24"), + ip_network("80.90.25.0/24"), + ip_network("80.90.26.0/24"), + ip_network("80.90.27.0/24"), + ip_network("80.90.28.0/24"), + ip_network("80.90.29.0/24"), + ip_network("80.90.30.0/24"), + ip_network("80.90.31.0/24"), + ip_network("80.90.4.0/24"), + ip_network("80.90.5.0/24"), + ip_network("80.90.6.0/24"), + ip_network("80.90.8.0/24"), + ip_network("80.90.9.0/24"), + ip_network("91.236.122.0/24"), + ], + "Azure": [ + ip_network("101.226.203.0"), + ip_network("108.161.240.0"), + ip_network("110.164.36.0"), + ip_network("110.232.176.0"), + ip_network("117.103.183.0"), + ip_network("117.18.232.0"), + ip_network("119.46.85.0"), + ip_network("120.132.137.0"), + ip_network("121.156.59.224"), + ip_network("121.189.46.0"), + ip_network("136.228.144.0"), + ip_network("152.190.247.0"), + ip_network("152.195.0.0"), + ip_network("152.199.0.0"), + ip_network("180.240.184.0"), + ip_network("192.16.0.0"), + ip_network("192.229.128.0"), + ip_network("192.30.0.0"), + ip_network("194.255.210.64"), + ip_network("194.255.242.160"), + ip_network("195.67.219.64"), + ip_network("198.7.16.0"), + ip_network("203.66.205.0"), + ip_network("203.74.4.64"), + ip_network("213.175.80.0"), + ip_network("213.64.234.0"), + ip_network("213.65.58.0"), + ip_network("36.67.255.152"), + ip_network("46.22.64.0"), + ip_network("49.231.126.0"), + ip_network("5.104.64.0"), + ip_network("61.221.181.64"), + ip_network("64.12.0.0"), + ip_network("65.198.79.64"), + ip_network("65.199.146.192"), + ip_network("65.200.151.160"), + ip_network("65.200.157.192"), + ip_network("65.200.46.128"), + ip_network("65.222.137.0"), + ip_network("65.222.145.128"), + ip_network("68.130.0.0"), + ip_network("68.130.128.0"), + ip_network("68.130.136.0"), + ip_network("68.140.206.0"), + ip_network("68.232.32.0"), + ip_network("72.21.80.0"), + ip_network("88.194.45.128"), + ip_network("88.194.47.224"), + ip_network("93.184.208.0"), + ], + "Clever-cloud": [], # Possibly just the same thing as cloudflare cdn + "FastCDN": [ + ip_network("136.228.152.0/24"), + ip_network("136.228.153.0/24"), + ip_network("136.228.154.0/24"), + ip_network("136.228.155.0/24"), + ], + "ArvanCloud": [ # AS208006 + ip_network("130.185.120.0/23"), + ip_network("130.185.122.0/24"), + ip_network("130.185.123.0/24"), + ip_network("185.204.168.0/22"), + ip_network("185.215.234.0/24"), + ip_network("185.215.235.0/24"), + ip_network("185.235.40.0/22"), + ip_network("194.5.192.0/23"), + ip_network("194.5.206.0/23"), + ], + "Kingsoft": [ + ip_network("104.166.180.0/23"), + ip_network("104.250.32.0/23"), + ip_network("104.250.34.0/23"), + ip_network("104.250.36.0/23"), + ip_network("104.250.38.0/23"), + ip_network("104.250.40.0/23"), + ip_network("104.250.42.0/23"), + ip_network("104.250.44.0/24"), + ip_network("104.250.46.0/23"), + ip_network("104.250.48.0/23"), + ip_network("104.250.50.0/23"), + ip_network("104.250.52.0/22"), + ip_network("104.250.56.0/23"), + ip_network("104.250.58.0/23"), + ip_network("104.250.60.0/23"), + ip_network("104.250.60.0/24"), + ip_network("104.250.62.0/23"), + ip_network("107.155.51.0/24"), + ip_network("107.155.52.0/23"), + ip_network("107.155.53.0/24"), + ip_network("120.131.0.0/23"), + ip_network("120.131.10.0/23"), + ip_network("120.131.12.0/23"), + ip_network("120.131.14.0/23"), + ip_network("120.131.2.0/23"), + ip_network("120.131.4.0/23"), + ip_network("120.131.6.0/23"), + ip_network("120.131.8.0/23"), + ip_network("120.92.0.0/23"), + ip_network("120.92.10.0/23"), + ip_network("120.92.104.0/21"), + ip_network("120.92.112.0/21"), + ip_network("120.92.120.0/21"), + ip_network("120.92.12.0/23"), + ip_network("120.92.128.0/21"), + ip_network("120.92.136.0/21"), + ip_network("120.92.14.0/23"), + ip_network("120.92.144.0/21"), + ip_network("120.92.152.0/21"), + ip_network("120.92.160.0/21"), + ip_network("120.92.16.0/21"), + ip_network("120.92.168.0/21"), + ip_network("120.92.176.0/21"), + ip_network("120.92.184.0/21"), + ip_network("120.92.184.0/23"), + ip_network("120.92.186.0/23"), + ip_network("120.92.192.0/23"), + ip_network("120.92.194.0/23"), + ip_network("120.92.196.0/23"), + ip_network("120.92.200.0/21"), + ip_network("120.92.2.0/23"), + ip_network("120.92.208.0/20"), + ip_network("120.92.224.0/23"), + ip_network("120.92.226.0/23"), + ip_network("120.92.228.0/23"), + ip_network("120.92.230.0/23"), + ip_network("120.92.232.0/23"), + ip_network("120.92.234.0/23"), + ip_network("120.92.236.0/23"), + ip_network("120.92.238.0/23"), + ip_network("120.92.24.0/21"), + ip_network("120.92.32.0/21"), + ip_network("120.92.40.0/21"), + ip_network("120.92.4.0/23"), + ip_network("120.92.48.0/21"), + ip_network("120.92.56.0/21"), + ip_network("120.92.6.0/23"), + ip_network("120.92.64.0/21"), + ip_network("120.92.72.0/21"), + ip_network("120.92.80.0/21"), + ip_network("120.92.8.0/23"), + ip_network("120.92.88.0/21"), + ip_network("120.92.96.0/21"), + ip_network("169.197.116.0/23"), + ip_network("203.69.19.0/24"), + ip_network("203.69.33.0/24"), + ip_network("213.255.228.0/23"), + ip_network("213.255.230.0/24"), + ], + "Tencent": [], + "Google": [ + ip_network("8.34.208.0/20"), + ip_network("8.35.192.0/21"), + ip_network("8.35.200.0/23"), + ip_network("23.236.48.0/20"), + ip_network("23.251.128.0/19"), + ip_network("34.100.0.0/16"), + ip_network("34.102.0.0/15"), + ip_network("34.104.0.0/14"), + ip_network("34.124.0.0/18"), + ip_network("34.124.64.0/20"), + ip_network("34.124.80.0/23"), + ip_network("34.124.84.0/22"), + ip_network("34.124.88.0/23"), + ip_network("34.124.92.0/22"), + ip_network("34.125.0.0/16"), + ip_network("34.64.0.0/11"), + ip_network("34.96.0.0/14"), + ip_network("35.184.0.0/14"), + ip_network("35.188.0.0/15"), + ip_network("35.190.0.0/17"), + ip_network("35.190.128.0/18"), + ip_network("35.190.192.0/19"), + ip_network("35.190.224.0/20"), + ip_network("35.190.240.0/22"), + ip_network("35.192.0.0/14"), + ip_network("35.196.0.0/15"), + ip_network("35.198.0.0/16"), + ip_network("35.199.0.0/17"), + ip_network("35.199.128.0/18"), + ip_network("35.200.0.0/13"), + ip_network("35.208.0.0/13"), + ip_network("35.216.0.0/15"), + ip_network("35.220.0.0/14"), + ip_network("35.224.0.0/13"), + ip_network("35.232.0.0/15"), + ip_network("35.234.0.0/16"), + ip_network("35.235.0.0/17"), + ip_network("35.235.192.0/20"), + ip_network("35.235.216.0/21"), + ip_network("35.235.224.0/20"), + ip_network("35.236.0.0/14"), + ip_network("35.240.0.0/13"), + ip_network("104.154.0.0/15"), + ip_network("104.196.0.0/14"), + ip_network("107.178.192.0/18"), + ip_network("108.170.192.0/20"), + ip_network("108.170.208.0/21"), + ip_network("108.170.216.0/22"), + ip_network("108.170.220.0/23"), + ip_network("108.170.222.0/24"), + ip_network("108.59.80.0/20"), + ip_network("130.211.128.0/17"), + ip_network("130.211.16.0/20"), + ip_network("130.211.32.0/19"), + ip_network("130.211.4.0/22"), + ip_network("130.211.64.0/18"), + ip_network("130.211.8.0/21"), + ip_network("146.148.16.0/20"), + ip_network("146.148.2.0/23"), + ip_network("146.148.32.0/19"), + ip_network("146.148.4.0/22"), + ip_network("146.148.64.0/18"), + ip_network("146.148.8.0/21"), + ip_network("162.216.148.0/22"), + ip_network("162.222.176.0/21"), + ip_network("173.255.112.0/20"), + ip_network("192.158.28.0/22"), + ip_network("199.192.112.0/22"), + ip_network("199.223.232.0/22"), + ip_network("199.223.236.0/23"), + ip_network("208.68.108.0/23"), + ], +} + +""" +More inclusive list of available CDNs + +Format: CDNs[] = +""" +CDNs = { + # ".*\.amazonaws\.com": "Amazon AWS", # Just because aws does not mean CDN + r".*cdn\.geeksforgeeks\.org": "GeeksForGeeksCDN", + r".*\.discordapp\.com": "Discord", + r".*\.airee\.international": "Airee", + r".*\.myracloud\.com": "Myra", + r".*\.msecnd\.ne": "MicrosoftAzure", + r".*\.clever-cloud\.com": "Clever-cloud", + r".*\.turbobytes-cdn\.com": "Turbo Bytes", + r".*\.akadns\.net": "Akamai", + r".*\.anankecdn\.com\.br": "Ananke", + r".*\.belugacdn\.com": "BelugaCDN", + r".*\.cdnify\.io": "CDNify", + r".*\.clients\.turbobytes\.net": "Turbo Bytes", + r".*\.lambdacdn\.net": "LambdaCDN", + r".*\.akamai\.net": "Akamai", + r".*\.akamaized\.net": "Akamai", + r".*\.akamaiedge\.net": "Akamai", + r".*\.akamaihd\.net": "Akamai", + r".*\.edgesuite\.net": "Akamai", + r".*\.edgekey\.net": "Akamai", + r".*\.srip\.net": "Akamai", + r".*\.akamaitechnologies\.com": "Akamai", + r".*\.akamaitechnologies\.fr": "Akamai", + r".*\.tl88\.net": "AkamaiChinaCDN", + r".*\.llnwd\.net": "Limelight", + r".*\.lldns\.net": "Limelight", + r".*\.netdna-cdn\.com": "StackPath", + r".*\.netdna-ssl\.com": "StackPath", + r".*\.netdna\.com": "StackPath", + r".*\.gfx\.ms": "Limelight", + r".*\.adn\.": "EdgeCast", + r".*\.wac\.": "EdgeCast", + r".*\.wpc\.": "EdgeCast", + r".*\.fastly\.net": "Fastly", + r".*\.fastlylb\.net": "Fastly", + r".*edgecastcdn\.net": "EdgeCast", + r".*\.systemcdn\.net": "EdgeCast", + r".*\.transactcdn\.net": "EdgeCast", + r".*\.v1cdn\.net": "EdgeCast", + r".*\.v2cdn\.net": "EdgeCast", + r".*\.v3cdn\.net": "EdgeCast", + r".*\.v4cdn\.net": "EdgeCast", + r".*\.v5cdn\.net": "EdgeCast", + r".*hwcdn\.net": "Highwinds", + r".*\.simplecdn\.net": "SimpleCDN", + r".*\.instacontent\.net": "MirrorImage", + r".*\.cap-mii\.net": "MirrorImage", + r".*\.footprint\.net": "Level3", + r".*\.fpbns\.net": "Level3", + r".*\.ay1\.b\.yahoo\.com": "Yahoo", + r".*\.yimg\.": "Yahoo", + r".*\.yahooapis\.com": "Yahoo", + r".*\.google\.": "Google", + r".*googlesyndication\.": "Google", + r".*youtube\.": "Google", + r".*\.googleusercontent\.com": "Google", + r".*googlehosted\.com": "Google", + r".*\.insnw\.net": "InstartLogic", + r".*\.inscname\.net": "InstartLogic", + r".*\.internapcdn\.net": "Internap", + r".*\.cloudfront\.net": "Cloudfront", + r".*\.kxcdn\.com": "KeyCDN", + r".*\.cotcdn\.net": "CotendoCDN", + r".*\.cachefly\.net": "Cachefly", + r".*bo\.lt": r"BO\.LT", + r".*\.cloudflare\.net": "Cloudflare", + r".*\.cloudflare\.com": "Cloudflare", + r".*\.afxcdn\.net": r"afxcdn\.net", + r".*\.wscdns\.com": "ChinaNetCenter", + r".*\.wscloudcdn\.com": "ChinaNetCenter", + r".*\.ourwebpic\.com": "ChinaNetCenter", + r".*\.att-dsa\.net": "AT&T", + r".*\.vo\.msecnd\.net": "MicrosoftAzure", + r".*\.azureedge\.net": "MicrosoftAzure", + r".*\.voxcdn\.net": "VoxCDN", + r".*\.bluehatnetwork\.com": "BlueHatNetwork", + r".*\.swiftcdn1\.com": "SwiftCDN", + r".*\.swiftserve\.com": "SwiftServe", + r".*\.cdngc\.net": "CDNetworks", + r".*\.gccdn\.net": "CDNetworks", + r".*\.gccdn\.cn": "CDNetworks", + r".*\.panthercdn\.com": "CDNetworks", + r".*\.nocookie\.net": "Fastly", + r".*\.cdn\.bitgravity\.com": "Tata communications", + r".*\.cdn\.telefonica\.com": "Telefonica", + r".*\.gslb\.taobao\.com": "Taobao", + r".*\.gslb\.tbcache\.com": "Alimama", + r".*\.mirror-image\.net": "MirrorImage", + r".*\.yottaa\.net": "Yottaa", + r".*\.cubecdn\.net": "cubeCDN", + r".*\.cdn77\.net": "CDN77", + r".*\.cdn77\.org": "CDN77", + r".*x\.incapdns\.net": "Incapsula", + r".*\.bitgravity\.com": "BitGravity", + r".*\.r\.worldcdn\.net": "OnApp", + r".*\.r\.worldssl\.net": "OnApp", + r".*tbcdn\.cn": "Taobao", + r".*\.taobaocdn\.com": "Taobao", + r".*\.ngenix\.net": "NGENIX", + r".*\.pagerain\.net": "PageRain", + r".*\.ccgslb\.com": "ChinaCache", + r".*\.ccgslb\.net": "ChinaCache", + r".*\.c3cache\.net": "ChinaCache", + r".*\.chinacache\.net": "ChinaCache", + r".*\.c3cdn\.net": "ChinaCache", + r".*\.lxdns\.com": "ChinaNetCenter", + r".*\.speedcdns\.com": "QUANTIL/ChinaNetCenter", + r".*\.mwcloudcdn\.com": "QUANTIL/ChinaNetCenter", + r".*cdn\.sfr\.net": "SFR", + r".*\.azioncdn\.net": "Azion", + r".*\.azioncdn\.com": "Azion", + r".*\.azion\.net": "Azion", + r".*\.cdncloud\.net\.au": "MediaCloud", + r".*\.rncdn1\.com": "ReflectedNetworks", + r".*\.cdnsun\.net": "CDNsun", + r".*\.mncdn\.com": "Medianova", + r".*\.mncdn\.net": "Medianova", + r".*\.mncdn\.org": "Medianova", + r".*cdn\.jsdelivr\.net": "jsDelivr", + r".*\.nyiftw\.net": "NYIFTW", + r".*\.nyiftw\.com": "NYIFTW", + r".*\.resrc\.it": r"ReSRC\.it", + r".*\.zenedge\.net": "Zenedge", + r".*\.lswcdn\.net": "LeaseWebCDN", + r".*\.lswcdn\.eu": "LeaseWebCDN", + r".*\.revcn\.net": "RevSoftware", + r".*\.revdn\.net": "RevSoftware", + r".*\.caspowa\.com": "Caspowa", + r".*\.twimg\.com": "Twitter", + r".*\.facebook\.com": "Facebook", + r".*\.facebook\.net": "Facebook", + r".*\.fbcdn\.net": "Facebook", + r".*\.cdninstagram\.com": "Facebook", + r".*\.rlcdn\.com": "Reapleaf", + r".*\.wp\.com": "WordPress", + r".*\.aads1\.net": "Aryaka", + r".*\.aads-cn\.net": "Aryaka", + r".*\.aads-cng\.net": "Aryaka", + r".*\.squixa\.net": r"section\.io", + r".*\.bisongrid\.net": "BisonGrid", + r".*\.cdn\.gocache\.net": "GoCache", + r".*\.hiberniacdn\.com": "HiberniaCDN", + r".*\.cdntel\.net": "Telenor", + r".*\.raxcdn\.com": "Rackspace", + r".*\.unicorncdn\.net": "UnicornCDN", + r".*\.optimalcdn\.com": "OptimalCDN", + r".*\.kinxcdn\.com": "KINXCDN", + r".*\.kinxcdn\.net": "KINXCDN", + r".*\.stackpathdns\.com": "StackPath", + r".*\.hosting4cdn\.com": "Hosting4CDN", + r".*\.netlify\.com": "Netlify", + r".*\.b-cdn\.net": "BunnyCDN", + r".*\.gtimg": "Tencent", +} diff --git a/src/findcdn/cdnEngine/analyzers/__init__.py b/src/findcdn/cdnEngine/analyzers/__init__.py new file mode 100644 index 0000000..cc6ed12 --- /dev/null +++ b/src/findcdn/cdnEngine/analyzers/__init__.py @@ -0,0 +1,37 @@ +"""Initialize all analyzers in folder.""" + +# Standard Python Libraries +from importlib import util +from importlib.machinery import ModuleSpec +import os.path as path + +# Third-Party Libraries +from yaml import safe_load + +# Internal Libraries + +# Get path where the modules should be +PWD = path.dirname(path.realpath(__file__)) + +# Load in analyzers config file +with open(f"{PWD}/analyzers.yml", "r") as fp: + analyzers = safe_load(fp) + +ANALYZERS = {} +for analyzer, attribs in analyzers["analyzers"].items(): + spec = util.spec_from_file_location( + attribs["classname"], f"{PWD}/{attribs['filename']}" + ) + if spec is not None and type(spec) == ModuleSpec: + module = util.module_from_spec(spec) + if spec.loader is not None: + spec.loader.exec_module(module) + ANALYZERS[attribs["classname"]] = { + "class": getattr( + module, attribs["classname"] + )(), # instantiate the class here + "arg": attribs["argument"], + "prio": attribs["priority"], + } + +ARGS = "".join([v["arg"] for _, v in ANALYZERS.items()]) diff --git a/src/findcdn/cdnEngine/analyzers/analyzers.yml b/src/findcdn/cdnEngine/analyzers/analyzers.yml new file mode 100644 index 0000000..93b63c1 --- /dev/null +++ b/src/findcdn/cdnEngine/analyzers/analyzers.yml @@ -0,0 +1,39 @@ +--- +analyzers: + iplyzer: + filename: "iplyzer.py" + classname: "IPlyzer" + description: "Identify CDN using IP address lookups." + # Leave this option out for people to choose. Always run this. + argument: "" + # Prioritize the shorter running analyzers + priority: 1 + + cnamelyzer: + filename: "cnamelyzer.py" + classname: "CNAMElyzer" + description: "Identify CDN using CNAME records." + argument: "c" + priority: 2 + + whoislyzer: + filename: "whoislyzer.py" + classname: "WHOISlyzer" + description: "Identify CDN using WHOIS records." + argument: "w" + priority: 3 + + httplyzer: + filename: "httplyzer.py" + classname: "HTTPlyzer" + description: "Identify CDN using headers from Website." + argument: "h" + priority: 4 + +# Add your own analyzer! + + # censyslyzer: + # filename: "censuslyzer.py" + # classname: "CENSUSlyzer" + # description: "Check for CDN using Censys" + # argument: "e" diff --git a/src/findcdn/cdnEngine/analyzers/base.py b/src/findcdn/cdnEngine/analyzers/base.py new file mode 100644 index 0000000..6714f7e --- /dev/null +++ b/src/findcdn/cdnEngine/analyzers/base.py @@ -0,0 +1,58 @@ +"""Analyzer base class for templatnig new analyzers.""" + +# Standard Python Libraries +from abc import ABC, abstractclassmethod +from dataclasses import dataclass +from ipaddress import IPv4Address, IPv6Address +from typing import List, Tuple, Union + +# Third-Party Libraries +from loguru import logger + +# Internal Libraries + + +@dataclass +class Domain: + """Domain class for representing domains processed.""" + + domain: str + ips: List[Union[str, IPv4Address, IPv6Address]] + cnames: List[str] + cdns: List[str] + + +class BaseAnalyzer(ABC): + """This is the base class for all analyzers used for FindCDN.""" + + __NAME = "BaseAnalyzer" + + @abstractclassmethod + def get_data(self, domain: Domain) -> Tuple[List, int]: + """Perform action to get data we need to detect a CDN.""" + pass + + @abstractclassmethod + def parse(self, data: List) -> Tuple[List, int]: + """Parse the data gathered and return CDN results.""" + pass + + def run(self, domain: Domain, timeout: int = 10, verbose: bool = False) -> Tuple[List[str], Domain, int]: + """Kick off analysis and return CDN results.""" + self.timeout = timeout + + if verbose: + logger.debug(f"[{self.__NAME}] Obtaining data") + data, err = self.get_data(domain) + if err: + return [], domain, err + + if verbose: + logger.debug(f"[{self.__NAME}] Parsing results ") + results, err = self.parse(data) + if err: + return [], domain, err + + results = list(set(results)) + + return results, domain, 0 diff --git a/src/findcdn/cdnEngine/analyzers/cnamelyzer.py b/src/findcdn/cdnEngine/analyzers/cnamelyzer.py new file mode 100644 index 0000000..22fe8d9 --- /dev/null +++ b/src/findcdn/cdnEngine/analyzers/cnamelyzer.py @@ -0,0 +1,64 @@ +"""CNAME analyzer to identify CDNs from CNAME records.""" + +# Standard Python Libraries +from re import match +from typing import List, Tuple + +# Third-Party Libraries +from dns.resolver import NXDOMAIN, NoAnswer, NoNameservers, Resolver, Timeout + +# cisagov Libraries +from findcdn.cdnEngine.analyzers.__cdn_config__ import CDNs + +# Internal Libraries +from findcdn.cdnEngine.analyzers.base import BaseAnalyzer, Domain + + +class CNAMElyzer(BaseAnalyzer): + """Perform CNAME lookup based on domain.""" + + __NAME = "CNAMElyzer" + lifetime = 10 + + def get_data(self, domain: Domain) -> Tuple[List, int]: + """Get CNAME Records Of Domain.""" + cnames = [] + error_code = 0 + # Setup manual Resolver + resolver = Resolver() + resolver.timeout = self.timeout + resolver.lifetime = self.lifetime + + try: + resp = resolver.resolve(domain.domain, "cname") + cnames = [record.to_text() for record in resp] + except NoAnswer: + error_code = 1 + except NoNameservers: + error_code = 2 + except NXDOMAIN: + error_code = 3 + except Timeout: + error_code = 4 + + return cnames, error_code + + def parse(self, cnames: List) -> Tuple[List, int]: + """Parse the data gathered and return CDN results.""" + cdns = [] + error_code = 0 + + try: + for record in cnames: + for cdn_regex, cdn_name in CDNs.items(): + matches = match(cdn_regex, record.lower()) + if matches: + res = matches.group() + if res: + cdns.append(cdn_name) + + except Exception as e: # TODO fix exception usage + print(e) + error_code = 1 + + return cdns, error_code diff --git a/src/findcdn/cdnEngine/analyzers/httplyzer.py b/src/findcdn/cdnEngine/analyzers/httplyzer.py new file mode 100644 index 0000000..b48240d --- /dev/null +++ b/src/findcdn/cdnEngine/analyzers/httplyzer.py @@ -0,0 +1,109 @@ +"""HTTP Analyzer module for finding cdn based on HTTP headers.""" + +# Standard Python Libraries +from re import search +from typing import List, Tuple + +# Third-Party Libraries +from requests import ConnectionError, ConnectTimeout, ReadTimeout, get + +# cisagov Libraries +from findcdn.cdnEngine.analyzers.__cdn_config__ import CDNs + +# Internal Libraries +from findcdn.cdnEngine.analyzers.base import BaseAnalyzer, Domain + + +class HTTPlyzer(BaseAnalyzer): + """Reach out to host. Get headers.""" + + __NAME = "HTTPlyzer" + + def get_data(self, domain: Domain) -> Tuple[List, int]: + """Perform action to get data we need to detect a CDN.""" + http_data = [] + error_code = 0 + + PROTOCOLS = ["http://", "https://"] + INTERESTING_HEADERS = [ + "server", + "via", + "x-cache", + "cf-cache-status", # This is specific to cloudflare + ] + AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/37.0.2062.94 Chrome/37.0.2062.94 Safari/537.36" + + # Verify if we have had good hosts yet + if len(domain.ips) != 0: + # Get request with the intent a redirection will happen + for proto in PROTOCOLS: + try: + response = get( + f"{proto}{domain.domain}", + allow_redirects=True, + headers={"User-Agent": AGENT}, + timeout=self.timeout, + ) + + # Validate all redirects (if any) are from the same domain + valid = True + for resp in response.history: + dom = search("https?://([A-Za-z_0-9.-]+).*", resp.url) + if dom: + valid &= all( + [i == domain.domain for i in list(dom.groups())] + ) + + if not valid: + error_code = 1 + else: + # Collect headers + headers = [] + for resp in response.history: + headers.append(resp.headers) + headers.append(response.headers) + + # Iterate over them collecting data + for header in headers: + for h in INTERESTING_HEADERS: + http_data.append(header.get(h)) + + # Find out if we are using some sort of cache + for header in headers: + for h, v in dict(header).items(): + if "-cache" in h.lower() and "drupal" not in h.lower(): + http_data.append("CDN_NOT_RECOGNIZED") + except ConnectTimeout: + error_code |= 1 + except ConnectionError: + error_code |= 2 + except ReadTimeout: + error_code |= 3 + + return http_data, error_code + + def parse(self, http_data: List) -> Tuple[List, int]: + """Parse the data gathered and return CDN results.""" + cdns = [] + error_code = 0 + + try: + for data in http_data: + if data: + for cdn_regex, cdn_name in CDNs.items(): + matches = search(cdn_regex, data.lower()) + if matches: + res = matches.group() + if res: + cdns.append(cdn_name) + if cdn_name.lower() in data.lower(): + cdns.append(cdn_name) + for data in http_data: + if data: + if "CDN_NOT_RECOGNIZED" in data and len(cdns) == 0: + cdns.append("CDN_NOT_RECOGNIZED") + except Exception as e: # TODO fix exception usage + print(e) + error_code = 1 + + return cdns, error_code diff --git a/src/findcdn/cdnEngine/analyzers/iplyzer.py b/src/findcdn/cdnEngine/analyzers/iplyzer.py new file mode 100644 index 0000000..085b94a --- /dev/null +++ b/src/findcdn/cdnEngine/analyzers/iplyzer.py @@ -0,0 +1,68 @@ +"""IP Analyzer module for identifying CDNs through IP blocks.""" + +# Standard Python Libraries +from ipaddress import IPv4Address, IPv6Address, ip_address +from typing import List, Tuple, Union + +# Third-Party Libraries +from dns.resolver import NXDOMAIN, NoAnswer, NoNameservers, Resolver, Timeout + +# cisagov Libraries +from findcdn.cdnEngine.analyzers.__cdn_config__ import CDN_RANGES + +# Internal Libraries +from findcdn.cdnEngine.analyzers.base import BaseAnalyzer, Domain + + +class IPlyzer(BaseAnalyzer): + """Obtain IP address of domain and check.""" + + __NAME = "IPlyzer" + lifetime: int = 10 + + def get_data(self, domain: Domain) -> Tuple[List, int]: + """Perform action to get data we need to detect a CDN.""" + ip_list: List[Union[str, IPv4Address, IPv6Address]] = [] + error_code = 0 + + if len(domain.ips) > 0: + return domain.ips, 0 + + resolver = Resolver() + resolver.timeout = self.timeout + resolver.lifetime = self.lifetime + + try: + for ip in resolver.resolve(domain.domain): + addr = ip_address(str(ip)) + if addr not in ip_list: + ip_list.append(addr) + except NoAnswer: + error_code = 1 + except NoNameservers: + error_code = 2 + except NXDOMAIN: + error_code = 3 + except Timeout: + error_code = -1 + + domain.ips = ip_list + + return ip_list, error_code + + def parse(self, ips: List) -> Tuple[List, int]: + """Parse the data gathered and return CDN results.""" + cdns = [] + error_code = 0 + + try: + for cdn, iprange in CDN_RANGES.items(): + for ipaddr in ips: + for block in iprange: + if ip_address(ipaddr) in block: + cdns.append((ipaddr, cdn)) + except Exception as e: + print(e) + error_code = 1 + + return [res[1] for res in cdns], error_code diff --git a/src/findcdn/cdnEngine/analyzers/whoislyzer.py b/src/findcdn/cdnEngine/analyzers/whoislyzer.py new file mode 100644 index 0000000..7879f2c --- /dev/null +++ b/src/findcdn/cdnEngine/analyzers/whoislyzer.py @@ -0,0 +1,87 @@ +"""Whois analyzer for finding CDNs based on WHOIS records.""" + +# Standard Python Libraries +from re import match +from typing import List, Tuple + +# Third-Party Libraries +from dns.resolver import NXDOMAIN, NoAnswer, NoNameservers, Timeout +from ipwhois import HTTPLookupError, IPDefinedError, IPWhois +from ipwhois.exceptions import ASNRegistryError, WhoisLookupError, WhoisRateLimitError + +# cisagov Libraries +from findcdn.cdnEngine.analyzers.__cdn_config__ import CDNs + +# Internal Libraries +from findcdn.cdnEngine.analyzers.base import BaseAnalyzer, Domain + + +class WHOISlyzer(BaseAnalyzer): + """Perform whois lookup on domain.""" + + __NAME = "WHOISlyzer" + + def get_data(self, domain: Domain) -> Tuple[List, int]: + """Perform action to get data we need to detect a CDN.""" + whois_data = [] + error_code = 0 + + try: + for ip in domain.ips: + while True: + try: + response = IPWhois(ip) + break + except WhoisRateLimitError: + pass + + # These two should be where we can find substrings hinting CDN + org = response.lookup_whois().get("asn_description") + if org and org != "BAREFRUIT-ERRORHANDLING": + whois_data.append(org) + + network = response.lookup_rdap().get("network") + org = network.get("name") if network else None + if org and org != "BAREFRUIT-ERRORHANDLING": + whois_data.append(org) + except NoAnswer: + error_code = 1 + except NoNameservers: + error_code = 2 + except NXDOMAIN: + error_code = 3 + except Timeout: + error_code = 4 + except HTTPLookupError: + error_code = 5 + except IPDefinedError: + error_code = 6 + except ASNRegistryError: + error_code = 7 + except WhoisLookupError: + error_code = 8 + except Exception as e: + print(f"[{e}]: {domain.domain} for {ip}") + + return whois_data, error_code + + def parse(self, whois_data: List) -> Tuple[List, int]: + """Parse the data gathered and return CDN results.""" + cdns = [] + error_code = 0 + + try: + for data in whois_data: + for cdn_regex, cdn_name in CDNs.items(): + matches = match(cdn_regex, data.lower()) + if matches: + res = matches.group() + if res: + cdns.append(cdn_name) + if cdn_name.lower() in data.lower(): + cdns.append(cdn_name) + except Exception as e: # TODO fix exception usage + print(e) + error_code = 1 + + return cdns, error_code diff --git a/src/findcdn/cdnEngine/cdnEngine.py b/src/findcdn/cdnEngine/cdnEngine.py index a8aacb6..a8ee745 100644 --- a/src/findcdn/cdnEngine/cdnEngine.py +++ b/src/findcdn/cdnEngine/cdnEngine.py @@ -6,188 +6,142 @@ """ # Standard Python Libraries -import concurrent.futures -import math -import os -from typing import List, Tuple +from concurrent.futures import ThreadPoolExecutor, as_completed +from contextlib import ContextDecorator +from datetime import datetime +from time import perf_counter +from typing import Any, Dict, List # Third-Party Libraries from tqdm import tqdm +from loguru import logger +from validators import domain +# cisagov Libraries # Internal Libraries -from . import detectCDN - - -class DomainPot: - """DomainPot defines the "pot" which Domain objects are stored.""" - - def __init__(self, domains: List[str]): - """Define the pot for the Chef to use.""" - self.domains: List[detectCDN.Domain] = [] - - # Convert to list of type domain - for dom in domains: - dom_in = detectCDN.Domain( - dom, list(), list(), list(), list(), list(), list(), list() - ) - self.domains.append(dom_in) - - -def chef_executor( - domain: detectCDN.Domain, - timeout: int, - user_agent: str, - verbosity: bool, - interactive: bool, -): - """Attempt to make the method "threadsafe" by giving each worker its own detector.""" - # Define detector - detective = detectCDN.cdnCheck() - - # Run checks - try: - detective.all_checks( - # Timeout is split by .4 so that each chunk can only take less than half. - domain, - verbose=verbosity, - timeout=math.ceil(timeout * 0.4), - agent=user_agent, - interactive=interactive, +from findcdn.cdnEngine.analyzers import ANALYZERS +from findcdn.cdnEngine.analyzers.base import Domain + + +class functime(ContextDecorator): + """Decorator to measure function time.""" + + def __enter__(self): + """Start timer definition.""" + self.start = perf_counter() + return self + + def __exit__(self, type, value, traceback): + """End timer and delta setting to elapsed.""" + self.end = perf_counter() + self.elapsed = self.end - self.start + + +def analyze_domain( + domain: str, checks: str, timeout: int = 10, verbose: bool = False +) -> Dict[str, Dict[str, object]]: + """Analyze single domain.""" + error_code = 0 + dom = Domain(domain, [], [], []) + results: List[str] = [] + + # First identify if domain has valid IPs + iplyzer = ANALYZERS["IPlyzer"]["class"] + results, _, ec = iplyzer.run(dom, timeout) + + # If there are no results but there are IPs, that means + # we must fallback to a different method for CDN detection. + if not len(results) == 0 and len(dom.ips) > 0: + # Sort analyzers based on priority (also filter out IPlyzer) + analyzers = sorted( + list(filter(lambda x: x != "IPlyzer", ANALYZERS.keys())), + key=lambda x: ANALYZERS[x]["prio"], ) - except Exception as e: - # Incase some uncaught error somewhere - if interactive or verbosity: - print(f"An unusual exception has occurred:\n{e}") - return 1 - - # Return 0 for success - return 0 - - -class Chef: - """Chef will run analysis on the domains in the DomainPot.""" - - def __init__( - self, - pot: DomainPot, - threads: int, - timeout: int, - user_agent: str, - interactive: bool = False, - verbose: bool = False, - ): - """Give the chef the pot to use.""" - self.pot: DomainPot = pot - self.pbar: tqdm = interactive - self.verbose: bool = verbose - self.timeout: int = timeout - self.agent = user_agent - self.interactive = interactive - - # Determine thread count - if threads and threads != 0: - # Threads defined by user assign - self.threads = threads - else: - # No user defined threads, get it from os.cpu_count() - cpu_count = os.cpu_count() - if cpu_count is None: - cpu_count = 1 - self.threads = cpu_count # type: ignore - - def grab_cdn(self, double: bool = False): # type: ignore - """Check for CDNs used be domain list.""" - # Use Concurrent futures to multithread with pools - job_count = 0 - - if self.verbose: - # Give user information about the run: - print(f"Using {self.threads} threads with a {self.timeout} second timeout") - print(f"User Agent: {self.agent}\n") - - with concurrent.futures.ThreadPoolExecutor( - max_workers=self.threads - ) as executor: - # If double, Double contents to combat CDN cache misses - newpot = [] - if double: - for domain in self.pot.domains: - newpot.append(domain) - for domain in self.pot.domains: - newpot.append(domain) - job_count = len(newpot) - # Setup pbar with correct amount size - if self.pbar: - pbar = tqdm(total=job_count) - - # Assign workers and assign to results list - results = { - executor.submit( - chef_executor, - domain, - self.timeout, - self.agent, - self.verbose, - self.interactive, - ) - for domain in newpot - } - - # Comb future objects for completed task pool. - for future in concurrent.futures.as_completed(results): - try: - # Try and grab feature result to dequeue job - future.result(timeout=self.timeout) - except concurrent.futures.TimeoutError as e: - # Tell us we dropped it. Should log this instead. - if self.interactive or self.verbose: - print(f"Dropped due to: {e}") - - # Update status bar if allowed - if self.pbar: - # We type ignore these as its "illegal" to access private attributes of an object - pending = f"Pending: {executor._work_queue.qsize()} jobs" # type: ignore - threads = f"Threads: {len(executor._threads)}" # type: ignore - pbar.set_description(f"[{pending}]==[{threads}]") - if self.pbar is not None: - pbar.update(1) - else: - pass - - # Return the amount of jobs done and error code - return job_count - - def has_cdn(self): - """For each domain, check if domain contains CDNS. If so, tick cdn_present to true.""" - for domain in self.pot.domains: - if len(domain.cdns) > 0: - domain.cdn_present = True - - def run_checks(self, double: bool = False) -> int: - """Run analysis on the internal domain pool using detectCDN library.""" - cnt = self.grab_cdn(double) - self.has_cdn() - return cnt - - -def run_checks( + + # Filter out any with their arg missing + analyzers = list(filter(lambda x: ANALYZERS[x]["arg"] in checks, analyzers)) + + # Get results + for analyzer in analyzers: + a = ANALYZERS[analyzer]["class"] + if verbose: + logger.debug(f"[ANALYZER]::[{analyzer}] Starting..") + results, _, ec = a.run(dom, timeout, verbose=verbose) + if verbose: + logger.debug(f"[ANALYZER]::[{analyzer}] RESULTS: {results} ERROR CODE: {error_code}") + error_code |= ec + if len(results) > 0: + if verbose: + logger.debug(f"[ANALYZER]::[{analyzer}] CDN Has been found: {len(results) > 0 = }") + break # CDN has been found + if ec == -1: + break # Domain just flat don't exist probably + + # Organize and return as dict + dom_res = {} + dom_res[dom.domain] = { + "cdn": results, + "ips": [str(ip) for ip in dom.ips], + "has_cdn": 1 if len(results) > 0 else 0, + } + return dom_res + + +def analyze_domains( domains: List[str], - threads: int, - timeout: int, - user_agent: str, - interactive: bool = False, + checks: str, + threads: int = 4, + timeout: int = 10, verbose: bool = False, - double: bool = False, -) -> Tuple[List[detectCDN.Domain], int]: - """Orchestrate the use of DomainPot and Chef.""" - # Our domain pot - dp = DomainPot(domains) - - # Our chef to manage pot - chef = Chef(dp, threads, timeout, user_agent, interactive, verbose) - - # Run analysis for all domains - cnt = chef.run_checks(double) + interactive: bool = False, +): + """Perform analysis on multiple domains.""" + # Show loading bar if interactive + if interactive: + pbar = tqdm(total=len(domains)) + + # This is so we can time the total execution of the code + with functime() as ft: + # Collect the subset of valid and invalid domains + VALID_DOMAINS = list(filter(lambda x: domain(x), domains)) + INVALID_DOMAINS = list(filter(lambda x: not domain(x), domains)) + + completed = [] + res = [] + + # Thread pool executor for concurrent executions. + with ThreadPoolExecutor(max_workers=threads) as executor: + # Submit all the domains to be analyzed by a worker + for domaind in VALID_DOMAINS: + completed.append( + executor.submit(analyze_domain, domaind, checks, timeout, verbose) + ) - # Return all domains in form domain_pool, count of jobs processed, error code - return (chef.pot.domains, cnt) + # Wait for workers to finish + for task in as_completed(completed): + dom_res = task.result() + res.append(dom_res) + + if verbose: + dom = list(dom_res.keys())[0] + found = dom_res[dom]['has_cdn'] + logger.debug(f"{dom} {'has a cdn!' if found else 'has no cdn.'}") + + # Update progress bar as a result completes + if interactive: + pbar.update(1) + + # Aggregate results + results: Dict[str, Any] = {} + results["valid_domains"] = {} + [results["valid_domains"].update(dom) for dom in res] + + results["invalid_domains"] = [domain for domain in INVALID_DOMAINS] + results["date"] = datetime.now().strftime("%m/%d/%Y, %H:%M:%S") + results["runtime"] = ft.elapsed + results["total_analyzed"] = len(VALID_DOMAINS) + results["count_with_cdn"] = sum( + 1 for _, v in results["valid_domains"].items() if v["has_cdn"] + ) + + return results diff --git a/src/findcdn/cdnEngine/detectCDN/__init__.py b/src/findcdn/cdnEngine/detectCDN/__init__.py deleted file mode 100644 index 17b39d4..0000000 --- a/src/findcdn/cdnEngine/detectCDN/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -"""detectCDN Library.""" -from .cdn_check import Domain, cdnCheck -from .cdn_config import COMMON, CDNs, CDNs_rev -from .cdn_err import NoIPaddress - -__all__ = [ - "Domain", - "cdnCheck", - "CDNs_rev", - "CDNs", - "COMMON", - "NoIPaddress", -] diff --git a/src/findcdn/cdnEngine/detectCDN/cdn_check.py b/src/findcdn/cdnEngine/detectCDN/cdn_check.py deleted file mode 100644 index a85475b..0000000 --- a/src/findcdn/cdnEngine/detectCDN/cdn_check.py +++ /dev/null @@ -1,279 +0,0 @@ -""" -Summary: This is the main runner for detectCDn. - -Description: The detectCDN library is meant to show what CDNs a domain may be using -""" - -# Standard Python Libraries -from http.client import RemoteDisconnected -from ssl import CertificateError, SSLError -from typing import List -from urllib import request as request -from urllib.error import URLError - -# Third-Party Libraries -from dns.resolver import NXDOMAIN, NoAnswer, NoNameservers, Resolver, Timeout, query -from ipwhois import HTTPLookupError, IPDefinedError, IPWhois -from ipwhois.exceptions import ASNRegistryError - -# Internal Libraries -from .cdn_config import COMMON, CDNs, CDNs_rev -from .cdn_err import NoIPaddress - -# Global variables -LIFETIME = 10 - - -class Domain: - """Domain class allows for storage of metadata on domain.""" - - def __init__( - self, - url: str, - ip: List[str] = [], - cnames: List[str] = [], - cdns: List[str] = [], - cdns_by_name: List[str] = [], - namsrvs: List[str] = [], - headers: List[str] = [], - whois_data: List[str] = [], - ): - """Initialize object to store metadata on domain in url.""" - self.url = url - self.ip = ip - self.cnames = cnames - self.cdns = cdns - self.cdns_by_name = cdns_by_name - self.namesrvs = namsrvs - self.headers = headers - self.whois_data = whois_data - self.cdn_present = False - - -class cdnCheck: - """cdnCheck runs analysis and stores discovered data in Domain object.""" - - def __init__(self): - """Initialize the orchestrator of analysis.""" - self.running = False - - def ip(self, dom: Domain) -> List[int]: - """Determine IP addresses the domain resolves to.""" - dom_list: List[str] = [dom.url, "www." + dom.url] - return_codes = [] - ip_list = [] - for domain in dom_list: - try: - # Query the domain - response = query(domain) - # Assign any found IP addresses to the object - for ip in response: - if str(ip.address) not in ip_list and str(ip.address) not in dom.ip: - ip_list.append(str(ip.address)) - except NoAnswer: - return_codes.append(1) - except NoNameservers: - return_codes.append(2) - except NXDOMAIN: - return_codes.append(3) - except Timeout: - return_codes.append(4) - - # Append all addresses into IP_list - for addr in ip_list: - dom.ip.append(addr) - # Return listing of error codes - return return_codes - - def cname(self, dom: Domain, timeout: int) -> List[int]: - """Collect CNAME records on domain.""" - # List of domains to check - dom_list = [dom.url, "www." + dom.url] - # Our codes to return - return_code = [] - # Seutp resolver and timeouts - resolver = Resolver() - resolver.timeout = timeout - resolver.lifetime = LIFETIME - cname_query = resolver.query - # Iterate through all domains in list - for domain in dom_list: - try: - response = cname_query(domain, "cname") - dom.cnames = [record.to_text() for record in response] - except NoAnswer: - return_code.append(1) - except NoNameservers: - return_code.append(2) - except NXDOMAIN: - return_code.append(3) - except Timeout: - return_code.append(4) - return return_code - - def https_lookup( - self, dom: Domain, timeout: int, agent: str, interactive: bool, verbose: bool - ) -> int: - """Read 'server' header for CDN hints.""" - # List of domains with different protocols to check. - PROTOCOLS = ["https://", "https://www."] - # Iterate through all protocols - for PROTOCOL in PROTOCOLS: - try: - # Some domains only respond when we have a User-Agent defined. - req = request.Request( - PROTOCOL + dom.url, - data=None, - headers={"User-Agent": agent}, - ) - # Making the timeout 50 as to not hang thread. - response = request.urlopen(req, timeout=timeout) # nosec - except URLError: - continue - except RemoteDisconnected: - continue - except CertificateError: - continue - except ConnectionResetError: - continue - except SSLError: - continue - except Exception as e: - # Define an exception just in case we missed one. - if interactive or verbose: - print(f"[{e}]: https://{dom.url}") - continue - # Define headers to check for the response - # to grab strings for later parsing. - HEADERS = ["server", "via"] - for value in HEADERS: - if ( - response.headers[value] is not None - and response.headers[value] not in dom.headers - ): - dom.headers.append(response.headers[value]) - return 0 - - def whois(self, dom: Domain, interactive: bool, verbose: bool) -> int: - """Scrape WHOIS data for the org or asn_description.""" - # Make sure we have Ip addresses to check - try: - if len(dom.ip) <= 0: - raise NoIPaddress - except NoIPaddress: - return 1 - # Define temp list to assign - whois_data = [] - # Iterate through all the IP addresses in object - for ip in dom.ip: - try: - response = IPWhois(ip) - # These two should be where we can find substrings hinting to CDN - try: - org = response.lookup_whois()["asn_description"] - if org != "BAREFRUIT-ERRORHANDLING": - whois_data.append(org) - except AttributeError: - pass - try: - org = response.lookup_rdap()["network"]["name"] - if org != "BAREFRUIT-ERRORHANDLING": - whois_data.append(org) - except AttributeError: - pass - except HTTPLookupError: - pass - except IPDefinedError: - pass - except ASNRegistryError: - pass - except Exception as e: - if interactive or verbose: - print(f"[{e}]: {dom.url} for {ip}") - for data in whois_data: - if data not in dom.whois_data: - dom.whois_data.append(data) - # Everything was successful - return 0 - - def CDNid(self, dom: Domain, data_blob: List): - """ - Identify any CDN name in list received. - - All of these will be doing some sort of substring analysis - on each string from any list passed to it. This will help - us identify the CDN which could be used. - """ - for data in data_blob: - # Make sure we do not try to analyze None type data - if data is None: - continue - # Check the CDNs standard list - for url in CDNs: - if ( - url.lower().replace(" ", "") in data.lower().replace(" ", "") - and url not in dom.cdns - ): - dom.cdns.append(url) - dom.cdns_by_name.append(CDNs[url]) - - # Check the CDNs reverse list - for name in CDNs_rev: - if name.lower() in data.lower() and CDNs_rev[name] not in dom.cdns: - dom.cdns.append(CDNs_rev[name]) - dom.cdns_by_name.append(name) - - # Check the CDNs Common list: - for name in COMMON.keys(): - if ( - name.lower().replace(" ", "") in data.lower().replace(" ", "") - and CDNs_rev[name] not in dom.cdns - ): - dom.cdns.append(CDNs_rev[name]) - dom.cdns_by_name.append(name) - - def data_digest(self, dom: Domain) -> int: - """Digest all data collected and assign to CDN list.""" - return_code = 1 - # Iterate through all attributes for substrings - if len(dom.cnames) > 0 and not None: - self.CDNid(dom, dom.cnames) - return_code = 0 - if len(dom.headers) > 0 and not None: - self.CDNid(dom, dom.headers) - return_code = 0 - if len(dom.namesrvs) > 0 and not None: - self.CDNid(dom, dom.namesrvs) - return_code = 0 - if len(dom.whois_data) > 0 and not None: - self.CDNid(dom, dom.whois_data) - return_code = 0 - return return_code - - def all_checks( - self, - dom: Domain, - timeout: int, - agent: str, - verbose: bool = False, - interactive: bool = False, - ) -> int: - """Option to run everything in this library then digest.""" - # Obtain each attributes data - self.ip(dom) - self.cname(dom, timeout) - self.https_lookup(dom, timeout, agent, interactive, verbose) - self.whois(dom, interactive, verbose) - - # Digest the data - return_code = self.data_digest(dom) - - # Extra case if we want verbosity for each domain check - if verbose: - if len(dom.cdns) > 0: - print(f"{dom.url} has the following CDNs:\n{dom.cdns}") - else: - print(f"{dom.url} does not use a CDN") - - # Return to calling function - return return_code diff --git a/src/findcdn/cdnEngine/detectCDN/cdn_config.py b/src/findcdn/cdnEngine/detectCDN/cdn_config.py deleted file mode 100644 index 1f6a646..0000000 --- a/src/findcdn/cdnEngine/detectCDN/cdn_config.py +++ /dev/null @@ -1,186 +0,0 @@ -"""Define public exports.""" -__all__ = ["COMMON", "CDNs", "CDNs_rev"] - -"""Top 14 CDNs most commonly used.""" -COMMON = { - "Cloudflare": "Cloudflare - https://www.cloudflare.com", - "Incapsula": "Incapsula - https://www.incapsula.com/", - "Cloudfront": "Cloudfront - https://aws.amazon.com/cloudfront/", - "Akamai": "Akamai - https://akamai.com", - "Airee": "Airee - https://airee.international", - "CacheFly": "CacheFly - https://www.cachefly.com/", - "EdgeCast": "EdgeCast - https://verizondigitalmedia.com", - "MaxCDN": "MaxCDN - https://www.maxcdn.com/", - "Beluga": "BelugaCDN - https://belugacdn.com", - "Limelight": "Limelight - https://www.limelight.com", - "Fastly": "Fastly - https://www.fastly.com/", - "Myracloud": "Myra - https://myracloud.com", - "msecnd.ne": "Microsoft Azure - https://azure.microsoft.com/en-us/services/cdn/", - "Clever-cloud": "Clever Cloud - https://www.clever-cloud.com/", -} - -""" -More inclusive list of available CDNs - -Format: CDNs[] = -""" -CDNs = { - ".amazonaws.com": "Amazon AWS", - "cdn.geeksforgeeks.org": "GeeksForGeeksCDN", - ".discordapp.com": "Discord", - ".airee.international": "Airee", - ".myracloud.com": "Myra", - ".msecnd.ne": "MicrosoftAzure", - ".clever-cloud.com": "Clever-cloud", - ".turbobytes-cdn.com": "Turbo Bytes", - ".akadns.net": "Akamai", - ".anankecdn.com.br": "Ananke", - ".belugacdn.com": "BelugaCDN", - ".cdnify.io": "CDNify", - ".clients.turbobytes.net": "Turbo Bytes", - ".lambdacdn.net": "LambdaCDN", - ".akamai.net": "Akamai", - ".akamaized.net": "Akamai", - ".akamaiedge.net": "Akamai", - ".akamaihd.net": "Akamai", - ".edgesuite.net": "Akamai", - ".edgekey.net": "Akamai", - ".srip.net": "Akamai", - ".akamaitechnologies.com": "Akamai", - ".akamaitechnologies.fr": "Akamai", - ".tl88.net": "AkamaiChinaCDN", - ".llnwd.net": "Limelight", - ".lldns.net": "Limelight", - ".netdna-cdn.com": "StackPath", - ".netdna-ssl.com": "StackPath", - ".netdna.com": "StackPath", - ".gfx.ms": "Limelight", - ".adn.": "EdgeCast", - ".wac.": "EdgeCast", - ".wpc.": "EdgeCast", - ".fastly.net": "Fastly", - ".fastlylb.net": "Fastly", - "edgecastcdn.net": "EdgeCast", - ".systemcdn.net": "EdgeCast", - ".transactcdn.net": "EdgeCast", - ".v1cdn.net": "EdgeCast", - ".v2cdn.net": "EdgeCast", - ".v3cdn.net": "EdgeCast", - ".v4cdn.net": "EdgeCast", - ".v5cdn.net": "EdgeCast", - "hwcdn.net": "Highwinds", - ".simplecdn.net": "SimpleCDN", - ".instacontent.net": "MirrorImage", - ".cap-mii.net": "MirrorImage", - ".footprint.net": "Level3", - ".fpbns.net": "Level3", - ".ay1.b.yahoo.com": "Yahoo", - ".yimg.": "Yahoo", - ".yahooapis.com": "Yahoo", - ".google.": "Google", - "googlesyndication.": "Google", - "youtube.": "Google", - ".googleusercontent.com": "Google", - "googlehosted.com": "Google", - ".insnw.net": "InstartLogic", - ".inscname.net": "InstartLogic", - ".internapcdn.net": "Internap", - ".cloudfront.net": "Cloudfront", - ".kxcdn.com": "KeyCDN", - ".cotcdn.net": "CotendoCDN", - ".cachefly.net": "Cachefly", - "bo.lt": "BO.LT", - ".cloudflare.net": "Cloudflare", - ".cloudflare.com": "Cloudflare", - ".afxcdn.net": "afxcdn.net", - ".wscdns.com": "ChinaNetCenter", - ".wscloudcdn.com": "ChinaNetCenter", - ".ourwebpic.com": "ChinaNetCenter", - ".att-dsa.net": "AT&T", - ".vo.msecnd.net": "MicrosoftAzure", - ".azureedge.net": "MicrosoftAzure", - ".voxcdn.net": "VoxCDN", - ".bluehatnetwork.com": "BlueHatNetwork", - ".swiftcdn1.com": "SwiftCDN", - ".swiftserve.com": "SwiftServe", - ".cdngc.net": "CDNetworks", - ".gccdn.net": "CDNetworks", - ".gccdn.cn": "CDNetworks", - ".panthercdn.com": "CDNetworks", - ".nocookie.net": "Fastly", - ".cdn.bitgravity.com": "Tata communications", - ".cdn.telefonica.com": "Telefonica", - ".gslb.taobao.com": "Taobao", - ".gslb.tbcache.com": "Alimama", - ".mirror-image.net": "MirrorImage", - ".yottaa.net": "Yottaa", - ".cubecdn.net": "cubeCDN", - ".cdn77.net": "CDN77", - ".cdn77.org": "CDN77", - "x.incapdns.net": "Incapsula", - ".bitgravity.com": "BitGravity", - ".r.worldcdn.net": "OnApp", - ".r.worldssl.net": "OnApp", - "tbcdn.cn": "Taobao", - ".taobaocdn.com": "Taobao", - ".ngenix.net": "NGENIX", - ".pagerain.net": "PageRain", - ".ccgslb.com": "ChinaCache", - ".ccgslb.net": "ChinaCache", - ".c3cache.net": "ChinaCache", - ".chinacache.net": "ChinaCache", - ".c3cdn.net": "ChinaCache", - ".lxdns.com": "ChinaNetCenter", - ".speedcdns.com": "QUANTIL/ChinaNetCenter", - ".mwcloudcdn.com": "QUANTIL/ChinaNetCenter", - "cdn.sfr.net": "SFR", - ".azioncdn.net": "Azion", - ".azioncdn.com": "Azion", - ".azion.net": "Azion", - ".cdncloud.net.au": "MediaCloud", - ".rncdn1.com": "ReflectedNetworks", - ".cdnsun.net": "CDNsun", - ".mncdn.com": "Medianova", - ".mncdn.net": "Medianova", - ".mncdn.org": "Medianova", - "cdn.jsdelivr.net": "jsDelivr", - ".nyiftw.net": "NYIFTW", - ".nyiftw.com": "NYIFTW", - ".resrc.it": "ReSRC.it", - ".zenedge.net": "Zenedge", - ".lswcdn.net": "LeaseWebCDN", - ".lswcdn.eu": "LeaseWebCDN", - ".revcn.net": "RevSoftware", - ".revdn.net": "RevSoftware", - ".caspowa.com": "Caspowa", - ".twimg.com": "Twitter", - ".facebook.com": "Facebook", - ".facebook.net": "Facebook", - ".fbcdn.net": "Facebook", - ".cdninstagram.com": "Facebook", - ".rlcdn.com": "Reapleaf", - ".wp.com": "WordPress", - ".aads1.net": "Aryaka", - ".aads-cn.net": "Aryaka", - ".aads-cng.net": "Aryaka", - ".squixa.net": "section.io", - ".bisongrid.net": "BisonGrid", - ".cdn.gocache.net": "GoCache", - ".hiberniacdn.com": "HiberniaCDN", - ".cdntel.net": "Telenor", - ".raxcdn.com": "Rackspace", - ".unicorncdn.net": "UnicornCDN", - ".optimalcdn.com": "OptimalCDN", - ".kinxcdn.com": "KINXCDN", - ".kinxcdn.net": "KINXCDN", - ".stackpathdns.com": "StackPath", - ".hosting4cdn.com": "Hosting4CDN", - ".netlify.com": "Netlify", - ".b-cdn.net": "BunnyCDN", - ".gtimg": "Tencent", -} - -""" -Swap the keys with their respective value. Used for digesting results. -""" -CDNs_rev = {v: k for k, v in CDNs.items()} diff --git a/src/findcdn/cdnEngine/detectCDN/cdn_err.py b/src/findcdn/cdnEngine/detectCDN/cdn_err.py deleted file mode 100644 index f5f5b68..0000000 --- a/src/findcdn/cdnEngine/detectCDN/cdn_err.py +++ /dev/null @@ -1,11 +0,0 @@ -"""Define public exports.""" -__all__ = ["NoIPaddress"] - - -class NoIPaddress(Exception): - """Raise when no IP addresses in domain class.""" - - def __init__(self, message="There are no IP addresses to check!"): - """Instantiate super class with passed message.""" - self.message = message - super().__init__(self.message) diff --git a/src/findcdn/findcdn.py b/src/findcdn/findcdn.py index 4bb4081..6734634 100644 --- a/src/findcdn/findcdn.py +++ b/src/findcdn/findcdn.py @@ -15,21 +15,21 @@ Options: -h --help Show this message. --version Show the current version. + --checks= Select detection types; possible values: + cname (c), HTTP headers (h),and whois (w). + [default: chw] -o FILE --output=FILE If specified, then the JSON output file will be created at the specified value. -v --verbose Includes additional print statements. --all Includes domains with and without a CDN in output. -d --double Run the checks twice to increase accuracy. - -t --threads= Number of threads, otherwise use default. + -t --threads= Number of threads, otherwise use default. [default: 4] --timeout= Max duration in seconds to wait for a domain to - conclude processing, otherwise use default. - --user_agent= Set the user agent to use, otherwise - use default. + conclude processing, otherwise use default. [default: 4] """ # Standard Python Libraries -import datetime import json import os import sys @@ -38,20 +38,18 @@ # Third-Party Libraries import docopt from schema import And, Or, Schema, SchemaError, Use -import validators # Internal Libraries from ._version import __version__ -from .cdnEngine import run_checks +from .cdnEngine import ARGS, analyze_domains from .findcdn_err import FileWriteError, InvalidDomain, NoDomains, OutputFileExists # Global Variables -USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" -TIMEOUT = 60 # Time in seconds -THREADS = 0 # If 0 then cdnEngine uses CPU count to set thread count +TIMEOUT = 10 # Time in seconds +THREADS = 10 # If 0 then cdnEngine uses CPU count to set thread count -def write_json(json_dump: str, output: str, verbose: bool, interactive: bool): +def write_json(json_dump: str, output: str): """Write dict as JSON to output file.""" try: with open(output, "x") as outfile: @@ -64,64 +62,32 @@ def write_json(json_dump: str, output: str, verbose: bool, interactive: bool): def main( domain_list: List[str], + checks: str, output_path: str = None, verbose: bool = False, - all_domains: bool = False, interactive: bool = False, - double_in: bool = False, threads: int = THREADS, timeout: int = TIMEOUT, - user_agent: str = USER_AGENT, + all: bool = False, ) -> str: """Take in a list of domains and determine the CDN for each return (JSON, number of successful jobs).""" - # Make sure the list passed is got something in it - if len(domain_list) <= 0: - raise NoDomains("error") - - # Validate domains in list - for item in domain_list: - if validators.domain(item) is not True: - raise InvalidDomain(item) - - # Show the validated domains if in verbose mode - if verbose: - print("%d Domains Validated" % len(domain_list)) - - # Define domain dict and counter for json - domain_dict = {} - CDN_count = 0 - # Check domain list - processed_list, cnt = run_checks( - domain_list, - threads, - timeout, - user_agent, - interactive, - verbose, - double_in, + results = analyze_domains( + domains=domain_list, + checks=checks, + threads=threads, + timeout=timeout, + interactive=interactive, + verbose=verbose, ) - # Parse the domain data - for domain in processed_list: - # Track the count of the domain has cdns - if len(domain.cdns) > 0: - CDN_count += 1 - - # Setup formatting for json output - if len(domain.cdns) > 0 or all_domains: - domain_dict[domain.url] = { - "IP": str(domain.ip)[1:-1], - "cdns": str(domain.cdns)[1:-1], - "cdns_by_names": str(domain.cdns_by_name)[1:-1], - } + if not all: + results["valid_domains"] = { + cdn: v for cdn, v in results["valid_domains"].items() if len(v["cdn"]) > 0 + } # Create JSON from the results and return (results, successful jobs) - json_dict = {} - json_dict["date"] = datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S") - json_dict["cdn_count"] = str(CDN_count) - json_dict["domains"] = domain_dict # type: ignore - json_dump = json.dumps(json_dict, indent=4, sort_keys=False) + json_dump = json.dumps(results, indent=4, sort_keys=False) # Show the dump to stdout if verbose or interactive if (output_path is None and interactive) or verbose: @@ -129,14 +95,7 @@ def main( # Export to file if file provided if output_path is not None: - write_json(json_dump, output_path, verbose, interactive) - if interactive or verbose: - print( - "Domain processing completed.\n%d domains had CDN's out of %d." - % (CDN_count, len(domain_list)) - ) - if verbose: - print(f"{cnt} jobs completed!") + write_json(json_dump, output_path) # Return json dump to callee return json_dump @@ -147,17 +106,14 @@ def interactive() -> None: # Obtain arguments from docopt args: Dict[str, str] = docopt.docopt(__doc__, version=__version__) - # Check for None params then set default if found - if args["--user_agent"] is None: - args["--user_agent"] = USER_AGENT - if args["--threads"] is None: - args["--threads"] = THREADS - if args["--timeout"] is None: - args["--timeout"] = TIMEOUT - # Validate and convert arguments as needed with schema schema: Schema = Schema( { + "--checks": And( + Use(str), + lambda checks: all([c in ARGS for c in checks]), + error="Check strings must be valid opts.", + ), "--output": Or( None, And( @@ -184,10 +140,6 @@ def interactive() -> None: lambda timeout: timeout > 0, error="The timeout duration must be a number greater than 0", ), - "--user_agent": And( - str, - error="The user agent must be a string.", - ), "": And(list, error="Please format the domains as a list."), str: object, # Don't care about other keys, if any } @@ -214,15 +166,14 @@ def interactive() -> None: # Start main runner of program with supplied inputs. try: main( - domain_list, - validated_args["--output"], - validated_args["--verbose"], - validated_args["--all"], - True, # Launch in interactive mode. - validated_args["--double"], - validated_args["--threads"], - validated_args["--timeout"], - validated_args["--user_agent"], + domain_list=domain_list, + checks=validated_args["--checks"], + output_path=validated_args["--output"], + verbose=validated_args["--verbose"], + interactive=True, # Launch in interactive mode. + threads=validated_args["--threads"], + timeout=validated_args["--timeout"], + all=validated_args["--all"], ) # Check for all potential exceptions except OutputFileExists as ofe: