-
Notifications
You must be signed in to change notification settings - Fork 101
Adds automated IPv6 database build and integration #231
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,114 @@ | ||
| name: Build IPv6 Database Update | ||
| on: | ||
| push: | ||
| paths: | ||
| - 'input/**' | ||
| - '.github/workflows/build-list-ipv6.yml' | ||
| schedule: | ||
| - cron: '50 8 * * *' | ||
| workflow_dispatch: | ||
|
|
||
| jobs: | ||
| build_and_commit: | ||
| runs-on: ubuntu-latest | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| list: | ||
| - name: vpn | ||
| validate_low: 1 | ||
| validate_high: 5000000000000 | ||
| - name: datacenter | ||
| validate_low: 1 | ||
| validate_high: 500000000000000 | ||
| steps: | ||
| - uses: actions/checkout@v2 | ||
| - name: Dependencies | ||
| run: | | ||
| sudo apt-get update | ||
| sudo apt-get install python3 python3-pip | ||
| - name: Downloading IPv6 ASN database | ||
| run: | | ||
| curl https://iptoasn.com/data/ip2asn-v6.tsv.gz | gzip -d > /tmp/asndb-ipv6.tsv | ||
| - name: ASN -> IPv6 Range | ||
| run: | | ||
| rm -f /tmp/asn-processed-ipv6.txt | ||
| cat "$GITHUB_WORKSPACE/input/${{matrix.list.name}}/ASN.txt" | grep -v '^#' | awk '{print $1}' | grep '^AS' | while read -r asn; do | ||
| echo "Processing $asn for IPv6" | ||
| awk -v asn="${asn:2}" '$3 == asn {print $1"-"$2}' /tmp/asndb-ipv6.tsv | while read -r range; do | ||
| start=$(echo "$range" | cut -d'-' -f1) | ||
| end=$(echo "$range" | cut -d'-' -f2) | ||
| python3 -c "import ipaddress; import sys; start=ipaddress.IPv6Address('$start'); end=ipaddress.IPv6Address('$end'); nets=list(ipaddress.summarize_address_range(start, end)); [print(str(net)) for net in nets]" >> /tmp/asn-processed-ipv6.txt | ||
| done | ||
| done | ||
| if [[ "${{matrix.list.name}}" == "datacenter" ]]; then | ||
| cat "$GITHUB_WORKSPACE/input/vpn/ASN.txt" | grep -v '^#' | awk '{print $1}' | grep '^AS' | while read -r asn; do | ||
| echo "Processing $asn for IPv6 (datacenter includes VPN)" | ||
| awk -v asn="${asn:2}" '$3 == asn {print $1"-"$2}' /tmp/asndb-ipv6.tsv | while read -r range; do | ||
| start=$(echo "$range" | cut -d'-' -f1) | ||
| end=$(echo "$range" | cut -d'-' -f2) | ||
| python3 -c "import ipaddress; import sys; start=ipaddress.IPv6Address('$start'); end=ipaddress.IPv6Address('$end'); nets=list(ipaddress.summarize_address_range(start, end)); [print(str(net)) for net in nets]" >> /tmp/asn-processed-ipv6.txt | ||
| done | ||
| done | ||
| fi | ||
| - name: Merge IPv6 Lists | ||
| run: | | ||
| # Remove too small allocations from ASN list (anything less than a /64 for IPv6) | ||
| # IPv6 typically uses /64 as the smallest routable prefix for end sites | ||
| perl ./helpers/cleanup-ipv6.pl /tmp/asn-processed-ipv6.txt | grep -E '/([0-9]|[1-5][0-9]|6[0-4])$' > /tmp/asn-cleaned-ipv6.txt | ||
| # Combine lists - look for Manual-ipv6.txt files | ||
| find $GITHUB_WORKSPACE/input/${{matrix.list.name}}/ips/ -name "*ipv6*.txt" -o -name "*IPv6*.txt" | xargs cat | grep -v '^#' | awk '{print $1}' | sed '/^$/d' > /tmp/manual-processed-ipv6.txt | ||
| cat /tmp/asn-cleaned-ipv6.txt /tmp/manual-processed-ipv6.txt | sort > /tmp/ipv6.txt | ||
| # Final cleanup | ||
| perl ./helpers/cleanup-ipv6.pl /tmp/ipv6.txt > ipv6.txt | ||
| - name: Test generated IPv6 Lists | ||
| run: | | ||
| # Check for loopback addresses (::1/128) | ||
| if [[ $(grep -i "^::1" ipv6.txt | wc -l) != "0" ]]; then | ||
| echo "Lists can not contain IPv6 loopback addresses" | ||
| exit 1 | ||
| fi | ||
| # Check for link-local addresses (fe80::/10) | ||
| if [[ $(grep -i "^fe8" ipv6.txt | wc -l) != "0" ]]; then | ||
| echo "Lists can not contain IPv6 link-local addresses" | ||
| exit 1 | ||
| fi | ||
| # Estimate IPv6 addresses covered (simplified calculation) | ||
| ips_covered=$(python3 -c "total=0; [total:=total+min(2**(128-int(line.strip().split('/')[1])), 2**32) if '/' in line.strip() else total+1 for line in open('ipv6.txt') if line.strip() and not line.startswith('#')]; print(int(total))") | ||
| if [[ "$ips_covered" -lt ${{matrix.list.validate_low}} ]]; then | ||
| echo "Too few IPv6 addresses covered ($ips_covered)" | ||
| exit 1 | ||
| fi | ||
| if [[ "$ips_covered" -gt ${{matrix.list.validate_high}} ]]; then | ||
| echo "Too many IPv6 addresses covered ($ips_covered)" | ||
| exit 1 | ||
| fi | ||
| - name: Push Output of ipv6.txt to category folder | ||
| uses: X4BNet/copy_file_to_another_repo_action@main | ||
| env: | ||
| API_TOKEN_GITHUB: ${{ secrets.MY_GITHUB_TOKEN }} | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can also use build in "secrets.GITHUB_TOKEN"
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Aware its now possible to grant appropriate permissions to the default token. This wasnt possible when this was first written. |
||
| with: | ||
| source_file: 'ipv6.txt' | ||
| destination_repo: '${{ github.repository }}' | ||
| destination_folder: '/output/${{matrix.list.name}}/' | ||
| user_email: '[email protected]' | ||
| user_name: 'listbuilder' | ||
| destination_branch: "main" | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps a better solution would be to dynamically set the branch based on where the workflow is being executed? Currently, if I want to test something on a different branch, the data is still uploaded to main.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Care needs to be taken to prevent both jobs from conflicting with eachother. I would probably prefer a matrix build that does both IPv4 and IPv6 seperately and then a dependent job to update the repository. This would also provide more CI re-use. |
||
| - name: Push Output of ipv6.txt to root (datacenter only) | ||
| uses: X4BNet/copy_file_to_another_repo_action@main | ||
| env: | ||
| API_TOKEN_GITHUB: ${{ secrets.MY_GITHUB_TOKEN }} | ||
| if: ${{ matrix.list.name == 'datacenter' }} | ||
| with: | ||
| source_file: 'ipv6.txt' | ||
| destination_repo: '${{ github.repository }}' | ||
| destination_folder: '/' | ||
| user_email: '[email protected]' | ||
| user_name: 'listbuilder' | ||
| destination_branch: "main" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -49,7 +49,7 @@ jobs: | |
| perl ./helpers/cleanup.pl /tmp/asn-processed.txt | grep -E '/(2[0-4]|1[0-9]|[0-9])$' > /tmp/asn-cleaned.txt | ||
|
|
||
| # Combine lists | ||
| cat $GITHUB_WORKSPACE/input/${{matrix.list.name}}/ips/*.txt | grep -v '^#' | awk '{print $1}' | sed '/^$/d' > /tmp/manual-processed.txt | ||
| find $GITHUB_WORKSPACE/input/${{matrix.list.name}}/ips/ -type f -name "*.txt" ! -iname "*ipv6.txt" ! -iname "*IPv6.txt" -exec cat {} + | grep -v '^#' | awk '{print $1}' | sed '/^$/d' > /tmp/manual-processed.txt | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the future, we can organize IPv4 and IPv6 into different folders, for now I made a small fix. |
||
| cat /tmp/asn-cleaned.txt /tmp/manual-processed.txt | sort -n > /tmp/ipv4.txt | ||
|
|
||
| # Final cleanup | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| #!/bin/bash | ||
| list="$1" | ||
|
|
||
| curl https://iptoasn.com/data/ip2asn-v6.tsv.gz | gzip -d >/tmp/asndb-ipv6.tsv | ||
|
|
||
| rm /tmp/asn-processed-ipv6.txt | ||
| cat "./input/$list/ASN.txt" | grep -v '^#' | awk '{print $1}' | grep '^AS' | while read -r asn; do | ||
| echo "Processing $asn" | ||
| awk -v asn="${asn:2}" '$3 == asn {print $1"-"$2}' /tmp/asndb-ipv6.tsv | while read -r range; do | ||
| start=$(echo "$range" | cut -d'-' -f1) | ||
| end=$(echo "$range" | cut -d'-' -f2) | ||
| python3 -c "import ipaddress; import sys; start=ipaddress.IPv6Address('$start'); end=ipaddress.IPv6Address('$end'); nets=list(ipaddress.summarize_address_range(start, end)); [print(str(net)) for net in nets]" >>/tmp/asn-processed-ipv6.txt | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ipcalc does not support IPv6. Python is also better because we don't need to install any additional software or package, the default Python available on any normal operating system (💩 Windows) is enough. |
||
| done | ||
| done | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,10 +1,10 @@ | ||
| #!/bin/bash | ||
| list="$1" | ||
|
|
||
|
|
||
| curl https://iptoasn.com/data/ip2asn-v4.tsv.gz | gzip -d > /tmp/asndb.tsv | ||
| curl https://iptoasn.com/data/ip2asn-v4.tsv.gz | gzip -d >/tmp/asndb.tsv | ||
|
|
||
| rm /tmp/asn-processed.txt | ||
| cat "./input/$list/ASN.txt" | grep -v '^#' | awk '{print $1}' | grep '^AS' | while read asn; do | ||
| echo "Processing $asn" | ||
| awk '{if($3 == '${asn:2}') print "ipcalc -rn "$1"-"$2" | tail -n+2"}' /tmp/asndb.tsv | bash >> /tmp/asn-processed.txt | ||
| done | ||
| awk '{if($3 == '${asn:2}') print "ipcalc -rn "$1"-"$2" | tail -n+2"}' /tmp/asndb.tsv | bash >>/tmp/asn-processed.txt | ||
| done |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| #!/usr/bin/perl | ||
| use strict; | ||
| use warnings; | ||
|
|
||
| # Simple IPv6 cleanup script - removes duplicates and basic overlaps | ||
| my %seen; | ||
| my @networks; | ||
|
|
||
| while (my $line = <>) { | ||
| chomp $line; | ||
| next if $line =~ /^#/ || $line =~ /^\s*$/; | ||
|
|
||
| # Basic IPv6 CIDR validation | ||
| if ($line =~ /^([0-9a-f:]+)\/(\d+)$/i) { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This also prevents any IPv4 address from being added to the list. |
||
| my ($ip, $prefix) = ($1, $2); | ||
|
|
||
| # Validate prefix length | ||
| if ($prefix >= 0 && $prefix <= 128) { | ||
| # Normalize IPv6 address (basic normalization) | ||
| $ip = lc($ip); | ||
| my $normalized = "$ip/$prefix"; | ||
|
|
||
| # Skip duplicates | ||
| next if $seen{$normalized}; | ||
| $seen{$normalized} = 1; | ||
|
|
||
| push @networks, { | ||
| ip => $ip, | ||
| prefix => $prefix, | ||
| original => $line | ||
| }; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| # Sort by IP address (simple string sort for now) | ||
| @networks = sort { $a->{original} cmp $b->{original} } @networks; | ||
|
|
||
| # Output networks | ||
| for my $net (@networks) { | ||
| print $net->{original} . "\n"; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| # Manually added IPv6 netblocks | ||
| # Comment description mandatory |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| # Manually added IPv6 netblocks | ||
| # Comment description mandatory |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| # IPv6 networks for datacenters and VPNs (legacy path) | ||
| # This file will be automatically generated |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| # IPv6 networks for datacenters and VPNs | ||
| # This file will be automatically generated |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| # IPv6 networks for VPNs only | ||
| # This file will be automatically generated |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This value can still be adjusted.