Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
name: Code Analysis

on:
push:
branches: [ main, master, dev ]
pull_request:
branches: [ main ]

jobs:
analyze:
runs-on: ubuntu-latest
env:
KMP_DUPLICATE_LIB_OK: TRUE # 兼容某些 PyTorch/OpenMP 环境

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Install Pixi
uses: prefix-dev/[email protected]
with:
pixi-version: latest

- name: Setup environment from pixi.toml
run: pixi install

- name: Run type check with mypy
run: |
pixi run python -m pip install mypy
pixi run python -m mypy \
phishintention.py \
configs.py \
configs/ \
modules/ \
ocr_lib/ \
utils/ \
--exclude "test" --exclude "tests" --exclude "datasets" || echo "Mypy completed with warnings (non-fatal)"

- name: Run code duplication check (non-fatal)
run: |
pixi run python -m pip install pylint
pixi run python -m pylint \
--disable=all \
--enable=similarities \
--min-similarity-lines=10 \
phishintention.py \
configs.py \
configs/ \
modules/ \
ocr_lib/ \
utils/ || echo "Code duplication check completed (non-fatal)"

- name: Run code complexity check (radon)
run: |
pixi run python -m pip install radon
pixi run python -m radon cc \
phishintention.py \
configs.py \
configs/ \
modules/ \
ocr_lib/ \
utils/ \
-a -s --show-closures \
--exclude "test*,tests*,datasets*" || echo "Radon complexity check completed (non-fatal)"

133 changes: 133 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# PhishIntention container
# Base image chosen for Python 3.10 compatibility with torch 1.13 and detectron2
FROM python:3.10-slim

ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
KMP_DUPLICATE_LIB_OK=TRUE

# Use Tsinghua University APT mirror for faster package installs in China
RUN set -eux; \
MIRROR="mirrors.tuna.tsinghua.edu.cn"; \
if [ -f /etc/apt/sources.list.d/debian.sources ]; then \
sed -i "s|http://deb.debian.org/debian|https://$MIRROR/debian|g; \
s|https://deb.debian.org/debian|https://$MIRROR/debian|g; \
s|http://security.debian.org/debian-security|https://$MIRROR/debian-security|g; \
s|https://security.debian.org/debian-security|https://$MIRROR/debian-security|g" \
/etc/apt/sources.list.d/debian.sources; \
else \
sed -i "s|http://deb.debian.org/debian|https://$MIRROR/debian|g; \
s|https://deb.debian.org/debian|https://$MIRROR/debian|g; \
s|http://security.debian.org/debian-security|https://$MIRROR/debian-security|g; \
s|https://security.debian.org/debian-security|https://$MIRROR/debian-security|g" \
/etc/apt/sources.list; \
fi; \
apt-get update; \
apt-get install -y --no-install-recommends ca-certificates curl; \
rm -rf /var/lib/apt/lists/*

# Use Tsinghua University PyPI mirror for faster package installs in China
RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple

# System deps for OpenCV, Detectron2 build, and Selenium/Chrome driver runtime
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
build-essential \
git \
curl \
ca-certificates \
unzip \
wget \
libglib2.0-0 \
libsm6 \
libxrender1 \
libxext6 \
libgl1 \
chromium \
chromium-driver \
fonts-liberation \
libasound2 \
libatk-bridge2.0-0 \
libatk1.0-0 \
libatspi2.0-0 \
libcups2 \
libdbus-1-3 \
libdrm2 \
libgbm1 \
libgtk-3-0 \
libnspr4 \
libnss3 \
libwayland-client0 \
libxcomposite1 \
libxdamage1 \
libxfixes3 \
libxkbcommon0 \
libxrandr2 \
xdg-utils \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /app
COPY . /app

# Upgrade pip tooling
RUN pip install --no-cache-dir --upgrade pip setuptools wheel

RUN pip install --no-cache-dir numpy==1.23.0

# Torch + torchvision (CPU builds) pinned for Detectron2 compatibility
RUN pip install --no-cache-dir \
--index-url https://download.pytorch.org/whl/cpu \
torch==1.13.1+cpu \
torchvision==0.14.1+cpu

# Core Python deps
RUN pip install --no-cache-dir \
numpy==1.23.0 \
requests \
scikit-learn \
spacy \
beautifulsoup4 \
matplotlib \
pandas \
nltk \
tqdm \
unidecode \
gdown \
tldextract \
scipy \
pathlib \
fvcore \
lxml \
psutil \
Pillow==8.4.0 \
editdistance \
cryptography==38.0.4 \
httpcore==0.15.0 \
h11 \
h2 \
blinker==1.7.0 \
hyperframe \
selenium-wire \
helium \
selenium \
webdriver-manager \
flask \
flask-cors \
pycocotools \
opencv-python \
opencv-contrib-python

# Detectron2 from source (CPU build)
RUN pip install --no-build-isolation git+https://github.com/facebookresearch/detectron2.git

# Ensure chromedriver has execute permissions
RUN if [ -f ./chromedriver-linux64/chromedriver ]; then chmod +x ./chromedriver-linux64/chromedriver; fi

RUN cp /usr/bin/chromedriver ./chromedriver-linux64/chromedriver

# Prepare models during build
RUN chmod +x setup_in_docker.sh
RUN ./setup_in_docker.sh

# Default command can be overridden; keep shell for interactive runs
CMD ["bash"]
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,16 @@ phishintention">Website</a> •
|_ phishintention.py: main script
```

## Setup
## Setup with Docker

```bash
git clone https://github.com/lindsey98/PhishIntention.git
cd PhishIntention
docker build -t phishintention .
docker run -it phishintention
```

## Setup with pixi

### Step 1: Install dependencies:

Expand Down
69 changes: 69 additions & 0 deletions brand_mapping.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{
"Adobe Inc.": "Adobe",
"Adobe Inc": "Adobe",
"ADP, LLC": "ADP",
"ADP, LLC.": "ADP",
"Amazon.com Inc.": "Amazon",
"Amazon.com Inc": "Amazon",
"AOL Inc.": "AOL",
"AOL Inc": "AOL",
"Apple Inc.": "Apple",
"Apple Inc": "Apple",
"AT&T Inc.": "AT&T",
"AT&T Inc": "AT&T",
"Dropbox, Inc.": "Dropbox",
"Dropbox, Inc": "Dropbox",
"eBay Inc.": "eBay",
"eBay Inc": "eBay",
"Facebook, Inc.": "Facebook",
"Facebook, Inc": "Facebook",
"Google Inc.": "Google",
"Google Inc": "Google",
"Netflix Inc.": "Netflix",
"Netflix Inc": "Netflix",
"PayPal Inc.": "PayPal",
"PayPal Inc": "PayPal",
"Square, Inc.": "Square",
"Square, Inc": "Square",
"Yahoo! Inc": "Yahoo!",
"Yahoo! Inc.": "Yahoo!",
"Americanas.com S,A Comercio Electrnico": "Americanas.com S",
"Banco do Brasil S.A.": "Banco do Brasil S.A",
"Credit Agricole S.A.": "Credit Agricole S.A",
"DGI (French Tax Authority)": "DGI French Tax Authority",
"DHL Airways, Inc.": "DHL Airways",
"DHL Airways, Inc": "DHL Airways",
"DHL": "DHL Airways",
"Free (ISP)": "Free ISP",
"Mastercard International Incorporated": "Mastercard International",
"Royal KPN N.V.": "Royal KPN N.V",
"SF Express Co.": "SF Express Co",
"SNS Bank N.V.": "SNS Bank N.V",
"Webmail Providers": "Webmail Provider",
"Microsoft OneDrive": "Microsoft",
"Office365": "Microsoft",
"Outlook": "Microsoft",
"Global Sources (HK)": "Global Sources HK",
"T-Online": "Deutsche Telekom",
"Airbnb, Inc": "Airbnb, Inc.",
"azul": "Azul",
"Raiffeisen Bank S.A": "Raiffeisen Bank S.A.",
"Twitter, Inc": "Twitter, Inc.",
"Twitter": "Twitter, Inc.",
"capital_one": "Capital One Financial Corporation",
"la_banque_postale": "La Banque postale",
"db": "Deutsche Bank AG",
"Swiss Post": "PostFinance",
"PostFinance": "PostFinance",
"grupo_bancolombia": "Bancolombia",
"barclays": "Barclays Bank Plc",
"gov_uk": "Government of the United Kingdom",
"Aruba S.p.A": "Aruba S.p.A.",
"TSB Bank Plc": "TSB Bank Limited",
"strato": "Strato AG",
"cogeco": "Cogeco",
"Canada Revenue Agency": "Government of Canada",
"UniCredit Bulbank": "UniCredit Bank Aktiengesellschaft",
"ameli_fr": "French Health Insurance",
"Banco de Credito del Peru": "bcp"
}
Loading