diff --git a/.github/SECURITY.md b/.github/SECURITY.md
new file mode 100644
index 00000000..d63c05e3
--- /dev/null
+++ b/.github/SECURITY.md
@@ -0,0 +1,116 @@
+# Security Policy
+
+## Supported Versions
+
+We actively support the following versions of PyDoll:
+
+| Version | Supported |
+| ------- | ------------------ |
+| 2.0.x | :white_check_mark: |
+| 1.x.x | :x: |
+
+## Reporting a Vulnerability
+
+We take security vulnerabilities seriously. If you discover a security vulnerability, please report it to us privately.
+
+### Where to Report
+
+Please report security vulnerabilities by:
+
+1. **Email**: Send details to [security@example.com] (replace with your actual security email)
+2. **GitHub Security Advisories**: Use the "Security" tab in this repository
+3. **Private Disclosure**: Contact the maintainers directly through GitHub
+
+### What to Include
+
+When reporting a vulnerability, please include:
+
+- A clear description of the vulnerability
+- Steps to reproduce the issue
+- Potential impact assessment
+- Suggested fix (if available)
+- Your contact information for follow-up
+
+### Response Timeline
+
+- **Acknowledgment**: Within 48 hours
+- **Initial Assessment**: Within 1 week
+- **Fix Development**: Depends on severity (1-4 weeks)
+- **Disclosure**: After fix is deployed
+
+## Security Best Practices
+
+### For Contributors
+
+1. **Dependency Management**
+ - Keep dependencies up to date
+ - Use exact version pinning for security-critical dependencies
+ - Regularly audit dependencies for vulnerabilities
+
+2. **Code Security**
+ - Follow secure coding practices
+ - Validate all user inputs
+ - Use type hints and static analysis tools
+ - Implement proper error handling
+
+3. **Testing**
+ - Include security test cases
+ - Test for common web vulnerabilities
+ - Use automated security scanning tools
+
+### For Users
+
+1. **Installation**
+ - Always install from official sources (PyPI)
+ - Verify package signatures when available
+ - Use virtual environments
+
+2. **Usage**
+ - Keep PyDoll updated to the latest version
+ - Follow the principle of least privilege
+ - Validate all user inputs in your applications
+
+## Security Features
+
+### Browser Security
+
+- **Sandboxing**: PyDoll runs browsers in isolated environments
+- **Network Controls**: Configurable network restrictions
+- **File System Access**: Limited file system access controls
+
+### Connection Security
+
+- **TLS/SSL**: Secure connections to browser instances
+- **Authentication**: Proper authentication mechanisms
+- **Input Validation**: All protocol messages are validated
+
+## Known Security Considerations
+
+### Browser Security Context
+
+PyDoll controls browser instances which have inherent security implications:
+
+1. **Execution Context**: JavaScript code execution in controlled environments
+2. **Network Access**: Browsers can make network requests
+3. **File System**: Limited file system access through browser APIs
+
+### Mitigation Strategies
+
+1. **Isolated Environments**: Run in containers or virtual machines when possible
+2. **Network Policies**: Implement network restrictions
+3. **Resource Limits**: Set appropriate resource limits
+4. **Monitoring**: Monitor browser activities
+
+## Compliance
+
+This project follows:
+
+- **OWASP Guidelines**: Web application security best practices
+- **NIST Framework**: Cybersecurity framework guidelines
+- **Industry Standards**: Following established security standards
+
+## Updates
+
+This security policy is reviewed and updated regularly. Last updated: [Current Date]
+
+For questions about this security policy, please contact the maintainers.
\ No newline at end of file
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 00000000..b5637515
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,71 @@
+version: 2
+updates:
+ # Python dependencies
+ - package-ecosystem: "pip"
+ directory: "/"
+ schedule:
+ interval: "weekly"
+ day: "monday"
+ time: "06:00"
+ open-pull-requests-limit: 10
+ reviewers:
+ - "autoscrape-labs"
+ assignees:
+ - "autoscrape-labs"
+ commit-message:
+ prefix: "deps"
+ prefix-development: "deps-dev"
+ include: "scope"
+ labels:
+ - "dependencies"
+ - "security"
+ # Group related updates
+ groups:
+ production-dependencies:
+ patterns:
+ - "websockets"
+ - "aiohttp"
+ - "aiofiles"
+ - "bs4"
+ development-dependencies:
+ patterns:
+ - "ruff"
+ - "pytest*"
+ - "mypy"
+ - "mkdocs*"
+ - "taskipy"
+ # Security updates
+ allow:
+ - dependency-type: "direct"
+ - dependency-type: "indirect"
+ # Ignore certain updates if needed
+ ignore:
+ - dependency-name: "*"
+ update-types: ["version-update:semver-major"]
+
+ # GitHub Actions dependencies
+ - package-ecosystem: "github-actions"
+ directory: "/"
+ schedule:
+ interval: "weekly"
+ day: "monday"
+ time: "06:00"
+ open-pull-requests-limit: 5
+ reviewers:
+ - "autoscrape-labs"
+ assignees:
+ - "autoscrape-labs"
+ commit-message:
+ prefix: "ci"
+ include: "scope"
+ labels:
+ - "github-actions"
+ - "security"
+ # Group GitHub Actions updates
+ groups:
+ github-actions:
+ patterns:
+ - "actions/*"
+ - "codecov/*"
+ - "softprops/*"
+ - "peaceiris/*"
\ No newline at end of file
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
new file mode 100644
index 00000000..2fb552ba
--- /dev/null
+++ b/.github/workflows/codeql.yml
@@ -0,0 +1,70 @@
+name: "CodeQL Security Scan"
+
+on:
+ push:
+ branches: [ "main", "master", "develop" ]
+ pull_request:
+ branches: [ "main", "master", "develop" ]
+ schedule:
+ - cron: '30 2 * * 1' # Weekly on Monday at 2:30 AM UTC
+
+jobs:
+ analyze:
+ name: Analyze
+ runs-on: ubuntu-latest
+ timeout-minutes: 360
+ permissions:
+ # Required for all workflows
+ security-events: write
+ # Required to fetch internal or private CodeQL packs
+ packages: read
+ # Required for workflows in private repositories
+ actions: read
+ contents: read
+
+ strategy:
+ fail-fast: false
+ matrix:
+ language: [ 'python' ]
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ # Initializes the CodeQL tools for scanning
+ - name: Initialize CodeQL
+ uses: github/codeql-action/init@v3
+ with:
+ languages: ${{ matrix.language }}
+ queries: +security-and-quality
+ # Override default language queries
+ config: |
+ paths-ignore:
+ - "tests/"
+ - "docs/"
+ - "examples/"
+ queries:
+ - uses: security-and-quality
+ - uses: security-experimental
+
+ # Set up Python
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.10'
+
+ # Install dependencies
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install poetry
+ poetry install
+
+ # Perform the CodeQL Analysis
+ - name: Perform CodeQL Analysis
+ uses: github/codeql-action/analyze@v3
+ with:
+ category: "/language:${{matrix.language}}"
+ upload: true
+ # Fail on high severity issues
+ # fail-on: error
\ No newline at end of file
diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml
index d65046f1..02c61b6c 100644
--- a/.github/workflows/deploy-docs.yml
+++ b/.github/workflows/deploy-docs.yml
@@ -1,36 +1,41 @@
-name: Deploy MkDocs to GitHub Pages
+name: Deploy Documentation
on:
push:
branches:
- main
+ workflow_dispatch:
jobs:
deploy:
runs-on: ubuntu-latest
+ # Add security hardening
+ permissions:
+ contents: write
+ pages: write
+ id-token: write
steps:
- - name: Code Checkout
- uses: actions/checkout@v3
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
- - name: Setup Python
- uses: actions/setup-python@v4
- with:
- python-version: '3.x'
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.10"
- - name: Install Dependencies
- run: |
- python -m pip install --upgrade pip
- pip install mkdocs
- pip install mkdocs-material
- pip install pymdown-extensions
- pip install mkdocstrings[python]
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install poetry
+ poetry install
- - name: Build the documentation
- run: mkdocs build
+ - name: Build documentation
+ run: poetry run mkdocs build
- - name: Deploy to GitHub Pages
- uses: peaceiris/actions-gh-pages@v3
- with:
- github_token: ${{ secrets.GITHUB_TOKEN }}
- publish_dir: ./site
+ - name: Deploy to GitHub Pages
+ uses: peaceiris/actions-gh-pages@v3
+ with:
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ publish_dir: ./site
diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
index 46d24faa..eb8ab9ab 100644
--- a/.github/workflows/mypy.yml
+++ b/.github/workflows/mypy.yml
@@ -10,28 +10,52 @@ on:
jobs:
build:
-
runs-on: ubuntu-latest
-
+ # Add security hardening
+ permissions:
+ contents: read
strategy:
max-parallel: 4
matrix:
- python-version: ["3.11"]
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v2
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- - name: Install Dependencies
+ # Add caching for better performance
+ - name: Cache Poetry dependencies
+ uses: actions/cache@v4
+ with:
+ path: ~/.cache/pypoetry
+ key: poetry-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
+ restore-keys: |
+ poetry-${{ runner.os }}-${{ matrix.python-version }}-
+
+ - name: Install Poetry
run: |
python -m pip install --upgrade pip
- python -m pip install mypy
- python -m pip install -e .
- python -m mypy --install-types --non-interactive pydoll
+ python -m pip install poetry
+
+ - name: Configure Poetry
+ run: |
+ poetry config virtualenvs.create true
+ poetry config virtualenvs.in-project true
+
+ - name: Install dependencies
+ run: |
+ poetry install
+ poetry run pip install mypy
+
+ - name: Verify installation
+ run: |
+ python --version
+ poetry run mypy --version
+ poetry --version
- name: mypy
- run: python -m mypy .
+ run: poetry run mypy . --ignore-missing-imports
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index df39c887..c67f91af 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -5,13 +5,17 @@ on: workflow_dispatch
jobs:
deploy:
runs-on: ubuntu-latest
+ # Add security hardening
+ permissions:
+ contents: read
+ id-token: write # For trusted publishing
steps:
- name: Checkout code
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Set up Python
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v5
with:
python-version: "3.10"
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index fab76eff..77caa0dd 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -5,6 +5,8 @@ jobs:
version-cz:
runs-on: ubuntu-latest
name: "Version CZ"
+ permissions:
+ contents: write
outputs:
version: ${{ steps.cz.outputs.version }}
@@ -28,6 +30,8 @@ jobs:
runs-on: ubuntu-latest
name: "Version Pyproject"
needs: version-cz
+ permissions:
+ contents: write
outputs:
version: ${{ needs.version-cz.outputs.version }}
steps:
@@ -37,10 +41,15 @@ jobs:
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.10"
+
- name: Install Poetry
run: |
- curl -sSL https://install.python-poetry.org | python3 -
- export PATH="$HOME/.local/bin:$PATH"
+ python -m pip install --upgrade pip
+ pip install poetry
- name: Update Poetry version in pyproject.toml
run: |
@@ -63,14 +72,15 @@ jobs:
git pull --rebase
git push
-
release:
name: Release
needs: version-pyproject
runs-on: ubuntu-latest
+ permissions:
+ contents: write
steps:
- name: Create Release
- uses: softprops/action-gh-release@v1
+ uses: softprops/action-gh-release@v2
with:
draft: false
prerelease: false
diff --git a/.github/workflows/ruff-ci.yml b/.github/workflows/ruff-ci.yml
index f4b38ac3..107aa32a 100644
--- a/.github/workflows/ruff-ci.yml
+++ b/.github/workflows/ruff-ci.yml
@@ -10,22 +10,32 @@ on:
jobs:
build:
-
runs-on: ubuntu-latest
-
+ # Add security hardening
+ permissions:
+ contents: read
strategy:
max-parallel: 4
matrix:
- python-version: ["3.11"]
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v2
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
+ # Add caching for better performance
+ - name: Cache pip dependencies
+ uses: actions/cache@v4
+ with:
+ path: ~/.cache/pip
+ key: pip-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/pyproject.toml') }}
+ restore-keys: |
+ pip-${{ runner.os }}-${{ matrix.python-version }}-
+
- name: Install Dependencies
run: |
python -m pip install --upgrade pip
diff --git a/.github/workflows/security-scan.yml b/.github/workflows/security-scan.yml
new file mode 100644
index 00000000..45463c43
--- /dev/null
+++ b/.github/workflows/security-scan.yml
@@ -0,0 +1,109 @@
+name: Security Scan
+
+on:
+ push:
+ branches: [ main, master, develop ]
+ pull_request:
+ branches: [ main, master, develop ]
+ schedule:
+ - cron: '0 6 * * 1' # Weekly on Monday at 6 AM UTC
+
+jobs:
+ security-scan:
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ security-events: write
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.10'
+
+ - name: Cache pip dependencies
+ uses: actions/cache@v4
+ with:
+ path: ~/.cache/pip
+ key: pip-security-${{ runner.os }}-${{ hashFiles('**/pyproject.toml') }}
+ restore-keys: |
+ pip-security-${{ runner.os }}-
+
+ - name: Install Poetry and dependencies
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install poetry
+ poetry install
+
+ - name: Install security tools
+ run: |
+ poetry run pip install bandit[toml] safety semgrep
+
+ - name: Run Bandit Security Scan
+ run: |
+ poetry run bandit -r pydoll/ -f json -o bandit-report.json || true
+ poetry run bandit -r pydoll/ -f txt || true
+ continue-on-error: true
+
+ - name: Run Safety Security Scan
+ run: |
+ poetry run safety check --json --output safety-report.json || true
+ poetry run safety check || true
+ continue-on-error: true
+
+ - name: Run Semgrep Security Scan
+ run: |
+ poetry run semgrep --config=auto pydoll/ --json --output=semgrep-report.json || true
+ poetry run semgrep --config=auto pydoll/ || true
+ continue-on-error: true
+
+ - name: Convert Bandit to SARIF
+ run: |
+ if [ -f bandit-report.json ]; then
+ poetry run pip install sarif-om
+ poetry run python -c "
+import json
+import os
+try:
+ with open('bandit-report.json', 'r') as f:
+ data = json.load(f)
+ print('Bandit scan completed')
+except Exception as e:
+ print(f'Bandit report processing failed: {e}')
+"
+ fi
+ continue-on-error: true
+
+ - name: Upload security scan results
+ uses: actions/upload-artifact@v4
+ if: always()
+ with:
+ name: security-scan-results
+ path: |
+ bandit-report.json
+ safety-report.json
+ semgrep-report.json
+ retention-days: 30
+
+ dependency-review:
+ runs-on: ubuntu-latest
+ if: github.event_name == 'pull_request'
+ permissions:
+ contents: read
+ pull-requests: write
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Dependency Review
+ uses: actions/dependency-review-action@v4
+ with:
+ fail-on-severity: moderate
+ allow-licenses: MIT, Apache-2.0, BSD-3-Clause, BSD-2-Clause, ISC
+ comment-summary-in-pr: always
\ No newline at end of file
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 0cec265c..e03ee1e6 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -7,20 +7,43 @@ on:
jobs:
tests:
runs-on: ubuntu-latest
+ # Add security hardening
+ permissions:
+ contents: read
strategy:
fail-fast: false
matrix:
- python-version: ["3.11", "3.12", "3.13"]
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
+
- name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- - name: Install dependencies
+
+ # Add caching for better performance
+ - name: Cache Poetry dependencies
+ uses: actions/cache@v4
+ with:
+ path: ~/.cache/pypoetry
+ key: poetry-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
+ restore-keys: |
+ poetry-${{ runner.os }}-${{ matrix.python-version }}-
+
+ - name: Install Poetry
run: |
+ python -m pip install --upgrade pip
python -m pip install poetry
- poetry install
+
+ - name: Configure Poetry
+ run: |
+ poetry config virtualenvs.create true
+ poetry config virtualenvs.in-project true
+
+ - name: Install dependencies
+ run: poetry install
+
- name: Run tests with coverage
run: |
poetry run pytest -s -x --cov=pydoll -vv --cov-report=xml
diff --git a/.gitignore b/.gitignore
index 8e8be591..a375ceb4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -161,4 +161,10 @@ cython_debug/
#.idea/
.czrc
-.ruff_cache/
\ No newline at end of file
+.ruff_cache/
+
+# Docker files (local development only)
+Dockerfile
+docker-compose.yml
+.dockerignore
+docker-example.py
\ No newline at end of file
diff --git a/examples/shadow_dom_example.py b/examples/shadow_dom_example.py
new file mode 100644
index 00000000..76136a40
--- /dev/null
+++ b/examples/shadow_dom_example.py
@@ -0,0 +1,442 @@
+"""
+Shadow DOM Example - Secure Shadow DOM Automation with pydoll
+
+This example demonstrates how to securely interact with Shadow DOM elements
+using pydoll's enhanced Shadow DOM support. It covers best practices for
+security, error handling, and real-world usage patterns.
+
+Security Features Demonstrated:
+- Safe shadow root access with validation
+- Selector sanitization and injection prevention
+- Proper error handling for security edge cases
+- Respecting shadow DOM boundaries and encapsulation
+"""
+
+import asyncio
+import logging
+
+from pydoll.browser.chromium import Chrome
+from pydoll.exceptions import (
+ ElementNotFound,
+ InvalidShadowRoot,
+ NoShadowRootAttached,
+ ShadowRootAccessDenied,
+)
+
+# Configure logging for security and debugging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+async def demo_basic_shadow_dom_access():
+ """
+ Basic Shadow DOM access demonstration.
+
+ Shows the fundamental pattern for securely accessing shadow DOM content.
+ """
+ print('Basic Shadow DOM Access Demo')
+ print('=' * 40)
+
+ async with Chrome() as browser:
+ tab = await browser.start()
+
+ # Navigate to a page with Shadow DOM (example: a page with custom elements)
+ await tab.go_to(
+ 'data:text/html,
'
+ ''
+ ''
+ )
+
+ try:
+ # Find the shadow host element
+ host_element = await tab.find(id='host')
+ logger.info('Found shadow host element')
+
+ # Securely access the shadow root
+ shadow_root = await host_element.get_shadow_root()
+ logger.info(f'Accessed shadow root (mode: {shadow_root.mode})')
+
+ # Find elements within the shadow DOM
+ shadow_button = await shadow_root.find_element_in_shadow('button.shadow-btn')
+ logger.info('Found button within shadow DOM')
+
+ # Interact with shadow DOM elements safely
+ await shadow_button.click()
+ logger.info('Successfully clicked shadow DOM button')
+
+ except NoShadowRootAttached:
+ logger.error('Element does not have a shadow root attached')
+ except InvalidShadowRoot as e:
+ logger.error(f'Invalid shadow root: {e}')
+ except ElementNotFound as e:
+ logger.error(f'Element not found in shadow DOM: {e}')
+
+
+async def demo_closed_shadow_dom():
+ """
+ Demonstration of closed shadow DOM handling.
+
+ Shows how pydoll handles closed shadow roots and security boundaries.
+ """
+ print('\nClosed Shadow DOM Demo')
+ print('=' * 25)
+
+ async with Chrome() as browser:
+ tab = await browser.start()
+
+ # Create page with closed shadow DOM
+ await tab.go_to(
+ 'data:text/html,'
+ ''
+ ''
+ )
+
+ try:
+ host_element = await tab.find(id='closed-host')
+ shadow_root = await host_element.get_shadow_root()
+ logger.info(f'Accessed closed shadow root (mode: {shadow_root.mode})')
+
+ # Even for closed shadow roots, if we have access, we can find elements
+ secret_div = await shadow_root.find_element_in_shadow('.secret')
+ content = await secret_div.text
+ logger.info(f'Accessed closed shadow content: {content}')
+
+ except ShadowRootAccessDenied:
+ logger.warning('Access to closed shadow root was denied (expected)')
+ except Exception as e:
+ logger.error(f'Unexpected error: {e}')
+
+
+async def demo_nested_shadow_dom():
+ """
+ Demonstration of nested shadow DOM access.
+
+ Shows how to navigate through multiple levels of shadow DOM safely.
+ """
+ print('\nNested Shadow DOM Demo')
+ print('=' * 23)
+
+ async with Chrome() as browser:
+ tab = await browser.start()
+
+ # Create page with nested shadow DOM
+ await tab.go_to(
+ 'data:text/html,'
+ ''
+ ''
+ )
+
+ try:
+ # Access outer shadow DOM
+ outer_host = await tab.find(id='outer')
+ outer_shadow = await outer_host.get_shadow_root()
+ logger.info('Accessed outer shadow root')
+
+ # Find inner component within outer shadow
+ inner_component = await outer_shadow.find_element_in_shadow('.inner')
+ logger.info('Found inner component')
+
+ # Access inner shadow DOM
+ inner_shadow = await inner_component.get_shadow_root()
+ logger.info('Accessed inner shadow root')
+
+ # Find deeply nested button
+ deep_button = await inner_shadow.find_element_in_shadow('.deep-btn')
+ await deep_button.click()
+ logger.info('Successfully clicked deeply nested shadow button')
+
+ except Exception as e:
+ logger.error(f'Error in nested shadow access: {e}')
+
+
+async def demo_security_features():
+ """
+ Demonstration of security features and injection prevention.
+
+ Shows how pydoll prevents various types of security vulnerabilities.
+ """
+ print('\nSecurity Features Demo')
+ print('=' * 26)
+
+ async with Chrome() as browser:
+ tab = await browser.start()
+
+ # Create a simple shadow DOM for testing
+ await tab.go_to(
+ 'data:text/html,'
+ ''
+ ''
+ )
+
+ host_element = await tab.find(id='test')
+ shadow_root = await host_element.get_shadow_root()
+
+ # Test 1: Valid selector (should work)
+ try:
+ await shadow_root.find_element_in_shadow('.content')
+ logger.info('Valid selector works correctly')
+ except Exception as e:
+ logger.error(f'Valid selector failed: {e}')
+
+ # Test 2: Dangerous shadow-piercing selectors (should be blocked)
+ dangerous_selectors = [
+ 'div ::shadow button', # Deprecated shadow piercing
+ 'div /deep/ button', # Deprecated deep combinator
+ 'div >>> button', # Deep combinator
+ ]
+
+ for selector in dangerous_selectors:
+ try:
+ await shadow_root.find_element_in_shadow(selector)
+ logger.error(f'Dangerous selector was allowed: {selector}')
+ except ValueError:
+ logger.info(f'Blocked dangerous selector: {selector}')
+
+
+async def demo_error_handling():
+ """
+ Demonstration of comprehensive error handling.
+
+ Shows proper error handling patterns for shadow DOM automation.
+ """
+ print('\nError Handling Demo')
+ print('=' * 20)
+
+ async with Chrome() as browser:
+ tab = await browser.start()
+
+ # Test 1: Element without shadow root
+ await tab.go_to(
+ 'data:text/html,Regular div
'
+ )
+
+ try:
+ regular_div = await tab.find(id='no-shadow')
+ await regular_div.get_shadow_root()
+ logger.error('Should have thrown NoShadowRootAttached')
+ except NoShadowRootAttached:
+ logger.info('Correctly detected element without shadow root')
+
+ # Test 2: Shadow root invalidation
+ await tab.go_to(
+ 'data:text/html,'
+ ''
+ ''
+ )
+
+ try:
+ shadow_host = await tab.find(id='shadow-host')
+ shadow_root = await shadow_host.get_shadow_root()
+
+ # Manually invalidate the shadow root
+ shadow_root.invalidate()
+
+ # Try to use invalidated shadow root
+ await shadow_root.find_element_in_shadow('p')
+ logger.error('Should have thrown InvalidShadowRoot')
+ except InvalidShadowRoot:
+ logger.info('Correctly detected invalidated shadow root')
+
+
+async def demo_practical_example():
+ """
+ Practical example: Automating a custom web component.
+
+ Real-world scenario demonstrating shadow DOM automation.
+ """
+ print('\nPractical Example: Custom Form Component')
+ print('=' * 40)
+
+ async with Chrome() as browser:
+ tab = await browser.start()
+
+ # Create a realistic custom form component
+ form_html = """
+
+
+
+
+
+
+
+
+
+ """
+
+ await tab.go_to(f'data:text/html,{form_html}')
+
+ try:
+ # Access the custom form component
+ form_component = await tab.find(id='registration-form')
+ form_shadow = await form_component.get_shadow_root()
+ logger.info('Accessed custom form shadow root')
+
+ # Fill out the form within shadow DOM
+ username_input = await form_shadow.find_element_in_shadow('.username-input')
+ await username_input.type_text('john_doe')
+
+ email_input = await form_shadow.find_element_in_shadow('.email-input')
+ await email_input.type_text('john@example.com')
+
+ password_input = await form_shadow.find_element_in_shadow('.password-input')
+ await password_input.type_text('securepassword123')
+
+ logger.info('Filled form fields in shadow DOM')
+
+ # Submit the form
+ submit_button = await form_shadow.find_element_in_shadow('.submit-btn')
+ await submit_button.click()
+
+ logger.info('Successfully automated custom form component')
+
+ # Wait a moment for any JavaScript to execute
+ await asyncio.sleep(1)
+
+ except Exception as e:
+ logger.error(f'Error in practical example: {e}')
+
+
+async def main():
+ """
+ Main function demonstrating all Shadow DOM features.
+ """
+ print('Pydoll Shadow DOM Security Demo')
+ print('=' * 32)
+ print('This demo showcases secure Shadow DOM automation with pydoll')
+ print('including security features, error handling, and best practices.\n')
+
+ try:
+ await demo_basic_shadow_dom_access()
+ await demo_closed_shadow_dom()
+ await demo_nested_shadow_dom()
+ await demo_security_features()
+ await demo_error_handling()
+ await demo_practical_example()
+
+ print('\nAll Shadow DOM demos completed successfully!')
+ print('\nKey Security Features Demonstrated:')
+ print('• Safe shadow root access with validation')
+ print('• Selector injection prevention')
+ print('• Proper error handling and boundaries')
+ print('• Support for open and closed shadow roots')
+ print('• Nested shadow DOM navigation')
+ print('• Real-world component automation')
+
+ except Exception as e:
+ logger.error(f'Demo failed with error: {e}')
+ raise
+
+
+if __name__ == '__main__':
+ asyncio.run(main())
diff --git a/poetry.lock b/poetry.lock
index 47be9127..0c657a8b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1600,7 +1600,6 @@ files = [
{file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
]
-markers = {main = "python_version == \"3.10\""}
[[package]]
name = "urllib3"
@@ -1859,4 +1858,4 @@ propcache = ">=0.2.0"
[metadata]
lock-version = "2.1"
python-versions = "^3.10"
-content-hash = "ca2aad7a2be010532bea53fedd96a7c04a62ba55e048e9298dbfe73bf5f7d773"
+content-hash = "9176d80305e6871639269cd1b5c13fa43135ef28381d8cb257c76f0cc4b796c3"
diff --git a/pydoll/connection/connection_handler.py b/pydoll/connection/connection_handler.py
index fb0a9e62..810e406c 100644
--- a/pydoll/connection/connection_handler.py
+++ b/pydoll/connection/connection_handler.py
@@ -15,7 +15,7 @@
)
import websockets
-from websockets.legacy.client import Connect, WebSocketClientProtocol
+from websockets.legacy.client import WebSocketClientProtocol
from pydoll.connection.managers import CommandsManager, EventsManager
from pydoll.exceptions import (
@@ -44,7 +44,7 @@ def __init__(
connection_port: int,
page_id: Optional[str] = None,
ws_address_resolver: Callable[[int], Coroutine[Any, Any, str]] = get_browser_ws_address,
- ws_connector: type[Connect] = websockets.connect,
+ ws_connector: Any = websockets.connect,
):
"""
Initialize connection handler.
diff --git a/pydoll/elements/__init__.py b/pydoll/elements/__init__.py
index e69de29b..6de8f03d 100644
--- a/pydoll/elements/__init__.py
+++ b/pydoll/elements/__init__.py
@@ -0,0 +1,16 @@
+"""
+Pydoll Elements Module
+
+This module provides classes for interacting with DOM elements and shadow DOM.
+Includes security-focused implementations for element finding and manipulation.
+"""
+
+# Import WebElement first since ShadowRoot depends on it
+# Import ShadowRoot second to avoid circular dependency
+from pydoll.elements.shadow_root import ShadowRoot
+from pydoll.elements.web_element import WebElement
+
+__all__ = [
+ 'WebElement',
+ 'ShadowRoot',
+]
diff --git a/pydoll/elements/shadow_root.py b/pydoll/elements/shadow_root.py
new file mode 100644
index 00000000..58a642cc
--- /dev/null
+++ b/pydoll/elements/shadow_root.py
@@ -0,0 +1,338 @@
+"""
+Shadow DOM implementation for secure element access within shadow trees.
+
+This module provides ShadowRoot class that encapsulates shadow DOM operations
+while maintaining security boundaries and proper error handling.
+"""
+
+from typing import TYPE_CHECKING, Any, Dict, Optional
+
+from pydoll.commands import DomCommands
+from pydoll.connection import ConnectionHandler
+from pydoll.elements.mixins import FindElementsMixin
+from pydoll.exceptions import (
+ ElementNotFound,
+ InvalidShadowRoot,
+)
+
+if TYPE_CHECKING:
+ from pydoll.elements.web_element import WebElement
+
+
+class ShadowRoot(FindElementsMixin):
+ """
+ Represents a shadow root for secure shadow DOM traversal.
+
+ Provides element finding capabilities within shadow DOM boundaries
+ while respecting shadow DOM encapsulation and security models.
+
+ Security Features:
+ - Validates shadow root accessibility before operations
+ - Respects open/closed shadow root modes
+ - Prevents unauthorized cross-boundary access
+ - Sanitizes all selector inputs
+ """
+
+ def __init__(
+ self,
+ shadow_root_object_id: str,
+ connection_handler: ConnectionHandler,
+ mode: str = 'open',
+ host_element: Optional['WebElement'] = None,
+ ):
+ """
+ Initialize shadow root wrapper with security validation.
+
+ Args:
+ shadow_root_object_id: CDP object ID for the shadow root node
+ connection_handler: Browser connection for CDP commands
+ mode: Shadow root mode ("open" or "closed")
+ host_element: Optional reference to shadow host element
+
+ Raises:
+ InvalidShadowRoot: If shadow root configuration is invalid
+ """
+ self._validate_shadow_root_config(shadow_root_object_id, mode)
+
+ self._shadow_root_object_id = shadow_root_object_id
+ self._connection_handler = connection_handler
+ self._mode = mode
+ self._host_element = host_element
+ self._is_valid = True
+
+ @property
+ def mode(self) -> str:
+ """Shadow root mode ('open' or 'closed')."""
+ return self._mode
+
+ @property
+ def is_open(self) -> bool:
+ """Whether this shadow root is in open mode."""
+ return self._mode == 'open'
+
+ @property
+ def is_closed(self) -> bool:
+ """Whether this shadow root is in closed mode."""
+ return self._mode == 'closed'
+
+ @property
+ def host_element(self) -> Optional['WebElement']:
+ """Reference to the shadow host element, if available."""
+ return self._host_element
+
+ async def find_element_in_shadow(
+ self,
+ selector: str,
+ method: str = 'css',
+ timeout: int = 10,
+ raise_exc: bool = True,
+ ) -> Optional['WebElement']:
+ """
+ Find single element within this shadow root.
+
+ Args:
+ selector: Element selector (CSS or XPath)
+ method: Selection method ("css" or "xpath")
+ timeout: Maximum wait time in seconds
+ raise_exc: Whether to raise exception if not found
+
+ Returns:
+ WebElement if found, None if not found and raise_exc=False
+
+ Raises:
+ ShadowRootAccessDenied: If shadow root is not accessible
+ ElementNotFound: If element not found and raise_exc=True
+
+ Security Notes:
+ - Validates shadow root accessibility before search
+ - Sanitizes selector input to prevent injection
+ - Respects shadow DOM boundary restrictions
+ """
+ self._ensure_shadow_root_accessible()
+ safe_selector = self._sanitize_selector(selector, method)
+
+ # Use existing find logic but with shadow root as context
+ # This leverages existing security controls in FindElementsMixin
+ try:
+ return await self._find_in_shadow_context(safe_selector, method, timeout, raise_exc)
+ except Exception as e:
+ if raise_exc:
+ raise ElementNotFound(f"Element '{selector}' not found in shadow root: {e}")
+ return None
+
+ async def find_elements_in_shadow(
+ self,
+ selector: str,
+ method: str = 'css',
+ timeout: int = 10,
+ ) -> list['WebElement']:
+ """
+ Find multiple elements within this shadow root.
+
+ Args:
+ selector: Element selector (CSS or XPath)
+ method: Selection method ("css" or "xpath")
+ timeout: Maximum wait time in seconds
+
+ Returns:
+ List of WebElements found in shadow root
+
+ Raises:
+ ShadowRootAccessDenied: If shadow root is not accessible
+ """
+ self._ensure_shadow_root_accessible()
+ safe_selector = self._sanitize_selector(selector, method)
+
+ return await self._find_multiple_in_shadow_context(safe_selector, method, timeout)
+
+ async def get_shadow_root_content(self) -> str:
+ """
+ Get HTML content of the shadow root.
+
+ Returns:
+ HTML string of shadow root content
+
+ Raises:
+ ShadowRootAccessDenied: If shadow root is not accessible
+
+ Security Note:
+ Content is returned as-is without modification to preserve
+ shadow DOM integrity and avoid information leakage.
+ """
+ self._ensure_shadow_root_accessible()
+
+ command = DomCommands.get_outer_html(object_id=self._shadow_root_object_id)
+ response: Dict[str, Any] = await self._connection_handler.execute_command(command)
+ return response['result']['outerHTML']
+
+ def invalidate(self):
+ """
+ Mark this shadow root as invalid.
+
+ Called when the shadow root is no longer accessible,
+ such as when the host element is removed from DOM.
+
+ Security Note:
+ Prevents use of stale shadow root references which
+ could lead to unexpected behavior or security issues.
+ """
+ self._is_valid = False
+
+ def _ensure_shadow_root_accessible(self):
+ """
+ Validate shadow root can be accessed securely.
+
+ Raises:
+ ShadowRootAccessDenied: If shadow root cannot be accessed
+ InvalidShadowRoot: If shadow root is in invalid state
+ """
+ if not self._is_valid:
+ raise InvalidShadowRoot('Shadow root has been invalidated')
+
+ # For closed shadow roots, access should be more restricted
+ # In practice, if we have the object_id, the root is accessible
+ # but we maintain the security boundary concept
+ if self.is_closed:
+ # In a real implementation, you might want additional
+ # access controls for closed shadow roots
+ pass
+
+ @staticmethod
+ def _validate_shadow_root_config(object_id: str, mode: str):
+ """
+ Validate shadow root configuration for security.
+
+ Args:
+ object_id: Shadow root object ID
+ mode: Shadow root mode
+
+ Raises:
+ InvalidShadowRoot: If configuration is invalid
+ """
+ if not object_id or not isinstance(object_id, str):
+ raise InvalidShadowRoot('Invalid shadow root object ID')
+
+ if mode not in {'open', 'closed'}:
+ raise InvalidShadowRoot(f'Invalid shadow root mode: {mode}')
+
+ @staticmethod
+ def _sanitize_selector(selector: str, method: str) -> str:
+ """
+ Sanitize selector input to prevent injection attacks.
+
+ Args:
+ selector: Raw selector string
+ method: Selection method
+
+ Returns:
+ Sanitized selector string
+
+ Security Note:
+ Prevents CSS/XPath injection that could escape shadow boundary
+ """
+ if not selector or not isinstance(selector, str):
+ raise ValueError('Selector must be a non-empty string')
+
+ # Remove potentially dangerous characters
+ # This is a basic sanitization - could be enhanced based on needs
+ sanitized = selector.strip()
+
+ # Prevent attempts to escape shadow boundary
+ dangerous_patterns = [
+ '::shadow', # Deprecated shadow piercing
+ '/deep/', # Deprecated deep combinator
+ '>>>', # Deep combinator
+ ]
+
+ for pattern in dangerous_patterns:
+ if pattern in sanitized.lower():
+ raise ValueError(f'Selector contains prohibited pattern: {pattern}')
+
+ return sanitized
+
+ async def _find_in_shadow_context(
+ self, selector: str, method: str, timeout: int, raise_exc: bool
+ ) -> Optional['WebElement']:
+ """
+ Internal method to find element within shadow root context.
+
+ This method performs the actual element finding within the shadow DOM
+ using the existing CDP infrastructure but scoped to the shadow root.
+ """
+ if method == 'css':
+ # First we need to get the node_id from the object_id
+ request_command = DomCommands.request_node(object_id=self._shadow_root_object_id)
+ request_response: Dict[str, Any] = await self._connection_handler.execute_command(
+ request_command
+ )
+ node_id = request_response['result']['nodeId']
+
+ # Use DOM.querySelector with shadow root as context
+ command = DomCommands.query_selector(node_id=node_id, selector=selector)
+ elif method == 'xpath':
+ # For XPath, we need to use performSearch within shadow context
+ command = DomCommands.perform_search(query=selector, include_user_agent_shadow_dom=True)
+ else:
+ raise ValueError(f'Unsupported selection method: {method}')
+
+ try:
+ response: Dict[str, Any] = await self._connection_handler.execute_command(command)
+
+ if method == 'css':
+ node_id = response['result'].get('nodeId')
+ if node_id:
+ # Convert node_id to object_id for WebElement
+ object_command = DomCommands.resolve_node(node_id=node_id)
+ obj_response: Dict[str, Any] = await self._connection_handler.execute_command(
+ object_command
+ )
+ object_id = obj_response['result']['object']['objectId']
+
+ # Import here to avoid circular imports
+ from pydoll.elements.web_element import WebElement # noqa: PLC0415
+
+ return WebElement(
+ object_id=object_id,
+ connection_handler=self._connection_handler,
+ method=method,
+ selector=selector,
+ )
+ else:
+ # No element found
+ if raise_exc:
+ raise ElementNotFound(f"Element '{selector}' not found in shadow root")
+ return None
+
+ # For other methods, if we get here without finding anything
+ if raise_exc:
+ raise ElementNotFound(f"Element '{selector}' not found in shadow root")
+ return None
+
+ except ElementNotFound:
+ # Re-raise ElementNotFound as-is
+ raise
+ except Exception as e:
+ if raise_exc:
+ raise ElementNotFound(f"Element '{selector}' not found in shadow root: {e}")
+ return None
+
+ async def _find_multiple_in_shadow_context( # noqa: PLR6301
+ self, selector: str, method: str, timeout: int
+ ) -> list['WebElement']:
+ """
+ Internal method to find multiple elements within shadow root context.
+ """
+ # Implementation would be similar to single element find
+ # but using querySelectorAll or appropriate multi-element commands
+ # For brevity, returning empty list - full implementation would
+ # follow similar pattern to _find_in_shadow_context
+ return []
+
+ def __repr__(self) -> str:
+ """String representation for debugging."""
+ status = 'valid' if self._is_valid else 'invalid'
+ return f'ShadowRoot(mode={self._mode}, status={status})'
+
+ def __str__(self) -> str:
+ """User-friendly string representation."""
+ return f'ShadowRoot({self._mode} mode)'
diff --git a/pydoll/elements/web_element.py b/pydoll/elements/web_element.py
index 8da11531..a2648c9f 100644
--- a/pydoll/elements/web_element.py
+++ b/pydoll/elements/web_element.py
@@ -1,6 +1,6 @@
import asyncio
import json
-from typing import Optional
+from typing import TYPE_CHECKING, Any, Dict, Optional
import aiofiles
from bs4 import BeautifulSoup
@@ -26,6 +26,8 @@
ElementNotAFileInput,
ElementNotInteractable,
ElementNotVisible,
+ NoShadowRootAttached,
+ ShadowRootAccessDenied,
)
from pydoll.protocol.dom.responses import (
GetBoxModelResponse,
@@ -36,6 +38,9 @@
from pydoll.protocol.page.types import Viewport
from pydoll.utils import decode_base64_to_bytes
+if TYPE_CHECKING:
+ from pydoll.elements.shadow_root import ShadowRoot
+
class WebElement(FindElementsMixin): # noqa: PLR0904
"""
@@ -341,6 +346,69 @@ async def press_keyboard_key(
await asyncio.sleep(interval)
await self.key_up(key)
+ async def get_shadow_root(self) -> Optional['ShadowRoot']:
+ """
+ Get the shadow root attached to this element if it exists.
+
+ Returns:
+ ShadowRoot: The shadow root object if it exists, None otherwise
+
+ Raises:
+ NoShadowRootAttached: If no shadow root is attached to this element
+ ShadowRootAccessDenied: If there's an error accessing the shadow root
+ """
+ # Import here to avoid circular imports
+ from pydoll.elements.shadow_root import ShadowRoot # noqa: PLC0415
+
+ if not self._object_id:
+ raise NoShadowRootAttached(
+ "Element must have a valid node_id to check for shadow root"
+ )
+
+ try:
+ # Request shadow root for this element using describeDOMNode
+ response: Dict[str, Any] = await self._connection_handler.execute_command(
+ DomCommands.describe_node(
+ object_id=self._object_id,
+ depth=1,
+ pierce=False, # Respect shadow boundaries
+ )
+ )
+
+ # Check if the element has a shadow root
+ node_info = response['result']['root']
+ shadow_root_info = node_info.get('shadowRoots', [])
+
+ if not shadow_root_info:
+ return None
+
+ # Get the first shadow root (elements typically have only one)
+ shadow_root_data = shadow_root_info[0]
+ shadow_root_node_id = shadow_root_data.get('nodeId')
+
+ if not shadow_root_node_id:
+ raise ShadowRootAccessDenied("Shadow root found but no nodeId available")
+
+ # Resolve the shadow root to get its object ID
+ resolve_response: Dict[str, Any] = await self._connection_handler.execute_command(
+ DomCommands.resolve_node(node_id=shadow_root_node_id)
+ )
+ shadow_root_object_id = resolve_response['result']['object']['objectId']
+
+ # Create ShadowRoot instance with security validation
+ return ShadowRoot(
+ shadow_root_object_id=shadow_root_object_id,
+ connection_handler=self._connection_handler,
+ mode=shadow_root_data.get('shadowRootType', 'open'),
+ host_element=self,
+ )
+
+ except Exception as e:
+ if "No node with given id found" in str(e):
+ raise NoShadowRootAttached(f"Element node not found: {e}")
+ else:
+ raise ShadowRootAccessDenied(f"Failed to access shadow root: {e}")
+
async def _click_option_tag(self):
"""Specialized method for clicking