Building a Proxy-Aware Web Testing Framework
Standard web testing frameworks test from a single location. A proxy-aware framework tests your website from multiple geographic locations, through different proxy types, and under various network conditions. This catches geo-restriction bugs, CDN configuration errors, localization issues, and proxy compatibility problems that single-location tests miss.
Use Cases
- Geo-restriction testing — verify content access rules by country
- Proxy compatibility — ensure your app works through corporate proxies
- CDN verification — confirm content is served from the correct edge
- Localization QA — verify language and currency detection
- Performance testing — measure load times from different regions
- Ad verification — confirm ads display correctly per market
- SEO testing — check search results and hreflang tags by location
Framework Design
import asyncio
import httpx
import json
import time
import os
from dataclasses import dataclass, field, asdict
from typing import List, Dict, Optional, Any, Callable
from enum import Enum
from datetime import datetime
import logging
logger = logging.getLogger(__name__)
class TestStatus(Enum):
PASSED = "passed"
FAILED = "failed"
SKIPPED = "skipped"
ERROR = "error"
@dataclass
class ProxyConfig:
name: str
url: str
country: str = ""
proxy_type: str = "http" # http, socks5, residential, datacenter
tags: List[str] = field(default_factory=list)
@dataclass
class TestResult:
test_name: str
proxy: str
proxy_country: str
status: TestStatus
message: str = ""
duration_ms: int = 0
response_code: int = 0
assertions: List[Dict] = field(default_factory=list)
metadata: Dict[str, Any] = field(default_factory=dict)
@dataclass
class TestSuiteResult:
name: str
total: int = 0
passed: int = 0
failed: int = 0
errors: int = 0
skipped: int = 0
duration_seconds: float = 0
results: List[TestResult] = field(default_factory=list)
timestamp: str = ""
class Assertion:
"""Fluent assertion builder for test results."""
def __init__(self, name: str):
self.name = name
self.checks: List[Dict] = []
def status_code(self, expected: int):
self.checks.append({
'type': 'status_code',
'expected': expected,
})
return self
def contains_text(self, text: str):
self.checks.append({
'type': 'contains_text',
'expected': text,
})
return self
def not_contains_text(self, text: str):
self.checks.append({
'type': 'not_contains_text',
'expected': text,
})
return self
def header_equals(self, header: str, value: str):
self.checks.append({
'type': 'header_equals',
'header': header,
'expected': value,
})
return self
def header_exists(self, header: str):
self.checks.append({
'type': 'header_exists',
'header': header,
})
return self
def redirects_to(self, url_pattern: str):
self.checks.append({
'type': 'redirects_to',
'expected': url_pattern,
})
return self
def response_time_under(self, ms: int):
self.checks.append({
'type': 'response_time',
'max_ms': ms,
})
return self
def title_contains(self, text: str):
self.checks.append({
'type': 'title_contains',
'expected': text,
})
return self
def json_field(self, path: str, expected: Any):
self.checks.append({
'type': 'json_field',
'path': path,
'expected': expected,
})
return self
class ProxyTestRunner:
"""Runs test cases through multiple proxies."""
def __init__(
self,
proxies: List[ProxyConfig],
timeout: int = 30,
concurrency: int = 5,
):
self.proxies = proxies
self.timeout = timeout
self.concurrency = concurrency
self.tests: List[Dict] = []
self.before_hooks: List[Callable] = []
self.after_hooks: List[Callable] = []
def add_test(
self,
name: str,
url: str,
method: str = "GET",
headers: dict = None,
assertions: List[Assertion] = None,
proxy_filter: Callable = None,
data: Any = None,
):
self.tests.append({
'name': name,
'url': url,
'method': method,
'headers': headers or {},
'assertions': assertions or [],
'proxy_filter': proxy_filter,
'data': data,
})
def before_each(self, hook: Callable):
self.before_hooks.append(hook)
def after_each(self, hook: Callable):
self.after_hooks.append(hook)
async def run(self, suite_name: str = "Test Suite") -> TestSuiteResult:
suite = TestSuiteResult(
name=suite_name,
timestamp=datetime.utcnow().isoformat(),
)
start = time.monotonic()
semaphore = asyncio.Semaphore(self.concurrency)
tasks = []
for test in self.tests:
applicable_proxies = self.proxies
if test['proxy_filter']:
applicable_proxies = [
p for p in self.proxies
if test['proxy_filter'](p)
]
for proxy in applicable_proxies:
tasks.append(
self._run_single(test, proxy, semaphore)
)
results = await asyncio.gather(*tasks, return_exceptions=True)
for result in results:
if isinstance(result, Exception):
suite.errors += 1
continue
suite.results.append(result)
suite.total += 1
if result.status == TestStatus.PASSED:
suite.passed += 1
elif result.status == TestStatus.FAILED:
suite.failed += 1
elif result.status == TestStatus.ERROR:
suite.errors += 1
elif result.status == TestStatus.SKIPPED:
suite.skipped += 1
suite.duration_seconds = round(time.monotonic() - start, 1)
return suite
async def _run_single(
self,
test: Dict,
proxy: ProxyConfig,
semaphore: asyncio.Semaphore,
) -> TestResult:
async with semaphore:
result = TestResult(
test_name=test['name'],
proxy=proxy.name,
proxy_country=proxy.country,
status=TestStatus.PASSED,
)
# Run before hooks
for hook in self.before_hooks:
await hook(test, proxy)
start = time.monotonic()
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36',
}
headers.update(test['headers'])
async with httpx.AsyncClient(
proxy=proxy.url,
timeout=self.timeout,
follow_redirects=True,
) as client:
if test['method'] == 'GET':
response = await client.get(test['url'], headers=headers)
elif test['method'] == 'POST':
response = await client.post(
test['url'], headers=headers, json=test.get('data'),
)
else:
response = await client.request(
test['method'], test['url'], headers=headers,
)
result.duration_ms = int(
(time.monotonic() - start) * 1000
)
result.response_code = response.status_code
# Run assertions
for assertion in test['assertions']:
assertion_results = self._check_assertions(
assertion, response, result.duration_ms
)
result.assertions.extend(assertion_results)
for ar in assertion_results:
if not ar['passed']:
result.status = TestStatus.FAILED
result.message = ar['message']
except Exception as e:
result.status = TestStatus.ERROR
result.message = str(e)[:200]
result.duration_ms = int(
(time.monotonic() - start) * 1000
)
# Run after hooks
for hook in self.after_hooks:
await hook(test, proxy, result)
# Log result
status_icon = {
TestStatus.PASSED: "PASS",
TestStatus.FAILED: "FAIL",
TestStatus.ERROR: "ERR ",
}.get(result.status, "SKIP")
print(
f" [{status_icon}] {test['name']} "
f"via {proxy.name} ({proxy.country}) "
f"— {result.duration_ms}ms"
+ (f" — {result.message}" if result.message else "")
)
return result
def _check_assertions(
self,
assertion: Assertion,
response: httpx.Response,
duration_ms: int,
) -> List[Dict]:
results = []
for check in assertion.checks:
ar = {
'name': assertion.name,
'type': check['type'],
'passed': True,
'message': '',
}
if check['type'] == 'status_code':
if response.status_code != check['expected']:
ar['passed'] = False
ar['message'] = (
f"Expected status {check['expected']}, "
f"got {response.status_code}"
)
elif check['type'] == 'contains_text':
if check['expected'] not in response.text:
ar['passed'] = False
ar['message'] = (
f"Text '{check['expected']}' not found in response"
)
elif check['type'] == 'not_contains_text':
if check['expected'] in response.text:
ar['passed'] = False
ar['message'] = (
f"Text '{check['expected']}' should not be in response"
)
elif check['type'] == 'header_equals':
actual = response.headers.get(check['header'], '')
if actual != check['expected']:
ar['passed'] = False
ar['message'] = (
f"Header {check['header']}: "
f"expected '{check['expected']}', got '{actual}'"
)
elif check['type'] == 'header_exists':
if check['header'] not in response.headers:
ar['passed'] = False
ar['message'] = f"Header {check['header']} not present"
elif check['type'] == 'redirects_to':
final_url = str(response.url)
if check['expected'] not in final_url:
ar['passed'] = False
ar['message'] = (
f"Expected redirect to '{check['expected']}', "
f"got '{final_url}'"
)
elif check['type'] == 'response_time':
if duration_ms > check['max_ms']:
ar['passed'] = False
ar['message'] = (
f"Response time {duration_ms}ms "
f"exceeds {check['max_ms']}ms limit"
)
elif check['type'] == 'title_contains':
from selectolax.parser import HTMLParser
tree = HTMLParser(response.text)
title = tree.css_first('title')
title_text = title.text() if title else ""
if check['expected'] not in title_text:
ar['passed'] = False
ar['message'] = (
f"Title '{title_text[:50]}' does not contain "
f"'{check['expected']}'"
)
elif check['type'] == 'json_field':
try:
data = response.json()
parts = check['path'].split('.')
value = data
for part in parts:
value = value[part]
if value != check['expected']:
ar['passed'] = False
ar['message'] = (
f"JSON {check['path']}: "
f"expected '{check['expected']}', got '{value}'"
)
except Exception as e:
ar['passed'] = False
ar['message'] = f"JSON parse error: {e}"
results.append(ar)
return results
def generate_report(suite: TestSuiteResult, filepath: str = None):
"""Generate an HTML report from test results."""
html = f"""<!DOCTYPE html>
<html>
<head>
<title>Test Report: {suite.name}</title>
<style>
body {{ font-family: monospace; background: #1a1a2e; color: #eee; padding: 20px; }}
.summary {{ background: #16213e; padding: 15px; border-radius: 6px; margin-bottom: 20px; }}
.passed {{ color: #4CAF50; }}
.failed {{ color: #f44336; }}
.error {{ color: #ff9800; }}
table {{ width: 100%; border-collapse: collapse; }}
th, td {{ padding: 8px 12px; text-align: left; border-bottom: 1px solid #333; }}
th {{ background: #0f3460; }}
</style>
</head>
<body>
<h1>Test Report: {suite.name}</h1>
<div class="summary">
<p>Total: {suite.total} |
<span class="passed">Passed: {suite.passed}</span> |
<span class="failed">Failed: {suite.failed}</span> |
<span class="error">Errors: {suite.errors}</span></p>
<p>Duration: {suite.duration_seconds}s | {suite.timestamp}</p>
</div>
<table>
<tr><th>Test</th><th>Proxy</th><th>Country</th><th>Status</th><th>Time</th><th>Message</th></tr>
"""
for r in suite.results:
css = r.status.value
html += f""" <tr>
<td>{r.test_name}</td>
<td>{r.proxy}</td>
<td>{r.proxy_country}</td>
<td class="{css}">{r.status.value.upper()}</td>
<td>{r.duration_ms}ms</td>
<td>{r.message[:80]}</td>
</tr>\n"""
html += """ </table>
</body>
</html>"""
if filepath:
with open(filepath, 'w') as f:
f.write(html)
print(f"Report saved to {filepath}")
return htmlUsage Example
async def main():
proxies = [
ProxyConfig("US Residential", "http://user:pass@us.proxy.com:8080", "US", "residential"),
ProxyConfig("UK Residential", "http://user:pass@uk.proxy.com:8080", "GB", "residential"),
ProxyConfig("DE Datacenter", "http://user:pass@de.proxy.com:8080", "DE", "datacenter"),
ProxyConfig("JP Residential", "http://user:pass@jp.proxy.com:8080", "JP", "residential"),
]
runner = ProxyTestRunner(proxies, concurrency=4)
# Test 1: Homepage loads from all locations
runner.add_test(
name="Homepage loads",
url="https://example.com",
assertions=[
Assertion("basic").status_code(200).response_time_under(5000),
Assertion("content").title_contains("Example"),
],
)
# Test 2: Geo-redirect works correctly
runner.add_test(
name="Geo-redirect to local site",
url="https://example.com",
assertions=[
Assertion("redirect").status_code(200),
],
proxy_filter=lambda p: p.country in ("US", "GB"),
)
# Test 3: GDPR banner shows in EU
runner.add_test(
name="GDPR banner in EU",
url="https://example.com",
assertions=[
Assertion("gdpr").contains_text("cookie"),
],
proxy_filter=lambda p: p.country == "DE",
)
# Test 4: GDPR banner hidden in non-EU
runner.add_test(
name="No GDPR banner outside EU",
url="https://example.com",
assertions=[
Assertion("no-gdpr").not_contains_text("cookie consent"),
],
proxy_filter=lambda p: p.country in ("US", "JP"),
)
# Test 5: API responds from all locations
runner.add_test(
name="API health check",
url="https://api.example.com/health",
assertions=[
Assertion("api").status_code(200).response_time_under(2000),
],
)
# Run all tests
suite = await runner.run("Geo-Restriction Tests")
# Generate report
generate_report(suite, "test_report.html")
# Export JSON results
with open("test_results.json", "w") as f:
json.dump(asdict(suite), f, indent=2)
# Summary
print(f"\n{'='*60}")
print(f"RESULTS: {suite.passed}/{suite.total} passed")
if suite.failed > 0:
print(f"FAILURES:")
for r in suite.results:
if r.status == TestStatus.FAILED:
print(f" - {r.test_name} via {r.proxy}: {r.message}")
asyncio.run(main())Integration with Playwright
For full browser testing through proxies, extend the framework with Playwright:
# Requires: pip install playwright
# Run: playwright install chromium
async def browser_test_through_proxy(
url: str,
proxy_url: str,
screenshot_path: str = None,
):
from playwright.async_api import async_playwright
async with async_playwright() as p:
browser = await p.chromium.launch(
proxy={"server": proxy_url},
)
page = await browser.new_page()
await page.goto(url, wait_until="networkidle")
title = await page.title()
content = await page.content()
if screenshot_path:
await page.screenshot(path=screenshot_path, full_page=True)
await browser.close()
return {
"title": title,
"content_length": len(content),
"screenshot": screenshot_path,
}Internal Links
- Building a Proxy Checker Tool — validate proxies before testing
- Building a Geo-Targeted Content Checker — content variation analysis
- Creating a Proxy Benchmarking Suite — performance benchmarks
- Anti-Detect Browser Proxy Guides — browser fingerprinting
- Website Testing with Proxies — testing best practices
FAQ
How is this different from standard e2e testing?
Standard e2e tests run from one location — your CI server. A proxy-aware framework tests from multiple geographic locations simultaneously. This catches issues like broken geo-redirects, missing translations, CDN misconfigurations, and geo-restricted content that single-location tests miss.
Which proxy type should I use for testing?
Use residential proxies for the most realistic testing. Datacenter proxies may trigger different behavior from CDNs and security services. For internal testing (corporate proxy compatibility), use the actual proxy infrastructure your users will go through.
Can I integrate this with CI/CD pipelines?
Yes. Run the test suite in your CI pipeline and fail the build if geo-restriction tests fail. Export results as JUnit XML for integration with Jenkins, GitHub Actions, or GitLab CI. Schedule nightly runs across all target markets.
How do I test JavaScript-heavy applications?
Use the Playwright integration for full browser testing. Launch a Chromium instance through each proxy, navigate to the URL, and test the fully rendered page. This catches issues with JavaScript-based geo-detection and dynamic content loading.
How many proxy locations should I test from?
Test from every country where you have users or serve content. At minimum, test from your top 3-5 markets plus one “rest of world” proxy. Add countries as you expand. For compliance testing (GDPR, CCPA), test from each jurisdiction where regulations apply.
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a Proxy Rotator in Python: Complete Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Bandwidth Optimization for Proxies: Reduce Costs & Increase Speed
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a Proxy Rotator in Python: Complete Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Bandwidth Optimization for Proxies: Reduce Costs & Increase Speed
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a News Crawler in Python: Step-by-Step Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Azure Functions for Serverless Web Scraping: the Complete Guide
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)
Related Reading
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a News Crawler in Python: Step-by-Step Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Azure Functions for Serverless Web Scraping: the Complete Guide
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)