Building a Geo-Targeted Content Checker
Websites serve different content based on visitor location. Prices change, products appear or disappear, promotions vary, and entire page layouts shift by country. A geo-targeted content checker loads the same URL through proxies in different countries and compares the results — essential for competitive intelligence, ad verification, and compliance testing.
Use Cases
- Price monitoring — detect geographic price discrimination
- Ad verification — confirm ads display correctly in target markets
- SEO auditing — check localized SERP results and hreflang tags
- Compliance — verify content restrictions by jurisdiction (GDPR banners, age gates)
- Competitor analysis — see competitors’ offerings in different markets
- Content localization QA — verify translations and local adaptations
Implementation
import asyncio
import httpx
import json
import hashlib
import difflib
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Any
from selectolax.parser import HTMLParser
from datetime import datetime
@dataclass
class GeoProxy:
country: str
country_code: str
proxy_url: str
city: str = ""
@dataclass
class ContentSnapshot:
country: str
country_code: str
url: str
status_code: int = 0
title: str = ""
language: str = ""
currency: str = ""
prices: List[str] = field(default_factory=list)
meta_tags: Dict[str, str] = field(default_factory=dict)
visible_text_hash: str = ""
visible_text_length: int = 0
redirected_url: str = ""
headers: Dict[str, str] = field(default_factory=dict)
cookies: Dict[str, str] = field(default_factory=dict)
html_length: int = 0
load_time_ms: int = 0
error: str = ""
custom_data: Dict[str, Any] = field(default_factory=dict)
@dataclass
class ContentDiff:
country_a: str
country_b: str
title_differs: bool = False
price_differs: bool = False
language_differs: bool = False
redirect_differs: bool = False
content_similarity: float = 0
details: List[str] = field(default_factory=list)
class GeoContentChecker:
def __init__(
self,
proxies: List[GeoProxy],
timeout: int = 30,
):
self.proxies = proxies
self.timeout = timeout
async def check_url(
self,
url: str,
extract_prices: bool = True,
) -> List[ContentSnapshot]:
snapshots = []
tasks = [
self._fetch_through_proxy(url, proxy, extract_prices)
for proxy in self.proxies
]
snapshots = await asyncio.gather(*tasks)
return list(snapshots)
async def _fetch_through_proxy(
self,
url: str,
proxy: GeoProxy,
extract_prices: bool,
) -> ContentSnapshot:
snapshot = ContentSnapshot(
country=proxy.country,
country_code=proxy.country_code,
url=url,
)
try:
import time
start = time.monotonic()
async with httpx.AsyncClient(
proxy=proxy.proxy_url,
timeout=self.timeout,
follow_redirects=True,
) as client:
response = await client.get(url, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36',
'Accept-Language': self._get_accept_language(
proxy.country_code
),
})
snapshot.load_time_ms = int(
(time.monotonic() - start) * 1000
)
snapshot.status_code = response.status_code
snapshot.redirected_url = str(response.url)
snapshot.html_length = len(response.text)
# Extract relevant headers
snapshot.headers = {
k: v for k, v in response.headers.items()
if k.lower() in (
'content-language', 'x-country',
'cf-ray', 'server', 'vary'
)
}
# Extract cookies
snapshot.cookies = dict(response.cookies)
# Parse HTML
tree = HTMLParser(response.text)
# Title
title_tag = tree.css_first('title')
snapshot.title = title_tag.text(strip=True) if title_tag else ""
# Language
html_tag = tree.css_first('html')
if html_tag:
snapshot.language = html_tag.attributes.get('lang', '')
# Meta tags
for meta in tree.css('meta'):
name = meta.attributes.get(
'name', meta.attributes.get('property', '')
)
content = meta.attributes.get('content', '')
if name and content:
snapshot.meta_tags[name] = content
# Extract prices
if extract_prices:
snapshot.prices = self._extract_prices(response.text)
snapshot.currency = self._detect_currency(response.text)
# Content hash for comparison
body = tree.css_first('body')
if body:
for tag in body.css('script, style, nav, footer'):
tag.decompose()
text = body.text(separator=' ', strip=True)
snapshot.visible_text_hash = hashlib.md5(
text.encode()
).hexdigest()
snapshot.visible_text_length = len(text)
except Exception as e:
snapshot.error = str(e)[:200]
return snapshot
def _extract_prices(self, html: str) -> List[str]:
import re
price_patterns = [
r'[\$\€\£\¥]\s*[\d,]+\.?\d*',
r'[\d,]+\.?\d*\s*(?:USD|EUR|GBP|JPY|AUD|CAD)',
r'(?:Price|price|cost)[\s:]*[\$\€\£]?\s*[\d,]+\.?\d*',
]
prices = set()
for pattern in price_patterns:
matches = re.findall(pattern, html)
prices.update(matches[:10]) # Limit to prevent noise
return sorted(prices)
def _detect_currency(self, html: str) -> str:
import re
currency_indicators = {
'USD': [r'\$', r'USD', r'US\s*Dollar'],
'EUR': [r'€', r'EUR', r'Euro'],
'GBP': [r'£', r'GBP', r'Pound'],
'JPY': [r'¥', r'JPY', r'Yen'],
'AUD': [r'A\$', r'AUD'],
'CAD': [r'C\$', r'CAD'],
}
counts = {}
for currency, patterns in currency_indicators.items():
count = 0
for pattern in patterns:
count += len(re.findall(pattern, html))
if count > 0:
counts[currency] = count
if counts:
return max(counts, key=counts.get)
return "unknown"
def _get_accept_language(self, country_code: str) -> str:
lang_map = {
'US': 'en-US,en;q=0.9',
'GB': 'en-GB,en;q=0.9',
'DE': 'de-DE,de;q=0.9,en;q=0.5',
'FR': 'fr-FR,fr;q=0.9,en;q=0.5',
'JP': 'ja-JP,ja;q=0.9,en;q=0.5',
'BR': 'pt-BR,pt;q=0.9,en;q=0.5',
'IN': 'en-IN,hi;q=0.9,en;q=0.8',
'AU': 'en-AU,en;q=0.9',
'CA': 'en-CA,en;q=0.9,fr;q=0.5',
}
return lang_map.get(country_code, 'en-US,en;q=0.9')
def compare(
self, snapshots: List[ContentSnapshot]
) -> List[ContentDiff]:
diffs = []
for i in range(len(snapshots)):
for j in range(i + 1, len(snapshots)):
a = snapshots[i]
b = snapshots[j]
diff = ContentDiff(
country_a=a.country,
country_b=b.country,
)
# Title comparison
if a.title != b.title:
diff.title_differs = True
diff.details.append(
f"Title: '{a.title[:50]}' vs '{b.title[:50]}'"
)
# Price comparison
if a.prices != b.prices:
diff.price_differs = True
diff.details.append(
f"Prices: {a.prices[:3]} vs {b.prices[:3]}"
)
# Currency comparison
if a.currency != b.currency:
diff.details.append(
f"Currency: {a.currency} vs {b.currency}"
)
# Language comparison
if a.language != b.language:
diff.language_differs = True
diff.details.append(
f"Language: {a.language} vs {b.language}"
)
# Redirect comparison
if a.redirected_url != b.redirected_url:
diff.redirect_differs = True
diff.details.append(
f"Redirect: {a.redirected_url} vs {b.redirected_url}"
)
# Content similarity
if a.visible_text_hash and b.visible_text_hash:
if a.visible_text_hash == b.visible_text_hash:
diff.content_similarity = 100.0
else:
# Approximate similarity from text length
len_ratio = (
min(a.visible_text_length, b.visible_text_length)
/ max(a.visible_text_length, b.visible_text_length)
* 100 if max(a.visible_text_length,
b.visible_text_length) > 0 else 0
)
diff.content_similarity = round(len_ratio, 1)
diffs.append(diff)
return diffs
def print_report(
self,
snapshots: List[ContentSnapshot],
diffs: List[ContentDiff],
):
print("\n" + "=" * 70)
print("GEO-TARGETED CONTENT REPORT")
print("=" * 70)
print(f"\nURL: {snapshots[0].url if snapshots else 'N/A'}")
print(f"Countries checked: {len(snapshots)}")
print(f"\n{'Country':<15} {'Status':<8} {'Title':<30} {'Currency':<8} {'Lang':<6}")
print("-" * 70)
for s in snapshots:
if s.error:
print(f"{s.country:<15} ERROR {s.error[:50]}")
else:
print(
f"{s.country:<15} {s.status_code:<8} "
f"{s.title[:28]:<30} {s.currency:<8} {s.language:<6}"
)
if any(d.price_differs for d in diffs):
print(f"\n--- Price Differences ---")
for s in snapshots:
if s.prices:
print(f" {s.country}: {', '.join(s.prices[:5])}")
if any(d.redirect_differs for d in diffs):
print(f"\n--- Redirect Differences ---")
for s in snapshots:
if s.redirected_url != s.url:
print(f" {s.country}: → {s.redirected_url}")
# Usage
async def main():
proxies = [
GeoProxy("United States", "US", "http://user:pass@us-proxy.example.com:8080"),
GeoProxy("United Kingdom", "GB", "http://user:pass@uk-proxy.example.com:8080"),
GeoProxy("Germany", "DE", "http://user:pass@de-proxy.example.com:8080"),
GeoProxy("Japan", "JP", "http://user:pass@jp-proxy.example.com:8080"),
GeoProxy("Brazil", "BR", "http://user:pass@br-proxy.example.com:8080"),
]
checker = GeoContentChecker(proxies)
url = "https://www.amazon.com/dp/B0EXAMPLE"
snapshots = await checker.check_url(url)
diffs = checker.compare(snapshots)
checker.print_report(snapshots, diffs)
# Export
from dataclasses import asdict
with open("geo_check_results.json", "w") as f:
json.dump(
[asdict(s) for s in snapshots],
f, indent=2
)
asyncio.run(main())Internal Links
- Building a Proxy Checker Tool — verify proxy locations
- Building a Price Monitoring Bot — track prices across regions
- Geo-Specific Proxy Guides — country proxy setup
- Ad Verification with Proxies — verify ad placements
- Best Residential Proxies 2026 — geo-targeted proxy providers
FAQ
How many countries should I check?
Check your primary target markets plus 2-3 comparison countries. For global e-commerce, test US, UK, Germany, Japan, and Brazil as they represent major markets with different currencies and regulations. Add more countries as needed for specific campaigns.
How do I verify proxy geographic accuracy?
Before running content checks, verify each proxy’s actual location using ipinfo.io or similar services. If a “US proxy” resolves to Canada, your content checks will be inaccurate. The checker can include a geo-verification step as a precondition.
Can I detect dynamic pricing with this tool?
Yes, but you need to run checks multiple times. Dynamic pricing changes based on browsing history, time of day, and demand. Run the checker on a schedule (hourly or daily) and compare results over time to identify pricing patterns.
How do I handle sites that block proxy traffic?
Use residential proxies for geo-content checking. Datacenter proxies are more likely to be blocked or served different content (CAPTCHA pages instead of the real site). Residential proxies from the target country provide the most authentic results.
What about JavaScript-rendered content?
This tool fetches static HTML. For JavaScript-heavy sites (SPAs, dynamic pricing loaded via API), integrate a headless browser (Playwright) with proxy support. Route the browser through country-specific proxies to see the fully rendered page as a local user would.
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a Proxy Rotator in Python: Complete Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Bandwidth Optimization for Proxies: Reduce Costs & Increase Speed
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a Proxy Rotator in Python: Complete Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Bandwidth Optimization for Proxies: Reduce Costs & Increase Speed
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a News Crawler in Python: Step-by-Step Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Azure Functions for Serverless Web Scraping: the Complete Guide
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)
Related Reading
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a News Crawler in Python: Step-by-Step Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Azure Functions for Serverless Web Scraping: the Complete Guide
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)