Build a Proxy Rotator in Python: Complete Tutorial
A proxy rotator automatically cycles through a pool of proxy servers for each request, distributing traffic across multiple IPs to avoid detection and rate limiting. While commercial proxy providers offer built-in rotation through backconnect gateways, building your own rotator gives you full control over rotation logic, health monitoring, and failover behavior.
In this tutorial, we build a production-ready proxy rotator with multiple rotation strategies, health checking, and async support.
Project Architecture
proxy-rotator/
├── rotator.py # Core rotation logic
├── health_checker.py # Proxy health monitoring
├── strategies.py # Rotation strategies
├── proxy_pool.py # Pool management
├── server.py # HTTP proxy server (optional)
├── config.yaml # Proxy list and settings
└── requirements.txt # DependenciesCore Components
The Proxy Data Model
# proxy_pool.py
import time
from dataclasses import dataclass, field
from typing import Optional
from enum import Enum
class ProxyStatus(Enum):
HEALTHY = "healthy"
DEGRADED = "degraded"
DEAD = "dead"
COOLDOWN = "cooldown"
@dataclass
class Proxy:
url: str
host: str
port: int
username: str = ""
password: str = ""
protocol: str = "http"
country: str = ""
# Health metrics
status: ProxyStatus = ProxyStatus.HEALTHY
total_requests: int = 0
successful_requests: int = 0
failed_requests: int = 0
total_latency: float = 0
last_used: float = 0
last_checked: float = 0
consecutive_failures: int = 0
@property
def success_rate(self) -> float:
if self.total_requests == 0:
return 1.0
return self.successful_requests / self.total_requests
@property
def avg_latency(self) -> float:
if self.successful_requests == 0:
return float('inf')
return self.total_latency / self.successful_requests
def record_success(self, latency: float):
self.total_requests += 1
self.successful_requests += 1
self.total_latency += latency
self.last_used = time.time()
self.consecutive_failures = 0
if self.status == ProxyStatus.DEGRADED:
self.status = ProxyStatus.HEALTHY
def record_failure(self):
self.total_requests += 1
self.failed_requests += 1
self.last_used = time.time()
self.consecutive_failures += 1
if self.consecutive_failures >= 3:
self.status = ProxyStatus.DEGRADED
if self.consecutive_failures >= 5:
self.status = ProxyStatus.DEAD
@classmethod
def from_url(cls, url: str, country: str = "") -> "Proxy":
from urllib.parse import urlparse
parsed = urlparse(url)
return cls(
url=url,
host=parsed.hostname or "",
port=parsed.port or 8080,
username=parsed.username or "",
password=parsed.password or "",
protocol=parsed.scheme or "http",
country=country,
)Rotation Strategies
# strategies.py
import random
import time
from abc import ABC, abstractmethod
from typing import List, Optional
from proxy_pool import Proxy, ProxyStatus
class RotationStrategy(ABC):
@abstractmethod
def select(self, proxies: List[Proxy], **kwargs) -> Optional[Proxy]:
pass
class RoundRobinStrategy(RotationStrategy):
def __init__(self):
self.index = 0
def select(self, proxies: List[Proxy], **kwargs) -> Optional[Proxy]:
healthy = [p for p in proxies if p.status != ProxyStatus.DEAD]
if not healthy:
return None
proxy = healthy[self.index % len(healthy)]
self.index += 1
return proxy
class RandomStrategy(RotationStrategy):
def select(self, proxies: List[Proxy], **kwargs) -> Optional[Proxy]:
healthy = [p for p in proxies if p.status != ProxyStatus.DEAD]
if not healthy:
return None
return random.choice(healthy)
class WeightedStrategy(RotationStrategy):
"""Select proxies weighted by success rate."""
def select(self, proxies: List[Proxy], **kwargs) -> Optional[Proxy]:
healthy = [p for p in proxies if p.status != ProxyStatus.DEAD]
if not healthy:
return None
weights = [p.success_rate + 0.1 for p in healthy]
return random.choices(healthy, weights=weights, k=1)[0]
class LatencyBasedStrategy(RotationStrategy):
"""Prefer lower-latency proxies."""
def select(self, proxies: List[Proxy], **kwargs) -> Optional[Proxy]:
healthy = [p for p in proxies if p.status != ProxyStatus.DEAD]
if not healthy:
return None
# 80% exploitation (best latency), 20% exploration
if random.random() < 0.2 or all(p.total_requests < 5 for p in healthy):
return random.choice(healthy)
return min(healthy, key=lambda p: p.avg_latency)
class GeoTargetedStrategy(RotationStrategy):
"""Select proxies from a specific country."""
def select(self, proxies: List[Proxy], **kwargs) -> Optional[Proxy]:
country = kwargs.get("country", "")
healthy = [p for p in proxies if p.status != ProxyStatus.DEAD]
if country:
geo_match = [p for p in healthy if p.country.upper() == country.upper()]
if geo_match:
healthy = geo_match
if not healthy:
return None
return random.choice(healthy)
class CooldownStrategy(RotationStrategy):
"""Enforce minimum time between proxy reuse."""
def __init__(self, cooldown_seconds: float = 5.0):
self.cooldown = cooldown_seconds
def select(self, proxies: List[Proxy], **kwargs) -> Optional[Proxy]:
now = time.time()
available = [
p for p in proxies
if p.status != ProxyStatus.DEAD
and (now - p.last_used) >= self.cooldown
]
if not available:
available = [p for p in proxies if p.status != ProxyStatus.DEAD]
if not available:
return None
return min(available, key=lambda p: p.last_used)The Rotator
# rotator.py
import time
import logging
from typing import List, Optional, Dict
from proxy_pool import Proxy, ProxyStatus
from strategies import (
RotationStrategy, RoundRobinStrategy, RandomStrategy,
WeightedStrategy, LatencyBasedStrategy, CooldownStrategy,
)
logger = logging.getLogger(__name__)
class ProxyRotator:
STRATEGIES = {
"round_robin": RoundRobinStrategy,
"random": RandomStrategy,
"weighted": WeightedStrategy,
"latency": LatencyBasedStrategy,
"cooldown": CooldownStrategy,
}
def __init__(
self,
proxies: List[str],
strategy: str = "weighted",
max_retries: int = 3,
**strategy_kwargs,
):
self.proxies = [Proxy.from_url(url) for url in proxies]
self.strategy = self.STRATEGIES[strategy](**strategy_kwargs)
self.max_retries = max_retries
self.stats = {"total": 0, "success": 0, "failed": 0}
def get_proxy(self, **kwargs) -> Optional[Proxy]:
"""Get the next proxy based on rotation strategy."""
proxy = self.strategy.select(self.proxies, **kwargs)
if proxy is None:
logger.warning("No healthy proxies available, resetting pool")
self._reset_dead_proxies()
proxy = self.strategy.select(self.proxies, **kwargs)
return proxy
def report_result(self, proxy: Proxy, success: bool, latency: float = 0):
"""Report the result of using a proxy."""
self.stats["total"] += 1
if success:
proxy.record_success(latency)
self.stats["success"] += 1
else:
proxy.record_failure()
self.stats["failed"] += 1
def get_proxy_dict(self, **kwargs) -> Optional[Dict[str, str]]:
"""Get proxy in requests-compatible format."""
proxy = self.get_proxy(**kwargs)
if proxy is None:
return None
return {"http": proxy.url, "https": proxy.url}
def _reset_dead_proxies(self):
"""Reset dead proxies for re-evaluation."""
for proxy in self.proxies:
if proxy.status == ProxyStatus.DEAD:
proxy.status = ProxyStatus.DEGRADED
proxy.consecutive_failures = 0
def get_stats(self) -> dict:
healthy = sum(1 for p in self.proxies if p.status == ProxyStatus.HEALTHY)
degraded = sum(1 for p in self.proxies if p.status == ProxyStatus.DEGRADED)
dead = sum(1 for p in self.proxies if p.status == ProxyStatus.DEAD)
return {
**self.stats,
"success_rate": self.stats["success"] / max(self.stats["total"], 1),
"pool_size": len(self.proxies),
"healthy": healthy,
"degraded": degraded,
"dead": dead,
}Usage with requests
import requests
import time
from rotator import ProxyRotator
# Initialize with your proxy list
rotator = ProxyRotator(
proxies=[
"http://user:pass@proxy1.example.com:8080",
"http://user:pass@proxy2.example.com:8080",
"http://user:pass@proxy3.example.com:8080",
],
strategy="weighted",
)
# Scrape with automatic rotation
urls = ["https://example.com/page/{}".format(i) for i in range(100)]
for url in urls:
proxy = rotator.get_proxy()
if proxy is None:
print("No proxies available!")
break
start = time.time()
try:
response = requests.get(
url,
proxies={"http": proxy.url, "https": proxy.url},
timeout=15,
)
latency = time.time() - start
rotator.report_result(proxy, success=True, latency=latency)
print(f"OK: {url} via {proxy.host} ({latency:.2f}s)")
except Exception as e:
rotator.report_result(proxy, success=False)
print(f"FAIL: {url} via {proxy.host}: {e}")
# Print stats
print(rotator.get_stats())Async Support
import asyncio
import aiohttp
import time
from rotator import ProxyRotator
async def async_scrape(rotator: ProxyRotator, urls: list, concurrency: int = 10):
"""Scrape URLs concurrently with proxy rotation."""
semaphore = asyncio.Semaphore(concurrency)
results = []
async def fetch(url):
async with semaphore:
proxy = rotator.get_proxy()
if not proxy:
return {"url": url, "status": "no_proxy"}
start = time.time()
try:
async with aiohttp.ClientSession() as session:
async with session.get(
url,
proxy=proxy.url,
timeout=aiohttp.ClientTimeout(total=15),
) as response:
latency = time.time() - start
rotator.report_result(proxy, True, latency)
text = await response.text()
return {"url": url, "status": response.status, "length": len(text)}
except Exception as e:
rotator.report_result(proxy, False)
return {"url": url, "status": "error", "error": str(e)}
tasks = [fetch(url) for url in urls]
results = await asyncio.gather(*tasks)
return results
# Usage
rotator = ProxyRotator(proxies=[...], strategy="cooldown", cooldown_seconds=3)
urls = [f"https://example.com/page/{i}" for i in range(1000)]
results = asyncio.run(async_scrape(rotator, urls, concurrency=20))Health Checker
# health_checker.py
import asyncio
import aiohttp
import time
from proxy_pool import Proxy, ProxyStatus
class HealthChecker:
def __init__(self, check_url="https://httpbin.org/ip", interval=60):
self.check_url = check_url
self.interval = interval
async def check_proxy(self, proxy: Proxy) -> bool:
try:
async with aiohttp.ClientSession() as session:
start = time.time()
async with session.get(
self.check_url,
proxy=proxy.url,
timeout=aiohttp.ClientTimeout(total=10),
) as response:
latency = time.time() - start
proxy.last_checked = time.time()
if response.status == 200:
proxy.record_success(latency)
return True
proxy.record_failure()
return False
except Exception:
proxy.record_failure()
proxy.last_checked = time.time()
return False
async def check_all(self, proxies: list):
tasks = [self.check_proxy(p) for p in proxies]
results = await asyncio.gather(*tasks)
healthy = sum(results)
print(f"Health check: {healthy}/{len(proxies)} proxies healthy")
return results
async def run_continuous(self, proxies: list):
while True:
await self.check_all(proxies)
await asyncio.sleep(self.interval)Configuration
# config.yaml
proxies:
- url: "http://user:pass@proxy1.example.com:8080"
country: "US"
- url: "http://user:pass@proxy2.example.com:8080"
country: "UK"
- url: "http://user:pass@proxy3.example.com:8080"
country: "DE"
settings:
strategy: "weighted"
max_retries: 3
health_check_interval: 60
cooldown_seconds: 5FAQ
Which rotation strategy should I use?
For most web scraping use cases, the weighted strategy works best — it naturally favors reliable proxies while still testing underperforming ones. Use cooldown when you need to prevent rapid reuse of the same IP.
How many proxies do I need in my pool?
For light scraping (< 100 requests/minute), 10-20 proxies suffice. For heavy scraping (1,000+ requests/minute), you need 100+ proxies. The rule of thumb is: pool size >= requests per minute / acceptable requests per proxy per minute.
Can I use this with commercial proxy services?
Yes, but commercial services like Bright Data already provide rotation through their backconnect gateways. This rotator is most useful with static proxy lists or self-hosted proxies. For details on how commercial rotation works, see our proxy rotation guide.
How do I handle proxy authentication?
Include credentials in the proxy URL: http://username:password@host:port. For IP whitelisting, omit the credentials and ensure your server IP is whitelisted with the provider.
Should I build my own rotator or use a library?
For production use, consider established libraries like scrapy-rotating-proxies or proxy middleware for your scraping framework. Build your own when you need custom rotation logic, specific health checking rules, or integration with proprietary proxy infrastructure.
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a SERP Tracker: Monitor Search Rankings
- AJAX Request Interception: Scraping API Calls Directly
- Bandwidth Optimization for Proxies: Reduce Costs & Increase Speed
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a SERP Tracker: Monitor Search Rankings
- AJAX Request Interception: Scraping API Calls Directly
- Bandwidth Optimization for Proxies: Reduce Costs & Increase Speed
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a News Crawler in Python: Step-by-Step Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Azure Functions for Serverless Web Scraping: the Complete Guide
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)
Related Reading
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a News Crawler in Python: Step-by-Step Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Azure Functions for Serverless Web Scraping: the Complete Guide
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)