Building a Proxy Rotation Library in Python
Rotating proxies is the backbone of any serious web scraping operation. Instead of relying on a single proxy until it gets blocked, a rotation library distributes requests across multiple proxies, tracks their health, and removes failing ones automatically.
This tutorial walks through building a reusable proxy rotation library in Python that you can drop into any scraping project.
Why Build Your Own Rotation Library
Commercial proxy rotation services handle rotation for you, but building your own library gives you control over rotation strategy, health checking frequency, session persistence, and failover behavior. If you use datacenter proxies or maintain your own proxy pool, a custom rotation library saves money and improves reliability.
Architecture Overview
The library has four main components:
- ProxyPool — stores and manages the list of available proxies
- RotationStrategy — determines which proxy to use next (round-robin, random, weighted)
- HealthChecker — periodically tests proxies and removes dead ones
- SessionManager — maintains sticky sessions for sites that require consistent IPs
Core Implementation
import asyncio
import random
import time
import httpx
from dataclasses import dataclass, field
from typing import List, Optional, Dict, Callable
from enum import Enum
from collections import defaultdict
import threading
import logging
logger = logging.getLogger(__name__)
class RotationType(Enum):
ROUND_ROBIN = "round_robin"
RANDOM = "random"
WEIGHTED = "weighted"
LEAST_USED = "least_used"
@dataclass
class ProxyEntry:
url: str
weight: float = 1.0
alive: bool = True
total_requests: int = 0
failed_requests: int = 0
avg_latency_ms: float = 0
last_used: float = 0
last_checked: float = 0
consecutive_failures: int = 0
cooldown_until: float = 0
@property
def success_rate(self) -> float:
if self.total_requests == 0:
return 1.0
return 1 - (self.failed_requests / self.total_requests)
@property
def effective_weight(self) -> float:
return self.weight * self.success_rate
class ProxyPool:
def __init__(
self,
proxies: List[str],
rotation: RotationType = RotationType.ROUND_ROBIN,
max_consecutive_failures: int = 5,
cooldown_seconds: int = 300,
):
self.entries: Dict[str, ProxyEntry] = {}
for p in proxies:
self.entries[p] = ProxyEntry(url=p)
self.rotation = rotation
self.max_consecutive_failures = max_consecutive_failures
self.cooldown_seconds = cooldown_seconds
self._rr_index = 0
self._lock = threading.Lock()
def get_proxy(self, session_key: Optional[str] = None) -> Optional[ProxyEntry]:
with self._lock:
available = self._get_available()
if not available:
# Try reviving cooldown proxies
self._revive_cooldown_proxies()
available = self._get_available()
if not available:
return None
if self.rotation == RotationType.ROUND_ROBIN:
proxy = self._round_robin(available)
elif self.rotation == RotationType.RANDOM:
proxy = random.choice(available)
elif self.rotation == RotationType.WEIGHTED:
proxy = self._weighted_select(available)
elif self.rotation == RotationType.LEAST_USED:
proxy = min(available, key=lambda p: p.total_requests)
else:
proxy = available[0]
proxy.last_used = time.time()
proxy.total_requests += 1
return proxy
def report_success(self, proxy_url: str, latency_ms: float):
with self._lock:
entry = self.entries.get(proxy_url)
if entry:
entry.consecutive_failures = 0
n = entry.total_requests
entry.avg_latency_ms = (
(entry.avg_latency_ms * (n - 1) + latency_ms) / n
)
def report_failure(self, proxy_url: str):
with self._lock:
entry = self.entries.get(proxy_url)
if entry:
entry.failed_requests += 1
entry.consecutive_failures += 1
if entry.consecutive_failures >= self.max_consecutive_failures:
entry.alive = False
entry.cooldown_until = time.time() + self.cooldown_seconds
logger.warning(f"Proxy {proxy_url} moved to cooldown")
def add_proxy(self, proxy_url: str, weight: float = 1.0):
with self._lock:
self.entries[proxy_url] = ProxyEntry(url=proxy_url, weight=weight)
def remove_proxy(self, proxy_url: str):
with self._lock:
self.entries.pop(proxy_url, None)
def _get_available(self) -> List[ProxyEntry]:
now = time.time()
return [
e for e in self.entries.values()
if e.alive and now >= e.cooldown_until
]
def _round_robin(self, available: List[ProxyEntry]) -> ProxyEntry:
self._rr_index = self._rr_index % len(available)
proxy = available[self._rr_index]
self._rr_index += 1
return proxy
def _weighted_select(self, available: List[ProxyEntry]) -> ProxyEntry:
weights = [p.effective_weight for p in available]
total = sum(weights)
if total == 0:
return random.choice(available)
return random.choices(available, weights=weights, k=1)[0]
def _revive_cooldown_proxies(self):
now = time.time()
for entry in self.entries.values():
if not entry.alive and now >= entry.cooldown_until:
entry.alive = True
entry.consecutive_failures = 0
logger.info(f"Proxy {entry.url} revived from cooldown")
@property
def stats(self) -> dict:
entries = list(self.entries.values())
alive = [e for e in entries if e.alive]
return {
"total": len(entries),
"alive": len(alive),
"dead": len(entries) - len(alive),
"avg_latency_ms": (
sum(e.avg_latency_ms for e in alive) / len(alive)
if alive else 0
),
"total_requests": sum(e.total_requests for e in entries),
}Sticky Session Support
Some websites require the same IP across multiple requests (login flows, multi-page checkout). The session manager maps a session key to a specific proxy.
class SessionManager:
def __init__(self, pool: ProxyPool, ttl_seconds: int = 600):
self.pool = pool
self.ttl = ttl_seconds
self._sessions: Dict[str, tuple] = {} # key -> (proxy_url, expiry)
def get_proxy(self, session_key: str) -> Optional[ProxyEntry]:
now = time.time()
if session_key in self._sessions:
proxy_url, expiry = self._sessions[session_key]
if now < expiry:
entry = self.pool.entries.get(proxy_url)
if entry and entry.alive:
entry.last_used = now
entry.total_requests += 1
return entry
del self._sessions[session_key]
proxy = self.pool.get_proxy()
if proxy:
self._sessions[session_key] = (proxy.url, now + self.ttl)
return proxy
def release_session(self, session_key: str):
self._sessions.pop(session_key, None)
def cleanup_expired(self):
now = time.time()
expired = [k for k, (_, exp) in self._sessions.items() if now >= exp]
for k in expired:
del self._sessions[k]Health Checker
The health checker runs in the background and tests proxies periodically. Dead proxies are automatically removed from rotation.
class HealthChecker:
def __init__(
self,
pool: ProxyPool,
check_interval: int = 60,
test_url: str = "https://httpbin.org/ip",
timeout: int = 10,
):
self.pool = pool
self.check_interval = check_interval
self.test_url = test_url
self.timeout = timeout
self._running = False
async def check_proxy(self, entry: ProxyEntry) -> bool:
try:
start = time.monotonic()
async with httpx.AsyncClient(
proxy=entry.url, timeout=self.timeout
) as client:
response = await client.get(self.test_url)
latency = (time.monotonic() - start) * 1000
if response.status_code == 200:
entry.alive = True
entry.avg_latency_ms = latency
entry.last_checked = time.time()
entry.consecutive_failures = 0
return True
except Exception:
pass
entry.consecutive_failures += 1
if entry.consecutive_failures >= self.pool.max_consecutive_failures:
entry.alive = False
entry.last_checked = time.time()
return False
async def run_check(self):
tasks = [
self.check_proxy(entry)
for entry in self.pool.entries.values()
]
results = await asyncio.gather(*tasks)
alive = sum(1 for r in results if r)
logger.info(f"Health check: {alive}/{len(results)} proxies alive")
async def start(self):
self._running = True
while self._running:
await self.run_check()
await asyncio.sleep(self.check_interval)
def stop(self):
self._running = FalseHTTP Client Integration
Wrap the pool in an HTTP client that automatically rotates proxies and retries on failure.
class RotatingClient:
def __init__(self, pool: ProxyPool, max_retries: int = 3, timeout: int = 30):
self.pool = pool
self.max_retries = max_retries
self.timeout = timeout
async def get(self, url: str, **kwargs) -> Optional[httpx.Response]:
for attempt in range(self.max_retries):
proxy = self.pool.get_proxy()
if not proxy:
raise RuntimeError("No available proxies")
try:
start = time.monotonic()
async with httpx.AsyncClient(
proxy=proxy.url, timeout=self.timeout
) as client:
response = await client.get(url, **kwargs)
latency = (time.monotonic() - start) * 1000
self.pool.report_success(proxy.url, latency)
return response
except Exception as e:
self.pool.report_failure(proxy.url)
logger.warning(f"Attempt {attempt+1} failed with {proxy.url}: {e}")
raise RuntimeError(f"All {self.max_retries} attempts failed for {url}")
async def get_many(self, urls: List[str], concurrency: int = 10):
semaphore = asyncio.Semaphore(concurrency)
async def fetch(url):
async with semaphore:
return url, await self.get(url)
tasks = [fetch(url) for url in urls]
return await asyncio.gather(*tasks, return_exceptions=True)Usage Example
async def main():
proxies = [
"http://user:pass@proxy1.example.com:8080",
"http://user:pass@proxy2.example.com:8080",
"http://user:pass@proxy3.example.com:8080",
"socks5://user:pass@proxy4.example.com:1080",
]
pool = ProxyPool(
proxies,
rotation=RotationType.WEIGHTED,
max_consecutive_failures=3,
cooldown_seconds=120,
)
# Start health checker in the background
checker = HealthChecker(pool, check_interval=60)
checker_task = asyncio.create_task(checker.start())
# Use the rotating client
client = RotatingClient(pool, max_retries=3)
response = await client.get("https://example.com")
print(response.status_code)
# Sticky session example
sessions = SessionManager(pool, ttl_seconds=300)
proxy = sessions.get_proxy("user_123")
print(f"Session proxy: {proxy.url}")
# Print pool statistics
print(pool.stats)
checker.stop()
asyncio.run(main())Packaging as a Library
Structure the project for distribution via pip:
proxy-rotator/
├── proxy_rotator/
│ ├── __init__.py
│ ├── pool.py
│ ├── strategies.py
│ ├── health.py
│ ├── sessions.py
│ └── client.py
├── tests/
│ ├── test_pool.py
│ └── test_strategies.py
├── pyproject.toml
└── README.mdIn pyproject.toml:
[project]
name = "proxy-rotator"
version = "0.1.0"
dependencies = ["httpx>=0.27", "httpx-socks>=0.9"]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"Internal Links
- Building a Proxy Checker Tool — check proxies before adding to rotation
- Building Your Own Rotating Proxy Pool — infrastructure for proxy pools
- Proxy Pool Manager: Open Source Guide — manage large proxy pools
- Best Rotating Proxies 2026 — compare with commercial solutions
- Web Scraping with Python — use the library in scraping projects
FAQ
What rotation strategy should I use?
Weighted rotation is best for production use. It automatically favors faster, more reliable proxies while still distributing load. Round-robin works well when all proxies have similar performance. Random rotation is simplest but does not adapt to proxy quality.
How many proxies should be in the rotation pool?
For small projects, 10-20 proxies are sufficient. For large-scale scraping, maintain at least 50-100 proxies with health checking enabled. The more proxies you have, the lower the request rate per IP, which reduces the chance of getting blocked.
How do I handle proxy authentication?
Include credentials in the proxy URL: http://username:password@host:port. The httpx library handles Basic authentication automatically. For more complex authentication (IP whitelisting, API keys), implement a custom authentication handler in the client wrapper.
Should I use sticky sessions or rotate every request?
Rotate every request by default. Use sticky sessions only when the target site requires session consistency — login flows, shopping carts, or multi-page forms. Sticky sessions increase the risk of IP-based blocking because the same IP makes multiple sequential requests.
How do I add proxies from a provider API?
Most proxy providers offer APIs to fetch proxy lists. Write a loader function that calls the provider API and feeds proxies into pool.add_proxy(). Schedule the loader to run periodically so new proxies are added and expired ones are replaced automatically.
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a Proxy Rotator in Python: Complete Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Bandwidth Optimization for Proxies: Reduce Costs & Increase Speed
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a Proxy Rotator in Python: Complete Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Bandwidth Optimization for Proxies: Reduce Costs & Increase Speed
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a News Crawler in Python: Step-by-Step Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Azure Functions for Serverless Web Scraping: the Complete Guide
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)
Related Reading
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a News Crawler in Python: Step-by-Step Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Azure Functions for Serverless Web Scraping: the Complete Guide
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)