Building a Captcha-Solving Integration Service
CAPTCHAs are the biggest obstacle in web scraping after IP bans. Rather than hardcoding one solving service into every scraper, build a unified integration service that abstracts multiple solvers behind a single API. Your scrapers call one endpoint regardless of whether the CAPTCHA goes to 2Captcha, Anti-Captcha, CapSolver, or a local ML model.
Why an Integration Layer
- Failover — if one solver goes down, traffic routes to another
- Cost optimization — route different CAPTCHA types to the cheapest solver
- Unified API — scrapers do not need to know which solver handles the request
- Analytics — track solve rates, costs, and latency across providers
- Queue management — buffer requests during solver outages
Supported CAPTCHA Types
The service handles:
- reCAPTCHA v2 (checkbox and invisible)
- reCAPTCHA v3 (score-based)
- hCaptcha
- Image CAPTCHAs (text recognition)
- FunCaptcha / Arkose Labs
- Turnstile (Cloudflare)
Implementation
import asyncio
import httpx
import time
import base64
import hashlib
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Optional, Dict, List
from enum import Enum
import logging
logger = logging.getLogger(__name__)
class CaptchaType(Enum):
RECAPTCHA_V2 = "recaptcha_v2"
RECAPTCHA_V3 = "recaptcha_v3"
HCAPTCHA = "hcaptcha"
IMAGE = "image"
FUNCAPTCHA = "funcaptcha"
TURNSTILE = "turnstile"
@dataclass
class CaptchaTask:
captcha_type: CaptchaType
site_url: str
site_key: str = ""
image_base64: str = ""
min_score: float = 0.7 # For reCAPTCHA v3
action: str = ""
data: dict = field(default_factory=dict)
@dataclass
class CaptchaResult:
success: bool = False
solution: str = ""
cost: float = 0
solve_time_ms: int = 0
solver: str = ""
error: str = ""
task_id: str = ""
class CaptchaSolver(ABC):
"""Base class for CAPTCHA solver integrations."""
def __init__(self, api_key: str, name: str):
self.api_key = api_key
self.name = name
@abstractmethod
async def solve(self, task: CaptchaTask) -> CaptchaResult:
pass
@abstractmethod
async def get_balance(self) -> float:
pass
class TwoCaptchaSolver(CaptchaSolver):
BASE_URL = "https://2captcha.com"
def __init__(self, api_key: str):
super().__init__(api_key, "2captcha")
async def solve(self, task: CaptchaTask) -> CaptchaResult:
result = CaptchaResult(solver=self.name)
start = time.monotonic()
try:
async with httpx.AsyncClient(timeout=120) as client:
# Submit task
params = {"key": self.api_key, "json": 1}
if task.captcha_type == CaptchaType.RECAPTCHA_V2:
params.update({
"method": "userrecaptcha",
"googlekey": task.site_key,
"pageurl": task.site_url,
})
elif task.captcha_type == CaptchaType.RECAPTCHA_V3:
params.update({
"method": "userrecaptcha",
"version": "v3",
"googlekey": task.site_key,
"pageurl": task.site_url,
"min_score": task.min_score,
"action": task.action,
})
elif task.captcha_type == CaptchaType.HCAPTCHA:
params.update({
"method": "hcaptcha",
"sitekey": task.site_key,
"pageurl": task.site_url,
})
elif task.captcha_type == CaptchaType.IMAGE:
params.update({
"method": "base64",
"body": task.image_base64,
})
elif task.captcha_type == CaptchaType.TURNSTILE:
params.update({
"method": "turnstile",
"sitekey": task.site_key,
"pageurl": task.site_url,
})
resp = await client.post(
f"{self.BASE_URL}/in.php", data=params
)
data = resp.json()
if data.get("status") != 1:
result.error = data.get("request", "Submit failed")
return result
task_id = data["request"]
result.task_id = task_id
# Poll for result
for _ in range(60):
await asyncio.sleep(5)
check = await client.get(
f"{self.BASE_URL}/res.php",
params={
"key": self.api_key,
"action": "get",
"id": task_id,
"json": 1,
}
)
check_data = check.json()
if check_data.get("status") == 1:
result.success = True
result.solution = check_data["request"]
break
elif check_data.get("request") != "CAPCHA_NOT_READY":
result.error = check_data.get("request", "Unknown error")
break
except Exception as e:
result.error = str(e)[:200]
result.solve_time_ms = int((time.monotonic() - start) * 1000)
return result
async def get_balance(self) -> float:
async with httpx.AsyncClient() as client:
resp = await client.get(
f"{self.BASE_URL}/res.php",
params={"key": self.api_key, "action": "getbalance", "json": 1}
)
return float(resp.json().get("request", 0))
class AntiCaptchaSolver(CaptchaSolver):
BASE_URL = "https://api.anti-captcha.com"
def __init__(self, api_key: str):
super().__init__(api_key, "anti-captcha")
async def solve(self, task: CaptchaTask) -> CaptchaResult:
result = CaptchaResult(solver=self.name)
start = time.monotonic()
try:
async with httpx.AsyncClient(timeout=120) as client:
# Build task object
if task.captcha_type == CaptchaType.RECAPTCHA_V2:
task_obj = {
"type": "RecaptchaV2TaskProxyless",
"websiteURL": task.site_url,
"websiteKey": task.site_key,
}
elif task.captcha_type == CaptchaType.HCAPTCHA:
task_obj = {
"type": "HCaptchaTaskProxyless",
"websiteURL": task.site_url,
"websiteKey": task.site_key,
}
elif task.captcha_type == CaptchaType.IMAGE:
task_obj = {
"type": "ImageToTextTask",
"body": task.image_base64,
}
else:
result.error = f"Unsupported type: {task.captcha_type}"
return result
resp = await client.post(
f"{self.BASE_URL}/createTask",
json={"clientKey": self.api_key, "task": task_obj}
)
data = resp.json()
if data.get("errorId", 0) != 0:
result.error = data.get("errorDescription", "Create failed")
return result
task_id = data["taskId"]
result.task_id = str(task_id)
# Poll for result
for _ in range(60):
await asyncio.sleep(5)
check = await client.post(
f"{self.BASE_URL}/getTaskResult",
json={
"clientKey": self.api_key,
"taskId": task_id,
}
)
check_data = check.json()
if check_data.get("status") == "ready":
result.success = True
solution = check_data.get("solution", {})
result.solution = (
solution.get("gRecaptchaResponse") or
solution.get("text") or
solution.get("token", "")
)
result.cost = check_data.get("cost", 0)
break
elif check_data.get("errorId", 0) != 0:
result.error = check_data.get("errorDescription", "")
break
except Exception as e:
result.error = str(e)[:200]
result.solve_time_ms = int((time.monotonic() - start) * 1000)
return result
async def get_balance(self) -> float:
async with httpx.AsyncClient() as client:
resp = await client.post(
f"{self.BASE_URL}/getBalance",
json={"clientKey": self.api_key}
)
return resp.json().get("balance", 0)Unified Service
class CaptchaService:
"""Routes CAPTCHA tasks to the best available solver."""
def __init__(self):
self.solvers: List[CaptchaSolver] = []
self.stats: Dict[str, dict] = {}
self.routing_rules: Dict[CaptchaType, str] = {}
def add_solver(self, solver: CaptchaSolver):
self.solvers.append(solver)
self.stats[solver.name] = {
"total": 0, "success": 0, "failed": 0,
"total_cost": 0, "avg_time_ms": 0,
}
def set_routing(self, captcha_type: CaptchaType, solver_name: str):
self.routing_rules[captcha_type] = solver_name
def _get_solver(self, captcha_type: CaptchaType) -> CaptchaSolver:
# Check routing rules first
preferred = self.routing_rules.get(captcha_type)
if preferred:
for s in self.solvers:
if s.name == preferred:
return s
# Default: first available solver
if self.solvers:
return self.solvers[0]
raise RuntimeError("No solvers configured")
async def solve(
self,
task: CaptchaTask,
fallback: bool = True,
) -> CaptchaResult:
primary = self._get_solver(task.captcha_type)
result = await primary.solve(task)
self._update_stats(primary.name, result)
if result.success:
return result
# Fallback to other solvers
if fallback:
for solver in self.solvers:
if solver.name != primary.name:
logger.info(
f"Falling back to {solver.name} "
f"after {primary.name} failed"
)
result = await solver.solve(task)
self._update_stats(solver.name, result)
if result.success:
return result
return result
def _update_stats(self, solver_name: str, result: CaptchaResult):
s = self.stats[solver_name]
s["total"] += 1
if result.success:
s["success"] += 1
else:
s["failed"] += 1
s["total_cost"] += result.cost
n = s["total"]
s["avg_time_ms"] = (
(s["avg_time_ms"] * (n - 1) + result.solve_time_ms) / n
)
async def get_balances(self) -> Dict[str, float]:
balances = {}
for solver in self.solvers:
try:
balances[solver.name] = await solver.get_balance()
except Exception:
balances[solver.name] = -1
return balances
def get_stats(self) -> dict:
return self.stats
# Usage
async def main():
service = CaptchaService()
service.add_solver(TwoCaptchaSolver("YOUR_2CAPTCHA_KEY"))
service.add_solver(AntiCaptchaSolver("YOUR_ANTICAPTCHA_KEY"))
# Route reCAPTCHA to 2Captcha, hCaptcha to Anti-Captcha
service.set_routing(CaptchaType.RECAPTCHA_V2, "2captcha")
service.set_routing(CaptchaType.HCAPTCHA, "anti-captcha")
task = CaptchaTask(
captcha_type=CaptchaType.RECAPTCHA_V2,
site_url="https://example.com/login",
site_key="6LcR_RsTAAAAANkB...",
)
result = await service.solve(task)
if result.success:
print(f"Solved in {result.solve_time_ms}ms by {result.solver}")
print(f"Token: {result.solution[:50]}...")
else:
print(f"Failed: {result.error}")
print(f"Balances: {await service.get_balances()}")
print(f"Stats: {service.get_stats()}")
asyncio.run(main())Integrating with Scrapers
async def scrape_with_captcha(url: str, service: CaptchaService):
async with httpx.AsyncClient(proxy="http://proxy:8080") as client:
response = await client.get(url)
if "g-recaptcha" in response.text:
# Extract site key from page
import re
match = re.search(r'data-sitekey="([^"]+)"', response.text)
if match:
result = await service.solve(CaptchaTask(
captcha_type=CaptchaType.RECAPTCHA_V2,
site_url=url,
site_key=match.group(1),
))
if result.success:
# Submit form with CAPTCHA token
response = await client.post(url, data={
"g-recaptcha-response": result.solution,
})
return responseInternal Links
- Building a Proxy Checker Tool — test proxies for CAPTCHA sites
- Building a Rate-Limited Scraper — reduce CAPTCHA triggers
- How to Avoid Getting Blocked — anti-detection strategies
- Anti-Detect Browser Guides — browser fingerprinting
- Best CAPTCHA Solving Services 2026 — compare solver providers
FAQ
Which CAPTCHA solver is cheapest?
Pricing varies by CAPTCHA type. For reCAPTCHA v2, rates range from $1 to $3 per 1,000 solves. hCaptcha is similar. Image CAPTCHAs are cheapest at $0.50-$1 per 1,000. Compare current rates across providers before choosing a default.
How long does CAPTCHA solving take?
Image CAPTCHAs solve in 5-15 seconds. reCAPTCHA v2 takes 15-45 seconds. reCAPTCHA v3 takes 10-20 seconds. hCaptcha is similar to reCAPTCHA v2. These times include submission, solving, and polling delays.
Can I solve CAPTCHAs locally without a service?
For image CAPTCHAs, yes — open-source OCR models handle simple text recognition. For reCAPTCHA and hCaptcha, local solving is impractical because these systems require browser interaction and behavioral analysis that automated solvers handle on their end.
How do I reduce CAPTCHA encounter rate?
Use residential proxies instead of datacenter proxies. Rotate user agents and browser fingerprints. Add realistic delays between requests. Maintain cookies and session state. These techniques reduce CAPTCHA challenges by 70-90% compared to bare scraping.
Should I retry if a CAPTCHA solution is rejected?
Yes, but limit retries to 2-3 attempts. If the same CAPTCHA is consistently rejected, the site may have changed its verification method. Report incorrect solutions to the solver service for refunds — most providers offer credits for failed solves.
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a Proxy Rotator in Python: Complete Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Bandwidth Optimization for Proxies: Reduce Costs & Increase Speed
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a Proxy Rotator in Python: Complete Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Bandwidth Optimization for Proxies: Reduce Costs & Increase Speed
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a News Crawler in Python: Step-by-Step Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Azure Functions for Serverless Web Scraping: the Complete Guide
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)
Related Reading
- Build an Anti-Detection Test Suite: Verify Browser Stealth
- Build a News Crawler in Python: Step-by-Step Tutorial
- AJAX Request Interception: Scraping API Calls Directly
- Azure Functions for Serverless Web Scraping: the Complete Guide
- How to Configure Proxies on iPhone and Android
- How to Use Proxies in Node.js (Axios, Fetch, Puppeteer)