Proxies for Real Estate Lead Generation (Zillow, Redfin, Realtor.com)

The real estate industry generates enormous B2B opportunity. Real estate agents spend $12 billion annually on technology, marketing, and lead generation tools. Mortgage brokers, title companies, home inspectors, photographers, staging companies, and PropTech startups all need to reach agents and brokerages as potential clients.

Scraping real estate platforms like Zillow, Redfin, and Realtor.com with mobile proxies provides access to agent directories, listing activity, and market data that enables targeted B2B outreach to the right professionals at the right time.

Real Estate Data Sources

Primary Platforms

Platform	Agent Data	Listing Data	Anti-Bot Level
Zillow	Agent profiles, reviews, sales history	Active/sold listings, Zestimates	High
Redfin	Agent profiles, transaction history	Listings, market data	Medium-High
Realtor.com	Agent directory, certifications	Listings, open houses	Medium
Homes.com	Agent profiles	Listings	Low
MLS (via IDX)	Full agent/listing data	Comprehensive	Varies

B2B Use Cases

If You Sell	Target These Agents	Data You Need
CRM/Tech tools	Top producers (20+ deals/year)	Transaction volume, current tech
Photography	Agents with low-quality listing photos	Listing photos, agent contact
Marketing services	New agents or declining agents	Sales trends, review counts
Mortgage services	High-volume agents	Average deal size, market area
Training/coaching	New agents (< 2 years)	License date, transaction count

Scraping Zillow Agent Data

Zillow has the most comprehensive agent database but also the strongest anti-bot protection:

from playwright.async_api import async_playwright
import asyncio
import random
import re
import json

async def scrape_zillow_agents(location, proxy_config, max_pages=10):
    """Scrape Zillow agent directory for a location"""
    agents = []

    async with async_playwright() as p:
        browser = await p.chromium.launch(
            proxy=proxy_config,
            headless=False,
        )
        context = await browser.new_context(
            viewport={"width": 1920, "height": 1080},
            user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
            locale="en-US",
        )
        page = await context.new_page()

        for page_num in range(1, max_pages + 1):
            url = f"https://www.zillow.com/professionals/real-estate-agent-reviews/{location}/?page={page_num}"

            await page.goto(url, wait_until="networkidle")
            await page.wait_for_timeout(random.randint(4000, 8000))

            # Check for blocks
            if await page.query_selector('text="Please verify you are a human"'):
                print("CAPTCHA detected - pausing")
                await page.wait_for_timeout(120000)
                continue

            # Extract agent cards
            cards = await page.query_selector_all('[class*="agent-card"], [data-test="professional-card"]')

            if not cards:
                break

            for card in cards:
                agent = await extract_zillow_agent(card)
                if agent:
                    agents.append(agent)

            print(f"Page {page_num}: {len(cards)} agents found")
            await page.wait_for_timeout(random.randint(6000, 15000))

        await browser.close()

    return agents


async def extract_zillow_agent(card):
    """Extract agent data from a Zillow agent card"""
    agent = {}

    name_el = await card.query_selector('[class*="name"], h2, h3')
    if name_el:
        agent['name'] = (await name_el.inner_text()).strip()

    # Company/brokerage
    company_el = await card.query_selector('[class*="company"], [class*="brokerage"]')
    if company_el:
        agent['brokerage'] = (await company_el.inner_text()).strip()

    # Phone
    phone_el = await card.query_selector('a[href^="tel:"]')
    if phone_el:
        agent['phone'] = (await phone_el.inner_text()).strip()

    # Sales count
    sales_el = await card.query_selector('[class*="sales"], [class*="transactions"]')
    if sales_el:
        sales_text = (await sales_el.inner_text()).strip()
        match = re.search(r'(\d+)', sales_text)
        if match:
            agent['recent_sales'] = int(match.group(1))

    # Rating
    rating_el = await card.query_selector('[class*="rating"]')
    if rating_el:
        rating_text = (await rating_el.inner_text()).strip()
        match = re.search(r'([\d.]+)', rating_text)
        if match:
            agent['rating'] = float(match.group(1))

    # Review count
    review_el = await card.query_selector('[class*="review"]')
    if review_el:
        review_text = (await review_el.inner_text()).strip()
        match = re.search(r'(\d+)', review_text)
        if match:
            agent['review_count'] = int(match.group(1))

    # Profile URL
    link_el = await card.query_selector('a[href*="/profile/"]')
    if link_el:
        href = await link_el.get_attribute('href')
        agent['zillow_profile'] = f"https://www.zillow.com{href}" if not href.startswith('http') else href

    return agent if agent.get('name') else None

Agent Detail Page Scraping

async def scrape_agent_details(page, profile_url):
    """Scrape detailed agent profile from Zillow"""
    await page.goto(profile_url, wait_until="networkidle")
    await page.wait_for_timeout(random.randint(3000, 7000))

    details = {}

    # Specializations
    spec_els = await page.query_selector_all('[class*="specialty"], [class*="specialization"]')
    details['specializations'] = []
    for el in spec_els:
        details['specializations'].append((await el.inner_text()).strip())

    # Service areas
    area_els = await page.query_selector_all('[class*="service-area"], [class*="area"]')
    details['service_areas'] = []
    for el in area_els:
        details['service_areas'].append((await el.inner_text()).strip())

    # Active listings count
    listings_el = await page.query_selector('[class*="active-listing"]')
    if listings_el:
        text = (await listings_el.inner_text()).strip()
        match = re.search(r'(\d+)', text)
        if match:
            details['active_listings'] = int(match.group(1))

    # Past sales data
    sales_section = await page.query_selector('[class*="past-sales"]')
    if sales_section:
        sales_items = await sales_section.query_selector_all('[class*="sale-item"]')
        details['past_sales'] = len(sales_items)

    # Website
    website_el = await page.query_selector('a[class*="website"]')
    if website_el:
        details['personal_website'] = await website_el.get_attribute('href')

    return details

Scraping Redfin Agent Data

Redfin provides detailed transaction history for agents:

async def scrape_redfin_agents(city, state, proxy_config):
    """Scrape Redfin real estate agent directory"""
    async with async_playwright() as p:
        browser = await p.chromium.launch(proxy=proxy_config, headless=False)
        page = await browser.new_page()

        url = f"https://www.redfin.com/real-estate-agents/{city}-{state}"
        await page.goto(url, wait_until="networkidle")
        await page.wait_for_timeout(random.randint(4000, 8000))

        agents = []
        cards = await page.query_selector_all('[class*="agent-card"]')

        for card in cards:
            agent = {}

            name_el = await card.query_selector('[class*="agent-name"]')
            if name_el:
                agent['name'] = (await name_el.inner_text()).strip()

            # Redfin shows detailed stats
            stats = await card.query_selector_all('[class*="stat"]')
            for stat in stats:
                stat_text = (await stat.inner_text()).strip().lower()
                if 'deal' in stat_text or 'sale' in stat_text:
                    match = re.search(r'(\d+)', stat_text)
                    if match:
                        agent['deals_last_year'] = int(match.group(1))
                elif 'list price' in stat_text:
                    match = re.search(r'\$([\d,]+)', stat_text)
                    if match:
                        agent['avg_list_price'] = int(match.group(1).replace(',', ''))

            phone_el = await card.query_selector('a[href^="tel:"]')
            if phone_el:
                agent['phone'] = (await phone_el.inner_text()).strip()

            if agent.get('name'):
                agents.append(agent)

        await browser.close()
        return agents

Property Listing Data for Market Intelligence

Property listings provide context for agent outreach. Understanding the proxy infrastructure behind large-scale data collection is covered in our proxy glossary.

async def scrape_market_data(zip_code, proxy_config):
    """Scrape market statistics for a zip code"""
    async with async_playwright() as p:
        browser = await p.chromium.launch(proxy=proxy_config)
        page = await browser.new_page()

        # Zillow market overview
        url = f"https://www.zillow.com/home-values/{zip_code}/"
        await page.goto(url, wait_until="networkidle")
        await page.wait_for_timeout(random.randint(3000, 6000))

        market_data = {"zip_code": zip_code}

        # Median home value
        value_el = await page.query_selector('[class*="median-value"], [class*="zhvi"]')
        if value_el:
            value_text = (await value_el.inner_text()).strip()
            match = re.search(r'\$([\d,]+)', value_text)
            if match:
                market_data['median_home_value'] = int(match.group(1).replace(',', ''))

        # Year-over-year change
        yoy_el = await page.query_selector('[class*="yoy"], [class*="change"]')
        if yoy_el:
            yoy_text = (await yoy_el.inner_text()).strip()
            match = re.search(r'([+-]?[\d.]+)%', yoy_text)
            if match:
                market_data['yoy_change_pct'] = float(match.group(1))

        await browser.close()
        return market_data

Lead Qualification for Real Estate

Score real estate agent leads based on your product fit:

def qualify_real_estate_lead(agent, product_type):
    """Qualify a real estate agent as a B2B lead"""
    score = 0
    reasons = []

    if product_type == "crm_tech":
        # Target top producers who need better tools
        if agent.get('recent_sales', 0) >= 20:
            score += 30
            reasons.append("High-volume producer")
        if agent.get('review_count', 0) < 5:
            score += 20
            reasons.append("Low online presence - may need tech help")
        if not agent.get('personal_website'):
            score += 15
            reasons.append("No personal website")

    elif product_type == "photography":
        # Target active agents without professional photos
        if agent.get('active_listings', 0) >= 3:
            score += 25
            reasons.append("Multiple active listings")
        if agent.get('avg_list_price', 0) > 500000:
            score += 20
            reasons.append("Luxury market - values professional presentation")

    elif product_type == "marketing":
        # Target newer agents or those with declining reviews
        if agent.get('recent_sales', 0) < 10:
            score += 25
            reasons.append("Lower volume - needs marketing boost")
        if agent.get('rating', 5) < 4.5:
            score += 15
            reasons.append("Room for reputation improvement")

    elif product_type == "mortgage":
        # Target high-volume agents for referral partnerships
        if agent.get('recent_sales', 0) >= 30:
            score += 35
            reasons.append("Very high volume - strong referral potential")
        if agent.get('avg_list_price', 0) > 300000:
            score += 15
            reasons.append("Good deal sizes")

    # Universal scoring factors
    if agent.get('phone'):
        score += 10
        reasons.append("Phone available for outreach")
    if agent.get('brokerage'):
        score += 5

    agent['lead_score'] = score
    agent['qualification_reasons'] = reasons
    return agent

Scaling Across Markets

Scrape agent data across multiple markets using geo-targeted proxies. For teams conducting ecommerce scraping alongside real estate data, the proxy rotation patterns are similar.

US_METRO_MARKETS = [
    {"city": "new-york", "state": "ny", "proxy_geo": "US-NY"},
    {"city": "los-angeles", "state": "ca", "proxy_geo": "US-CA"},
    {"city": "chicago", "state": "il", "proxy_geo": "US-IL"},
    {"city": "houston", "state": "tx", "proxy_geo": "US-TX"},
    {"city": "miami", "state": "fl", "proxy_geo": "US-FL"},
    {"city": "seattle", "state": "wa", "proxy_geo": "US-WA"},
    {"city": "denver", "state": "co", "proxy_geo": "US-CO"},
    {"city": "atlanta", "state": "ga", "proxy_geo": "US-GA"},
    {"city": "dallas", "state": "tx", "proxy_geo": "US-TX"},
    {"city": "phoenix", "state": "az", "proxy_geo": "US-AZ"},
]

async def national_agent_scrape(proxy_pool, product_type):
    """Scrape agent data across major US markets"""
    all_leads = []

    for market in US_METRO_MARKETS:
        proxy = proxy_pool.get_proxy(geo=market["proxy_geo"])
        proxy_config = {
            "server": proxy["url"],
            "username": proxy["username"],
            "password": proxy["password"],
        }

        # Scrape from multiple platforms per market
        zillow_agents = await scrape_zillow_agents(
            f"{market['city']}-{market['state']}",
            proxy_config,
            max_pages=5,
        )

        redfin_agents = await scrape_redfin_agents(
            market["city"],
            market["state"],
            proxy_config,
        )

        # Merge and deduplicate
        combined = merge_agent_lists(zillow_agents, redfin_agents)

        # Qualify leads
        for agent in combined:
            agent['market'] = f"{market['city'].title()}, {market['state'].upper()}"
            qualify_real_estate_lead(agent, product_type)

        all_leads.extend(combined)
        print(f"{market['city'].title()}: {len(combined)} agents scraped")

        await asyncio.sleep(random.uniform(30, 90))

    return all_leads


def merge_agent_lists(*agent_lists):
    """Merge agent lists from multiple platforms and deduplicate"""
    seen = {}

    for agents in agent_lists:
        for agent in agents:
            # Deduplicate by name + brokerage
            key = f"{agent.get('name', '').lower()}_{agent.get('brokerage', '').lower()}"
            if key in seen:
                # Merge data (keep non-null values)
                for k, v in agent.items():
                    if v and not seen[key].get(k):
                        seen[key][k] = v
            else:
                seen[key] = agent

    return list(seen.values())

Export and CRM Integration

import csv

def export_real_estate_leads(agents, output_file="real_estate_leads.csv"):
    """Export qualified real estate leads"""
    fieldnames = [
        'name', 'brokerage', 'phone', 'market',
        'recent_sales', 'active_listings', 'avg_list_price',
        'rating', 'review_count', 'specializations',
        'personal_website', 'zillow_profile',
        'lead_score', 'qualification_reasons'
    ]

    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
        writer.writeheader()

        sorted_agents = sorted(agents, key=lambda x: x.get('lead_score', 0), reverse=True)
        for agent in sorted_agents:
            row = agent.copy()
            row['specializations'] = '; '.join(agent.get('specializations', []))
            row['qualification_reasons'] = '; '.join(agent.get('qualification_reasons', []))
            writer.writerow(row)

    print(f"Exported {len(agents)} real estate leads to {output_file}")

Conclusion

Real estate platforms contain rich, structured data about agents and their activity that enables precise B2B targeting. By scraping Zillow, Redfin, and Realtor.com with mobile proxies, you can build comprehensive agent databases segmented by market, production volume, specialization, and online presence. The key insight is that different B2B products require different agent profiles — CRM tools target top producers, marketing services target newer agents, and mortgage services target high-volume closers. Automate your scraping across major metro markets, qualify leads against your specific product fit criteria, and deliver personalized outreach that references their actual market activity.