How to Scrape LinkedIn Jobs Data in 2026

How to Scrape LinkedIn Jobs Data in 2026

LinkedIn is the world’s largest professional network with over 1 billion members and millions of active job postings. For HR analytics teams, recruitment agencies, salary benchmarking services, and labor market researchers, scraping LinkedIn job data provides critical insights into hiring trends, compensation ranges, skill demand, and employer activity.

This guide covers how to scrape LinkedIn job postings using Python, navigate their anti-scraping defenses, and use proxies for reliable data extraction.

What Data Can You Extract from LinkedIn Jobs?

LinkedIn job postings contain valuable data:

  • Job titles and descriptions
  • Company information (name, size, industry)
  • Location (on-site, remote, hybrid)
  • Salary range (when provided)
  • Required skills and qualifications
  • Experience level (entry, mid, senior, executive)
  • Employment type (full-time, part-time, contract)
  • Posted date and application count
  • Benefits information

Example JSON Output

{
  "job_id": "3845678901",
  "title": "Senior Data Engineer",
  "company": "Tech Corp",
  "location": "San Francisco, CA (Hybrid)",
  "salary_range": "$150,000 - $200,000/yr",
  "posted": "2 days ago",
  "applicants": 145,
  "experience_level": "Mid-Senior level",
  "employment_type": "Full-time",
  "description": "We are looking for a Senior Data Engineer...",
  "skills": ["Python", "Apache Spark", "SQL", "AWS", "Airflow"],
  "url": "https://www.linkedin.com/jobs/view/3845678901"
}

Prerequisites

pip install requests beautifulsoup4 lxml fake-useragent selenium

LinkedIn has extremely aggressive anti-bot protections. Residential proxies are mandatory.

Method 1: Scraping LinkedIn’s Public Job Search

LinkedIn’s job search pages are partially accessible without authentication:

import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import json
import time
import random

class LinkedInJobScraper:
    def __init__(self, proxy_url=None):
        self.session = requests.Session()
        self.ua = UserAgent()
        self.proxy_url = proxy_url

    def _get_headers(self):
        return {
            "User-Agent": self.ua.random,
            "Accept": "text/html,application/xhtml+xml",
            "Accept-Language": "en-US,en;q=0.9",
            "Referer": "https://www.linkedin.com/",
            "Connection": "keep-alive",
        }

    def _get_proxies(self):
        if self.proxy_url:
            return {"http": self.proxy_url, "https": self.proxy_url}
        return None

    def search_jobs(self, keywords, location="", max_pages=5):
        """Search LinkedIn jobs (public, no auth required)."""
        all_jobs = []

        for page in range(max_pages):
            start = page * 25
            url = f"https://www.linkedin.com/jobs/search/?keywords={keywords}&location={location}&start={start}"

            try:
                response = self.session.get(
                    url,
                    headers=self._get_headers(),
                    proxies=self._get_proxies(),
                    timeout=30
                )
                response.raise_for_status()
                soup = BeautifulSoup(response.text, "lxml")

                job_cards = soup.select("div.base-card, li.result-card")
                for card in job_cards:
                    try:
                        job = {}
                        title = card.select_one("h3.base-search-card__title, h3")
                        job["title"] = title.get_text(strip=True) if title else None

                        company = card.select_one("h4.base-search-card__subtitle a, a.hidden-nested-link")
                        job["company"] = company.get_text(strip=True) if company else None

                        location_elem = card.select_one("span.job-search-card__location")
                        job["location"] = location_elem.get_text(strip=True) if location_elem else None

                        link = card.select_one("a.base-card__full-link, a")
                        job["url"] = link["href"].split("?")[0] if link and link.get("href") else None

                        date = card.select_one("time")
                        job["posted"] = date.get("datetime") if date else None

                        if job.get("title"):
                            all_jobs.append(job)
                    except Exception:
                        continue

                print(f"Page {page + 1}: Found {len(job_cards)} jobs")
                time.sleep(random.uniform(3, 7))

            except requests.RequestException as e:
                print(f"Error on page {page + 1}: {e}")
                continue

        return all_jobs

    def scrape_job_detail(self, job_url):
        """Scrape detailed job description from a job posting."""
        try:
            response = self.session.get(
                job_url,
                headers=self._get_headers(),
                proxies=self._get_proxies(),
                timeout=30
            )
            response.raise_for_status()
            soup = BeautifulSoup(response.text, "lxml")

            job = {}

            # JSON-LD structured data
            scripts = soup.find_all("script", type="application/ld+json")
            for script in scripts:
                try:
                    data = json.loads(script.string)
                    if data.get("@type") == "JobPosting":
                        job = {
                            "title": data.get("title"),
                            "description": data.get("description"),
                            "company": data.get("hiringOrganization", {}).get("name"),
                            "location": data.get("jobLocation", [{}])[0].get("address", {}).get("addressLocality") if isinstance(data.get("jobLocation"), list) else None,
                            "salary": data.get("baseSalary"),
                            "employment_type": data.get("employmentType"),
                            "date_posted": data.get("datePosted"),
                            "valid_through": data.get("validThrough"),
                            "industry": data.get("industry"),
                        }
                        break
                except json.JSONDecodeError:
                    continue

            # Fallback to HTML
            if not job.get("title"):
                title = soup.select_one("h1, h2.top-card-layout__title")
                job["title"] = title.get_text(strip=True) if title else None

                desc = soup.select_one("div.description__text, div.show-more-less-html__markup")
                job["description"] = desc.get_text(strip=True) if desc else None

            return job

        except requests.RequestException as e:
            print(f"Error: {e}")
            return None


# Usage
scraper = LinkedInJobScraper(proxy_url="http://user:pass@proxy:port")

# Search jobs
jobs = scraper.search_jobs("data engineer", "San Francisco", max_pages=3)
print(f"Found {len(jobs)} jobs")

# Get details for first job
if jobs and jobs[0].get("url"):
    details = scraper.scrape_job_detail(jobs[0]["url"])
    print(json.dumps(details, indent=2))

Handling LinkedIn Anti-Bot Protections

1. Authentication Walls

LinkedIn shows limited data without login. Use public job search URLs which are more accessible than profile-gated content.

2. Rate Limiting

LinkedIn blocks IPs aggressively. Use 5-10 second delays and rotate proxies every 3-5 requests.

3. Bot Detection

LinkedIn uses advanced fingerprinting. Use residential proxies and rotate user agents on every request.

4. Legal Action History

LinkedIn has actively pursued legal action against scrapers (hiQ Labs v. LinkedIn). Exercise caution and consult legal counsel.

Proxy Recommendations

Proxy TypeSuccess RateBest For
Residential60-75%Public job search
Mobile75-85%Job detail scraping
ISP55-65%Session-based access
Datacenter5-15%Not recommended

Residential proxies are essential for LinkedIn. The platform aggressively blocks datacenter IPs.

Legal Considerations

  1. Terms of Service: LinkedIn explicitly prohibits scraping in their User Agreement.
  2. hiQ Labs v. LinkedIn: This landmark case established that scraping public data is not a CFAA violation, but LinkedIn still pursues legal action.
  3. GDPR/CCPA: Job posting data may contain PII. Comply with applicable privacy regulations.
  4. Commercial Use: Significant legal risk for commercial scraping of LinkedIn data.

See our web scraping compliance guide for details.

Frequently Asked Questions

Can I scrape LinkedIn jobs without logging in?

Yes. LinkedIn’s public job search pages are accessible without authentication and contain job titles, companies, locations, and links to full postings. Detailed job descriptions are partially accessible on public pages.

Is LinkedIn job scraping legal?

The legal landscape is complex. The hiQ Labs v. LinkedIn case provides some precedent for scraping public data, but LinkedIn continues to pursue scrapers. Consult a lawyer for your specific use case.

How many job listings can I scrape per day?

With rotating residential proxies and careful rate limiting, expect 2,000-5,000 job listings per day. LinkedIn is among the most aggressive platforms at blocking scrapers.

Are there LinkedIn API alternatives for job data?

LinkedIn offers APIs for authorized partners, but access is restricted. The public job search pages provide the most accessible data for research purposes.

Method 2: Using Selenium for Full Job Details

For richer job data including full descriptions, required skills, and company details, use Selenium to render the JavaScript-heavy job detail pages:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import json
import time
import random

class LinkedInSeleniumScraper:
    def __init__(self, proxy=None):
        options = Options()
        options.add_argument("--headless=new")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        options.add_argument("--window-size=1920,1080")
        if proxy:
            options.add_argument(f"--proxy-server={proxy}")
        self.driver = webdriver.Chrome(options=options)

    def scrape_job_listing(self, job_url):
        self.driver.get(job_url)
        time.sleep(random.uniform(3, 6))

        try:
            WebDriverWait(self.driver, 15).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "h1, h2"))
            )
        except Exception:
            return None

        # Click "Show more" to expand description
        try:
            show_more = self.driver.find_element(By.CSS_SELECTOR, "button.show-more-less-html__button")
            show_more.click()
            time.sleep(1)
        except Exception:
            pass

        data = self.driver.execute_script('''
            const result = {};
            const title = document.querySelector("h1, h2.top-card-layout__title");
            result.title = title ? title.innerText.trim() : null;

            const company = document.querySelector("a.topcard__org-name-link, span.topcard__flavor");
            result.company = company ? company.innerText.trim() : null;

            const location = document.querySelector("span.topcard__flavor--bullet");
            result.location = location ? location.innerText.trim() : null;

            const description = document.querySelector("div.show-more-less-html__markup, div.description__text");
            result.description = description ? description.innerText.trim() : null;

            // Extract criteria (seniority, employment type, etc.)
            const criteria = document.querySelectorAll("li.description__job-criteria-item");
            criteria.forEach(item => {
                const label = item.querySelector("h3");
                const value = item.querySelector("span");
                if (label && value) {
                    const key = label.innerText.trim().toLowerCase().replace(/\\s+/g, "_");
                    result[key] = value.innerText.trim();
                }
            });

            // JSON-LD structured data
            const scripts = document.querySelectorAll('script[type="application/ld+json"]');
            for (const script of scripts) {
                try {
                    const json = JSON.parse(script.textContent);
                    if (json["@type"] === "JobPosting") {
                        result.salary = json.baseSalary || null;
                        result.date_posted = json.datePosted || null;
                        result.valid_through = json.validThrough || null;
                        result.employment_type = json.employmentType || result.employment_type || null;
                    }
                } catch {}
            }

            return result;
        ''')

        return data

    def close(self):
        self.driver.quit()

Data Storage and Analysis

For labor market research and salary benchmarking, store scraped job data in a structured database:

import sqlite3
import json
from datetime import datetime

class JobDataStore:
    def __init__(self, db_path="linkedin_jobs.db"):
        self.conn = sqlite3.connect(db_path)
        self.conn.execute('''CREATE TABLE IF NOT EXISTS jobs
            (job_id TEXT PRIMARY KEY, title TEXT, company TEXT,
             location TEXT, salary_range TEXT, employment_type TEXT,
             experience_level TEXT, description TEXT, skills TEXT,
             posted_date TEXT, url TEXT,
             scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)''')

    def store_job(self, job):
        self.conn.execute(
            """INSERT OR REPLACE INTO jobs
            (job_id, title, company, location, salary_range, employment_type,
             experience_level, description, skills, posted_date, url)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (job.get("job_id"), job.get("title"), job.get("company"),
             job.get("location"), job.get("salary_range"), job.get("employment_type"),
             job.get("experience_level"), job.get("description"),
             json.dumps(job.get("skills", [])), job.get("posted"), job.get("url"))
        )
        self.conn.commit()

    def get_salary_stats(self, keyword):
        cursor = self.conn.execute(
            "SELECT salary_range, COUNT(*) FROM jobs WHERE title LIKE ? AND salary_range IS NOT NULL GROUP BY salary_range",
            (f"%{keyword}%",)
        )
        return cursor.fetchall()

    def get_top_companies(self, keyword, limit=20):
        cursor = self.conn.execute(
            "SELECT company, COUNT(*) as count FROM jobs WHERE title LIKE ? GROUP BY company ORDER BY count DESC LIMIT ?",
            (f"%{keyword}%", limit)
        )
        return cursor.fetchall()

Conclusion

LinkedIn job scraping requires careful handling due to aggressive anti-bot systems and legal considerations. Public job search pages provide good basic data without authentication, while Selenium enables extraction of full job descriptions and structured data from detail pages. Use residential proxies with conservative rate limiting, store results in a database for longitudinal analysis, and always consider the legal implications before scraping at scale.

For more job data scraping guides, visit our web scraping proxy guide and proxy provider comparisons.


Related Reading

Scroll to Top