← Back to Notes

Playwright V2: Bypassed Every Anti-Bot System

Basic stealth mode isn't enough anymore. Here's how I bypass Cloudflare Turnstile, Akamai, DataDome, and every major anti-bot system. With real CAPTCHA solving.

What's Different From V1

Ultimate Stealth Setup

import asyncio
from playwright.async_api import async_playwright
import random
import fingerprint_generator

class StealthBrowser:
    """
    Playwright with advanced anti-detection
    """

    def __init__(self):
        self.fingerprint = fingerprint_generator.generate()

    async def start(self):
        self.playwright = await async_playwright().start()

        # Launch with stealth options
        self.browser = await self.playwright.chromium.launch(
            headless=False,  # Headless gets detected more
            args=self._get_stealth_args(),
            channel=self._get_browser_channel()  # Use installed Chrome
        )

        # Create context with spoofed fingerprint
        self.context = await self.browser.new_context(
            viewport=self.fingerprint['viewport'],
            user_agent=self.fingerprint['user_agent'],
            locale=self.fingerprint['locale'],
            timezone_id=self.fingerprint['timezone'],
            geolocation=self.fingerprint['geolocation'],
            permissions=['geolocation'],
            color_scheme=self.fingerprint['color_scheme'],
            device_scale_factor=self.fingerprint['dpr'],
        )

        # Inject stealth scripts
        await self.context.add_init_script("""
            // Override navigator properties
            Object.defineProperty(navigator, 'webdriver', {
                get: () => undefined
            });

            // Override Chrome detection
            window.chrome = {
                runtime: {},
                loadTimes: function() {},
                csi: function() {},
                app: {}
            };

            // Override permissions API
            const originalQuery = window.navigator.permissions.query;
            window.navigator.permissions.query = (parameters) => (
                parameters.name === 'notifications' ?
                    Promise.resolve({ state: Notification.permission }) :
                    originalQuery(parameters)
            );

            // WebGL spoofing
            const getParameter = WebGLRenderingContext.prototype.getParameter;
            WebGLRenderingContext.prototype.getParameter = function(parameter) {
                if (parameter === 37445) {
                    return 'Intel Inc.';
                }
                if (parameter === 37446) {
                    return 'Intel Iris OpenGL Engine';
                }
                return getParameter.call(this, parameter);
            };

            // Canvas fingerprint randomization
            const originalToDataURL = HTMLCanvasElement.prototype.toDataURL;
            HTMLCanvasElement.prototype.toDataURL = function(type) {
                const context = this.getContext('2d');
                if (context) {
                    const imageData = context.getImageData(0, 0, this.width, this.height);
                    for (let i = 0; i < imageData.data.length; i += 4) {
                        imageData.data[i] = imageData.data[i] + Math.floor(Math.random() * 3) - 1;
                    }
                    context.putImageData(imageData, 0, 0);
                }
                return originalToDataURL.apply(this, arguments);
            };

            // AudioContext spoofing
            const audioContext = window.AudioContext || window.webkitAudioContext;
            if (audioContext) {
                const originalCreateAnalyser = audioContext.prototype.createAnalyser;
                audioContext.prototype.createAnalyser = function() {
                    const analyser = originalCreateAnalyser.call(this);
                    const originalGetFloatFrequencyData = analyser.getFloatFrequencyData;
                    analyser.getFloatFrequencyData = function(array) {
                        originalGetFloatFrequencyData.call(this, array);
                        for (let i = 0; i < array.length; i++) {
                            array[i] = array[i] + Math.random() * 0.0001;
                        }
                    };
                    return analyser;
                };
            }
        """)

        self.page = await self.context.new_page()

        return self.page

    def _get_stealth_args(self):
        return [
            '--disable-blink-features=AutomationControlled',
            '--disable-dev-shm-usage',
            '--disable-background-timer-throttling',
            '--disable-backgrounding-occluded-windows',
            '--disable-renderer-backgrounding',
            '--disable-features=IsolateOrigins,site-per-process',
            '--disable-web-security',
            '--disable-features=VizDisplayCompositor',
            '--start-maximized',
            '--no-sandbox',
            '--disable-setuid-sandbox',
            '--disable-infobars',
            '--window-position=0,0',
            '--ignore-certifcate-errors',
            '--ignore-ssl-errors',
            '--ignore-certificate-errors-spki-list',
        ]

    def _get_browser_channel(self):
        """Use installed Chrome instead of bundled Chromium"""
        import os
        chrome_paths = [
            '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',  # macOS
            'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',  # Windows
            '/usr/bin/google-chrome',  # Linux
        ]
        for path in chrome_paths:
            if os.path.exists(path):
                return 'chrome'
        return None  # Fall back to chromium

# Usage
async def main():
    browser = StealthBrowser()
    page = await browser.start()

    # Navigate with human-like behavior
    await human_like_navigate(page, 'https://example.com')

    await asyncio.sleep(300)  # Keep alive for debugging

asyncio.run(main())

Cloudflare Turnstile Bypass

import requests
import base64

async def bypass_cloudflare_turnstile(page, url):
    """
    Bypass Cloudflare Turnstile CAPTCHA using 2Captcha
    """

    # Navigate to page
    await page.goto(url, wait_until='networkidle')

    # Check if Turnstile is present
    turnstile_present = await page.query_selector('.cf-turnstile')
    if not turnstile_present:
        return True  # No CAPTCHA needed

    # Get site key
    site_key = await page.evaluate('''() => {
        const elem = document.querySelector('[data-sitekey]');
        return elem ? elem.getAttribute('data-sitekey') : null;
    }''')

    if not site_key:
        return False

    # Solve CAPTCHA using 2Captcha
    api_key = 'YOUR_2CAPTCHA_API_KEY'

    # Submit CAPTCHA
    submit_url = 'http://2captcha.com/in.php'
    submit_data = {
        'key': api_key,
        'method': 'turnstile',
        'sitekey': site_key,
        'pageurl': url,
        'json': 1
    }

    response = requests.post(submit_url, data=submit_data)
    result = response.json()

    if result['status'] != 1:
        raise Exception(f"Failed to submit CAPTCHA: {result}")

    captcha_id = result['request']

    # Wait for solution
    import time
    for _ in range(60):  # Wait up to 60 seconds
        await asyncio.sleep(3)

        result_url = f'http://2captcha.com/res.php?key={api_key}&action=get&id={captcha_id}&json=1'
        response = requests.get(result_url)
        result = response.json()

        if result['status'] == 1:
            token = result['request']

            # Inject token
            await page.evaluate('''(token) => {
                const textarea = document.querySelector('[name="cf-turnstile-response"]');
                if (textarea) {
                    textarea.value = token;
                    // Trigger change event
                    textarea.dispatchEvent(new Event('change', { bubbles: true }));
                }
            }''', token)

            # Submit form
            submit_button = await page.query_selector('button[type="submit"], input[type="submit"]')
            if submit_button:
                await submit_button.click()

            return True

    raise Exception("CAPTCHA solving timeout")

# Alternative: Use YesCAPTCHA (faster, more expensive)
async def solve_with_yescaptcha(page, url):
    """
    YesCAPTCHA has better success rate for Cloudflare
    """
    api_key = 'YESCAPTCHA_API_KEY'

    # Get challenge
    site_key = await page.evaluate('''() => {
        const elem = document.querySelector('[data-sitekey]');
        return elem ? elem.getAttribute('data-sitekey') : null;
    }''')

    # Submit to YesCAPTCHA
    submit_data = {
        'clientKey': api_key,
        'task': {
            'type': 'TurnstileTaskProxyless',
            'websiteURL': url,
            'websiteKey': site_key,
        }
    }

    response = requests.post(
        'https://api.yescaptcha.com/createTask',
        json=submit_data
    )
    result = response.json()

    task_id = result['taskId']

    # Poll for result
    while True:
        await asyncio.sleep(2)

        response = requests.post(
            'https://api.yescaptcha.com/getTaskResult',
            json={
                'clientKey': api_key,
                'taskId': task_id
            }
        )
        result = response.json()

        if result['status'] == 'ready':
            token = result['solution']['token']

            # Inject and submit
            await page.evaluate('''(token) => {
                document.querySelector('[name="cf-turnstile-response"]').value = token;
            }''', token)

            await page.click('button[type="submit"]')
            return True

Human-Like Behavior Simulation

import random
import asyncio
from scipy.stats import truncnorm

async def human_like_type(page, selector, text, delay_range=(50, 200)):
    """
    Type text like a human: variable delays, typos, corrections
    """
    element = await page.query_selector(selector)
    await element.click()

    for char in text:
        # Variable delay between keystrokes
        delay = random.gauss(delay_range[0], 30)
        delay = max(30, min(delay, 500))  # Clamp to reasonable range
        await asyncio.sleep(delay / 1000)

        # Occasional typo (5% chance)
        if random.random() < 0.05:
            wrong_char = random.choice('abcdefghijklmnopqrstuvwxyz')
            await element.type(wrong_char)
            await asyncio.sleep(random.uniform(0.1, 0.3))
            # Backspace
            await element.press('Backspace')
            await asyncio.sleep(random.uniform(0.1, 0.2))

        await element.type(char)

async def human_like_mouse_move(page, target_element):
    """
    Move mouse with natural Bezier curves
    """
    box = await target_element.bounding_box()

    # Generate random points
    start_x, start_y = await page.mouse.position
    end_x = box['x'] + box['width'] / 2
    end_y = box['y'] + box['height'] / 2

    # Bezier curve control points
    cp1_x = start_x + random.uniform(-100, 100)
    cp1_y = start_y + random.uniform(-100, 100)
    cp2_x = end_x + random.uniform(-50, 50)
    cp2_y = end_y + random.uniform(-50, 50)

    # Move through Bezier curve
    steps = 50
    for i in range(steps + 1):
        t = i / steps
        # Cubic Bezier formula
        x = (1-t)**3 * start_x + \
            3 * (1-t)**2 * t * cp1_x + \
            3 * (1-t) * t**2 * cp2_x + \
            t**3 * end_x
        y = (1-t)**3 * start_y + \
            3 * (1-t)**2 * t * cp1_y + \
            3 * (1-t) * t**2 * cp2_y + \
            t**3 * end_y

        await page.mouse.move(x, y)
        await asyncio.sleep(0.01)  # 10ms per step

async def human_like_scroll(page, target_position):
    """
    Scroll naturally with momentum and overshoot
    """
    current_position = await page.evaluate('window.pageYOffset')

    # Scroll in bursts
    while abs(current_position - target_position) > 50:
        # Random scroll distance
        scroll_amount = random.randint(100, 300)
        if target_position < current_position:
            scroll_amount = -scroll_amount

        # Scroll
        await page.evaluate(f'window.scrollBy(0, {scroll_amount})')

        # Pause
        await asyncio.sleep(random.uniform(0.3, 0.8))

        # Occasional overshoot correction
        if random.random() < 0.2:
            await page.evaluate(f'window.scrollBy(0, {-scroll_amount * 0.1})')
            await asyncio.sleep(0.2)

        current_position = await page.evaluate('window.pageYOffset')

async def human_like_navigate(page, url):
    """
    Navigate with human-like behavior patterns
    """
    # Load page
    await page.goto(url, wait_until='domcontentloaded')

    # Wait random time (reading)
    await asyncio.sleep(random.uniform(1, 3))

    # Scroll down
    await human_like_scroll(page, random.randint(500, 1500))

    # Wait more (reading content)
    await asyncio.sleep(random.uniform(2, 5))

    # Move mouse (like looking at something)
    if random.random() < 0.3:
        elements = await page.query_selector_all('a, button')
        if elements:
            await human_like_mouse_move(page, random.choice(elements))

    # Scroll more
    await human_like_scroll(page, random.randint(1000, 3000))

Common Problems & Solutions

Problem: Even with all stealth plugins, Cloudflare blocks headless Chrome every time. Headful works fine.

What I Tried: puppeteer-extra, stealth plugins, different user agents - still detected.

Actual Fix: Cloudflare detects headless via multiple methods. Need to spoof all of them:

# Solution: Don't use headless, or use Xvfb virtual display
# Option 1: Run headful on server with Xvfb

# Install Xvfb
# Ubuntu: sudo apt-get install xvfb
# macOS: XQuartz

# Run with virtual display
from xvfbwrapper import Xvfb

with Xvfb(width=1920, height=1080):
    browser = await playwright.chromium.launch(
        headless=False,  # Actually headful but on virtual display
        args=['--start-maximized']
    )
    # Works like headless but appears headful to detection

# Option 2: Use puppeteer-stealth equivalent for Playwright
# Install playwright-extra
pip install playwright-stealth

from playwright_stealth import stealth_sync

# Apply stealth
browser = await playwright.chromium.launch(headless=True)
context = await browser.new_context()
page = await context.new_page()

await stealth_sync(page)

# Option 3: Spooof headless detection checks
await page.add_init_script('''
    // Override headless checks
    Object.defineProperty(navigator, 'plugins', {
        get: () => [1, 2, 3, 4, 5]
    });

    Object.defineProperty(navigator, 'languages', {
        get: () => ['en-US', 'en']
    });

    // Override WebGL vendor
    const getParameter = WebGLRenderingContext.prototype.getParameter;
    WebGLRenderingContext.prototype.getParameter = function(parameter) {
        if (parameter === 37445) {
            return 'Intel Inc.';
        }
        return getParameter.call(this, parameter);
    };

    // Spoof screen dimensions (headless often has 0x0)
    Object.defineProperty(screen, 'width', {
        get: () => 1920
    });
    Object.defineProperty(screen, 'height', {
        get: () => 1080
    });
    Object.defineProperty(screen, 'availWidth', {
        get: () => 1920
    });
    Object.defineProperty(screen, 'availHeight', {
        get: () => 1080
    });

    // Spoof media devices
    navigator.mediaDevices.enumerateDevices = () => Promise.resolve([
        {deviceId: 'default', kind: 'audioinput', label: '', groupId: 'default'},
        {deviceId: 'default', kind: 'videoinput', label: '', groupId: 'default'},
    ]);
''')

# Option 4: Use residential proxy with good reputation
# Cloudflare flags datacenter IPs
browser = await playwright.chromium.launch(
    proxy={
        'server': 'http://residential-proxy.com:8000',
        'username': 'user',
        'password': 'pass'
    }
)

Problem: DataDome allows first request, blocks subsequent ones with "Access denied". Cookies and headers don't help.

What I Tried: Rotated IPs, changed fingerprints - still blocked after N requests.

Actual Fix: DataDome uses behavioral analysis + device fingerprinting. Need complete browser profile:

# Solution: Use persistent context with real browser profile
# DataDome checks for consistent browser behavior

# Create browser profile once
import os

profile_path = '/path/to/browser/profile'

# First time: create profile with real browser session
browser = await playwright.chromium.launch_persistent_context(
    user_data_dir=profile_path,
    headless=False,
)

# Manually solve CAPTCHA once, save session
# Then reuse profile:

async def get_datadome_session(url):
    context = await playwright.chromium.launch_persistent_context(
        user_data_dir=profile_path,
        headless=False,
        channel='chrome',  # Use real Chrome
    )

    page = await context.new_page()

    # Load stored cookies if available
    if os.path.exists('cookies.json'):
        with open('cookies.json') as f:
            cookies = json.load(f)
            await context.add_cookies(cookies)

    await page.goto(url)

    # Check if blocked
    if 'datadome' in await page.content():
        # Need to solve CAPTCHA manually or with service
        input("Solve CAPTCHA in browser, then press Enter")

        # Save cookies after solving
        cookies = await context.cookies()
        with open('cookies.json', 'w') as f:
            json.dump(cookies, f)

    return page

# For multiple requests, reuse same context
async def datadome_requests(urls):
    context = await playwright.chromium.launch_persistent_context(
        user_data_dir=profile_path,
        headless=False,
    )

    # Warm up session (important!)
    await asyncio.sleep(5)

    results = []
    for url in urls:
        page = await context.new_page()

        # Random delay between requests
        await asyncio.sleep(random.uniform(10, 30))

        await page.goto(url)
        results.append(await page.content())

        await page.close()

    await context.close()
    return results

# Alternative: Use undetected-chromedriver
import undetected_chromedriver as uc

def datadome_with_uc(url):
    options = uc.ChromeOptions()
    options.add_argument('--user-data-dir=/path/to/profile')

    driver = uc.Chrome(options=options)
    driver.get(url)

    # UC handles most anti-bot automatically
    return driver

Persistent context with real browser profile solved DataDome. Warm-up period before requests is critical.

Problem: Akamai shows cookie consent modal. Can't access content until clicked, but button selectors change.

What I Tried: Multiple selectors, XPath, waiting - button not found.

Actual Fix: Akamai loads consent manager dynamically. Need to wait and handle multiple consent types:

async def handle_akamai_consent(page):
    """
    Handle various Akamai consent managers
    """
    # Wait for consent manager to load
    await page.wait_for_timeout(2000)

    # Try multiple consent button selectors
    consent_selectors = [
        # Akamai consent
        '#akamai-consent-button',
        '.akamai-accept',
        '#consent-accept',

        # Generic consent managers
        'button:has-text("Accept")',
        'button:has-text("I Agree")',
        'button:has-text("Accept All")',
        '.consent-button',
        '#consent-button',
        '.cookie-accept',

        # OneTrust
        '#onetrust-accept-btn-handler',
        '.ot-btn-container',

        # Cookiebot
        '#CybotCookiebotDialogBodyButtonAccept',
    ]

    consent_clicked = False
    for selector in consent_selectors:
        try:
            # Wait up to 5 seconds for each selector
            element = await page.wait_for_selector(
                selector,
                timeout=5000,
                state='visible'
            )

            if element:
                await element.click()
                consent_clicked = True
                print(f"Clicked consent with selector: {selector}")
                break

        except:
            continue

    # If no button found, try dismissing iframe
    if not consent_clicked:
        # Look for and close consent iframe
        frames = page.frames
        for frame in frames:
            try:
                button = await frame.query_selector('button:has-text("Accept")')
                if button:
                    await button.click()
                    consent_clicked = True
                    break
            except:
                pass

    # Wait for page to reload after consent
    await page.wait_for_timeout(3000)

    return consent_clicked

# Usage
await page.goto('https://protected-site.com')
await handle_akamai_consent(page)

# Now access content
content = await page.text()

# Alternative: Set consent cookie directly
await page.context.add_cookies([{
    'name': 'akamai-consent',
    'value': 'accepted',
    'domain': '.protected-site.com',
    'path': '/'
}])

# Reload with consent
await page.reload()

Comparison with Alternatives

Playwright V1

Basic stealth setup

DrissionPage

Better for Python projects

ScrapeGraph-AI

AI-powered scraping

Playwright GitHub

Official repository