爬虫遭遇StackPath反爬的应对之策

遇到StackPath反爬时出现图下提示:
  • 处理方式很简单,通过selenium获取cookie即可。


此处目标:

代码如下:

import time
import requests
from selenium import webdriver

UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"


def get_cookie(url):
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('user-agent=' + UA)
    chrome_options.add_argument('blink-settings=imagesEnabled=false')
    chrome_options.add_argument('--window-size=1920,1080')
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    browser = webdriver.Chrome(options=chrome_options)
    browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
        "source": """
        Object.defineProperty(navigator, 'webdriver', {
          get: () => undefined
        })
      """
    })
    browser.get(url)
    time.sleep(5)
    _d = {}
    for i in browser.get_cookies():
        _d[i.get('name')] = i.get('value')
    browser.close()
    return _d


headers = {
    "Host": "dailynewsegypt.com",
    "Connection": "keep-alive",
    "Cache-Control": "max-age=0",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": UA,
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "Sec-Fetch-Site": "same-origin",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-User": "?1",
    "Sec-Fetch-Dest": "document",
    "Referer": "https://dailynewsegypt.com/",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "zh-CN,zh;q=0.9,zh-TW;q=0.8,th;q=0.7,en;q=0.6",
}
url = 'https://dailynewsegypt.com/category/opinion/page/2/'
cookies = get_cookie(url)
req = requests.get(url=url, headers=headers, cookies=cookies)
time.sleep(5)
print(req.text)
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容

友情链接更多精彩内容