Scrapy第十一(④)篇：selenium4模拟器-本地代理(单线程)

ゝ一纸荒年。 2024-04-08 09:12 80阅读 0赞

#### 1.正常使用selenium访问百度：调试模式会被浏览器检测到。 ####

import time
    
    from selenium import webdriver
    from selenium.webdriver.chrome.service import Service as ChromeService
    from webdriver_manager.chrome import ChromeDriverManager
    
    if __name__ == '__main__':
    
        # 初始化驱动
        service = ChromeService(executable_path=ChromeDriverManager().install())
        # 配置
        options = webdriver.ChromeOptions()
        # 获取浏览器实例
        driver = webdriver.Chrome(service=service, options=options)
    
        # 访问百度
        driver.get("https://www.baidu.com/")
        time.sleep(5)
        # 销毁实例
        driver.quit()

![e809c4ebb33043de907c2a27501a7e23.png][]

#### 2.使用代理浏览器selenium访问百度：真正的用户操作 ####

import os
    import time
    
    from selenium import webdriver
    from selenium.webdriver.chrome.service import Service as ChromeService
    from webdriver_manager.chrome import ChromeDriverManager
    
    if __name__ == '__main__':
        # 本地谷歌浏览器地址
        chrome_path = "C:\Program Files (x86)\Google\Chrome\Application\chrome.exe"
        # 打开代理浏览器
        os.popen(rf'"{chrome_path}" --remote-debugging-port=9222 --user-data-dir="C:\selenum\AutomationProfile"')
    
        # 初始化驱动
        service = ChromeService(executable_path=ChromeDriverManager().install())
        # 配置
        options = webdriver.ChromeOptions()
        # 配置代理参数
        options.add_experimental_option('debuggerAddress', 'localhost:9222')
        # 获取浏览器实例
        driver = webdriver.Chrome(service=service, options=options)
    
        # 访问百度
        driver.get("https://www.baidu.com/")
        time.sleep(5)
        # 销毁实例
        driver.quit()

![5e5e7c05675a4bc9bf3eb40f0577b110.png][]

#### 3.selenium本地代理中间件： ####

class SeleniumMiddleware(object):
        def __init__(self):
            super().__init__()
            # 本地谷歌浏览器地址
            chrome_path = rf"C:\Users\Lenovo\AppData\Local\Google\Chrome\Application\chrome.exe"
            # 打开代理浏览器
            os.popen(rf'"{chrome_path}" --remote-debugging-port=9222 --user-data-dir="C:\selenum\AutomationProfile"')
    
        def process_response(self, request, response, spider):
            # 初始化驱动
            service = ChromeService(executable_path=ChromeDriverManager().install())
            # 配置代理
            options = webdriver.ChromeOptions()
            options.add_experimental_option('debuggerAddress', '127.0.0.1:9222')
            # 获取浏览器实例
            driver = webdriver.Chrome(service=service, options=options)
    
            # 访问网页
            driver.get(request.url)
            # 最小化
            driver.minimize_window()
            # 全屏
            driver.maximize_window()
            # 指定大小
            driver.set_window_rect(0, 0, 1000, 500)
            # 获取关键字输入框
            input_element = WebDriverWait(driver, timeout=3).until(lambda d: d.find_element(By.ID, "kw"))
            # 输入搜索关键字
            input_element.send_keys("苍穹之跃" + str(random.randint(0, 9)))
            # 获取【百度一下】按钮
            search_button_element = WebDriverWait(driver, timeout=3).until(lambda d: d.find_element(By.ID, "su"))
            # 点击
            search_button_element.click()
    
            # 动态加载后的网页
            html = driver.page_source
            # 退出浏览器
            driver.quit()
            return scrapy.http.HtmlResponse(url=request.url, body=html.encode('utf-8'), encoding='utf-8', request=request)

开启中间件：

DOWNLOADER_MIDDLEWARES = {
       'testproject.middlewares.SeleniumMiddleware': 543,
    }

#### 4.selenium本地代理中间件-多线程并发： ####

未实现

[e809c4ebb33043de907c2a27501a7e23.png]: https://image.dandelioncloud.cn/pgy_files/images/2024/04/08/a2876068ab824406a9bc4fccffad330d.png
[5e5e7c05675a4bc9bf3eb40f0577b110.png]: https://image.dandelioncloud.cn/pgy_files/images/2024/04/08/b5aa64ee2b6d41a686760d96a6d8448c.png