Go测试代码

package test

import (
	"context"
	"fmt"
	"net/url"
	"os"
	"slices"
	"testing"
	"time"

	"github.com/chromedp/cdproto/cdp"
	"github.com/chromedp/cdproto/network"
	"github.com/chromedp/cdproto/page"
	"github.com/chromedp/chromedp"
)

// userAgent 浏览器的UA头
//var userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36 Edg/141.0.0.0"

// stealthJS 随机浏览器指纹信息,来自 https://github.com/requireCool/stealth.min.js
var stealthJS, _ = os.ReadFile("stealth.min.js")

func TestDoubao(t *testing.T) {
	ctx := context.Background()

	chromedpOptions := []chromedp.ExecAllocatorOption{
		chromedp.Flag("headless", false),            // debug使用 false
		chromedp.Flag("disable-hang-monitor", true), // 禁用页面无响应检测

		// 核心:禁用自动化指示器
		chromedp.Flag("enable-automation", false),
		chromedp.Flag("useAutomationExtension", false),
		chromedp.Flag("disable-blink-features", "AutomationControlled"),
		// 辅助:增强伪装
		//chromedp.UserAgent(userAgent),
		chromedp.Flag("disable-web-security", false),
		chromedp.Flag("ignore-certificate-errors", false),
		// 随机化窗口大小,避免所有实例千篇一律
		//chromedp.WindowSize(1920+rand.Intn(200), 1080+rand.Intn(200)),
	}

	//初始化参数,先传一个空的数据
	chromedpOptions = append(chromedp.DefaultExecAllocatorOptions[:], chromedpOptions...)
	//allocatorContext, cancel1 := chromedp.NewExecAllocator(ctx, chromedpOptions...)
	allocatorContext, cancel1 := chromedp.NewRemoteAllocator(ctx, "ws://10.0.0.131:8222")
	defer cancel1()
	chromeCtx, cancel2 := chromedp.NewContext(allocatorContext)
	defer cancel2()
	// 执行一个空task, 用提前创建Chrome实例
	//chromedp.Run(chromeCtx, make([]chromedp.Action, 0, 1)...)

	//创建一个上下文,超时时间为300s
	chromeCtx, cancel3 := context.WithTimeout(chromeCtx, time.Duration(30)*time.Second)
	defer cancel3()

	// 监听deepseek的sse请求
	sseStatus := make(chan int, 1)
	var content string
	var requestIDs []network.RequestID
	// 添加监听器
	chromedp.ListenTarget(chromeCtx, func(ev interface{}) {
		switch ev := ev.(type) {
		case *network.EventRequestWillBeSent: //SSE请求发送
			uri, _ := url.Parse(ev.Request.URL)
			if uri.Path != "/samantha/chat/completion" {
				break
			}
			if ev.Request.Method != "POST" {
				break
			}
			//sseStatus = 1
			requestIDs = append(requestIDs, ev.RequestID)

		case *network.EventLoadingFinished: //SSE事件结束
			i := slices.Index(requestIDs, ev.RequestID)
			if i < 0 {
				break
			}
			requestIDs = slices.Delete(requestIDs, i, i+1)
			fc := chromedp.FromContext(chromeCtx)
			ctx := cdp.WithExecutor(chromeCtx, fc.Target)
			go func() { //使用协程,避免错误
				bs, err := network.GetResponseBody(ev.RequestID).Do(ctx)
				if err != nil {
					return
				}
				//获取SSE的整体返回值
				content = content + string(bs)
				sseStatus <- 1
			}()
		}
	})

	screenshotLogin := make([]byte, 0)
	screenshotSession := make([]byte, 0)

	// 无需登录,每次重启容器 curl -X POST http://10.0.0.131:2375/containers/chromedp-doubao/restart
	/*
		data, err := os.ReadFile("cookies.json")
		if err != nil {
			return
		}

		// 定义 map 来存储 cookies
		cookies := make(map[string]string)

		// 解析 JSON
		if err := json.Unmarshal(data, &cookies); err != nil {
			return
		}
		fmt.Println("---豆包的cookies已经获取---")

		// 2注入 map[string]string 形式的 cookies

		err = chromedp.Run(chromeCtx,

			//清理缓存
			chromedp.ActionFunc(func(ctx context.Context) error {

				cookes, err := storage.GetCookies().Do(ctx)
				fmt.Println("cookes", cookes)

				return err
			}),
			//等待三秒
			chromedp.Sleep(3*time.Second),

			chromedp.ActionFunc(func(ctx context.Context) error {
				for name, value := range cookies {
					// 构造 SetCookie action 并在正确的 ctx 下执行
					c := network.SetCookie(name, value).
						WithDomain("www.doubao.com").
						WithPath("/").
						WithHTTPOnly(false).
						WithSecure(false)
					// 注意:Do(ctx) 只返回 error
					if err := c.Do(ctx); err != nil {
						fmt.Errorf("SetCookie %s failed: %w", name, err)
					}

				}
				return nil
			}),
		)
	*/

	err := chromedp.Run(chromeCtx, chromedp.Tasks{

		/*
			chromedp.ActionFunc(func(ctx context.Context) error {

				// 构造 SetCookie action 并在正确的 ctx 下执行
				c := network.SetCookie("sessionid", "4e18aba71db74332e76f9ea0e10cf05a").
					WithDomain("www.doubao.com").
					WithPath("/").
					WithHTTPOnly(false).
					WithSecure(false)
				// 注意:Do(ctx) 只返回 error
				if err := c.Do(ctx); err != nil {
					fmt.Printf("SetCookie %s failed: %v", "sessionid", err)
				}

				return nil
			}),
		*/

		//chromedp.Evaluate(`Object.defineProperty(navigator, 'webdriver', {get: () => undefined})`, nil),
		// 推荐使用 page.AddScriptToEvaluateOnNewDocument 
		//chromedp.Evaluate(string(stealthJS), nil),

		chromedp.ActionFunc(func(ctx context.Context) error {
			_, err := page.AddScriptToEvaluateOnNewDocument(string(stealthJS)).Do(ctx)
			return err
		}),

        // 启用网络事件监听,这是关键一步
        network.Enable(),

        // 覆盖 navigator.userAgent 等
        //emulation.SetUserAgentOverride(userAgent),
        // 可同时设置额外请求头
        //network.SetExtraHTTPHeaders(network.Headers{"Accept-Language": "zh-CN,zh;q=0.9", "User-Agent": userAgent}),
		
/*
			//指定分辨率的窗口
			emulation.SetDeviceMetricsOverride(1920, 1080, 1.0, false).
				WithScreenOrientation(&emulation.ScreenOrientation{
					Type:  emulation.OrientationTypePortraitPrimary,
					Angle: 0,
				}),
		*/

        // 导航到登录页面
        chromedp.Navigate("https://www.doubao.com/chat/"),
        //等待三秒
        chromedp.Sleep(3 * time.Second),
        /*
            // 判断页面是否是已经登录
            chromedp.ActionFunc(func(ctx context.Context) error {
                // 检查是否在登录页面(通过查找登录相关元素).使用相同的chromeCtx
                var exists bool
                err := chromedp.Run(chromeCtx, chromedp.EvaluateAsDevTools(`document.evaluate('//*[text()=\"密码登录\"]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue !== null`, &exists))
                if err != nil {
                    return err
                }

                if !exists {
                    return nil
                }

                // 如果找到登录元素,执行登录流程.使用相同的chromeCtx
                return chromedp.Run(chromeCtx, chromedp.Tasks{

                    // 点击密码登录标签页
                    chromedp.WaitReady(`//*[text()='密码登录']`, chromedp.BySearch),
                    chromedp.Click(`//*[text()='密码登录']`, chromedp.BySearch),

                    //输入账号
                    chromedp.WaitReady(`//*[@placeholder='请输入手机号/邮箱地址']`, chromedp.BySearch),
                    chromedp.SendKeys(`//*[@placeholder='请输入手机号/邮箱地址']`, "账号", chromedp.BySearch),

                    //输入密码
                    chromedp.WaitReady(`//*[@placeholder='请输入密码']`, chromedp.BySearch),
                    chromedp.SendKeys(`//*[@placeholder='请输入密码']`, "密码", chromedp.BySearch),

                    //点击登录
                    chromedp.WaitReady(`//*[text()='登录']`, chromedp.BySearch),
                    chromedp.Click(`//*[text()='登录']`, chromedp.BySearch),

                    // 等待登录完成
                    chromedp.Sleep(3 * time.Second),
                })

            }),
        */

        // 登录截屏
       chromedp.FullScreenshot(&screenshotLogin, 100),

        //1.输入问题
        chromedp.SendKeys(`textarea[data-testid="chat_input_input"]`, "今天的热点新闻", chromedp.ByQuery),
        //等待三秒
        chromedp.Sleep(3 * time.Second),

        // 2. 等待按钮处于可点击状态 (aria-disabled="false" 且没有 disabled 属性)
        chromedp.WaitVisible(`button#flow-end-msg-send[aria-disabled="false"]:not([disabled])`, chromedp.ByQuery),

        // 3. 点击按钮
        chromedp.Click(`button#flow-end-msg-send`, chromedp.ByQuery),

        // 等待大模型回复,最多等待3分钟
        chromedp.ActionFunc(func(ctx context.Context) error {
            // 设置超时时间为180秒
            select {
            case <-sseStatus: // 成功状态
                return nil
            case <-time.After(180 * time.Second):
                return nil
            }
        }),
        // 会话截屏
        chromedp.FullScreenshot(&screenshotSession, 100),
    })

    fmt.Println(err)
    if len(screenshotLogin) > 0 {
        os.Remove("screenshotLogin.png")
        os.WriteFile("screenshotLogin.png", screenshotLogin, 0644)
    }

    if len(screenshotSession) > 0 {
        os.Remove("screenshotSession.png")
        os.WriteFile("screenshotSession.png", screenshotSession, 0644)
    }

    fmt.Println(content)

}

stealth.min.js

下载:stealth.min.js.zip

浏览器指纹验证网站:

docker compose

services:
  chromedp:
    image: chromedp/headless-shell:143.0.7445.3
    container_name: chromedp
    restart: unless-stopped
    command: 
      - "--headless=new"
      - "--window-size=1920,1080"
      - "--no-sandbox"
      - "--disable-setuid-sandbox"
      - "--disable-background-timer-throttling"
      - "--disable-backgrounding-occluded-windows"
      - "--disable-renderer-backgrounding"
      - "--disable-features=VizDisplayCompositor"
      - "--enable-unsafe-swiftshader"
      - "--disable-hang-monitor"
      - "--disable-automation"
      - "--disable-extensions"
      - "--disable-blink-features=AutomationControlled"
      - "--disable-web-security=false"
      - "--ignore-certificate-errors=false"
    shm_size: 4g
    #extra_hosts:
    #  - "www.doubao.com:10.0.0.131"
    environment:
      TZ: Asia/Shanghai
      LANG: zh_CN.UTF-8
      LC_ALL: zh_CN.UTF-8
    ports:
      - "8222:9222"
    deploy:
      resources:
        limits:
          cpus: '4'
          memory: 16G
        reservations:
          cpus: '4'
          memory: 8G  

中文乱码

### 豆包的 Content-Type text/event-stream 没有 charset=utf-8,造成乱码.(CDP会严格按照协议执行解析,无法修改Content-Type)
    ### 指定 www.doubao.com 的Nginx解析IP, 自签https证书
    #extra_hosts:
    #  - "www.doubao.com:10.0.0.131"

        location /samantha/chat/completion {

        ## 设置UTF-8编码
            add_header Content-Type "text/event-stream; charset=utf-8";

            proxy_set_header                Host                            $host;
            proxy_set_header                X-Real-IP                       $remote_addr;
            proxy_set_header                X-Forwarded-For                 $proxy_add_x_forwarded_for;

            # 取消缓冲
            proxy_buffering off;
            # 关闭代理缓存
            proxy_cache off;
            # 代理到真实的豆包服务
            proxy_pass    https://www.doubao.com;

       }
        location / {
            proxy_set_header                Host                            $host;
            proxy_set_header                X-Real-IP                       $remote_addr;
            proxy_set_header                X-Forwarded-For                 $proxy_add_x_forwarded_for;

            # 取消缓冲
            proxy_buffering off;
            # 关闭代理缓存
            proxy_cache off;
            # 代理到真实的豆包服务
            proxy_pass    https://www.doubao.com;
        }