From cbbb89315bc7267cac9b8c55c3db9f84e7098eb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=AE=B6=E5=90=8D?= Date: Wed, 1 Apr 2026 14:44:26 +0800 Subject: [PATCH 1/2] fix(browser): normalize wait timeouts --- src/browser/cdp.ts | 5 ++-- src/browser/page.ts | 5 ++-- src/browser/wait.test.ts | 23 ++++++++++++++++ src/browser/wait.ts | 17 ++++++++++++ src/clis/36kr/article.test.ts | 41 ++++++++++++++++++++++++++++ src/clis/36kr/article.ts | 3 +- src/clis/sinafinance/rolling-news.ts | 2 +- src/types.ts | 2 +- 8 files changed, 91 insertions(+), 7 deletions(-) create mode 100644 src/browser/wait.test.ts create mode 100644 src/browser/wait.ts create mode 100644 src/clis/36kr/article.test.ts diff --git a/src/browser/cdp.ts b/src/browser/cdp.ts index 72bcf87f..915c48b2 100644 --- a/src/browser/cdp.ts +++ b/src/browser/cdp.ts @@ -14,6 +14,7 @@ import { request as httpsRequest } from 'node:https'; import type { BrowserCookie, IPage, ScreenshotOptions, SnapshotOptions, WaitOptions } from '../types.js'; import type { IBrowserFactory } from '../runtime.js'; import { wrapForEval } from './utils.js'; +import { normalizeWaitTimeoutMs } from './wait.js'; import { generateSnapshotJs, scrollToRefJs, getFormStateJs } from './dom-snapshot.js'; import { generateStealthJs } from './stealth.js'; import { @@ -267,12 +268,12 @@ class CDPPage implements IPage { return; } if (options.selector) { - const timeout = (options.timeout ?? 10) * 1000; + const timeout = normalizeWaitTimeoutMs(options.timeout, 10); await this.evaluate(waitForSelectorJs(options.selector, timeout)); return; } if (options.text) { - const timeout = (options.timeout ?? 30) * 1000; + const timeout = normalizeWaitTimeoutMs(options.timeout, 30); await this.evaluate(waitForTextJs(options.text, timeout)); } } diff --git a/src/browser/page.ts b/src/browser/page.ts index a1bd2264..146625b3 100644 --- a/src/browser/page.ts +++ b/src/browser/page.ts @@ -14,6 +14,7 @@ import { formatSnapshot } from '../snapshotFormatter.js'; import type { BrowserCookie, IPage, ScreenshotOptions, SnapshotOptions, WaitOptions } from '../types.js'; import { sendCommand } from './daemon-client.js'; import { wrapForEval } from './utils.js'; +import { normalizeWaitTimeoutMs } from './wait.js'; import { saveBase64ToFile } from '../utils.js'; import { generateSnapshotJs, scrollToRefJs, getFormStateJs } from './dom-snapshot.js'; import { generateStealthJs } from './stealth.js'; @@ -239,13 +240,13 @@ export class Page implements IPage { return; } if (options.selector) { - const timeout = (options.timeout ?? 10) * 1000; + const timeout = normalizeWaitTimeoutMs(options.timeout, 10); const code = waitForSelectorJs(options.selector, timeout); await sendCommand('exec', { code, ...this._cmdOpts() }); return; } if (options.text) { - const timeout = (options.timeout ?? 30) * 1000; + const timeout = normalizeWaitTimeoutMs(options.timeout, 30); const code = waitForTextJs(options.text, timeout); await sendCommand('exec', { code, ...this._cmdOpts() }); } diff --git a/src/browser/wait.test.ts b/src/browser/wait.test.ts new file mode 100644 index 00000000..f63bdca5 --- /dev/null +++ b/src/browser/wait.test.ts @@ -0,0 +1,23 @@ +import { describe, expect, it } from 'vitest'; + +import { normalizeWaitTimeoutMs } from './wait.js'; + +describe('normalizeWaitTimeoutMs', () => { + it('uses the default timeout when no timeout is provided', () => { + expect(normalizeWaitTimeoutMs(undefined, 10)).toBe(10_000); + }); + + it('treats sub-1000 values as seconds', () => { + expect(normalizeWaitTimeoutMs(3, 10)).toBe(3_000); + expect(normalizeWaitTimeoutMs(0.5, 10)).toBe(500); + }); + + it('treats large values as milliseconds for backward compatibility', () => { + expect(normalizeWaitTimeoutMs(10_000, 10)).toBe(10_000); + }); + + it('clamps non-positive values to zero', () => { + expect(normalizeWaitTimeoutMs(0, 10)).toBe(0); + expect(normalizeWaitTimeoutMs(-2, 10)).toBe(0); + }); +}); diff --git a/src/browser/wait.ts b/src/browser/wait.ts new file mode 100644 index 00000000..dd0b98d1 --- /dev/null +++ b/src/browser/wait.ts @@ -0,0 +1,17 @@ +export function normalizeWaitTimeoutMs(timeout: number | undefined, defaultSeconds: number): number { + if (typeof timeout !== 'number' || !Number.isFinite(timeout)) { + return defaultSeconds * 1000; + } + + if (timeout <= 0) { + return 0; + } + + // Older adapters occasionally passed milliseconds here even though + // wait({ timeout }) is documented in seconds. Preserve those callers. + if (timeout >= 1000) { + return Math.round(timeout); + } + + return Math.round(timeout * 1000); +} diff --git a/src/clis/36kr/article.test.ts b/src/clis/36kr/article.test.ts new file mode 100644 index 00000000..00de82d6 --- /dev/null +++ b/src/clis/36kr/article.test.ts @@ -0,0 +1,41 @@ +import { beforeAll, describe, expect, it, vi } from 'vitest'; + +import { getRegistry } from '../../registry.js'; + +beforeAll(async () => { + await import('./article.js'); +}); + +describe('36kr/article', () => { + it('waits for capture before scraping the rendered article', async () => { + const command = getRegistry().get('36kr/article'); + expect(command?.func).toBeTypeOf('function'); + + const page = { + installInterceptor: vi.fn().mockResolvedValue(undefined), + goto: vi.fn().mockResolvedValue(undefined), + waitForCapture: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue({ + title: 'OpenCLI ships faster 36kr article scraping', + author: 'opencli', + date: '2026-03-31', + body: 'A useful article body', + }), + } as any; + + const result = await command!.func!(page, { id: 'https://www.36kr.com/p/321654987' }); + + expect(page.installInterceptor).toHaveBeenCalledWith('36kr.com/api'); + expect(page.goto).toHaveBeenCalledWith('https://www.36kr.com/p/321654987'); + expect(page.waitForCapture).toHaveBeenCalledWith(6); + expect(page.wait).toHaveBeenCalledWith({ selector: '.article-title, h1', timeout: 3 }); + expect(result).toEqual([ + { field: 'title', value: 'OpenCLI ships faster 36kr article scraping' }, + { field: 'author', value: 'opencli' }, + { field: 'date', value: '2026-03-31' }, + { field: 'url', value: 'https://36kr.com/p/321654987' }, + { field: 'body', value: 'A useful article body' }, + ]); + }); +}); diff --git a/src/clis/36kr/article.ts b/src/clis/36kr/article.ts index e8d24ef2..74765591 100644 --- a/src/clis/36kr/article.ts +++ b/src/clis/36kr/article.ts @@ -31,7 +31,8 @@ cli({ await page.installInterceptor('36kr.com/api'); await page.goto(`https://www.36kr.com/p/${articleId}`); - await page.wait(5); + await page.waitForCapture(6); + await page.wait({ selector: '.article-title, h1', timeout: 3 }); const data: any = await page.evaluate(` (() => { diff --git a/src/clis/sinafinance/rolling-news.ts b/src/clis/sinafinance/rolling-news.ts index 9e0cac9d..8f40f7b4 100644 --- a/src/clis/sinafinance/rolling-news.ts +++ b/src/clis/sinafinance/rolling-news.ts @@ -14,7 +14,7 @@ cli({ columns: ['column', 'title', 'date', 'url'], func: async (page, _args) => { await page.goto(`https://finance.sina.com.cn/roll/#pageid=384&lid=2519`); - await page.wait({ selector: '.d_list_txt li', timeout: 10000 }); + await page.wait({ selector: '.d_list_txt li', timeout: 10 }); const payload = await page.evaluate(` (() => { diff --git a/src/types.ts b/src/types.ts index 43ceb983..e3b0c590 100644 --- a/src/types.ts +++ b/src/types.ts @@ -28,7 +28,7 @@ export interface WaitOptions { text?: string; selector?: string; // wait until document.querySelector(selector) matches time?: number; - timeout?: number; + timeout?: number; // seconds by default; values >= 1000 are treated as ms for backward compatibility } export interface ScreenshotOptions { From 8d866e56dff51c014d45ced2a4d5cec12f9e5cb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=AE=B6=E5=90=8D?= Date: Sun, 5 Apr 2026 16:28:48 +0800 Subject: [PATCH 2/2] simplify: remove normalizeWaitTimeoutMs helper, fix sinafinance timeout Per review feedback, the simpler fix is to enforce one consistent unit (seconds) and correct the one wrong call site instead of introducing auto-detecting threshold logic. --- src/browser/cdp.ts | 5 ++--- src/browser/page.ts | 5 ++--- src/browser/wait.test.ts | 23 ----------------------- src/browser/wait.ts | 17 ----------------- src/types.ts | 2 +- 5 files changed, 5 insertions(+), 47 deletions(-) delete mode 100644 src/browser/wait.test.ts delete mode 100644 src/browser/wait.ts diff --git a/src/browser/cdp.ts b/src/browser/cdp.ts index 915c48b2..72bcf87f 100644 --- a/src/browser/cdp.ts +++ b/src/browser/cdp.ts @@ -14,7 +14,6 @@ import { request as httpsRequest } from 'node:https'; import type { BrowserCookie, IPage, ScreenshotOptions, SnapshotOptions, WaitOptions } from '../types.js'; import type { IBrowserFactory } from '../runtime.js'; import { wrapForEval } from './utils.js'; -import { normalizeWaitTimeoutMs } from './wait.js'; import { generateSnapshotJs, scrollToRefJs, getFormStateJs } from './dom-snapshot.js'; import { generateStealthJs } from './stealth.js'; import { @@ -268,12 +267,12 @@ class CDPPage implements IPage { return; } if (options.selector) { - const timeout = normalizeWaitTimeoutMs(options.timeout, 10); + const timeout = (options.timeout ?? 10) * 1000; await this.evaluate(waitForSelectorJs(options.selector, timeout)); return; } if (options.text) { - const timeout = normalizeWaitTimeoutMs(options.timeout, 30); + const timeout = (options.timeout ?? 30) * 1000; await this.evaluate(waitForTextJs(options.text, timeout)); } } diff --git a/src/browser/page.ts b/src/browser/page.ts index 146625b3..a1bd2264 100644 --- a/src/browser/page.ts +++ b/src/browser/page.ts @@ -14,7 +14,6 @@ import { formatSnapshot } from '../snapshotFormatter.js'; import type { BrowserCookie, IPage, ScreenshotOptions, SnapshotOptions, WaitOptions } from '../types.js'; import { sendCommand } from './daemon-client.js'; import { wrapForEval } from './utils.js'; -import { normalizeWaitTimeoutMs } from './wait.js'; import { saveBase64ToFile } from '../utils.js'; import { generateSnapshotJs, scrollToRefJs, getFormStateJs } from './dom-snapshot.js'; import { generateStealthJs } from './stealth.js'; @@ -240,13 +239,13 @@ export class Page implements IPage { return; } if (options.selector) { - const timeout = normalizeWaitTimeoutMs(options.timeout, 10); + const timeout = (options.timeout ?? 10) * 1000; const code = waitForSelectorJs(options.selector, timeout); await sendCommand('exec', { code, ...this._cmdOpts() }); return; } if (options.text) { - const timeout = normalizeWaitTimeoutMs(options.timeout, 30); + const timeout = (options.timeout ?? 30) * 1000; const code = waitForTextJs(options.text, timeout); await sendCommand('exec', { code, ...this._cmdOpts() }); } diff --git a/src/browser/wait.test.ts b/src/browser/wait.test.ts deleted file mode 100644 index f63bdca5..00000000 --- a/src/browser/wait.test.ts +++ /dev/null @@ -1,23 +0,0 @@ -import { describe, expect, it } from 'vitest'; - -import { normalizeWaitTimeoutMs } from './wait.js'; - -describe('normalizeWaitTimeoutMs', () => { - it('uses the default timeout when no timeout is provided', () => { - expect(normalizeWaitTimeoutMs(undefined, 10)).toBe(10_000); - }); - - it('treats sub-1000 values as seconds', () => { - expect(normalizeWaitTimeoutMs(3, 10)).toBe(3_000); - expect(normalizeWaitTimeoutMs(0.5, 10)).toBe(500); - }); - - it('treats large values as milliseconds for backward compatibility', () => { - expect(normalizeWaitTimeoutMs(10_000, 10)).toBe(10_000); - }); - - it('clamps non-positive values to zero', () => { - expect(normalizeWaitTimeoutMs(0, 10)).toBe(0); - expect(normalizeWaitTimeoutMs(-2, 10)).toBe(0); - }); -}); diff --git a/src/browser/wait.ts b/src/browser/wait.ts deleted file mode 100644 index dd0b98d1..00000000 --- a/src/browser/wait.ts +++ /dev/null @@ -1,17 +0,0 @@ -export function normalizeWaitTimeoutMs(timeout: number | undefined, defaultSeconds: number): number { - if (typeof timeout !== 'number' || !Number.isFinite(timeout)) { - return defaultSeconds * 1000; - } - - if (timeout <= 0) { - return 0; - } - - // Older adapters occasionally passed milliseconds here even though - // wait({ timeout }) is documented in seconds. Preserve those callers. - if (timeout >= 1000) { - return Math.round(timeout); - } - - return Math.round(timeout * 1000); -} diff --git a/src/types.ts b/src/types.ts index e3b0c590..43ceb983 100644 --- a/src/types.ts +++ b/src/types.ts @@ -28,7 +28,7 @@ export interface WaitOptions { text?: string; selector?: string; // wait until document.querySelector(selector) matches time?: number; - timeout?: number; // seconds by default; values >= 1000 are treated as ms for backward compatibility + timeout?: number; } export interface ScreenshotOptions {