Skip to content

Commit 315c013

Browse files
VinciGit00claude
andcommitted
feat: add wait_ms parameter to scrape, smartscraper, and markdownify endpoints
Adds a new `waitMs` option (sent as `wait_ms` in the API payload) to control how long the scraping provider waits before capturing page content. Defaults to 3000ms on the server side. Aligns with sgai-api PR #399. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 94dcfb1 commit 315c013

4 files changed

Lines changed: 29 additions & 4 deletions

File tree

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "scrapegraph-js",
33
"author": "ScrapeGraphAI",
4-
"version": "0.2.111",
4+
"version": "0.2.112",
55
"description": "Scrape and extract structured data from a webpage using ScrapeGraphAI's APIs. Supports cookies for authentication, infinite scrolling, and pagination.",
66
"repository": {
77
"type": "git",

src/markdownify.js

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@ import { getMockResponse } from './utils/mockResponse.js';
1111
* @param {Object} options - Optional configuration options.
1212
* @param {boolean} options.mock - Override mock mode for this request
1313
* @param {boolean} [options.stealth=false] - Enable stealth mode to avoid bot detection
14+
* @param {number} [options.waitMs] - Number of milliseconds to wait before scraping the website (default: 3000)
1415
* @returns {Promise<string>} A promise that resolves to the markdown representation of the webpage.
1516
* @throws {Error} Throws an error if the HTTP request fails.
1617
*/
1718
export async function markdownify(apiKey, url, options = {}) {
18-
const { mock = null, stealth = false } = options;
19+
const { mock = null, stealth = false, waitMs = null } = options;
1920

2021
// Check if mock mode is enabled
2122
const useMock = mock !== null ? mock : isMockEnabled();
@@ -41,6 +42,13 @@ export async function markdownify(apiKey, url, options = {}) {
4142
payload.stealth = stealth;
4243
}
4344

45+
if (waitMs !== null) {
46+
if (!Number.isInteger(waitMs) || waitMs < 0) {
47+
throw new Error('waitMs must be a positive integer');
48+
}
49+
payload.wait_ms = waitMs;
50+
}
51+
4452
try {
4553
const response = await axios.post(endpoint, payload, { headers });
4654
return response.data;

src/scrape.js

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import { getMockResponse, createMockAxiosResponse } from './utils/mockResponse.j
1313
* @param {boolean} [options.branding=false] - Whether to include branding in the response (defaults to false).
1414
* @param {Object} options.headers - Optional custom headers to send with the request.
1515
* @param {boolean} [options.stealth=false] - Enable stealth mode to avoid bot detection
16+
* @param {number} [options.waitMs] - Number of milliseconds to wait before scraping the website (default: 3000)
1617
* @returns {Promise<Object>} A promise that resolves to the HTML content and metadata.
1718
* @throws {Error} Throws an error if the HTTP request fails.
1819
*
@@ -57,7 +58,8 @@ export async function scrape(apiKey, url, options = {}) {
5758
branding = false,
5859
headers: customHeaders = {},
5960
mock = null,
60-
stealth = false
61+
stealth = false,
62+
waitMs = null
6163
} = options;
6264

6365
// Check if mock mode is enabled
@@ -91,6 +93,13 @@ export async function scrape(apiKey, url, options = {}) {
9193
payload.stealth = stealth;
9294
}
9395

96+
if (waitMs !== null) {
97+
if (!Number.isInteger(waitMs) || waitMs < 0) {
98+
throw new Error('waitMs must be a positive integer');
99+
}
100+
payload.wait_ms = waitMs;
101+
}
102+
94103
// Only include headers in payload if they are provided
95104
if (Object.keys(customHeaders).length > 0) {
96105
payload.headers = customHeaders;

src/smartScraper.js

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,11 @@ import { getMockResponse, createMockAxiosResponse } from './utils/mockResponse.j
2626
* @param {boolean} [stealth] - Optional flag to enable stealth mode to avoid bot detection
2727
* @param {string} [websiteHtml] - Optional raw HTML content to process (max 2MB, mutually exclusive with url and websiteMarkdown)
2828
* @param {string} [websiteMarkdown] - Optional Markdown content to process (max 2MB, mutually exclusive with url and websiteHtml)
29+
* @param {number} [waitMs] - Optional number of milliseconds to wait before scraping the website (default: 3000)
2930
* @returns {Promise<string>} Extracted data in JSON format matching the provided schema
3031
* @throws - Will throw an error in case of an HTTP failure or validation error.
3132
*/
32-
export async function smartScraper(apiKey, url, prompt, schema = null, numberOfScrolls = null, totalPages = null, cookies = null, options = {}, plain_text = false, renderHeavyJs = false, stealth = false, websiteHtml = null, websiteMarkdown = null) {
33+
export async function smartScraper(apiKey, url, prompt, schema = null, numberOfScrolls = null, totalPages = null, cookies = null, options = {}, plain_text = false, renderHeavyJs = false, stealth = false, websiteHtml = null, websiteMarkdown = null, waitMs = null) {
3334
const { mock = null } = options;
3435

3536
// Validate that exactly one of url, websiteHtml, or websiteMarkdown is provided
@@ -123,6 +124,13 @@ export async function smartScraper(apiKey, url, prompt, schema = null, numberOfS
123124
payload.stealth = stealth;
124125
}
125126

127+
if (waitMs !== null) {
128+
if (!Number.isInteger(waitMs) || waitMs < 0) {
129+
throw new Error('waitMs must be a positive integer');
130+
}
131+
payload.wait_ms = waitMs;
132+
}
133+
126134
try {
127135
const response = await axios.post(endpoint, payload, { headers });
128136
return response.data;

0 commit comments

Comments
 (0)