Method Signatures
// With schema and options
await page.extract<T extends z.AnyZodObject>(options: ExtractOptions<T>): Promise<ExtractResult<T>>
// String instruction only
await page.extract(instruction: string): Promise<{ extraction: string }>
// No parameters (raw page content)
await page.extract(): Promise<{ pageText: string }>
ExtractOptions Interface:interface ExtractOptions<T extends z.AnyZodObject> {
instruction?: string;
schema?: T;
modelName?: AvailableModel;
modelClientOptions?: ClientOptions;
domSettleTimeoutMs?: number;
selector?: string;
iframes?: boolean;
}
type ExtractResult<T> = z.infer<T>;
# With schema and parameters
await page.extract(
instruction: str = None,
schema: BaseModel = None,
selector: str = None,
iframes: bool = None,
model_name: AvailableModel = None,
model_client_options: Dict = None,
dom_settle_timeout_ms: int = None
) -> ExtractResult
# String instruction only
await page.extract(instruction: str) -> Dict[str, str]
# No parameters (raw page content)
await page.extract() -> Dict[str, str]
Parameters
Natural language description of what data to extract.
Type schema defining the structure of data to extract. Ensures type safety and validation.
XPath selector to limit extraction scope. Reduces token usage and improves accuracy.
Set to true if content exists within iframes.Default: false
Override the default LLM model for this extraction.
Model-specific configuration options.
Maximum time to wait for DOM to stabilize.Default: 30000
Response Types
With Schema
String Only
No Parameters
Returns: Promise<ExtractResult<T>> where T matches your schemaThe returned object will be strictly typed according to your schema definition.
Returns: Promise<{ extraction: string }>Simple string extraction without schema validation.
Returns: Promise<{ pageText: string }>Raw accessibility tree representation of page content.
Code Examples
Single Object
Arrays
URLs
Scoped
Schema-less
Advanced
import { z } from 'zod';
// Schema definition
const ProductSchema = z.object({
name: z.string(),
price: z.number(),
inStock: z.boolean()
});
// Extraction
const product = await page.extract({
instruction: "extract product details",
schema: ProductSchema
});
Example Response
{
"name": "Product Name",
"price": 100,
"inStock": true
}
import { z } from 'zod';
// Schema definition
const ApartmentListingsSchema = z.object({
apartments: z.array(z.object({
address: z.string(),
price: z.string(),
bedrooms: z.number()
}))
});
// Extraction
const listings = await page.extract({
instruction: "extract all apartment listings",
schema: ApartmentListingsSchema
});
Example Response
{
"apartments": [
{
"address": "123 Main St",
"price": "$100,000",
"bedrooms": 3
},
{
"address": "456 Elm St",
"price": "$150,000",
"bedrooms": 2
}
]
}
import { z } from 'zod';
// Schema definition
const NavigationSchema = z.object({
links: z.array(z.object({
text: z.string(),
url: z.string().url() // URL validation
}))
});
// Extraction
const links = await page.extract({
instruction: "extract navigation links",
schema: NavigationSchema
});
Example Response
{
"links": [
{
"text": "Home",
"url": "https://example.com"
}
]
}
import { z } from 'zod';
const ProductSchema = z.object({
name: z.string(),
price: z.number(),
description: z.string()
});
// Extract from specific page section
const data = await page.extract({
instruction: "extract product info from this section",
selector: "xpath=/html/body/div/div",
schema: ProductSchema
});
Example Response
{
"name": "Product Name",
"price": 100,
"description": "Product description"
}
// String only extraction
const title = await page.extract("get the page title");
// Returns: { extraction: "Page Title" }
// Raw page content
const content = await page.extract();
// Returns: { pageText: "Accessibility Tree: ..." }
Example Response
{
"extraction": "Page Title"
}
import { z } from 'zod';
// Schema with descriptions and validation
const ProductSchema = z.object({
price: z.number().describe("Product price in USD"),
rating: z.number().min(0).max(5).describe("Customer rating out of 5"),
available: z.boolean().describe("Whether product is in stock"),
tags: z.array(z.string()).optional()
});
// Nested schema
const EcommerceSchema = z.object({
product: z.object({
name: z.string(),
price: z.object({
current: z.number(),
original: z.number().optional()
})
}),
reviews: z.array(z.object({
rating: z.number(),
comment: z.string()
}))
});
Example Response
{
"product": {
"name": "Product Name",
"price": {
"current": 100,
"original": 120
}
},
"reviews": [
{
"rating": 4,
"comment": "Great product!"
}
]
}