Python Examples (and REST API)¶
The most common scrape workflows using both the Python SDK and raw REST API calls.
Install the SDK¶
pip install toolkitapi
Scrape a page (HTML output)¶
curl -X POST "https://scrape.toolkitapi.io/v1/scrape" \
-H "X-API-Key: YOUR_KEY" \
-H "Content-Type: application/json" \
-d '{"url": "https://toolkitapi.io", "render_js": false}'
import requests
resp = requests.post(
"https://scrape.toolkitapi.io/v1/scrape",
headers={"X-API-Key": "YOUR_KEY"},
json={"url": "https://toolkitapi.io", "render_js": False},
)
data = resp.json()
print(data["status_code"], data["title"])
const resp = await fetch("https://scrape.toolkitapi.io/v1/scrape", {
method: "POST",
headers: { "X-API-Key": "YOUR_KEY", "Content-Type": "application/json" },
body: JSON.stringify({ url: "https://toolkitapi.io", render_js: false }),
});
const data = await resp.json();
console.log(data.title);
Response
{
"url": "https://toolkitapi.io",
"status_code": 200,
"content": "<!doctype html>\n<html>...",
"title": "Toolkit API",
"content_type": "text/html"
}
Python SDK equivalent¶
from toolkitapi import Scrape
with Scrape(api_key="tk_...") as scrape:
result = scrape.fetch(url="https://toolkitapi.io", output="html")
print(result["status_code"])
print(result["content"][:500])
Fetch markdown content¶
curl -X POST "https://scrape.toolkitapi.io/v1/scrape" \
-H "X-API-Key: YOUR_KEY" \
-H "Content-Type: application/json" \
-d '{"url": "https://toolkitapi.io/blog/post", "formats": ["markdown"], "include_links": true, "include_tables": true}'
import requests
resp = requests.post(
"https://scrape.toolkitapi.io/v1/scrape",
headers={"X-API-Key": "YOUR_KEY"},
json={"url": "https://toolkitapi.io/blog/post", "formats": ["markdown"], "include_links": True, "include_tables": True},
)
data = resp.json()
print(data["markdown"][:500])
const resp = await fetch("https://scrape.toolkitapi.io/v1/scrape", {
method: "POST",
headers: { "X-API-Key": "YOUR_KEY", "Content-Type": "application/json" },
body: JSON.stringify({
url: "https://toolkitapi.io/blog/post",
formats: ["markdown"],
include_links: true,
include_tables: true,
}),
});
const data = await resp.json();
console.log(data.markdown);
Python SDK equivalent¶
from toolkitapi import Scrape
with Scrape(api_key="tk_...") as scrape:
result = scrape.extract_markdown(
url="https://toolkitapi.io/blog/post",
include_links=True,
include_tables=True,
)
print(result["content"])
Fetch plain text¶
curl -X POST "https://scrape.toolkitapi.io/v1/scrape" \
-H "X-API-Key: YOUR_KEY" \
-H "Content-Type: application/json" \
-d '{"url": "https://toolkitapi.io/blog/post", "formats": ["text"]}'
import requests
resp = requests.post(
"https://scrape.toolkitapi.io/v1/scrape",
headers={"X-API-Key": "YOUR_KEY"},
json={"url": "https://toolkitapi.io/blog/post", "formats": ["text"]},
)
data = resp.json()
print(data["text"])
const resp = await fetch("https://scrape.toolkitapi.io/v1/scrape", {
method: "POST",
headers: { "X-API-Key": "YOUR_KEY", "Content-Type": "application/json" },
body: JSON.stringify({ url: "https://toolkitapi.io/blog/post", formats: ["text"] }),
});
const data = await resp.json();
console.log(data.text);
CSS selector extraction¶
Extract specific elements using CSS selectors:
curl -X POST "https://scrape.toolkitapi.io/v1/scrape" \
-H "X-API-Key: YOUR_KEY" \
-H "Content-Type: application/json" \
-d '{
"url": "https://toolkitapi.io/product/123",
"render_js": true,
"extract": {
"selectors": {
"title": "h1",
"price": ".price",
"image_urls": {"selector": ".gallery img", "attr": "src", "multiple": true}
}
}
}'
import requests
resp = requests.post(
"https://scrape.toolkitapi.io/v1/scrape",
headers={"X-API-Key": "YOUR_KEY"},
json={
"url": "https://toolkitapi.io/product/123",
"render_js": True,
"extract": {
"selectors": {
"title": "h1",
"price": ".price",
"image_urls": {"selector": ".gallery img", "attr": "src", "multiple": True},
}
},
},
)
data = resp.json()
print(data["selectors"]["title"])
print(data["selectors"]["price"])
const resp = await fetch("https://scrape.toolkitapi.io/v1/scrape", {
method: "POST",
headers: { "X-API-Key": "YOUR_KEY", "Content-Type": "application/json" },
body: JSON.stringify({
url: "https://toolkitapi.io/product/123",
render_js: true,
extract: {
selectors: {
title: "h1",
price: ".price",
image_urls: { selector: ".gallery img", attr: "src", multiple: true },
},
},
}),
});
const data = await resp.json();
console.log(data.selectors);
Python SDK equivalent¶
from toolkitapi import Scrape
with Scrape(api_key="tk_...") as scrape:
result = scrape.css_extract(
url="https://toolkitapi.io/product/123",
render_js=True,
selectors={
"title": "h1",
"price": ".price",
"buy_link": {"selector": ".buy-now", "attr": "href"},
},
)
print(result["selectors"])
Metadata and rich extraction¶
curl -X POST "https://scrape.toolkitapi.io/v1/scrape" \
-H "X-API-Key: YOUR_KEY" \
-H "Content-Type: application/json" \
-d '{
"url": "https://toolkitapi.io",
"formats": ["markdown"],
"extract": {
"meta_tags": true,
"link_preview": true,
"links": true,
"images": true
}
}'
from toolkitapi import Scrape
with Scrape(api_key="tk_...") as scrape:
result = scrape.fetch(
url="https://toolkitapi.io",
output="markdown",
extract={"meta_tags": True, "link_preview": True, "links": True, "images": True},
)
print(result.get("meta_tags"))
print(result.get("link_preview"))
AI extraction¶
Extract structured data from any page using natural language:
curl -X POST "https://scrape.toolkitapi.io/v1/scrape" \
-H "X-API-Key: YOUR_KEY" \
-H "Content-Type: application/json" \
-d '{
"url": "https://toolkitapi.io/product/123",
"render_js": true,
"extract": {
"ai_prompt": "Extract the product name, price, and availability.",
"ai_schema": {
"type": "object",
"properties": {
"name": {"type": "string"},
"price": {"type": "string"},
"availability": {"type": "string"}
}
}
}
}'
from toolkitapi import Scrape
with Scrape(api_key="tk_...") as scrape:
result = scrape.ai_extract(
url="https://toolkitapi.io/product/123",
render_js=True,
prompt="Extract the product name, price, and availability.",
schema={
"type": "object",
"properties": {
"name": {"type": "string"},
"price": {"type": "string"},
"availability": {"type": "string"},
},
},
)
print(result.get("ai_extract"))
Common parameters¶
POST /v1/scrape¶
| Parameter | Type | Required | Description |
|---|---|---|---|
url |
string | Yes | Target URL to scrape |
formats |
string[] | No | Output formats: html, markdown, text, screenshot, links |
render_js |
boolean | No | Execute JavaScript. Default: false |
extract |
object | No | Extraction configuration (selectors, metadata, AI) |
include_links |
boolean | No | Include hyperlink data. Default: false |
include_tables |
boolean | No | Extract tables as structured data. Default: false |
headers |
object | No | Custom HTTP headers |
proxy |
string | No | Proxy region: US, GB, datacenter, etc. |
GET /v1/fetch¶
| Parameter | Type | Required | Description |
|---|---|---|---|
url |
string | Yes | Target URL |
output |
string | No | html, markdown, text, clean. Default: html |
render_js |
boolean | No | Execute JavaScript. Default: false |
Output modes¶
| Output | Best for |
|---|---|
html |
Raw parsing and archival |
markdown |
LLM and RAG pipelines |
text |
NLP and search indexing |
clean |
Article-like readable output |
screenshot |
Visual snapshots |
links |
Link discovery and mapping |