Create and Run a Scraper Programmatically
Learn how to create an AI scraper once and use it programmatically via API. Set up once, automate everywhere.
Overview
This guide shows you how to create an AI scraper in the dashboard and then use it programmatically through the API. This is perfect if you want to set up a scraper once and automate data extraction with simple API callsβno need to configure it every time.
Prerequisites
Before you start, make sure you have:
- A MrScraper API key for authentication. If you donβt have one yet, follow this guide to create an API key.
Step 1: Verify Your API Token
You can verify your API token and check your account status using the /subscription-accounts:
curl --location 'https://api.app.mrscraper.com/api/v1/subscription-accounts' \
--header 'accept: */*' \
--header 'x-api-token: YOUR_API_TOKEN'const token = "YOUR_API_TOKEN";
fetch("https://api.app.mrscraper.com/api/v1/subscription-accounts", {
method: "GET",
headers: {
"accept": "*/*",
"x-api-token": token
}
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));import requests
token = "YOUR_API_TOKEN"
headers = {
"accept": "*/*",
"x-api-token": token
}
response = requests.get(
"https://api.app.mrscraper.com/api/v1/subscription-accounts",
headers=headers
)
print(response.json())Response (200 OK):
{
"message": "Successful operation!",
"data": {
"id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
"tokenLimit": 10000,
"tokenUsage": 994,
"stripeSubscriptionId": "sub_xxxxxxxxxxxxxxxxxx",
"userId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
"stripeStatus": "true",
"quantity": 1,
"endsAt": "2030-12-01T02:05:08.400Z",
"subscriptionItemId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
"user": {
"id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
"createdAt": "2025-11-24T01:59:15.866Z",
"createdById": null,
"updatedAt": "2026-01-17T08:11:43.270Z",
"updatedById": null,
"deletedAt": null,
"deletedById": null,
"name": "John Doe",
"email": "johndoe@example.com",
"username": null,
"latestApiToken": "atk_xxxx....",
"gender": null,
"phoneNumber": null,
"address": null,
"birthDate": null,
"avatar": "https://api.app.mrscraper.com/images/users/default-avatar.png",
"otp": null,
"otpExpiredAt": null,
"isVerified": true,
"stripeCustomerId": "cus_xxxxxxxxx",
"googleMail": null,
"googleId": null,
"s3Bucket": null,
"domainTargets": [
"https://books.toscrape.com"
]
},
"rateLimit": null,
"rateTtl": null,
"isEnterprise": true,
"cancelAtPeriodEnd": null,
"currentPeriodStart": null,
"currentPeriodEnd": null,
"proxyDollarPerGB": null,
"isAutoRenew": false,
"updatedAt": "2026-01-18T01:40:21.392Z",
"createdAt": "2025-11-24T02:05:08.470Z"
}
}Key Response Fields:
Subscription Account Fields:
| Field | Type | Description |
|---|---|---|
id | string | Unique identifier for your subscription account |
tokenLimit | integer | Maximum tokens available in your subscription plan |
tokenUsage | integer | Total tokens consumed so far |
stripeSubscriptionId | string | Stripe subscription identifier (for billing) |
userId | string | Your unique user identifier |
stripeStatus | string | Payment status ("true" = active) |
quantity | integer | Subscription quantity/seats |
endsAt | string | Subscription expiration date (ISO 8601 format) |
subscriptionItemId | string | Stripe subscription item identifier |
rateLimit | integer/null | API rate limit (requests per window) |
rateTtl | integer/null | Rate limit time window in seconds |
isEnterprise | boolean | Whether account has enterprise features |
cancelAtPeriodEnd | boolean/null | If subscription will cancel at period end |
currentPeriodStart | string/null | Current billing period start date |
currentPeriodEnd | string/null | Current billing period end date |
proxyDollarPerGB | number/null | Custom proxy pricing (enterprise only) |
isAutoRenew | boolean | Whether subscription auto-renews |
User Object Fields (nested under data.user):
| Field | Type | Description |
|---|---|---|
id | string | Unique user identifier |
name | string | User's full name |
email | string | User's email address |
latestApiToken | string | Your most recently generated API token (truncated) |
avatar | string | URL to user's profile picture |
isVerified | boolean | Whether email is verified |
stripeCustomerId | string | Stripe customer identifier |
domainTargets | array | List of domains you've scraped |
createdAt | string | Account creation timestamp |
updatedAt | string | Last account update timestamp |
Tip
Checking Token Balance: Use tokenLimit - tokenUsage to calculate your remaining tokens. Plan your scraping operations accordingly to avoid running out of tokens.
Step 2: Collect URLs from Website
Collect all relevant URLs from your target website using the AI scraper endpoint. You can select the Agent type you want to use from the example below:
Initial Scrape using General Agent
Step 1: Get All URL
Use the /scrapers-ai endpoint to create a General agent scraper to get only the URLs within the provided link.
curl --location 'https://api.app.mrscraper.com/api/v1/scrapers-ai' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN' \
--header 'content-type: application/json' \
--data '{
"url": "https://books.toscrape.com",
"agent": "general",
"message": "Extract all data and just only get the urls, include patterns https://books.toscrape.com/catalogue and for exclude patterns https://books.toscrape.com/catalogue/category",
"proxyCountry": ""
}'const token = "YOUR_API_TOKEN";
fetch("https://api.app.mrscraper.com/api/v1/scrapers-ai", {
method: "POST",
headers: {
"accept": "application/json",
"x-api-token": token,
"content-type": "application/json"
},
body: JSON.stringify({
url: "https://books.toscrape.com",
graph: "general",
message: "Extract all data and just only get the urls, include patterns https://books.toscrape.com/catalogue and for exclude patterns https://books.toscrape.com/catalogue/category",
proxyCountry: ""
})
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));import requests
token = "YOUR_API_TOKEN"
url = "https://api.app.mrscraper.com/api/v1/scrapers-ai"
headers = {
"accept": "application/json",
"x-api-token": token,
"content-type": "application/json"
}
payload = {
"url": "https://books.toscrape.com",
"agent": "general",
"message": "Extract all data and just only get the urls, include patterns https://books.toscrape.com/catalogue and for exclude patterns https://books.toscrape.com/catalogue/category",
"proxyCountry": ""
}
response = requests.post(url, headers=headers, json=payload)
print(response.json())Step 2: Get Page Details
After collecting URLs, you'll want to extract detailed data from each individual page.
Tip
When creating a detail scraper, always use a real detail page URL.
For example, instead of https://books.toscrape.com, use a specific product page like https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html (from your URL collection step).
Do not use the homepage or listing pageβthis helps ensure accurate data extraction and prevents setup errors.
Create a new scraper configured for detail extraction:
curl --location 'https://api.app.mrscraper.com/api/v1/scrapers-ai' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN' \
--header 'content-type: application/json' \
--data '{
"url": "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
"agent": "general",
"message": "Extract data : Title, Product Description, Price (Float), Availability, and Number of reviews",
"proxyCountry": ""
}'const token = "YOUR_API_TOKEN";
fetch("https://api.app.mrscraper.com/api/v1/scrapers-ai", {
method: "POST",
headers: {
"accept": "application/json",
"x-api-token": token,
"content-type": "application/json"
},
body: JSON.stringify({
url: "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
graph: "general",
message: "Extract data : Title, Product Description, Price (Float), Availability, and Number of reviews",
proxyCountry: ""
})
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));import requests
token = "YOUR_API_TOKEN"
url = "https://api.app.mrscraper.com/api/v1/scrapers-ai"
headers = {
"accept": "application/json",
"x-api-token": token,
"content-type": "application/json"
}
payload = {
"url": "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
"agent": "general",
"message": "Extract data : Title, Product Description, Price (Float), Availability, and Number of reviews",
"proxyCountry": ""
}
response = requests.post(url, headers=headers, json=payload)
print(response.json())Response (200 OK):
{
"id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
"createdAt": "2026-01-18T08:37:38.124Z",
"userId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...",
"scraperId": "YOUR_DETAIL_SCRAPER_ID",
"type": "AI",
"url": "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
"status": "Finished",
"tokenUsage": 5,
"data": {
"title": "A Light in the Attic",
"price": "Β£51.77",
"availability": "In stock (22 available)",
"description": "It's hard to imagine a world without A Light in the Attic...",
"number_of_reviews": "0"
}
}Request Parameters:
| Parameter | Type | Required | Description |
|---|---|---|---|
url | string | Yes | Target website URL to scrape |
agent | string | Yes | AI agent to use (e.g., general, listing, detail, map) |
message | string | Yes | Natural language instruction describing what to extract |
proxyCountry | string | No | Proxy country code for geo-restricted content |
Save the Scraper ID
You'll need this ID (YOUR_DETAIL_SCRAPER_ID in this example) for bulk scraping multiple URLs with the same extraction configuration.
Initial Scrape with Map Agent
Use the /scrapers-ai endpoint to create a Map agent scraper.
curl --location 'https://api.app.mrscraper.com/api/v1/scrapers-ai' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN' \
--header 'content-type: application/json' \
--data '{
"agent": "map",
"url": "https://books.toscrape.com",
"maxDepth": 2,
"maxPages": 100,
"limit": 1000,
"includePatterns": "^https:\\/\\/books\\.toscrape\\.com\\/catalogue$",
"excludePatterns": "^https:\\/\\/books\\.toscrape\\.com\\/catalogue\\/category$"
}'const token = "YOUR_API_TOKEN";
fetch("https://api.app.mrscraper.com/api/v1/scrapers-ai", {
method: "POST",
headers: {
"accept": "application/json",
"x-api-token": token,
"content-type": "application/json"
},
body: JSON.stringify({
agent: "map",
url: "https://books.toscrape.com",
maxDepth: 2,
maxPages: 100,
limit: 1000,
includePatterns: "^https:\\/\\/books\\.toscrape\\.com\\/catalogue$",
excludePatterns: "^https:\\/\\/books\\.toscrape\\.com\\/catalogue\\/category$"
})
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));import requests
token = "YOUR_API_TOKEN"
url = "https://api.app.mrscraper.com/api/v1/scrapers-ai"
headers = {
"accept": "application/json",
"x-api-token": token,
"content-type": "application/json"
}
payload = {
"agent": "map",
"url": "https://books.toscrape.com",
"maxDepth": 2,
"maxPages": 100,
"limit": 1000,
"includePatterns": "^https:\\/\\/books\\.toscrape\\.com\\/catalogue$",
"excludePatterns": "^https:\\/\\/books\\.toscrape\\.com\\/catalogue\\/category$"
}
response = requests.post(url, headers=headers, json=payload)
print(response.json())Request Parameters:
| Parameter | Type | Required | Description |
|---|---|---|---|
agent | string | Yes | Must be "map" for map agent |
url | string | Yes | Target website URL to scrape |
maxDepth | number | Yes | Maximum depth to crawl (e.g., 2) |
maxPages | number | Yes | Maximum number of pages to crawl (e.g., 100) |
limit | number | Yes | Maximum number of URLs to collect (e.g., 1000) |
includePatterns | string | No | Regex pattern for URLs to include |
excludePatterns | string | No | Regex pattern for URLs to exclude (can be empty string) |
Response (200 OK):
{
"message": "Successful operation!",
"data": {
"id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...",
"createdAt": "2026-01-18T05:01:24.944Z",
"userId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...",
"scraperId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...",
"type": "AI",
"url": "https://books.toscrape.com",
"status": "Finished",
"error": null,
"tokenUsage": 5,
"data": {
"urls": [
"https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
"https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
"https://books.toscrape.com/catalogue/soumission_998/index.html",
...
]
}
}
}Save the Scraper ID
You'll need this ID to re-run the scraper later without reconfiguring it.
Bulk Scrape Multiple URLs
Use the /scrapers-ai-rerun/bulk endpoint to scrape multiple URLs in a single request:
Note
The bulk endpoint is available only for the General agent.
curl --location 'https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun/bulk' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN' \
--header 'content-type: application/json' \
--data '{
"scraperId": "YOUR_DETAIL_SCRAPER_ID",
"urls": [
"https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
"https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
"https://books.toscrape.com/catalogue/soumission_998/index.html",
...
]
}'const token = "YOUR_API_TOKEN";
const detailScraperId = "YOUR_DETAIL_SCRAPER_ID";
const urls = [
"https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
"https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
"https://books.toscrape.com/catalogue/soumission_998/index.html"
];
fetch("https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun/bulk", {
method: "POST",
headers: {
"accept": "application/json",
"x-api-token": token,
"content-type": "application/json"
},
body: JSON.stringify({
scraperId: detailScraperId,
urls: urls
})
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));import requests
token = "YOUR_API_TOKEN"
detail_scraper_id = "YOUR_DETAIL_SCRAPER_ID"
urls = [
"https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
"https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
"https://books.toscrape.com/catalogue/soumission_998/index.html",
...
]
url = "https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun/bulk"
headers = {
"accept": "application/json",
"x-api-token": token,
"content-type": "application/json"
}
payload = {
"scraperId": detail_scraper_id,
"urls": urls
}
response = requests.post(url, headers=headers, json=payload)
print(response.json())Request Parameters:
| Parameter | Type | Required | Description |
|---|---|---|---|
scraperId | string | Yes | The detail scraper ID from Step 4 |
urls | array | Yes | Array of URLs to scrape |
Response (200 OK):
{
"message": "Bulk scraping is Running",
"data": {
"bulkResultId": "YOUR_BULK_RESULT_ID"
}
}Asynchronous Operation
Bulk scraping runs in the background. Save the bulkResultId to retrieve results later.
Step 3: Retrieve Results
There are two ways to retrieve scraping results: get a specific result by its ID, or get all results for a scraper.
Get Single Result by Result ID
Use the /results/{id} endpoint to get the latest single result using a specific resultId (the id field from the scrape response):
curl --location 'https://api.app.mrscraper.com/api/v1/results/YOUR_RESULT_ID' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN'const token = "YOUR_API_TOKEN";
const resultId = "YOUR_RESULT_ID";
fetch(`https://api.app.mrscraper.com/api/v1/results/${resultId}`, {
method: "GET",
headers: {
"accept": "application/json",
"x-api-token": token
}
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));import requests
token = "YOUR_API_TOKEN"
result_id = "YOUR_RESULT_ID"
url = f"https://api.app.mrscraper.com/api/v1/results/{result_id}"
headers = {
"accept": "application/json",
"x-api-token": token
}
response = requests.get(url, headers=headers)
print(response.json())Response (200 OK):
{
"message": "Successful operation!",
"data": {
"createdAt": "2026-01-18T07:28:44.725Z",
"id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxx...",
"userId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...",
"scraperId": "YOUR_SCRAPER_ID",
"type": "Rerun-AI",
"url": "https://books.toscrape.com",
"status": "Finished",
"error": "",
"tokenUsage": 4,
"data": {
"urls": [
"https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
"https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
"https://books.toscrape.com/catalogue/soumission_998/index.html",
...
]
},
"htmlPath": "results/.../page.html",
"screenshotPath": "results/.../page.jpg"
}
}Get All Results by Scraper ID
Use the /results endpoint to retrieve all historical results for a specific scraper with pagination:
curl --location --globoff 'https://api.app.mrscraper.com/api/v1/results?filters[scraperId]=YOUR_SCRAPER_ID&page=1&pageSize=10&sort=createdAt&sortOrder=DESC' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN'const token = "YOUR_API_TOKEN";
const scraperId = "YOUR_SCRAPER_ID";
const params = new URLSearchParams({
'filters[scraperId]': scraperId,
'page': '1',
'pageSize': '10',
'sort': 'createdAt',
'sortOrder': 'DESC'
});
fetch(`https://api.app.mrscraper.com/api/v1/results?${params}`, {
method: "GET",
headers: {
"accept": "application/json",
"x-api-token": token
}
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));import requests
token = "YOUR_API_TOKEN"
scraper_id = "YOUR_SCRAPER_ID"
url = "https://api.app.mrscraper.com/api/v1/results"
headers = {
"accept": "application/json",
"x-api-token": token
}
params = {
"filters[scraperId]": scraper_id,
"page": 1,
"pageSize": 10,
"sort": "createdAt",
"sortOrder": "DESC"
}
response = requests.get(url, headers=headers, params=params)
print(response.json())Query Parameters:
| Parameter | Type | Required | Description |
|---|---|---|---|
filters[scraperId] | string | Yes | Filter results by scraper ID |
page | integer | No | Page number (default: 1) |
pageSize | integer | No | Results per page (default: 10) |
sort | string | No | Field to sort by (e.g., createdAt) |
sortOrder | string | No | Sort direction: ASC or DESC |
Response (200 OK):
{
"message": "Successful fetch",
"data": [
{
"createdAt": "2026-01-18T07:28:44.725Z",
"id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxx...",
"scraperId": "YOUR_SCRAPER_ID",
"type": "Rerun-AI",
"url": "https://books.toscrape.com",
"status": "Finished",
"tokenUsage": 4,
"data": {
"urls": ["..."]
}
},
{
"createdAt": "2026-01-18T07:28:04.740Z",
"id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...",
"scraperId": "YOUR_SCRAPER_ID",
"type": "AI",
"url": "https://books.toscrape.com",
"status": "Finished",
"tokenUsage": 5,
"data": {
"urls": ["..."]
}
}
],
"meta": {
"page": 1,
"pageSize": 10,
"total": 2,
"totalPage": 1
}
}Retrieve Bulk Scraping Results
Bulk scraping is asynchronous, meaning you need to poll the results endpoint to check the status and retrieve the data when processing is complete.
curl --location 'https://api.app.mrscraper.com/api/v1/results/YOUR_BULK_RESULT_ID' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN'const token = "YOUR_API_TOKEN";
const bulkResultId = "YOUR_BULK_RESULT_ID";
fetch(`https://api.app.mrscraper.com/api/v1/results/${bulkResultId}`, {
method: "GET",
headers: {
"accept": "application/json",
"x-api-token": token
}
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));import requests
token = "YOUR_API_TOKEN"
bulk_result_id = "YOUR_BULK_RESULT_ID"
url = f"https://api.app.mrscraper.com/api/v1/results/{bulk_result_id}"
headers = {
"accept": "application/json",
"x-api-token": token
}
response = requests.get(url, headers=headers)
print(response.json())Response (Status: Running):
{
"message": "Successful operation!",
"data": {
"id": "YOUR_BULK_RESULT_ID",
"status": "Running",
"data": null
}
}Response (Status: Finished):
{
"message": "Successful operation!",
"data": {
"createdAt": "2026-01-18T08:38:23.993Z",
"id": "YOUR_BULK_RESULT_ID",
"userId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
"scraperId": "YOUR_DETAIL_SCRAPER_ID",
"type": "Bulk-AI",
"status": "Finished",
"tokenUsage": 12,
"data": {
"mergedData": [
{
"title": "A Light in the Attic",
"price": "Β£51.77",
"availability": "In stock (22 available)",
"description": "It's hard to imagine a world without A Light in the Attic...",
"product_type": "Books",
"upc": "a897fe39b1053632",
"price_excl_tax": "Β£51.77",
"price_incl_tax": "Β£51.77",
"tax": "Β£0.00",
"number_of_reviews": "0"
},
{
"title": "Tipping the Velvet",
"price": "Β£53.74",
"availability": "In stock (20 available)",
"description": "Erotic and absorbing...Written with starling power...",
"product_type": "Books",
"upc": "90fa61229261140a",
"price_excl_tax": "Β£53.74",
"price_incl_tax": "Β£53.74",
"tax": "Β£0.00",
"number_of_reviews": "0"
},
{
"title": "Soumission",
"price": "Β£50.10",
"availability": "In stock (20 available)",
"description": "Dans une France assez proche de la nΓ΄tre...",
"product_type": "Books",
"upc": "6957f44c3847a760",
"price_excl_tax": "Β£50.10",
"price_incl_tax": "Β£50.10",
"tax": "Β£0.00",
"number_of_reviews": "0"
}
],
"urlDetails": [
{
"url": "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
"status": "Finished",
"error": ""
},
{
"url": "https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
"status": "Finished",
"error": ""
},
{
"url": "https://books.toscrape.com/catalogue/soumission_998/index.html",
"status": "Finished",
"error": ""
}
],
"summary": {
"totalUrls": 3,
"successfulUrls": 3,
"failedUrls": 0,
"totalTokenUsage": 12
}
}
}
}Key Response Fields:
| Field | Type | Description |
|---|---|---|
status | string | Current status: Running, Finished, or Failed |
data.mergedData | array | Array of extracted data from all URLs |
data.urlDetails | array | Status details for each individual URL |
data.summary | object | Summary statistics of the bulk operation |
Step 4: Re-run Scraper (Optional)
If you want to scrape the same website again with the same configuration, use the /scrapers-ai-rerun endpoint with your saved scraperId:
Re-run General Agent
curl --location 'https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN' \
--header 'content-type: application/json' \
--data '{
"scraperId": "YOUR_SCRAPER_ID",
"url": "https://books.toscrape.com"
}'const token = "YOUR_API_TOKEN";
const scraperId = "YOUR_SCRAPER_ID";
fetch("https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun", {
method: "POST",
headers: {
"accept": "application/json",
"x-api-token": token,
"content-type": "application/json"
},
body: JSON.stringify({
scraperId: scraperId,
url: "https://books.toscrape.com"
})
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));import requests
token = "YOUR_API_TOKEN"
scraper_id = "YOUR_SCRAPER_ID"
url = "https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun"
headers = {
"accept": "application/json",
"x-api-token": token,
"content-type": "application/json"
}
payload = {
"scraperId": scraper_id,
"url": "https://books.toscrape.com"
}
response = requests.post(url, headers=headers, json=payload)
print(response.json())Request Parameters:
| Parameter | Type | Required | Description |
|---|---|---|---|
scraperId | string | Yes | The scraper ID from your initial scrape |
url | string | Yes | Target URL to scrape |
Re-run Map Agent
When using a scraper with the map agent, you need to provide additional parameters, such as includePatterns and excludePatterns, to control URL filtering.
curl --location 'https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN' \
--header 'content-type: application/json' \
--data '{
"scraperId": "YOUR_SCRAPER_ID",
"url": "https://books.toscrape.com",
"maxDepth": 1,
"maxPages": 50,
"limit": 100,
"includePatterns": ["https://books.toscrape.com/catalogue"],
"excludePatterns": ["https://books.toscrape.com/catalogue/category"]
}'const token = "YOUR_API_TOKEN";
const scraperId = "YOUR_SCRAPER_ID";
fetch("https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun", {
method: "POST",
headers: {
"accept": "application/json",
"x-api-token": token,
"content-type": "application/json"
},
body: JSON.stringify({
scraperId: scraperId,
url: "https://books.toscrape.com",
"scraperId": scraper_id,
"url": "https://books.toscrape.com",
maxDepth: 1,
maxPages: 50,
limit: 100,
includePatterns: ["https://books.toscrape.com/catalogue"],
excludePatterns: ["https://books.toscrape.com/catalogue/category"]
})
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));import requests
token = "YOUR_API_TOKEN"
scraper_id = "YOUR_SCRAPER_ID"
url = "https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun"
headers = {
"accept": "application/json",
"x-api-token": token,
"content-type": "application/json"
}
payload = {
"scraperId": scraper_id,
"url": "https://books.toscrape.com",
"maxDepth": 1,
"maxPages": 50,
"limit": 100,
"includePatterns": ["https://books.toscrape.com/catalogue"],
"excludePatterns": ["https://books.toscrape.com/catalogue/category"]
}
response = requests.post(url, headers=headers, json=payload)
print(response.json())Additional Map Agent Parameters:
| Parameter | Type | Required | Description |
|---|---|---|---|
scraperId | string | Yes | The scraper ID from your initial scrape |
url | string | Yes | Target URL to scrape |
maxDepth | number | Yes | Maximum depth to crawl (e.g., 2) |
maxPages | number | Yes | Maximum number of pages to crawl (e.g., 100) |
limit | number | Yes | Maximum number of URLs to collect (e.g., 1000) |
includePatterns | string[] | No | Regex pattern for URLs to include |
excludePatterns | string[] | No | Regex pattern for URLs to exclude (can be empty string) |
Note:
When "re-running" with a map agent, you can fine-tune which URLs will be crawled usingincludePatternsandexcludePatternswhile reusing your base scraper logic and configuration.
Response (200 OK):
{
"message": "Successful operation!",
"data": {
"id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...",
"createdAt": "2026-01-18T07:28:44.725Z",
"userId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...,
"scraperId": "YOUR_SCRAPER_ID",
"type": "Rerun-AI",
"url": "https://books.toscrape.com",
"status": "Finished",
"error": "",
"tokenUsage": 4,
"data": {
"urls": [
"https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
"https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
"https://books.toscrape.com/catalogue/soumission_998/index.html",
...
]
},
"htmlPath": "results/.../page.html",
"screenshotPath": "results/.../page.jpg"
}
}Tips
- You donβt need to create a new scraper from scratch every time you want to scrape a differentβbut structurally similarβwebsite or page. If the data format and structure are the same, you can simply re-run your existing scraper and provide a new target URL.
- This works for every agents including map agent. For map agents, you can even update the URL and the include/exclude patterns before recreating a new scraper, saving you time and tokens. Try reusing your scrapers for maximum efficiency whenever your targets share the same page layout!
Complete Workflow Example
Below is a full example that runs all steps in a single automated workflow:
import requests
import json
import time
# Configuration
API_TOKEN = "YOUR_API_TOKEN"
BASE_URL = "https://api.app.mrscraper.com/api/v1"
headers = {
"accept": "application/json",
"x-api-token": API_TOKEN,
"content-type": "application/json"
}
def step1_verify_token():
"""Step 1: Verify API token and check account status"""
print("π Step 1: Verifying API token...")
response = requests.get(
f"{BASE_URL}/subscription-accounts",
headers={"accept": "*/*", "x-api-token": API_TOKEN}
)
data = response.json()
if response.status_code == 200:
print(f"β Token valid! Tokens: {data['data']['tokenUsage']}/{data['data']['tokenLimit']}")
return True
else:
print(f"β Token verification failed: {data}")
return False
def step2_collect_urls(target_url, message):
"""Step 2: Create scraper and collect URLs"""
print(f"\nπ Step 2: Collecting URLs from {target_url}...")
payload = {
"url": target_url,
"agent": "general",
"message": message,
"proxyCountry": ""
}
response = requests.post(f"{BASE_URL}/scrapers-ai", headers=headers, json=payload)
result = response.json()
# cache to a file
with open("step2_collect_urls_response.json", "w") as f:
json.dump(result, f, indent=2)
if "scraperId" in result.get("data", {}):
scraper_id = result["data"]["scraperId"]
print(f"β Scraping finished")
print(f"π URL Scraper ID: {scraper_id}")
return [], scraper_id
else:
print(f"β Failed: {result}")
return [], None
def step3_rerun_collect_urls(url, scraper_id):
"""Step 3: Rerun URL collection scraper"""
print(f"\nπ Step 3: Rerunning URL collection scraper {scraper_id}...")
payload = {
"scraperId": scraper_id,
"url": url
}
response = requests.post(f"{BASE_URL}/scrapers-ai-rerun/", headers=headers, json=payload)
result = response.json()
# cache to a file
with open("step3_rerun_collect_urls_response.json", "w") as f:
json.dump(result, f, indent=2)
if result.get("message") == "Successful operation!":
urls = result["data"]["data"]["urls"]
print(f"β Rerun collected {len(urls)} URLs")
return urls
else:
print(f"β Failed: {result}")
return []
def step4_create_detail_scraper(sample_url, message):
"""Step 4: Create detail scraper"""
print(f"\nπ§ Step 4: Creating detail scraper...")
payload = {
"url": sample_url,
"agent": "general",
"message": message,
"proxyCountry": ""
}
response = requests.post(f"{BASE_URL}/scrapers-ai", headers=headers, json=payload)
result = response.json()
# cache to a file
with open("step4_create_detail_scraper_response.json", "w") as f:
json.dump(result, f, indent=2)
if "scraperId" in result.get("data", {}):
scraper_id = result["data"]["scraperId"]
print(f"β Detail scraper created")
print(f"π Detail Scraper ID: {scraper_id}")
return scraper_id
else:
print(f"β Failed: {result}")
return None
def step5_bulk_scrape(scraper_id, urls):
"""Step 5: Bulk scrape all URLs"""
print(f"\nπ€ Step 5: Starting bulk scrape for {len(urls)} URLs...")
payload = {
"scraperId": scraper_id,
"urls": urls
}
response = requests.post(f"{BASE_URL}/scrapers-ai-rerun/bulk", headers=headers, json=payload)
result = response.json()
# cache to a file
with open("step5_bulk_scrape_response.json", "w") as f:
json.dump(result, f, indent=2)
bulk_result_id = result["data"]["bulkResultId"]
print(f"β Bulk scraping started")
print(f"π Bulk Result ID: {bulk_result_id}")
return bulk_result_id
def step6_get_results(bulk_result_id, poll_interval=10):
"""Step 6: Poll for results"""
print(f"\nβ³ Step 6: Waiting for results (polling every {poll_interval}s)...")
while True:
response = requests.get(
f"{BASE_URL}/results/{bulk_result_id}",
headers={"accept": "application/json", "x-api-token": API_TOKEN}
)
result = response.json()
status = result["data"]["status"]
if status == "Finished":
print("β
Scraping completed!")
return result["data"]["data"]
elif status == "Running":
print(f" β³ Still running... checking again in {poll_interval}s")
time.sleep(poll_interval)
else:
print(f"β Error: {status}")
return None
# Main workflow
if __name__ == "__main__":
print("π Starting N2N Programmatic Workflow\n")
print("=" * 60)
# Step 1: Verify token
if not step1_verify_token():
exit(1)
# Step 2: Collect URLs
url = "https://books.toscrape.com"
urls, url_scraper_id = step2_collect_urls(
url,
"Return array of urls that follow patterns https://books.toscrape.com/catalogue but exclude that has patterns https://books.toscrape.com/catalogue/category"
)
# Step 3: Rerun URL collection scraper
if url_scraper_id:
urls = step3_rerun_collect_urls(url, url_scraper_id)
if not urls:
exit(1)
# Use first 3 URLs for demo
urls = urls[:3]
print(f" Using {len(urls)} URLs for demo")
# Step 4: Create detail scraper
detail_scraper_id = step4_create_detail_scraper(
urls[0],
"Extract all data detail"
)
if not detail_scraper_id:
exit(1)
# Step 5: Bulk scrape
bulk_result_id = step5_bulk_scrape(detail_scraper_id, urls)
bulk_result_id = "a438e4c2-8489-4473-b405-6b4b5e18ed3f"
# Step 6: Get results
results = step6_get_results(bulk_result_id)
if results:
print("\n" + "=" * 60)
print("π RESULTS SUMMARY")
print("=" * 60)
print(f"β Total items: {len(results['mergedData'])}")
print(f"β Successful: {results['summary']['successfulUrls']}")
print(f"β Failed: {results['summary']['failedUrls']}")
print(f"β Tokens used: {results['summary']['totalTokenUsage']}")
print("\nπ¦ EXTRACTED DATA:")
print(json.dumps(results['mergedData'], indent=2))// Configuration
const API_TOKEN = "YOUR_API_TOKEN";
const BASE_URL = "https://api.app.mrscraper.com/api/v1";
const fs = require('fs').promises;
const headers = {
"accept": "application/json",
"x-api-token": API_TOKEN,
"content-type": "application/json"
};
// Step 1: Verify API token
async function step1VerifyToken() {
console.log("π Step 1: Verifying API token...");
const response = await fetch(`${BASE_URL}/subscription-accounts`, {
headers: { "accept": "*/*", "x-api-token": API_TOKEN }
});
const data = await response.json();
if (response.ok) {
console.log(`β Token valid! Tokens: ${data.data.tokenUsage}/${data.data.tokenLimit}`);
return true;
}
console.log(`β Token verification failed:`, data);
return false;
}
// Step 2: Collect URLs
async function step2CollectUrls(targetUrl, message) {
console.log(`\nπ Step 2: Collecting URLs from ${targetUrl}...`);
const response = await fetch(`${BASE_URL}/scrapers-ai`, {
method: "POST",
headers,
body: JSON.stringify({
url: targetUrl,
agent: "general",
message,
proxyCountry: ""
})
});
const result = await response.json();
// Cache to a file
await fs.writeFile("step2_collect_urls_response.json", JSON.stringify(result, null, 2));
if (result && 'scraperId' in (result.data || {})) {
const scraperId = result.data.scraperId;
console.log(`β Scraping finished`);
console.log(`π URL Scraper ID: ${scraperId}`);
return { urls: [], scraperId };
}
console.log(`β Failed:`, result);
return { urls: [], scraperId: null };
}
// Step 3: Rerun URL collection scraper
async function step3RerunCollectUrls(url, scraperId) {
console.log(`\nπ Step 3: Rerunning URL collection scraper ${scraperId}...`);
const response = await fetch(`${BASE_URL}/scrapers-ai-rerun/`, {
method: "POST",
headers,
body: JSON.stringify({
scraperId,
url
})
});
const result = await response.json();
// Cache to a file
await fs.writeFile("step3_rerun_collect_urls_response.json", JSON.stringify(result, null, 2));
if (result.message === "Successful operation!") {
const urls = result.data.data.urls;
console.log(`β Rerun collected ${urls.length} URLs`);
return urls;
}
console.log(`β Failed:`, result);
return [];
}
// Step 4: Create detail scraper
async function step4CreateDetailScraper(sampleUrl, message) {
console.log(`\nπ§ Step 4: Creating detail scraper...`);
const response = await fetch(`${BASE_URL}/scrapers-ai`, {
method: "POST",
headers,
body: JSON.stringify({
url: sampleUrl,
agent: "general",
message,
proxyCountry: ""
})
});
const result = await response.json();
// Cache to a file
await fs.writeFile("step4_create_detail_scraper_response.json", JSON.stringify(result, null, 2));
if (result && 'scraperId' in (result.data || {})) {
const scraperId = result.data.scraperId;
console.log(`β Detail scraper created`);
console.log(`π Detail Scraper ID: ${scraperId}`);
return scraperId;
}
console.log(`β Failed:`, result);
return null;
}
// Step 5: Bulk scrape
async function step5BulkScrape(scraperId, urls) {
console.log(`\nπ€ Step 5: Starting bulk scrape for ${urls.length} URLs...`);
const response = await fetch(`${BASE_URL}/scrapers-ai-rerun/bulk`, {
method: "POST",
headers,
body: JSON.stringify({ scraperId, urls })
});
const result = await response.json();
// Cache to a file
await fs.writeFile("step5_bulk_scrape_response.json", JSON.stringify(result, null, 2));
const bulkResultId = result.data.bulkResultId;
console.log(`β Bulk scraping started`);
console.log(`π Bulk Result ID: ${bulkResultId}`);
return bulkResultId;
}
// Step 6: Get results with polling
async function step6GetResults(bulkResultId, pollInterval = 10000) {
console.log(`\nβ³ Step 6: Waiting for results (polling every ${pollInterval/1000}s)...`);
while (true) {
const response = await fetch(`${BASE_URL}/results/${bulkResultId}`, {
headers: { "accept": "application/json", "x-api-token": API_TOKEN }
});
const result = await response.json();
const status = result.data.status;
if (status === "Finished") {
console.log("β
Scraping completed!");
return result.data.data;
} else if (status === "Running") {
console.log(` β³ Still running... checking again in ${pollInterval/1000}s`);
await new Promise(r => setTimeout(r, pollInterval));
} else {
console.log(`β Error: ${status}`);
return null;
}
}
}
// Main workflow
(async () => {
console.log("π Starting N2N Programmatic Workflow\n");
console.log("=".repeat(60));
// Step 1: Verify token
if (!await step1VerifyToken()) {
process.exit(1);
}
// Step 2: Collect URLs
const url = "https://books.toscrape.com";
const { urls: initialUrls, scraperId: urlScraperId } = await step2CollectUrls(
url,
"Return array of urls that follow patterns https://books.toscrape.com/catalogue but exclude that has patterns https://books.toscrape.com/catalogue/category"
);
// Step 3: Rerun URL collection scraper
let urls = [];
if (urlScraperId) {
urls = await step3RerunCollectUrls(url, urlScraperId);
}
if (!urls.length) {
process.exit(1);
}
// Use first 3 URLs for demo
urls = urls.slice(0, 3);
console.log(` Using ${urls.length} URLs for demo`);
// Step 4: Create detail scraper
const detailScraperId = await step4CreateDetailScraper(
urls[0],
"Extract all data detail"
);
if (!detailScraperId) {
process.exit(1);
}
// Step 5: Bulk scrape
let bulkResultId = await step5BulkScrape(detailScraperId, urls);
// Optional: Uncomment to use a specific bulk result ID
// bulkResultId = "a438e4c2-8489-4473-b405-6b4b5e18ed3f";
// Step 6: Get results
const results = await step6GetResults(bulkResultId);
if (results) {
console.log("\n" + "=".repeat(60));
console.log("π RESULTS SUMMARY");
console.log("=".repeat(60));
console.log(`β Total items: ${results.mergedData.length}`);
console.log(`β Successful: ${results.summary.successfulUrls}`);
console.log(`β Failed: ${results.summary.failedUrls}`);
console.log(`β Tokens used: ${results.summary.totalTokenUsage}`);
console.log("\nπ¦ EXTRACTED DATA:");
console.log(JSON.stringify(results.mergedData, null, 2));
}
})();