Create and Run a Scraper Programmatically

Learn how to create an AI scraper once and use it programmatically via API. Set up once, automate everywhere.

Overview

This guide shows you how to create an AI scraper in the dashboard and then use it programmatically through the API. This is perfect if you want to set up a scraper once and automate data extraction with simple API callsβ€”no need to configure it every time.

Prerequisites

Before you start, make sure you have:

Step 1: Verify Your API Token

You can verify your API token and check your account status using the /subscription-accounts:

curl --location 'https://api.app.mrscraper.com/api/v1/subscription-accounts' \
--header 'accept: */*' \
--header 'x-api-token: YOUR_API_TOKEN'
const token = "YOUR_API_TOKEN";

fetch("https://api.app.mrscraper.com/api/v1/subscription-accounts", {
  method: "GET",
  headers: {
    "accept": "*/*",
    "x-api-token": token
  }
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));
import requests

token = "YOUR_API_TOKEN"

headers = {
    "accept": "*/*",
    "x-api-token": token
}

response = requests.get(
    "https://api.app.mrscraper.com/api/v1/subscription-accounts",
    headers=headers
)

print(response.json())

Response (200 OK):

{
    "message": "Successful operation!",
    "data": {
        "id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
        "tokenLimit": 10000,
        "tokenUsage": 994,
        "stripeSubscriptionId": "sub_xxxxxxxxxxxxxxxxxx",
        "userId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
        "stripeStatus": "true",
        "quantity": 1,
        "endsAt": "2030-12-01T02:05:08.400Z",
        "subscriptionItemId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
        "user": {
            "id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
            "createdAt": "2025-11-24T01:59:15.866Z",
            "createdById": null,
            "updatedAt": "2026-01-17T08:11:43.270Z",
            "updatedById": null,
            "deletedAt": null,
            "deletedById": null,
            "name": "John Doe",
            "email": "johndoe@example.com",
            "username": null,
            "latestApiToken": "atk_xxxx....",
            "gender": null,
            "phoneNumber": null,
            "address": null,
            "birthDate": null,
            "avatar": "https://api.app.mrscraper.com/images/users/default-avatar.png",
            "otp": null,
            "otpExpiredAt": null,
            "isVerified": true,
            "stripeCustomerId": "cus_xxxxxxxxx",
            "googleMail": null,
            "googleId": null,
            "s3Bucket": null,
            "domainTargets": [
                "https://books.toscrape.com"
            ]
        },
        "rateLimit": null,
        "rateTtl": null,
        "isEnterprise": true,
        "cancelAtPeriodEnd": null,
        "currentPeriodStart": null,
        "currentPeriodEnd": null,
        "proxyDollarPerGB": null,
        "isAutoRenew": false,
        "updatedAt": "2026-01-18T01:40:21.392Z",
        "createdAt": "2025-11-24T02:05:08.470Z"
    }
}

Key Response Fields:

Subscription Account Fields:

FieldTypeDescription
idstringUnique identifier for your subscription account
tokenLimitintegerMaximum tokens available in your subscription plan
tokenUsageintegerTotal tokens consumed so far
stripeSubscriptionIdstringStripe subscription identifier (for billing)
userIdstringYour unique user identifier
stripeStatusstringPayment status ("true" = active)
quantityintegerSubscription quantity/seats
endsAtstringSubscription expiration date (ISO 8601 format)
subscriptionItemIdstringStripe subscription item identifier
rateLimitinteger/nullAPI rate limit (requests per window)
rateTtlinteger/nullRate limit time window in seconds
isEnterprisebooleanWhether account has enterprise features
cancelAtPeriodEndboolean/nullIf subscription will cancel at period end
currentPeriodStartstring/nullCurrent billing period start date
currentPeriodEndstring/nullCurrent billing period end date
proxyDollarPerGBnumber/nullCustom proxy pricing (enterprise only)
isAutoRenewbooleanWhether subscription auto-renews

User Object Fields (nested under data.user):

FieldTypeDescription
idstringUnique user identifier
namestringUser's full name
emailstringUser's email address
latestApiTokenstringYour most recently generated API token (truncated)
avatarstringURL to user's profile picture
isVerifiedbooleanWhether email is verified
stripeCustomerIdstringStripe customer identifier
domainTargetsarrayList of domains you've scraped
createdAtstringAccount creation timestamp
updatedAtstringLast account update timestamp

Tip

Checking Token Balance: Use tokenLimit - tokenUsage to calculate your remaining tokens. Plan your scraping operations accordingly to avoid running out of tokens.

Step 2: Collect URLs from Website

Collect all relevant URLs from your target website using the AI scraper endpoint. You can select the Agent type you want to use from the example below:

Initial Scrape using General Agent

Step 1: Get All URL

Use the /scrapers-ai endpoint to create a General agent scraper to get only the URLs within the provided link.

curl --location 'https://api.app.mrscraper.com/api/v1/scrapers-ai' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN' \
--header 'content-type: application/json' \
--data '{
    "url": "https://books.toscrape.com",
    "agent": "general",
    "message": "Extract all data and just only get the urls, include patterns https://books.toscrape.com/catalogue and for exclude patterns https://books.toscrape.com/catalogue/category",
    "proxyCountry": ""
}'
const token = "YOUR_API_TOKEN";

fetch("https://api.app.mrscraper.com/api/v1/scrapers-ai", {
  method: "POST",
  headers: {
    "accept": "application/json",
    "x-api-token": token,
    "content-type": "application/json"
  },
  body: JSON.stringify({
    url: "https://books.toscrape.com",
    graph: "general",
    message: "Extract all data and just only get the urls, include patterns https://books.toscrape.com/catalogue and for exclude patterns https://books.toscrape.com/catalogue/category",
    proxyCountry: ""
  })
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));
import requests

token = "YOUR_API_TOKEN"

url = "https://api.app.mrscraper.com/api/v1/scrapers-ai"
headers = {
    "accept": "application/json",
    "x-api-token": token,
    "content-type": "application/json"
}
payload = {
    "url": "https://books.toscrape.com",
    "agent": "general",
    "message": "Extract all data and just only get the urls, include patterns https://books.toscrape.com/catalogue and for exclude patterns https://books.toscrape.com/catalogue/category",
    "proxyCountry": ""
}

response = requests.post(url, headers=headers, json=payload)
print(response.json())

Step 2: Get Page Details

After collecting URLs, you'll want to extract detailed data from each individual page.

Tip

When creating a detail scraper, always use a real detail page URL.

For example, instead of https://books.toscrape.com, use a specific product page like https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html (from your URL collection step).

Do not use the homepage or listing pageβ€”this helps ensure accurate data extraction and prevents setup errors.

Create a new scraper configured for detail extraction:

curl --location 'https://api.app.mrscraper.com/api/v1/scrapers-ai' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN' \
--header 'content-type: application/json' \
--data '{
    "url": "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
    "agent": "general",
    "message": "Extract data : Title, Product Description, Price (Float), Availability, and Number of reviews",
    "proxyCountry": ""
}'
const token = "YOUR_API_TOKEN";

fetch("https://api.app.mrscraper.com/api/v1/scrapers-ai", {
  method: "POST",
  headers: {
    "accept": "application/json",
    "x-api-token": token,
    "content-type": "application/json"
  },
  body: JSON.stringify({
    url: "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
    graph: "general",
    message: "Extract data : Title, Product Description, Price (Float), Availability, and Number of reviews",
    proxyCountry: ""
  })
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));
import requests

token = "YOUR_API_TOKEN"

url = "https://api.app.mrscraper.com/api/v1/scrapers-ai"
headers = {
    "accept": "application/json",
    "x-api-token": token,
    "content-type": "application/json"
}
payload = {
    "url": "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
    "agent": "general",
    "message": "Extract data : Title, Product Description, Price (Float), Availability, and Number of reviews",
    "proxyCountry": ""
}

response = requests.post(url, headers=headers, json=payload)
print(response.json())

Response (200 OK):

{
  "id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
  "createdAt": "2026-01-18T08:37:38.124Z",
  "userId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...",
  "scraperId": "YOUR_DETAIL_SCRAPER_ID",
  "type": "AI",
  "url": "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
  "status": "Finished",
  "tokenUsage": 5,
  "data": {
    "title": "A Light in the Attic",
    "price": "Β£51.77",
    "availability": "In stock (22 available)",
    "description": "It's hard to imagine a world without A Light in the Attic...",  
    "number_of_reviews": "0"
  }
}

Request Parameters:

ParameterTypeRequiredDescription
urlstringYesTarget website URL to scrape
agentstringYesAI agent to use (e.g., general, listing, detail, map)
messagestringYesNatural language instruction describing what to extract
proxyCountrystringNoProxy country code for geo-restricted content

Save the Scraper ID

You'll need this ID (YOUR_DETAIL_SCRAPER_ID in this example) for bulk scraping multiple URLs with the same extraction configuration.

Initial Scrape with Map Agent

Use the /scrapers-ai endpoint to create a Map agent scraper.

curl --location 'https://api.app.mrscraper.com/api/v1/scrapers-ai' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN' \
--header 'content-type: application/json' \
--data '{
    "agent": "map",
    "url": "https://books.toscrape.com",
    "maxDepth": 2,
    "maxPages": 100,
    "limit": 1000,
    "includePatterns": "^https:\\/\\/books\\.toscrape\\.com\\/catalogue$",
    "excludePatterns": "^https:\\/\\/books\\.toscrape\\.com\\/catalogue\\/category$"
}'
const token = "YOUR_API_TOKEN";

fetch("https://api.app.mrscraper.com/api/v1/scrapers-ai", {
  method: "POST",
  headers: {
    "accept": "application/json",
    "x-api-token": token,
    "content-type": "application/json"
  },
  body: JSON.stringify({
    agent: "map",
    url: "https://books.toscrape.com",
    maxDepth: 2,
    maxPages: 100,
    limit: 1000,
    includePatterns: "^https:\\/\\/books\\.toscrape\\.com\\/catalogue$",
    excludePatterns: "^https:\\/\\/books\\.toscrape\\.com\\/catalogue\\/category$"
  })
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));
import requests

token = "YOUR_API_TOKEN"

url = "https://api.app.mrscraper.com/api/v1/scrapers-ai"
headers = {
    "accept": "application/json",
    "x-api-token": token,
    "content-type": "application/json"
}
payload = {
    "agent": "map",
    "url": "https://books.toscrape.com",
    "maxDepth": 2,
    "maxPages": 100,
    "limit": 1000,
    "includePatterns": "^https:\\/\\/books\\.toscrape\\.com\\/catalogue$",
    "excludePatterns": "^https:\\/\\/books\\.toscrape\\.com\\/catalogue\\/category$"
}

response = requests.post(url, headers=headers, json=payload)
print(response.json())

Request Parameters:

ParameterTypeRequiredDescription
agentstringYesMust be "map" for map agent
urlstringYesTarget website URL to scrape
maxDepthnumberYesMaximum depth to crawl (e.g., 2)
maxPagesnumberYesMaximum number of pages to crawl (e.g., 100)
limitnumberYesMaximum number of URLs to collect (e.g., 1000)
includePatternsstringNoRegex pattern for URLs to include
excludePatternsstringNoRegex pattern for URLs to exclude (can be empty string)

Response (200 OK):

{
  "message": "Successful operation!",
  "data": {
    "id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...",
    "createdAt": "2026-01-18T05:01:24.944Z",
    "userId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...",
    "scraperId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...",
    "type": "AI",
    "url": "https://books.toscrape.com",
    "status": "Finished",
    "error": null,
    "tokenUsage": 5,
    "data": {
      "urls": [
        "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
        "https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
        "https://books.toscrape.com/catalogue/soumission_998/index.html",
        ...
      ]
    }
  }
}

Save the Scraper ID

You'll need this ID to re-run the scraper later without reconfiguring it.

Bulk Scrape Multiple URLs

Use the /scrapers-ai-rerun/bulk endpoint to scrape multiple URLs in a single request:

Note

The bulk endpoint is available only for the General agent.

curl --location 'https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun/bulk' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN' \
--header 'content-type: application/json' \
--data '{
    "scraperId": "YOUR_DETAIL_SCRAPER_ID",
    "urls": [
        "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
        "https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
        "https://books.toscrape.com/catalogue/soumission_998/index.html",
        ...
    ]
}'
const token = "YOUR_API_TOKEN";
const detailScraperId = "YOUR_DETAIL_SCRAPER_ID";
const urls = [
  "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
  "https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
  "https://books.toscrape.com/catalogue/soumission_998/index.html"
];

fetch("https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun/bulk", {
  method: "POST",
  headers: {
    "accept": "application/json",
    "x-api-token": token,
    "content-type": "application/json"
  },
  body: JSON.stringify({
    scraperId: detailScraperId,
    urls: urls
  })
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));
import requests

token = "YOUR_API_TOKEN"
detail_scraper_id = "YOUR_DETAIL_SCRAPER_ID"
urls = [
    "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
    "https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
    "https://books.toscrape.com/catalogue/soumission_998/index.html",
    ...
]

url = "https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun/bulk"
headers = {
    "accept": "application/json",
    "x-api-token": token,
    "content-type": "application/json"
}
payload = {
    "scraperId": detail_scraper_id,
    "urls": urls
}

response = requests.post(url, headers=headers, json=payload)
print(response.json())

Request Parameters:

ParameterTypeRequiredDescription
scraperIdstringYesThe detail scraper ID from Step 4
urlsarrayYesArray of URLs to scrape

Response (200 OK):

{
  "message": "Bulk scraping is Running",
  "data": {
    "bulkResultId": "YOUR_BULK_RESULT_ID"
  }
}

Asynchronous Operation

Bulk scraping runs in the background. Save the bulkResultId to retrieve results later.

Step 3: Retrieve Results

There are two ways to retrieve scraping results: get a specific result by its ID, or get all results for a scraper.

Get Single Result by Result ID

Use the /results/{id} endpoint to get the latest single result using a specific resultId (the id field from the scrape response):

curl --location 'https://api.app.mrscraper.com/api/v1/results/YOUR_RESULT_ID' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN'
const token = "YOUR_API_TOKEN";
const resultId = "YOUR_RESULT_ID";

fetch(`https://api.app.mrscraper.com/api/v1/results/${resultId}`, {
  method: "GET",
  headers: {
    "accept": "application/json",
    "x-api-token": token
  }
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));
import requests

token = "YOUR_API_TOKEN"
result_id = "YOUR_RESULT_ID"

url = f"https://api.app.mrscraper.com/api/v1/results/{result_id}"
headers = {
    "accept": "application/json",
    "x-api-token": token
}

response = requests.get(url, headers=headers)
print(response.json())

Response (200 OK):

{
  "message": "Successful operation!",
  "data": {
    "createdAt": "2026-01-18T07:28:44.725Z",
    "id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxx...",
    "userId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...",
    "scraperId": "YOUR_SCRAPER_ID",
    "type": "Rerun-AI",
    "url": "https://books.toscrape.com",
    "status": "Finished",
    "error": "",
    "tokenUsage": 4,
    "data": {
      "urls": [
        "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
        "https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
        "https://books.toscrape.com/catalogue/soumission_998/index.html",
        ...
      ]
    },
    "htmlPath": "results/.../page.html",
    "screenshotPath": "results/.../page.jpg"
  }
}

Get All Results by Scraper ID

Use the /results endpoint to retrieve all historical results for a specific scraper with pagination:

curl --location --globoff 'https://api.app.mrscraper.com/api/v1/results?filters[scraperId]=YOUR_SCRAPER_ID&page=1&pageSize=10&sort=createdAt&sortOrder=DESC' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN'
const token = "YOUR_API_TOKEN";
const scraperId = "YOUR_SCRAPER_ID";

const params = new URLSearchParams({
  'filters[scraperId]': scraperId,
  'page': '1',
  'pageSize': '10',
  'sort': 'createdAt',
  'sortOrder': 'DESC'
});

fetch(`https://api.app.mrscraper.com/api/v1/results?${params}`, {
  method: "GET",
  headers: {
    "accept": "application/json",
    "x-api-token": token
  }
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));
import requests

token = "YOUR_API_TOKEN"
scraper_id = "YOUR_SCRAPER_ID"

url = "https://api.app.mrscraper.com/api/v1/results"
headers = {
    "accept": "application/json",
    "x-api-token": token
}
params = {
    "filters[scraperId]": scraper_id,
    "page": 1,
    "pageSize": 10,
    "sort": "createdAt",
    "sortOrder": "DESC"
}

response = requests.get(url, headers=headers, params=params)
print(response.json())

Query Parameters:

ParameterTypeRequiredDescription
filters[scraperId]stringYesFilter results by scraper ID
pageintegerNoPage number (default: 1)
pageSizeintegerNoResults per page (default: 10)
sortstringNoField to sort by (e.g., createdAt)
sortOrderstringNoSort direction: ASC or DESC

Response (200 OK):

{
  "message": "Successful fetch",
  "data": [
    {
      "createdAt": "2026-01-18T07:28:44.725Z",
      "id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxx...",
      "scraperId": "YOUR_SCRAPER_ID",
      "type": "Rerun-AI",
      "url": "https://books.toscrape.com",
      "status": "Finished",
      "tokenUsage": 4,
      "data": {
        "urls": ["..."]
      }
    },
    {
      "createdAt": "2026-01-18T07:28:04.740Z",
      "id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...",
      "scraperId": "YOUR_SCRAPER_ID",
      "type": "AI",
      "url": "https://books.toscrape.com",
      "status": "Finished",
      "tokenUsage": 5,
      "data": {
        "urls": ["..."]
      }
    }
  ],
  "meta": {
    "page": 1,
    "pageSize": 10,
    "total": 2,
    "totalPage": 1
  }
}

Retrieve Bulk Scraping Results

Bulk scraping is asynchronous, meaning you need to poll the results endpoint to check the status and retrieve the data when processing is complete.

curl --location 'https://api.app.mrscraper.com/api/v1/results/YOUR_BULK_RESULT_ID' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN'
const token = "YOUR_API_TOKEN";
const bulkResultId = "YOUR_BULK_RESULT_ID";

fetch(`https://api.app.mrscraper.com/api/v1/results/${bulkResultId}`, {
  method: "GET",
  headers: {
    "accept": "application/json",
    "x-api-token": token
  }
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));
import requests

token = "YOUR_API_TOKEN"
bulk_result_id = "YOUR_BULK_RESULT_ID"

url = f"https://api.app.mrscraper.com/api/v1/results/{bulk_result_id}"
headers = {
    "accept": "application/json",
    "x-api-token": token
}

response = requests.get(url, headers=headers)
print(response.json())

Response (Status: Running):

{
  "message": "Successful operation!",
  "data": {
    "id": "YOUR_BULK_RESULT_ID",
    "status": "Running",
    "data": null
  }
}

Response (Status: Finished):

{
  "message": "Successful operation!",
  "data": {
    "createdAt": "2026-01-18T08:38:23.993Z",
    "id": "YOUR_BULK_RESULT_ID",
    "userId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
    "scraperId": "YOUR_DETAIL_SCRAPER_ID",
    "type": "Bulk-AI",
    "status": "Finished",
    "tokenUsage": 12,
    "data": {
      "mergedData": [
        {
          "title": "A Light in the Attic",
          "price": "Β£51.77",
          "availability": "In stock (22 available)",
          "description": "It's hard to imagine a world without A Light in the Attic...",
          "product_type": "Books",
          "upc": "a897fe39b1053632",
          "price_excl_tax": "Β£51.77",
          "price_incl_tax": "Β£51.77",
          "tax": "Β£0.00",
          "number_of_reviews": "0"
        },
        {
          "title": "Tipping the Velvet",
          "price": "Β£53.74",
          "availability": "In stock (20 available)",
          "description": "Erotic and absorbing...Written with starling power...",
          "product_type": "Books",
          "upc": "90fa61229261140a",
          "price_excl_tax": "Β£53.74",
          "price_incl_tax": "Β£53.74",
          "tax": "Β£0.00",
          "number_of_reviews": "0"
        },
        {
          "title": "Soumission",
          "price": "Β£50.10",
          "availability": "In stock (20 available)",
          "description": "Dans une France assez proche de la nΓ΄tre...",
          "product_type": "Books",
          "upc": "6957f44c3847a760",
          "price_excl_tax": "Β£50.10",
          "price_incl_tax": "Β£50.10",
          "tax": "Β£0.00",
          "number_of_reviews": "0"
        }
      ],
      "urlDetails": [
        {
          "url": "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
          "status": "Finished",
          "error": ""
        },
        {
          "url": "https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
          "status": "Finished",
          "error": ""
        },
        {
          "url": "https://books.toscrape.com/catalogue/soumission_998/index.html",
          "status": "Finished",
          "error": ""
        }
      ],
      "summary": {
        "totalUrls": 3,
        "successfulUrls": 3,
        "failedUrls": 0,
        "totalTokenUsage": 12
      }
    }
  }
}

Key Response Fields:

FieldTypeDescription
statusstringCurrent status: Running, Finished, or Failed
data.mergedDataarrayArray of extracted data from all URLs
data.urlDetailsarrayStatus details for each individual URL
data.summaryobjectSummary statistics of the bulk operation

Step 4: Re-run Scraper (Optional)

If you want to scrape the same website again with the same configuration, use the /scrapers-ai-rerun endpoint with your saved scraperId:

Re-run General Agent

curl --location 'https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN' \
--header 'content-type: application/json' \
--data '{
    "scraperId": "YOUR_SCRAPER_ID",
    "url": "https://books.toscrape.com"
}'
const token = "YOUR_API_TOKEN";
const scraperId = "YOUR_SCRAPER_ID";

fetch("https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun", {
  method: "POST",
  headers: {
    "accept": "application/json",
    "x-api-token": token,
    "content-type": "application/json"
  },
  body: JSON.stringify({
    scraperId: scraperId,
    url: "https://books.toscrape.com"
  })
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));
import requests

token = "YOUR_API_TOKEN"
scraper_id = "YOUR_SCRAPER_ID"

url = "https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun"
headers = {
    "accept": "application/json",
    "x-api-token": token,
    "content-type": "application/json"
}
payload = {
    "scraperId": scraper_id,
    "url": "https://books.toscrape.com"
}

response = requests.post(url, headers=headers, json=payload)
print(response.json())

Request Parameters:

ParameterTypeRequiredDescription
scraperIdstringYesThe scraper ID from your initial scrape
urlstringYesTarget URL to scrape

Re-run Map Agent

When using a scraper with the map agent, you need to provide additional parameters, such as includePatterns and excludePatterns, to control URL filtering.

curl --location 'https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun' \
--header 'accept: application/json' \
--header 'x-api-token: YOUR_API_TOKEN' \
--header 'content-type: application/json' \
--data '{
    "scraperId": "YOUR_SCRAPER_ID",
    "url": "https://books.toscrape.com",
    "maxDepth": 1,
    "maxPages": 50,
    "limit": 100,
    "includePatterns": ["https://books.toscrape.com/catalogue"],
    "excludePatterns": ["https://books.toscrape.com/catalogue/category"]
}'
const token = "YOUR_API_TOKEN";
const scraperId = "YOUR_SCRAPER_ID";

fetch("https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun", {
  method: "POST",
  headers: {
    "accept": "application/json",
    "x-api-token": token,
    "content-type": "application/json"
  },
  body: JSON.stringify({
    scraperId: scraperId,
    url: "https://books.toscrape.com",
    "scraperId": scraper_id,
    "url": "https://books.toscrape.com",
    maxDepth: 1,
    maxPages: 50,
    limit: 100,
    includePatterns: ["https://books.toscrape.com/catalogue"],
    excludePatterns: ["https://books.toscrape.com/catalogue/category"]
  })
})
.then(res => res.json())
.then(data => console.log(data))
.catch(err => console.error(err));
import requests

token = "YOUR_API_TOKEN"
scraper_id = "YOUR_SCRAPER_ID"

url = "https://api.app.mrscraper.com/api/v1/scrapers-ai-rerun"
headers = {
    "accept": "application/json",
    "x-api-token": token,
    "content-type": "application/json"
}
payload = {
    "scraperId": scraper_id,
    "url": "https://books.toscrape.com",
    "maxDepth": 1,
    "maxPages": 50,
    "limit": 100,
    "includePatterns": ["https://books.toscrape.com/catalogue"],
    "excludePatterns": ["https://books.toscrape.com/catalogue/category"]
}

response = requests.post(url, headers=headers, json=payload)
print(response.json())

Additional Map Agent Parameters:

ParameterTypeRequiredDescription
scraperIdstringYesThe scraper ID from your initial scrape
urlstringYesTarget URL to scrape
maxDepthnumberYesMaximum depth to crawl (e.g., 2)
maxPagesnumberYesMaximum number of pages to crawl (e.g., 100)
limitnumberYesMaximum number of URLs to collect (e.g., 1000)
includePatternsstring[]NoRegex pattern for URLs to include
excludePatternsstring[]NoRegex pattern for URLs to exclude (can be empty string)

Note:
When "re-running" with a map agent, you can fine-tune which URLs will be crawled using includePatterns and excludePatterns while reusing your base scraper logic and configuration.

Response (200 OK):

{
  "message": "Successful operation!",
  "data": {
    "id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...",
    "createdAt": "2026-01-18T07:28:44.725Z",
    "userId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxx...,
    "scraperId": "YOUR_SCRAPER_ID",
    "type": "Rerun-AI",
    "url": "https://books.toscrape.com",
    "status": "Finished",
    "error": "",
    "tokenUsage": 4,
    "data": {
      "urls": [
        "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",
        "https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html",
        "https://books.toscrape.com/catalogue/soumission_998/index.html",
        ...
      ]
    },
    "htmlPath": "results/.../page.html",
    "screenshotPath": "results/.../page.jpg"
  }
}

Tips

  • You don’t need to create a new scraper from scratch every time you want to scrape a differentβ€”but structurally similarβ€”website or page. If the data format and structure are the same, you can simply re-run your existing scraper and provide a new target URL.
  • This works for every agents including map agent. For map agents, you can even update the URL and the include/exclude patterns before recreating a new scraper, saving you time and tokens. Try reusing your scrapers for maximum efficiency whenever your targets share the same page layout!

Complete Workflow Example

Below is a full example that runs all steps in a single automated workflow:

import requests
import json
import time

# Configuration
API_TOKEN = "YOUR_API_TOKEN"
BASE_URL = "https://api.app.mrscraper.com/api/v1"

headers = {
    "accept": "application/json",
    "x-api-token": API_TOKEN,
    "content-type": "application/json"
}

def step1_verify_token():
    """Step 1: Verify API token and check account status"""
    print("πŸ” Step 1: Verifying API token...")
    
    response = requests.get(
        f"{BASE_URL}/subscription-accounts",
        headers={"accept": "*/*", "x-api-token": API_TOKEN}
    )
    data = response.json()
    
    if response.status_code == 200:
        print(f"βœ“ Token valid! Tokens: {data['data']['tokenUsage']}/{data['data']['tokenLimit']}")
        return True
    else:
        print(f"❌ Token verification failed: {data}")
        return False

def step2_collect_urls(target_url, message):
    """Step 2: Create scraper and collect URLs"""
    print(f"\nπŸ“ Step 2: Collecting URLs from {target_url}...")
    
    payload = {
        "url": target_url,
        "agent": "general",
        "message": message,
        "proxyCountry": ""
    }
    
    response = requests.post(f"{BASE_URL}/scrapers-ai", headers=headers, json=payload)
    result = response.json()
    # cache to a file
    with open("step2_collect_urls_response.json", "w") as f:
        json.dump(result, f, indent=2)
    
    if "scraperId" in result.get("data", {}):
        scraper_id = result["data"]["scraperId"]
        print(f"βœ“ Scraping finished")
        print(f"πŸ“‹ URL Scraper ID: {scraper_id}")
        return [], scraper_id
    else:
        print(f"❌ Failed: {result}")
        return [], None


def step3_rerun_collect_urls(url, scraper_id):
    """Step 3: Rerun URL collection scraper"""
    print(f"\nπŸ”„ Step 3: Rerunning URL collection scraper {scraper_id}...")
    
    payload = {
        "scraperId": scraper_id,
        "url": url
    }
    
    response = requests.post(f"{BASE_URL}/scrapers-ai-rerun/", headers=headers, json=payload)
    result = response.json()
    # cache to a file
    with open("step3_rerun_collect_urls_response.json", "w") as f:
        json.dump(result, f, indent=2)
    
    if result.get("message") == "Successful operation!":
        urls = result["data"]["data"]["urls"]
        print(f"βœ“ Rerun collected {len(urls)} URLs")
        return urls
    else:
        print(f"❌ Failed: {result}")
        return []
    
def step4_create_detail_scraper(sample_url, message):
    """Step 4: Create detail scraper"""
    print(f"\nπŸ”§ Step 4: Creating detail scraper...")
    
    payload = {
        "url": sample_url,
        "agent": "general",
        "message": message,
        "proxyCountry": ""
    }
    
    response = requests.post(f"{BASE_URL}/scrapers-ai", headers=headers, json=payload)
    result = response.json()
    # cache to a file
    with open("step4_create_detail_scraper_response.json", "w") as f:
        json.dump(result, f, indent=2)
    
    if "scraperId" in result.get("data", {}):
        scraper_id = result["data"]["scraperId"]
        print(f"βœ“ Detail scraper created")
        print(f"πŸ“‹ Detail Scraper ID: {scraper_id}")
        return scraper_id
    else:
        print(f"❌ Failed: {result}")
        return None

def step5_bulk_scrape(scraper_id, urls):
    """Step 5: Bulk scrape all URLs"""
    print(f"\nπŸ€– Step 5: Starting bulk scrape for {len(urls)} URLs...")
    
    payload = {
        "scraperId": scraper_id,
        "urls": urls
    }
    
    response = requests.post(f"{BASE_URL}/scrapers-ai-rerun/bulk", headers=headers, json=payload)
    result = response.json()

    # cache to a file
    with open("step5_bulk_scrape_response.json", "w") as f:
        json.dump(result, f, indent=2)
    
    bulk_result_id = result["data"]["bulkResultId"]
    print(f"βœ“ Bulk scraping started")
    print(f"πŸ“‹ Bulk Result ID: {bulk_result_id}")
    return bulk_result_id

def step6_get_results(bulk_result_id, poll_interval=10):
    """Step 6: Poll for results"""
    print(f"\n⏳ Step 6: Waiting for results (polling every {poll_interval}s)...")
    
    while True:
        response = requests.get(
            f"{BASE_URL}/results/{bulk_result_id}",
            headers={"accept": "application/json", "x-api-token": API_TOKEN}
        )
        result = response.json()
        status = result["data"]["status"]
        
        if status == "Finished":
            print("βœ… Scraping completed!")
            return result["data"]["data"]
        elif status == "Running":
            print(f"  ⏳ Still running... checking again in {poll_interval}s")
            time.sleep(poll_interval)
        else:
            print(f"❌ Error: {status}")
            return None

# Main workflow
if __name__ == "__main__":
    print("πŸš€ Starting N2N Programmatic Workflow\n")
    print("=" * 60)
    
    # Step 1: Verify token
    if not step1_verify_token():
        exit(1)
    
    # Step 2: Collect URLs
    url = "https://books.toscrape.com"
    urls, url_scraper_id = step2_collect_urls(
        url,
        "Return array of urls that follow patterns https://books.toscrape.com/catalogue but exclude that has patterns https://books.toscrape.com/catalogue/category"
    )

    # Step 3: Rerun URL collection scraper
    if url_scraper_id:
        urls = step3_rerun_collect_urls(url, url_scraper_id)
    
    if not urls:
        exit(1)
    
    # Use first 3 URLs for demo
    urls = urls[:3]
    print(f"  Using {len(urls)} URLs for demo")


    # Step 4: Create detail scraper
    detail_scraper_id = step4_create_detail_scraper(
        urls[0],
        "Extract all data detail"
    )
    
    if not detail_scraper_id:
        exit(1)
    
    # Step 5: Bulk scrape
    bulk_result_id = step5_bulk_scrape(detail_scraper_id, urls)

    bulk_result_id = "a438e4c2-8489-4473-b405-6b4b5e18ed3f"
    
    # Step 6: Get results
    results = step6_get_results(bulk_result_id)
    
    if results:
        print("\n" + "=" * 60)
        print("πŸ“Š RESULTS SUMMARY")
        print("=" * 60)
        print(f"βœ“ Total items: {len(results['mergedData'])}")
        print(f"βœ“ Successful: {results['summary']['successfulUrls']}")
        print(f"βœ“ Failed: {results['summary']['failedUrls']}")
        print(f"βœ“ Tokens used: {results['summary']['totalTokenUsage']}")
        
        print("\nπŸ“¦ EXTRACTED DATA:")
        print(json.dumps(results['mergedData'], indent=2))
// Configuration
const API_TOKEN = "YOUR_API_TOKEN";
const BASE_URL = "https://api.app.mrscraper.com/api/v1";
const fs = require('fs').promises;

const headers = {
  "accept": "application/json",
  "x-api-token": API_TOKEN,
  "content-type": "application/json"
};

// Step 1: Verify API token
async function step1VerifyToken() {
  console.log("πŸ” Step 1: Verifying API token...");
  
  const response = await fetch(`${BASE_URL}/subscription-accounts`, {
    headers: { "accept": "*/*", "x-api-token": API_TOKEN }
  });
  const data = await response.json();
  
  if (response.ok) {
    console.log(`βœ“ Token valid! Tokens: ${data.data.tokenUsage}/${data.data.tokenLimit}`);
    return true;
  }
  console.log(`❌ Token verification failed:`, data);
  return false;
}

// Step 2: Collect URLs
async function step2CollectUrls(targetUrl, message) {
  console.log(`\nπŸ“ Step 2: Collecting URLs from ${targetUrl}...`);
  
  const response = await fetch(`${BASE_URL}/scrapers-ai`, {
    method: "POST",
    headers,
    body: JSON.stringify({
      url: targetUrl,
      agent: "general",
      message,
      proxyCountry: ""
    })
  });
  const result = await response.json();
  
  // Cache to a file
  await fs.writeFile("step2_collect_urls_response.json", JSON.stringify(result, null, 2));
  
  if (result && 'scraperId' in (result.data || {})) {
    const scraperId = result.data.scraperId;
    console.log(`βœ“ Scraping finished`);
    console.log(`πŸ“‹ URL Scraper ID: ${scraperId}`);
    return { urls: [], scraperId };
  }
  console.log(`❌ Failed:`, result);
  return { urls: [], scraperId: null };
}

// Step 3: Rerun URL collection scraper
async function step3RerunCollectUrls(url, scraperId) {
  console.log(`\nπŸ”„ Step 3: Rerunning URL collection scraper ${scraperId}...`);
  
  const response = await fetch(`${BASE_URL}/scrapers-ai-rerun/`, {
    method: "POST",
    headers,
    body: JSON.stringify({
      scraperId,
      url
    })
  });
  const result = await response.json();
  
  // Cache to a file
  await fs.writeFile("step3_rerun_collect_urls_response.json", JSON.stringify(result, null, 2));
  
  if (result.message === "Successful operation!") {
    const urls = result.data.data.urls;
    console.log(`βœ“ Rerun collected ${urls.length} URLs`);
    return urls;
  }
  console.log(`❌ Failed:`, result);
  return [];
}

// Step 4: Create detail scraper
async function step4CreateDetailScraper(sampleUrl, message) {
  console.log(`\nπŸ”§ Step 4: Creating detail scraper...`);
  
  const response = await fetch(`${BASE_URL}/scrapers-ai`, {
    method: "POST",
    headers,
    body: JSON.stringify({
      url: sampleUrl,
      agent: "general",
      message,
      proxyCountry: ""
    })
  });
  const result = await response.json();
  
  // Cache to a file
  await fs.writeFile("step4_create_detail_scraper_response.json", JSON.stringify(result, null, 2));
  
  if (result && 'scraperId' in (result.data || {})) {
    const scraperId = result.data.scraperId;
    console.log(`βœ“ Detail scraper created`);
    console.log(`πŸ“‹ Detail Scraper ID: ${scraperId}`);
    return scraperId;
  }
  console.log(`❌ Failed:`, result);
  return null;
}

// Step 5: Bulk scrape
async function step5BulkScrape(scraperId, urls) {
  console.log(`\nπŸ€– Step 5: Starting bulk scrape for ${urls.length} URLs...`);
  
  const response = await fetch(`${BASE_URL}/scrapers-ai-rerun/bulk`, {
    method: "POST",
    headers,
    body: JSON.stringify({ scraperId, urls })
  });
  const result = await response.json();
  
  // Cache to a file
  await fs.writeFile("step5_bulk_scrape_response.json", JSON.stringify(result, null, 2));
  
  const bulkResultId = result.data.bulkResultId;
  console.log(`βœ“ Bulk scraping started`);
  console.log(`πŸ“‹ Bulk Result ID: ${bulkResultId}`);
  return bulkResultId;
}

// Step 6: Get results with polling
async function step6GetResults(bulkResultId, pollInterval = 10000) {
  console.log(`\n⏳ Step 6: Waiting for results (polling every ${pollInterval/1000}s)...`);
  
  while (true) {
    const response = await fetch(`${BASE_URL}/results/${bulkResultId}`, {
      headers: { "accept": "application/json", "x-api-token": API_TOKEN }
    });
    const result = await response.json();
    const status = result.data.status;
    
    if (status === "Finished") {
      console.log("βœ… Scraping completed!");
      return result.data.data;
    } else if (status === "Running") {
      console.log(`  ⏳ Still running... checking again in ${pollInterval/1000}s`);
      await new Promise(r => setTimeout(r, pollInterval));
    } else {
      console.log(`❌ Error: ${status}`);
      return null;
    }
  }
}

// Main workflow
(async () => {
  console.log("πŸš€ Starting N2N Programmatic Workflow\n");
  console.log("=".repeat(60));
  
  // Step 1: Verify token
  if (!await step1VerifyToken()) {
    process.exit(1);
  }
  
  // Step 2: Collect URLs
  const url = "https://books.toscrape.com";
  const { urls: initialUrls, scraperId: urlScraperId } = await step2CollectUrls(
    url,
    "Return array of urls that follow patterns https://books.toscrape.com/catalogue but exclude that has patterns https://books.toscrape.com/catalogue/category"
  );
  
  // Step 3: Rerun URL collection scraper
  let urls = [];
  if (urlScraperId) {
    urls = await step3RerunCollectUrls(url, urlScraperId);
  }
  
  if (!urls.length) {
    process.exit(1);
  }
  
  // Use first 3 URLs for demo
  urls = urls.slice(0, 3);
  console.log(`  Using ${urls.length} URLs for demo`);
  
  // Step 4: Create detail scraper
  const detailScraperId = await step4CreateDetailScraper(
    urls[0],
    "Extract all data detail"
  );
  
  if (!detailScraperId) {
    process.exit(1);
  }
  
  // Step 5: Bulk scrape
  let bulkResultId = await step5BulkScrape(detailScraperId, urls);
  
  // Optional: Uncomment to use a specific bulk result ID
  // bulkResultId = "a438e4c2-8489-4473-b405-6b4b5e18ed3f";
  
  // Step 6: Get results
  const results = await step6GetResults(bulkResultId);
  
  if (results) {
    console.log("\n" + "=".repeat(60));
    console.log("πŸ“Š RESULTS SUMMARY");
    console.log("=".repeat(60));
    console.log(`βœ“ Total items: ${results.mergedData.length}`);
    console.log(`βœ“ Successful: ${results.summary.successfulUrls}`);
    console.log(`βœ“ Failed: ${results.summary.failedUrls}`);
    console.log(`βœ“ Tokens used: ${results.summary.totalTokenUsage}`);
    
    console.log("\nπŸ“¦ EXTRACTED DATA:");
    console.log(JSON.stringify(results.mergedData, null, 2));
  }
})();

On this page