PDF Scraper
Create and run a PDF scraper
API Reference
Endpoint Examples
- Account Summary
- Results
- Scrapers
- Scraping Runs
- AI Scraper
- X (Twitter) Scraper
- Google Scraper
- Job Board Scraper
- Facebook Marketplace Scraper
- PDF Scraper
- Realestate.com.au Scraper
- Shopee Scraper
PDF Scraper
Create and run a PDF scraper
This endpoint allows you to create and run a PDF scraper.
POST
/
api
/
scrapers
/
pdf
/
create-and-run
curl --request POST \
--url https://app.mrscraper.com/api/scrapers/pdf/create-and-run \
--header 'Authorization: Bearer <token>' \
--header 'Content-Type: application/json' \
--data '{
"name": "PDF Scraper",
"unique": true,
"expected_pdf": 200,
"keywords": [
{
"keyword": "Architectural plan PDF",
"prompt": "Return '\''true'\'' if the most of the image are architectural sketches, plan, blueprint, drawing. Return '\''false'\'' if most of the image are just text documents or other"
},
{
"keyword": "Floor plan PDF",
"prompt": "Return '\''true'\'' if the most of the image are architectural sketches, plan, blueprint, drawing. Return '\''false'\'' if most of the image are just text documents or other"
},
{
"keyword": "Building layout PDF",
"prompt": "Return '\''true'\'' if the most of the image are architectural sketches, plan, blueprint, drawing. Return '\''false'\'' if most of the image are just text documents or other"
}
]
}'
{
"message": "Scraping queued successfully",
"scraper": {
"id": 3669,
"name": "Test PDF Scraper",
"url": [
"Default"
],
"urls": [
"Default"
],
"scheduled": false,
"schedule": null,
"created_at": "2024-10-15T07:38:15.000000Z",
"updated_at": "2024-10-15T07:38:15.000000Z"
},
"results": [
{
"scraping_run_id": 375965,
"user_id": 5573,
"scraper_name": "Test PDF Scraper",
"scrapped_url": "Architectural plan PDF",
"scraped_url": "Architectural plan PDF",
"scraper_id": 3669,
"status": "running",
"updated_at": "2024-10-15T07:38:15.000000Z",
"created_at": "2024-10-15T07:38:15.000000Z",
"id": 1169956,
"scraper": {
"id": 3669,
"user_id": 5573,
"sharing": false,
"share_uuid": null,
"name": "Test PDF Scraper",
"type": "pdf",
"urls": "Default",
"pdf_urls": "Default",
"ai_prompt": null,
"ai_scope": null,
"headers": null,
"cookies": null,
"user_agent": null,
"disabled_resources": null,
"delay": 0,
"html_wanted": false,
"screenshot_wanted": false,
"screenshot_type": null,
"locale": null,
"scheduled": false,
"cron": null,
"cron_timezone": "UTC",
"paginate": 0,
"pagination_type": null,
"infinite_pagination_type": null,
"infinite_pagination_seconds": null,
"infinite_pagination_text": null,
"infinite_pagination_css_selector": null,
"infinite_pagination_n_selector": null,
"load_more_selector": null,
"pagination_query_parameter": null,
"pagination_next_page_selector": null,
"pagination_limit_type": null,
"pagination_max_page": null,
"max_next_page": null,
"pagination_max_variable": null,
"created_at": "2024-10-15T07:38:15.000000Z",
"updated_at": "2024-10-15T07:38:15.000000Z",
"cron_minutes": "*",
"cron_minutes_n_detail": null,
"cron_minutes_x_detail": null,
"cron_hours": "*",
"cron_hours_n_detail": null,
"cron_hours_x_detail": null,
"cron_day_of_month": "*",
"cron_day_of_month_x_detail": null,
"cron_month": "*",
"cron_month_x_detail": null,
"cron_day_of_week": "*",
"cron_day_of_week_x_detail": null,
"click_action_enabled": false,
"click_action_selector": null,
"click_action_wait": "no",
"workflow": [
{
"type": "options",
"data": {
"keywords": [
{
"type": "keyword",
"data": {
"keyword": "Architectural plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
},
{
"type": "keyword",
"data": {
"keyword": "Floor plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
},
{
"type": "keyword",
"data": {
"keyword": "Building layout PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
}
]
}
}
],
"version": 2,
"proxy_type": "rotation",
"proxy_host": null,
"proxy_port": null,
"proxy_username": null,
"proxy_password": null,
"parsers": null,
"deleted_at": null,
"external_auth": null
}
}
]
}
Authorizations
You can retrieve your token by visiting the API Tokens section inside your profile page or see https://docs.mrscraper.com/documentation/api-token for the details.
Body
application/json
The name of the scraping task.
Option to not include previously scraped PDF files.
The total expected PDF files results. This will automatically add the alternative keywords needed to achieve the expected results.
curl --request POST \
--url https://app.mrscraper.com/api/scrapers/pdf/create-and-run \
--header 'Authorization: Bearer <token>' \
--header 'Content-Type: application/json' \
--data '{
"name": "PDF Scraper",
"unique": true,
"expected_pdf": 200,
"keywords": [
{
"keyword": "Architectural plan PDF",
"prompt": "Return '\''true'\'' if the most of the image are architectural sketches, plan, blueprint, drawing. Return '\''false'\'' if most of the image are just text documents or other"
},
{
"keyword": "Floor plan PDF",
"prompt": "Return '\''true'\'' if the most of the image are architectural sketches, plan, blueprint, drawing. Return '\''false'\'' if most of the image are just text documents or other"
},
{
"keyword": "Building layout PDF",
"prompt": "Return '\''true'\'' if the most of the image are architectural sketches, plan, blueprint, drawing. Return '\''false'\'' if most of the image are just text documents or other"
}
]
}'
{
"message": "Scraping queued successfully",
"scraper": {
"id": 3669,
"name": "Test PDF Scraper",
"url": [
"Default"
],
"urls": [
"Default"
],
"scheduled": false,
"schedule": null,
"created_at": "2024-10-15T07:38:15.000000Z",
"updated_at": "2024-10-15T07:38:15.000000Z"
},
"results": [
{
"scraping_run_id": 375965,
"user_id": 5573,
"scraper_name": "Test PDF Scraper",
"scrapped_url": "Architectural plan PDF",
"scraped_url": "Architectural plan PDF",
"scraper_id": 3669,
"status": "running",
"updated_at": "2024-10-15T07:38:15.000000Z",
"created_at": "2024-10-15T07:38:15.000000Z",
"id": 1169956,
"scraper": {
"id": 3669,
"user_id": 5573,
"sharing": false,
"share_uuid": null,
"name": "Test PDF Scraper",
"type": "pdf",
"urls": "Default",
"pdf_urls": "Default",
"ai_prompt": null,
"ai_scope": null,
"headers": null,
"cookies": null,
"user_agent": null,
"disabled_resources": null,
"delay": 0,
"html_wanted": false,
"screenshot_wanted": false,
"screenshot_type": null,
"locale": null,
"scheduled": false,
"cron": null,
"cron_timezone": "UTC",
"paginate": 0,
"pagination_type": null,
"infinite_pagination_type": null,
"infinite_pagination_seconds": null,
"infinite_pagination_text": null,
"infinite_pagination_css_selector": null,
"infinite_pagination_n_selector": null,
"load_more_selector": null,
"pagination_query_parameter": null,
"pagination_next_page_selector": null,
"pagination_limit_type": null,
"pagination_max_page": null,
"max_next_page": null,
"pagination_max_variable": null,
"created_at": "2024-10-15T07:38:15.000000Z",
"updated_at": "2024-10-15T07:38:15.000000Z",
"cron_minutes": "*",
"cron_minutes_n_detail": null,
"cron_minutes_x_detail": null,
"cron_hours": "*",
"cron_hours_n_detail": null,
"cron_hours_x_detail": null,
"cron_day_of_month": "*",
"cron_day_of_month_x_detail": null,
"cron_month": "*",
"cron_month_x_detail": null,
"cron_day_of_week": "*",
"cron_day_of_week_x_detail": null,
"click_action_enabled": false,
"click_action_selector": null,
"click_action_wait": "no",
"workflow": [
{
"type": "options",
"data": {
"keywords": [
{
"type": "keyword",
"data": {
"keyword": "Architectural plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
},
{
"type": "keyword",
"data": {
"keyword": "Floor plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
},
{
"type": "keyword",
"data": {
"keyword": "Building layout PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
}
]
}
}
],
"version": 2,
"proxy_type": "rotation",
"proxy_host": null,
"proxy_port": null,
"proxy_username": null,
"proxy_password": null,
"parsers": null,
"deleted_at": null,
"external_auth": null
}
}
]
}