PDF Scraper
Create and run a PDF scraper
This endpoint allows you to create and run a PDF scraper.
POST
Authorizations
You can retrieve your token by visiting the API Tokens section inside your profile page or see https://docs.mrscraper.com/documentation/api-token for the details.
Body
application/json
The name of the scraping task.
Example:
"PDF Scraper"
The each keywords to search PDF files.
Example:
[
{
"keyword": "Architectural plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
},
{
"keyword": "Floor plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
},
{
"keyword": "Building layout PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
]
Option to not include previously scraped PDF files.
Example:
true
The total expected PDF files results. This will automatically add the alternative keywords needed to achieve the expected results.
Example:
200
Response
200 - application/json
Example:
"Scraping queued successfully"
Example:
{
"id": 3669,
"name": "PDF Scraper",
"url": ["Default"],
"urls": ["Default"],
"scheduled": false,
"schedule": null,
"created_at": "2024-10-15T07:38:15.000000Z",
"updated_at": "2024-10-15T07:38:15.000000Z"
}
Example:
[
{
"scraping_run_id": 375965,
"user_id": 26,
"scraper_name": "PDF Scraper",
"scrapped_url": "Architectural plan PDF",
"scraped_url": "Architectural plan PDF",
"scraper_id": 3669,
"status": "running",
"updated_at": "2024-09-20T06:50:17.000000Z",
"created_at": "2024-09-20T06:50:16.000000Z",
"id": 1169956,
"scraper": {
"id": 3669,
"user_id": 26,
"sharing": false,
"share_uuid": null,
"name": "PDF Scraper",
"type": "pdf",
"urls": "Default",
"pdf_urls": "Default",
"ai_prompt": null,
"ai_scope": null,
"headers": null,
"cookies": null,
"user_agent": null,
"disabled_resources": null,
"delay": 0,
"html_wanted": false,
"screenshot_wanted": false,
"screenshot_type": null,
"locale": null,
"scheduled": false,
"cron": null,
"cron_timezone": "UTC",
"paginate": 0,
"pagination_type": null,
"infinite_pagination_type": null,
"infinite_pagination_seconds": null,
"infinite_pagination_text": null,
"infinite_pagination_css_selector": null,
"infinite_pagination_n_selector": null,
"load_more_selector": null,
"pagination_query_parameter": null,
"pagination_next_page_selector": null,
"pagination_limit_type": null,
"pagination_max_page": null,
"max_next_page": null,
"pagination_max_variable": null,
"created_at": "2024-09-20T06:50:16.000000Z",
"updated_at": "2024-09-20T06:50:16.000000Z",
"cron_minutes": "*",
"cron_minutes_n_detail": null,
"cron_minutes_x_detail": null,
"cron_hours": "*",
"cron_hours_n_detail": null,
"cron_hours_x_detail": null,
"cron_day_of_month": "*",
"cron_day_of_month_x_detail": null,
"cron_month": "*",
"cron_month_x_detail": null,
"cron_day_of_week": "*",
"cron_day_of_week_x_detail": null,
"click_action_enabled": false,
"click_action_selector": null,
"click_action_wait": "no",
"workflow": [
{
"type": "options",
"data": {
"keywords": [
{
"type": "keyword",
"data": {
"keyword": "Architectural plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
},
{
"type": "keyword",
"data": {
"keyword": "Floor plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
},
{
"type": "keyword",
"data": {
"keyword": "Building layout PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
}
]
}
}
],
"version": 2,
"proxy_type": "rotation",
"proxy_host": null,
"proxy_port": null,
"proxy_username": null,
"proxy_password": null,
"parsers": null,
"deleted_at": null
}
}
]