POST
/
api
/
scrapers
/
pdf
/
create-and-run
curl --request POST \
  --url https://app.mrscraper.com/api/scrapers/pdf/create-and-run \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '{
  "name": "PDF Scraper",
  "unique": true,
  "expected_pdf": 200,
  "keywords": [
    {
      "keyword": "Architectural plan PDF",
      "prompt": "Return '\''true'\'' if the most of the image are architectural sketches, plan, blueprint, drawing. Return '\''false'\'' if most of the image are just text documents or other"
    },
    {
      "keyword": "Floor plan PDF",
      "prompt": "Return '\''true'\'' if the most of the image are architectural sketches, plan, blueprint, drawing. Return '\''false'\'' if most of the image are just text documents or other"
    },
    {
      "keyword": "Building layout PDF",
      "prompt": "Return '\''true'\'' if the most of the image are architectural sketches, plan, blueprint, drawing. Return '\''false'\'' if most of the image are just text documents or other"
    }
  ]
}'
{
  "message": "Scraping queued successfully",
  "scraper": {
    "id": 3669,
    "name": "Test PDF Scraper",
    "url": [
      "Default"
    ],
    "urls": [
      "Default"
    ],
    "scheduled": false,
    "schedule": null,
    "created_at": "2024-10-15T07:38:15.000000Z",
    "updated_at": "2024-10-15T07:38:15.000000Z"
  },
  "results": [
    {
      "scraping_run_id": 375965,
      "user_id": 5573,
      "scraper_name": "Test PDF Scraper",
      "scrapped_url": "Architectural plan PDF",
      "scraped_url": "Architectural plan PDF",
      "scraper_id": 3669,
      "status": "running",
      "updated_at": "2024-10-15T07:38:15.000000Z",
      "created_at": "2024-10-15T07:38:15.000000Z",
      "id": 1169956,
      "scraper": {
        "id": 3669,
        "user_id": 5573,
        "sharing": false,
        "share_uuid": null,
        "name": "Test PDF Scraper",
        "type": "pdf",
        "urls": "Default",
        "pdf_urls": "Default",
        "ai_prompt": null,
        "ai_scope": null,
        "headers": null,
        "cookies": null,
        "user_agent": null,
        "disabled_resources": null,
        "delay": 0,
        "html_wanted": false,
        "screenshot_wanted": false,
        "screenshot_type": null,
        "locale": null,
        "scheduled": false,
        "cron": null,
        "cron_timezone": "UTC",
        "paginate": 0,
        "pagination_type": null,
        "infinite_pagination_type": null,
        "infinite_pagination_seconds": null,
        "infinite_pagination_text": null,
        "infinite_pagination_css_selector": null,
        "infinite_pagination_n_selector": null,
        "load_more_selector": null,
        "pagination_query_parameter": null,
        "pagination_next_page_selector": null,
        "pagination_limit_type": null,
        "pagination_max_page": null,
        "max_next_page": null,
        "pagination_max_variable": null,
        "created_at": "2024-10-15T07:38:15.000000Z",
        "updated_at": "2024-10-15T07:38:15.000000Z",
        "cron_minutes": "*",
        "cron_minutes_n_detail": null,
        "cron_minutes_x_detail": null,
        "cron_hours": "*",
        "cron_hours_n_detail": null,
        "cron_hours_x_detail": null,
        "cron_day_of_month": "*",
        "cron_day_of_month_x_detail": null,
        "cron_month": "*",
        "cron_month_x_detail": null,
        "cron_day_of_week": "*",
        "cron_day_of_week_x_detail": null,
        "click_action_enabled": false,
        "click_action_selector": null,
        "click_action_wait": "no",
        "workflow": [
          {
            "type": "options",
            "data": {
              "keywords": [
                {
                  "type": "keyword",
                  "data": {
                    "keyword": "Architectural plan PDF",
                    "prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
                  }
                },
                {
                  "type": "keyword",
                  "data": {
                    "keyword": "Floor plan PDF",
                    "prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
                  }
                },
                {
                  "type": "keyword",
                  "data": {
                    "keyword": "Building layout PDF",
                    "prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
                  }
                }
              ]
            }
          }
        ],
        "version": 2,
        "proxy_type": "rotation",
        "proxy_host": null,
        "proxy_port": null,
        "proxy_username": null,
        "proxy_password": null,
        "parsers": null,
        "deleted_at": null,
        "external_auth": null
      }
    }
  ]
}

Authorizations

Authorization
string
header
required

You can retrieve your token by visiting the API Tokens section inside your profile page or see https://docs.mrscraper.com/documentation/api-token for the details.

Body

application/json
name
string
required

The name of the scraping task.

keywords
object[]
required

The each keywords to search PDF files.

unique
boolean

Option to not include previously scraped PDF files.

expected_pdf
number

The total expected PDF files results. This will automatically add the alternative keywords needed to achieve the expected results.

Response

200 - application/json
message
string
scraper
object
results
any[]