Submit a Batch Scrape Job

curl --request POST \
  --url https://api.spidra.io/api/batch/scrape \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "urls": [
    "https://example.com/product/1",
    "https://example.com/product/2"
  ],
  "prompt": "Extract the product name, price, and availability",
  "output": "json"
}
'

import requests

url = "https://api.spidra.io/api/batch/scrape"

payload = {
    "urls": ["https://example.com/product/1", "https://example.com/product/2"],
    "prompt": "Extract the product name, price, and availability",
    "output": "json"
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    urls: ['https://example.com/product/1', 'https://example.com/product/2'],
    prompt: 'Extract the product name, price, and availability',
    output: 'json'
  })
};

fetch('https://api.spidra.io/api/batch/scrape', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.spidra.io/api/batch/scrape",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'urls' => [
        'https://example.com/product/1',
        'https://example.com/product/2'
    ],
    'prompt' => 'Extract the product name, price, and availability',
    'output' => 'json'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.spidra.io/api/batch/scrape"

	payload := strings.NewReader("{\n  \"urls\": [\n    \"https://example.com/product/1\",\n    \"https://example.com/product/2\"\n  ],\n  \"prompt\": \"Extract the product name, price, and availability\",\n  \"output\": \"json\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.spidra.io/api/batch/scrape")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"urls\": [\n    \"https://example.com/product/1\",\n    \"https://example.com/product/2\"\n  ],\n  \"prompt\": \"Extract the product name, price, and availability\",\n  \"output\": \"json\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.spidra.io/api/batch/scrape")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"urls\": [\n    \"https://example.com/product/1\",\n    \"https://example.com/product/2\"\n  ],\n  \"prompt\": \"Extract the product name, price, and availability\",\n  \"output\": \"json\"\n}"

response = http.request(request)
puts response.read_body

{
  "status": "queued",
  "batchId": "f3a2b1c0-0000-0000-0000-000000000000",
  "total": 2
}

{
  "status": "error",
  "message": "<string>"
}

{
  "status": "error",
  "message": "<string>"
}

{
  "status": "error",
  "message": "<string>"
}

{
  "status": "error",
  "message": "You have exceeded your monthly credit limit."
}

{
  "status": "error",
  "message": "Request validation failed. Fix the errors below and try again.",
  "errors": [
    "URL 2: \"ftp://example.com\" is not a valid URL — must use http or https",
    "URL 4: private and internal URLs are not allowed"
  ]
}

{
  "status": "error",
  "message": "<string>"
}

POST

batch

scrape

Submit a Batch Scrape Job

curl --request POST \
  --url https://api.spidra.io/api/batch/scrape \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "urls": [
    "https://example.com/product/1",
    "https://example.com/product/2"
  ],
  "prompt": "Extract the product name, price, and availability",
  "output": "json"
}
'

import requests

url = "https://api.spidra.io/api/batch/scrape"

payload = {
    "urls": ["https://example.com/product/1", "https://example.com/product/2"],
    "prompt": "Extract the product name, price, and availability",
    "output": "json"
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    urls: ['https://example.com/product/1', 'https://example.com/product/2'],
    prompt: 'Extract the product name, price, and availability',
    output: 'json'
  })
};

fetch('https://api.spidra.io/api/batch/scrape', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.spidra.io/api/batch/scrape",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'urls' => [
        'https://example.com/product/1',
        'https://example.com/product/2'
    ],
    'prompt' => 'Extract the product name, price, and availability',
    'output' => 'json'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.spidra.io/api/batch/scrape"

	payload := strings.NewReader("{\n  \"urls\": [\n    \"https://example.com/product/1\",\n    \"https://example.com/product/2\"\n  ],\n  \"prompt\": \"Extract the product name, price, and availability\",\n  \"output\": \"json\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.spidra.io/api/batch/scrape")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"urls\": [\n    \"https://example.com/product/1\",\n    \"https://example.com/product/2\"\n  ],\n  \"prompt\": \"Extract the product name, price, and availability\",\n  \"output\": \"json\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.spidra.io/api/batch/scrape")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"urls\": [\n    \"https://example.com/product/1\",\n    \"https://example.com/product/2\"\n  ],\n  \"prompt\": \"Extract the product name, price, and availability\",\n  \"output\": \"json\"\n}"

response = http.request(request)
puts response.read_body

{
  "status": "queued",
  "batchId": "f3a2b1c0-0000-0000-0000-000000000000",
  "total": 2
}

{
  "status": "error",
  "message": "<string>"
}

{
  "status": "error",
  "message": "<string>"
}

{
  "status": "error",
  "message": "<string>"
}

{
  "status": "error",
  "message": "You have exceeded your monthly credit limit."
}

{
  "status": "error",
  "message": "Request validation failed. Fix the errors below and try again.",
  "errors": [
    "URL 2: \"ftp://example.com\" is not a valid URL — must use http or https",
    "URL 4: private and internal URLs are not allowed"
  ]
}

{
  "status": "error",
  "message": "<string>"
}

How It Works

Batch scrape jobs are asynchronous. Submitting returns a batchId immediately. Each URL is processed in parallel by independent workers.

Submit — Send your URL list. Receive batchId in the response.
Process — Each URL is opened in a real browser, CAPTCHAs solved, content extracted.
Poll — Call GET /api/batch/scrape/{batchId} every 2–5 seconds until status is terminal.

Credits are reserved upfront when you submit. The final amount is reconciled per item once processing completes.

Minimal Example

curl -X POST https://api.spidra.io/api/batch/scrape \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "urls": ["https://example.com/page-1", "https://example.com/page-2"],
    "prompt": "Extract the headline and summary",
    "output": "json"
  }'

const res = await fetch("https://api.spidra.io/api/batch/scrape", {
  method: "POST",
  headers: {
    Authorization: "Bearer YOUR_API_KEY",
    "Content-Type": "application/json",
  },
  body: JSON.stringify({
    urls: ["https://example.com/page-1", "https://example.com/page-2"],
    prompt: "Extract the headline and summary",
    output: "json",
  }),
});

const { batchId, total } = await res.json();
// batchId → poll GET /api/batch/scrape/{batchId}

import requests

resp = requests.post(
    "https://api.spidra.io/api/batch/scrape",
    headers={"Authorization": "Bearer YOUR_API_KEY"},
    json={
        "urls": ["https://example.com/page-1", "https://example.com/page-2"],
        "prompt": "Extract the headline and summary",
        "output": "json",
    },
)
batch_id = resp.json()["batchId"]

Response 202 Accepted:

{
  "status": "queued",
  "batchId": "f3a2b1c0-0000-0000-0000-000000000000",
  "total": 2
}

The Location response header is also set to /api/batch/scrape/{batchId} for convenience.

With Structured Output

Pass a schema to receive a consistent JSON shape for every item:

{
  "urls": [
    "https://shop.example.com/item/100",
    "https://shop.example.com/item/101"
  ],
  "prompt": "Extract the product details",
  "schema": {
    "type": "object",
    "required": ["name", "price"],
    "properties": {
      "name":      { "type": "string" },
      "price":     { "type": "number" },
      "currency":  { "type": ["string", "null"] },
      "available": { "type": ["boolean", "null"] }
    }
  }
}

When schema is provided, output is automatically forced to "json". Non-fatal schema issues are returned as schema_warnings in the submission response.

Use the Spidra JSON Schema Generator to build and preview your schema visually before pasting it here.

With Proxy

{
  "urls": ["https://amazon.de/dp/B123", "https://amazon.de/dp/B456"],
  "prompt": "Extract price and availability",
  "output": "json",
  "useProxy": true,
  "proxyCountry": "de"
}

With Screenshots

{
  "urls": ["https://example.com"],
  "screenshot": true,
  "fullPageScreenshot": true
}

Screenshot URLs are returned in each item’s screenshotUrl field once processing is complete.

Request Body

Field	Type	Required	Default	Description
`urls`	`string[]`	Yes	—	URLs to scrape. 1–50 URLs per request. Must be `http://` or `https://`. Private/internal IPs are rejected.
`prompt`	`string`	No	—	AI extraction instruction applied to every URL in the batch
`output`	`"json"` \| `"markdown"`	No	`"json"`	Output format for extracted content. Automatically `"json"` when `schema` is set
`schema`	`object`	No	—	JSON Schema object that constrains the AI output. Validated before queuing — returns `422` if invalid
`useProxy`	`boolean`	No	`false`	Route each URL through residential stealth proxies
`proxyCountry`	`string`	No	—	ISO country code (`"us"`, `"de"`, `"gb"`) or region (`"eu"`, `"global"`). Requires `useProxy: true`
`extractContentOnly`	`boolean`	No	`false`	Strip navigation, headers, and sidebars — keeps only the main content
`cookies`	`string`	No	—	Session cookies for authenticated pages. Never persisted to the database — passed ephemerally to the worker only
`screenshot`	`boolean`	No	`false`	Capture a viewport screenshot of each page
`fullPageScreenshot`	`boolean`	No	`false`	Capture the full scrollable page. Requires `screenshot: true`

Response

Field	Type	Description
`status`	`"queued"`	Always `"queued"` on a successful submission
`batchId`	`string`	UUID — use this to poll status and manage the batch
`total`	`number`	Number of URLs accepted into the batch
`schema_warnings`	`string[]`	Non-fatal schema issues (e.g., unsupported keywords). Only present if there are warnings

Errors

Code	Reason
`400`	`urls` is missing, not an array, empty, or exceeds 50 items
`401`	Missing API key authentication header
`402`	Payment overdue — update your payment method
`403`	Monthly credit limit reached
`422`	One or more URLs are invalid, or `schema` is malformed. An `errors` array is returned with per-URL details
`429`	More than 20 batch submissions per minute

Validation error example:

{
  "status": "error",
  "message": "Request validation failed. Fix the errors below and try again.",
  "errors": [
    "URL 2: \"ftp://example.com\" is not a valid URL — must use http or https",
    "URL 4: private and internal URLs are not allowed"
  ]
}

Get Batch Status

Poll for results

Batch Scraping Guide

Full feature walkthrough

Authorizations

Authorization

string

header

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Body

application/json

urls

string<uri>[]

required

URLs to scrape. 1–50 per request. Must be http:// or https://. Private/internal IPs are rejected.

Required array length: 1 - 50 elements

prompt

string

AI extraction instruction applied to every URL in the batch

output

enum<string>

default:json

Output format. Automatically set to 'json' when schema is provided

Available options:

json,

markdown

schema

object

JSON Schema that constrains AI output shape. Validated before queuing — returns 422 if invalid

useProxy

boolean

default:false

Route each URL through residential stealth proxies. Usage is billed from your bandwidth quota.

proxyCountry

string

ISO country code (e.g. 'us', 'de', 'gb') or region ('eu', 'global'). Requires useProxy: true

crawlerMode

string

default:default

Browser rendering mode: 'default', 'fast', or 'ai'

extractContentOnly

boolean

default:false

Strip navigation, headers, and sidebars — keep only the main content

string

Session cookies for authenticated pages. Never persisted — passed ephemerally to the worker

screenshot

boolean

default:false

Capture a viewport screenshot of each page

fullPageScreenshot

boolean

default:false

Capture the full scrollable page. Requires screenshot: true

Response

Batch accepted and queued

status

enum<string>

Available options:

queued

batchId

string<uuid>

total

integer

schema_warnings

string[]

Non-fatal schema issues. Only present if there are warnings

Get Scrape Job Status Get Batch Status

​How It Works

​Minimal Example

​With Structured Output

​With Proxy

​With Screenshots

​Request Body

​Response

​Errors

Get Batch Status

Batch Scraping Guide

Authorizations

Body

Response

How It Works

Minimal Example

With Structured Output

With Proxy

With Screenshots

Request Body

Response

Errors