Get Scrape Job Status

curl --request GET \
  --url https://api.spidra.io/api/scrape/{jobId} \
  --header 'Authorization: Bearer <token>'

import requests

url = "https://api.spidra.io/api/scrape/{jobId}"

headers = {"Authorization": "Bearer <token>"}

response = requests.get(url, headers=headers)

print(response.text)

const options = {method: 'GET', headers: {Authorization: 'Bearer <token>'}};

fetch('https://api.spidra.io/api/scrape/{jobId}', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.spidra.io/api/scrape/{jobId}",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.spidra.io/api/scrape/{jobId}"

	req, _ := http.NewRequest("GET", url, nil)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.get("https://api.spidra.io/api/scrape/{jobId}")
  .header("Authorization", "Bearer <token>")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.spidra.io/api/scrape/{jobId}")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request["Authorization"] = 'Bearer <token>'

response = http.request(request)
puts response.read_body

{
  "status": "active",
  "progress": {
    "message": "Processing content with AI...",
    "progress": 0.6
  },
  "result": null,
  "error": null
}

GET

scrape

{jobId}

Get Scrape Job Status

curl --request GET \
  --url https://api.spidra.io/api/scrape/{jobId} \
  --header 'Authorization: Bearer <token>'

import requests

url = "https://api.spidra.io/api/scrape/{jobId}"

headers = {"Authorization": "Bearer <token>"}

response = requests.get(url, headers=headers)

print(response.text)

const options = {method: 'GET', headers: {Authorization: 'Bearer <token>'}};

fetch('https://api.spidra.io/api/scrape/{jobId}', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.spidra.io/api/scrape/{jobId}",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io"
)

func main() {

	url := "https://api.spidra.io/api/scrape/{jobId}"

	req, _ := http.NewRequest("GET", url, nil)

	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.get("https://api.spidra.io/api/scrape/{jobId}")
  .header("Authorization", "Bearer <token>")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.spidra.io/api/scrape/{jobId}")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request["Authorization"] = 'Bearer <token>'

response = http.request(request)
puts response.read_body

{
  "status": "active",
  "progress": {
    "message": "Processing content with AI...",
    "progress": 0.6
  },
  "result": null,
  "error": null
}

Polling Pattern

Scrape jobs are processed asynchronously. When you submit a job you get a jobId back immediately. You then poll this endpoint every 2-5 seconds until status is completed or failed.

async function waitForResult(jobId) {
  while (true) {
    const res = await fetch(`https://api.spidra.io/api/scrape/${jobId}`, {
      headers: { Authorization: 'Bearer YOUR_API_KEY' }
    });
    const data = await res.json();

    if (data.status === 'completed') return data.result;
    if (data.status === 'failed') throw new Error(data.error);

    await new Promise(r => setTimeout(r, 3000));
  }
}

Status Values

Status	Meaning
`waiting`	In queue, not started yet
`active`	Running right now
`completed`	Done, results are ready
`failed`	Something went wrong, check `error`

Response Structure

When status is completed, everything you need is inside result.

{
  "status": "completed",
  "progress": {
    "message": "Scrape completed successfully",
    "progress": 1
  },
  "result": {
    "content": "...",
    "screenshots": [],
    "ai_extraction_failed": false,
    "stats": {
      "durationMs": 4200,
      "captchaSolvedCount": 0,
      "inputTokens": 312,
      "outputTokens": 84,
      "totalTokens": 396
    }
  },
  "error": null
}

result.content

This is the main output field. What it contains depends on whether you provided a prompt:

With prompt: the AI-extracted result, formatted according to output ("markdown" or "json")
Without prompt: the raw scraped page content as markdown

If AI extraction fails for any reason, content still returns the raw markdown as a fallback, and ai_extraction_failed is set to true so you can detect this.

result.stats

Timing and usage information for the job.

Field	Description
`durationMs`	How long the whole job took in milliseconds
`captchaSolvedCount`	Number of CAPTCHAs that were automatically solved
`inputTokens`	Tokens sent to the AI model
`outputTokens`	Tokens returned from the AI model
`totalTokens`	Total tokens used (input + output)

Failed Jobs

When status is failed, the error field contains the reason:

{
  "status": "failed",
  "error": "Failed to scrape https://example.com — net::ERR_NAME_NOT_RESOLVED"
}

Authorizations

Authorization

string

header

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Path Parameters

jobId

string

required

The job ID returned from POST /scrape

Response

Job status and results

status

enum<string>

Current status of the scrape job

Available options:

waiting,

active,

completed,

failed,

delayed

progress

object

Show child attributes

result

object | null

Present only when status is 'completed'

Show child attributes

error

string | null

Error message if status is 'failed'

Submit a Scrape Job Submit a Batch Scrape Job

​Polling Pattern

​Status Values

​Response Structure

​result.content

​result.stats

​Failed Jobs