Documentation /docs/extract

Extract

POST /v1/extract is the core production endpoint for AP invoice extraction. It accepts a file plus an explicit schema and returns grounded invoice-level fields and line-item structures.

AP / GST invoice schema example

{
  "doc_class": "invoice",
  "invoice_level_fields": {
    "invoice_number": "string: invoice identifier",
    "invoice_date": "date: invoice date",
    "due_date": "date: payment due date",
    "supplier_name": "string: supplier legal name",
    "supplier": {
      "GSTIN": "string: supplier GST registration number"
    },
    "po_number": "string: purchase order reference",
    "currency": "string: invoice currency",
    "subtotal": "number: subtotal before tax",
    "tax": "number: total tax amount",
    "total_due": "number: final invoice amount due",
    "totals": {
      "CGST_Amount": "number: central GST amount",
      "SGST_Amount": "number: state GST amount",
      "IGST_Amount": "number: integrated GST amount"
    }
  },
  "line_item_structures": {
    "line_items": {
      "description": "Invoice line items",
      "target_fields": [
        "description (string): line item description",
        "quantity (number): billed quantity",
        "unit_price (number): unit price",
        "amount (number): line amount",
        "HSN_Code (string): HSN or SAC classification code"
      ]
    },
    "line_item_notes": {
      "description": "Line-level notes or tax notes",
      "target_fields": []
    }
  }
}

cURL

curl -sS -X POST "https://api.docspeed.ai/v1/extract" \
  -H "Authorization: Bearer ${DOCSPEED_API_KEY}" \
  -H "Content-Type: application/json" \
  -d @extract-ap-invoice.json

Python

import requests

payload = {
    "input": {"file_id": "file_gst_invoice"},
    "execution_mode": "sync",
    "grounding": "cell",
    "schema": {
        "doc_class": "invoice",
        "invoice_level_fields": {
            "invoice_number": "string: invoice identifier",
            "supplier": {"GSTIN": "string: supplier GST registration number"},
            "totals": {
                "CGST_Amount": "number: central GST amount",
                "SGST_Amount": "number: state GST amount",
                "IGST_Amount": "number: integrated GST amount",
            },
        },
        "line_item_structures": {
            "line_items": {
                "description": "Invoice line items",
                "target_fields": [
                    "description (string): line item description",
                    "HSN_Code (string): HSN or SAC classification code",
                ],
            },
            "line_item_notes": {
                "description": "Line-level notes",
                "target_fields": [],
            },
        },
    },
}

response = requests.post(
    "https://api.docspeed.ai/v1/extract",
    headers={
        "Authorization": "Bearer YOUR_API_KEY",
        "Content-Type": "application/json",
    },
    json=payload,
    timeout=300,
)
print(response.json())

TypeScript

const response = await fetch("https://api.docspeed.ai/v1/extract", {
  method: "POST",
  headers: {
    Authorization: "Bearer YOUR_API_KEY",
    "Content-Type": "application/json",
  },
  body: JSON.stringify({
    input: { file_id: "file_gst_invoice" },
    execution_mode: "sync",
    grounding: "cell",
    schema: {
      doc_class: "invoice",
      invoice_level_fields: {
        invoice_number: "string: invoice identifier",
        supplier: {
          GSTIN: "string: supplier GST registration number",
        },
        totals: {
          CGST_Amount: "number: central GST amount",
          SGST_Amount: "number: state GST amount",
          IGST_Amount: "number: integrated GST amount",
        },
      },
      line_item_structures: {
        line_items: {
          description: "Invoice line items",
          target_fields: [
            "description (string): line item description",
            "HSN_Code (string): HSN or SAC classification code",
          ],
        },
        line_item_notes: {
          description: "Line-level notes",
          target_fields: [],
        },
      },
    },
  }),
});

console.log(await response.json());

Example response fragment

{
  "invoice_level_fields": {
    "supplier": {
      "GSTIN": {
        "value": "29ABCDE1234F1Z5",
        "region_ids": ["p1:l7"]
      }
    },
    "totals": {
      "CGST_Amount": {
        "value": 180.0,
        "region_ids": ["p1:l28"]
      }
    }
  },
  "line_item_structures": {
    "line_items": [
      {
        "HSN_Code": {
          "value": "8471",
          "region_ids": ["p1:t1:c4"]
        }
      }
    ]
  }
}