For AI agents: a documentation index is available at the root level at /llms.txt and /llms-full.txt. Append /llms.txt to any URL for a page-level index, or .md for the markdown version of any page.
GuidesAPI ReferenceChangelogModel Versioning
GuidesAPI ReferenceChangelogModel Versioning
    • Getting Started
    • Authentication
    • API Versioning
    • SDKs
    • Deployments
    • Error Codes
    • Async Processing
  • Endpoints
      • POSTExtract File (Sync)
      • POSTExtract File (Async)
      • GETGet Extract Run
      • POSTCancel Extract Run
      • DELDelete Extract Run
      • GETList Extract Runs
      • POSTCreate Extractor
      • GETGet Extractor
      • POSTUpdate Extractor
      • GETList Extractors
      • POSTCreate Extractor Version
      • GETGet Extractor Version
      • GETList Extractor Versions
  • Webhook Events
LogoLogo
EndpointsExtract

Extract File (Sync)

POST
/extract
POST
/extract
1import { ExtendClient } from "extend-ai";
2
3const client = new ExtendClient({ token: "YOUR_TOKEN" });
4await client.extract({
5 config: {
6 schema: {
7 "type": "object",
8 "properties": {
9 "vendor_name": {
10 "type": "string",
11 "description": "The name of the vendor"
12 },
13 "invoice_number": {
14 "type": "string",
15 "description": "The invoice number"
16 },
17 "total_amount": {
18 "type": "number",
19 "description": "The total amount due"
20 }
21 }
22 }
23 },
24 file: {
25 url: "https://example.com/invoice.pdf"
26 }
27});
1{
2 "object": "extract_run",
3 "id": "exr_Xj8mK2pL9nR4vT7qY5wZ",
4 "status": "PROCESSING",
5 "output": {
6 "value": {},
7 "metadata": {}
8 },
9 "initialOutput": {
10 "value": {},
11 "metadata": {}
12 },
13 "reviewedOutput": {
14 "value": {},
15 "metadata": {}
16 },
17 "failureReason": "PARSING_ERROR",
18 "failureMessage": "string",
19 "metadata": {},
20 "reviewed": false,
21 "edited": false,
22 "edits": {},
23 "config": {
24 "baseProcessor": "extraction_performance",
25 "baseVersion": "string",
26 "extractionRules": "string",
27 "schema": {},
28 "advancedOptions": {
29 "modelReasoningInsightsEnabled": true,
30 "advancedMultimodalEnabled": true,
31 "citationsEnabled": true,
32 "citationMode": "line",
33 "arrayCitationStrategy": "item",
34 "arrayStrategy": {
35 "type": "large_array_heuristics"
36 },
37 "chunkingOptions": {
38 "chunkingStrategy": "standard",
39 "pageChunkSize": 1,
40 "chunkSelectionStrategy": "intelligent",
41 "customSemanticChunkingRules": "string"
42 },
43 "excelSheetRanges": [
44 {
45 "start": 1,
46 "end": 1
47 }
48 ],
49 "excelSheetSelectionStrategy": "intelligent",
50 "pageRanges": [
51 {
52 "start": 1,
53 "end": 10
54 },
55 {
56 "start": 20,
57 "end": 30
58 }
59 ],
60 "reviewAgent": {
61 "enabled": true
62 },
63 "currentDateEnabled": true
64 },
65 "parseConfig": {
66 "target": "markdown",
67 "chunkingStrategy": {
68 "type": "page",
69 "options": {
70 "minCharacters": 500,
71 "maxCharacters": 10000
72 }
73 },
74 "engine": "parse_performance",
75 "engineVersion": "latest",
76 "blockOptions": {
77 "figures": {
78 "enabled": true,
79 "figureImageClippingEnabled": true,
80 "advancedChartExtractionEnabled": false
81 },
82 "tables": {
83 "enabled": true,
84 "targetFormat": "html",
85 "tableHeaderContinuationEnabled": false,
86 "cellBlocksEnabled": false,
87 "agentic": {
88 "enabled": false,
89 "customInstructions": "string"
90 }
91 },
92 "text": {
93 "signatureDetectionEnabled": false,
94 "agentic": {
95 "enabled": false,
96 "customInstructions": "string"
97 }
98 },
99 "keyValue": {
100 "blankFieldFormattingEnabled": false
101 },
102 "barcodes": {
103 "imageClippingEnabled": false,
104 "readingEnabled": false
105 },
106 "formulas": {
107 "enabled": false
108 }
109 },
110 "advancedOptions": {
111 "pageRotationEnabled": true,
112 "pageRanges": [
113 {
114 "start": 1,
115 "end": 10
116 },
117 {
118 "start": 20,
119 "end": 30
120 }
121 ],
122 "excelParsingMode": "basic",
123 "excelSkipHiddenContent": false,
124 "excelUseRawCellValues": false,
125 "excelSkipCalculation": true,
126 "verticalGroupingThreshold": 1,
127 "returnOcr": {
128 "words": false
129 },
130 "alwaysConvertToPdf": false,
131 "enrichmentFormat": "xml",
132 "imageConversionQuality": "medium",
133 "formattingDetection": [
134 {
135 "type": "change_tracking"
136 }
137 ]
138 }
139 }
140 },
141 "extractor": {
142 "object": "extractor",
143 "id": "ex_Xj8mK2pL9nR4vT7qY5wZ",
144 "name": "Invoice Extractor",
145 "createdAt": "2024-03-21T16:45:00Z",
146 "updatedAt": "2024-03-21T16:45:00Z"
147 },
148 "extractorVersion": {
149 "object": "extractor_version",
150 "id": "exv_xK9mLPqRtN3vS8wF5hB2cQ",
151 "description": "Updated extraction fields for new invoice format",
152 "version": "draft",
153 "extractorId": "ex_Xj8mK2pL9nR4vT7qY5wZ",
154 "createdAt": "2024-03-21T16:45:00Z"
155 },
156 "file": {
157 "object": "file",
158 "id": "file_xK9mLPqRtN3vS8wF5hB2cQ",
159 "name": "Invoices.pdf",
160 "type": "PDF",
161 "parentFileId": "file_Zk9mNP12Qw4yTv8BdR3H",
162 "metadata": {
163 "pageCount": 30,
164 "parentSplit": {
165 "id": "string",
166 "type": "Invoice",
167 "identifier": "other_2_9",
168 "startPage": 1,
169 "endPage": 10
170 }
171 },
172 "createdAt": "2024-03-21T16:45:00Z",
173 "updatedAt": "2024-03-21T16:45:00Z"
174 },
175 "parseRunId": "pr_Xj8mK2pL9nR4vT7qY5wZ",
176 "dashboardUrl": "https://dashboard.extend.ai/runs/exr_Xj8mK2pL9nR4vT7qY5wZ",
177 "usage": {
178 "credits": 9,
179 "totalCredits": 15,
180 "breakdown": [
181 {
182 "object": "parse_run",
183 "id": "pr_3UZSj69pYZDKHFuuX57ic",
184 "credits": 6
185 }
186 ]
187 },
188 "createdAt": "2024-03-21T16:45:00Z",
189 "updatedAt": "2024-03-21T16:45:00Z"
190}
Extract structured data from a file synchronously, waiting for the result before returning. This endpoint has a **5-minute timeout** — if processing takes longer, the request will fail. **Note:** This endpoint is intended for onboarding and testing only. For production workloads, use `POST /extract_runs` with [polling or webhooks](https://docs.extend.ai/2026-02-09/developers/async-processing) instead, as it provides better reliability for large files and avoids timeout issues. The Extract endpoint allows you to extract structured data from files using an existing extractor or an inline configuration. For more details, see the [Extract File guide](https://docs.extend.ai/2026-02-09/product/extraction/quick-start-5-minutes).
Was this page helpful?
Previous

Extract File (Async)

Next
Built with

Extract structured data from a file synchronously, waiting for the result before returning. This endpoint has a 5-minute timeout — if processing takes longer, the request will fail.

Note: This endpoint is intended for onboarding and testing only. For production workloads, use POST /extract_runs with polling or webhooks instead, as it provides better reliability for large files and avoids timeout issues.

The Extract endpoint allows you to extract structured data from files using an existing extractor or an inline configuration.

For more details, see the Extract File guide.

Authentication

AuthorizationBearer

Bearer authentication of the form Bearer <token>, where token is your auth token.

Headers

x-extend-api-version"2026-02-09"Optional
API version to use for the request. If you're using an SDK, you can ignore this parameter. If you are not using an SDK and do not specify a version, you will either receive a `400 Bad Request` or be set to a previous legacy version. See [API Versioning](https://docs.extend.ai/2026-02-09/developers/api-versioning) for more details.

Request

This endpoint expects an object.
fileobjectRequired
The file to be extracted from. Files can be provided as a URL, Extend file ID, or raw text.
extractorobjectOptional

Reference to an existing extractor. One of extractor or config must be provided.

configobjectOptional

Inline extract configuration. One of extractor or config must be provided.

metadatamap from strings to anyOptional
An optional object that can be passed in to identify the run. It will be returned back to you in the response and webhooks. Maximum size is 10KB. To categorize runs for billing and usage tracking, include `extend:usage_tags` with an array of string values (e.g., `{"extend:usage_tags": ["production", "team-eng", "customer-123"]}`). Tags must contain only alphanumeric characters, hyphens, and underscores; any special characters will be automatically removed.

Response

Successfully extracted data from file
objectenum

The type of object. Will always be "extract_run".

Allowed values:
idstring

The unique identifier for this extract run.

Example: "exr_Xj8mK2pL9nR4vT7qY5wZ"

statusenum

The status of a processor run (extract, classify, or split):

  • "PENDING" - The run has been created and is waiting to be processed
  • "PROCESSING" - The run is in progress
  • "PROCESSED" - The run completed successfully
  • "FAILED" - The run failed
  • "CANCELLED" - The run was cancelled
Allowed values:
outputobject or map from strings to objects or null

The final output, either reviewed or initial. This is a union of two possible shapes:

  • JSON Schema output: The current output format, returned for runs created with a JSON Schema config.
  • Legacy output: A legacy output format from a previous API version. This shape is only returned for runs that were originally created with a legacy config.

Availability: Present when status is "PROCESSED".

initialOutputobject or map from strings to objects or null

The initial output from the extract run, before any review edits.

Availability: Present when reviewed is true.

reviewedOutputobject or map from strings to objects or null

The output after human review.

Availability: Present when reviewed is true.

failureReasonstring or null

The reason for failure.

Availability: Present when status is "FAILED".

Possible values include:

  • ABORTED - The run was aborted by the user
  • INTERNAL_ERROR - An unexpected internal error occurred
  • FAILED_TO_PROCESS_FILE - Failed to process the file (e.g., OCR failure, file access issues)
  • INVALID_PROCESSOR - The processor configuration is invalid
  • INVALID_CONFIGURATION - The provided configuration is incompatible with the selected model
  • PARSING_ERROR - Failed to parse the extraction output
  • PRE_PROCESSING_FAILURE - An error occurred during preprocessing (e.g., chunking)
  • POST_PROCESSING_FAILURE - An error occurred during postprocessing
  • OUT_OF_CREDITS - Insufficient credits to run the extraction

Note: Additional failure reasons may be added in the future. Your integration should handle unknown values gracefully.

failureMessagestring or null

A detailed message about the failure.

Availability: Present when status is "FAILED".

metadatamap from strings to any or null

Any metadata that was provided when creating the extract run.

Availability: Present when metadata was provided during creation.

reviewedboolean
Indicates whether the run has been reviewed by a human.
editedboolean
Indicates whether the run results have been edited during review.
editsmap from strings to objects or null

Details of edits made during review.

Availability: Present when edited is true.

configobject

The configuration used for this extract run. This is a union of two possible shapes:

  • JSON Schema config: The current config format. All runs created through this API version use this shape.
  • Legacy config: A fields-array config from a previous API version. This shape is only returned when retrieving runs that were originally created with the legacy format. This API version does not support creating runs with legacy configs.
extractorobject or null

The extractor that was used for this run.

Availability: Present when an extractor reference was provided. Not present when using inline config.

extractorVersionobject or null

The version of the extractor that was used for this run.

Availability: Present when an extractor reference was provided. Not present when using inline config.

fileobject
The file that was processed.
parseRunIdstring or null

The ID of the parse run that was used for this extract run.

Availability: Present when a parse run was created.

dashboardUrlstring
The URL to view the extract run in the Extend dashboard.
usageobject or null

Usage credits consumed by this extract run.

Availability: Present when status is "PROCESSED". Will not be returned for runs created before October 7, 2025 or for customers on legacy billing systems.

createdAtstringformat: "date-time"

The time (in UTC) at which the object was created. Will follow the RFC 3339 format.

Example: "2024-03-21T16:45:00Z"

updatedAtstringformat: "date-time"

The time (in UTC) at which the object was last updated. Will follow the RFC 3339 format.

Example: "2024-03-21T16:45:00Z"

Errors

400
Bad Request Error
401
Unauthorized Error
402
Payment Required Error
403
Forbidden Error
404
Not Found Error
422
Unprocessable Entity Error
429
Too Many Requests Error
500
Internal Server Error

API version to use for the request. If you’re using an SDK, you can ignore this parameter. If you are not using an SDK and do not specify a version, you will either receive a 400 Bad Request or be set to a previous legacy version. See API Versioning for more details.

An optional object that can be passed in to identify the run. It will be returned back to you in the response and webhooks. Maximum size is 10KB.

To categorize runs for billing and usage tracking, include extend:usage_tags with an array of string values (e.g., {"extend:usage_tags": ["production", "team-eng", "customer-123"]}). Tags must contain only alphanumeric characters, hyphens, and underscores; any special characters will be automatically removed.