Files
In addition to plain text, Sapling also supports PDF and DOCX processing with its PDF-to-text and DOCX-to-text endpoints.
Note that these endpoints do not use just plain JSON as with other Sapling endpoints since the files must be submitted as binary data.
We plan to add support for file uploads to the SDK soon. We provide sample code in JavaScript and Python below; please contact us if you need assistance for other languages.
PDF to Text POST
https://api.sapling.ai/api/v1/ingest/pdf_to_text
HTTP method: POST
Request Parameters
Please refer to the code examples below.
You must submit the file in a file
field as well as a jsonParams
field that has the API key.
- Python
- JavaScript
import json
import requests
def process_pdf(file_path, json_params):
url = 'https://api.sapling.ai/api/v1/ingest/pdf_to_text'
files = {
'file': open(file_path, 'rb'),
'jsonParams': ('jsonParams', json.dumps(json_params), 'application/json')
}
response = requests.post(url, files=files)
if response.status_code == 200:
print(response.json())
else:
print(f'Failed, status code: {response.status_code}')
print(response.text)
if __name__ == '__main__':
pdf_file_path = 'path_to_your_pdf_file.pdf'
json_data = {
'key': '<api-key>', # Do not expose private key in production
}
process_pdf(pdf_file_path, json_data)
// Assumes there is a <input type="file" id="pdfFile" accept=".pdf">
let fileInput = document.getElementById('pdfFile');
let file = fileInput.files[0];
if (!file) {
// Error
}
let formData = new FormData();
formData.append('file', file);
const jsonParams = JSON.stringify({
key: '<api-key>', // Do not expose private key in production
});
formData.append('jsonParams', new Blob([jsonParams], { type: 'application/json' }));
const response = await fetch('https://api.sapling.ai/api/v1/ingest/pdf_to_text', {
method: 'POST',
credentials: 'include',
body: formData
})
console.log(response.json());
Response Parameters
{
"text": "[EXTRACTED TEXT]"
}
DOCX to Text POST
https://api.sapling.ai/api/v1/ingest/docx_to_text
HTTP method: POST
Request Parameters
Please refer to the code examples below.
You must submit the file in a file
field as well as a jsonParams
field that has the API key.
- Python
- JavaScript
import json
import requests
def process_docx(file_path, json_params):
url = 'https://api.sapling.ai/api/v1/ingest/docx_to_text'
files = {
'file': open(file_path, 'rb'),
'jsonParams': ('jsonParams', json.dumps(json_params), 'application/json')
}
response = requests.post(url, files=files)
if response.status_code == 200:
print(response.json())
else:
print(f'Failed, status code: {response.status_code}')
print(response.text)
if __name__ == '__main__':
docx_file_path = 'path_to_your_docx_file.docx'
json_data = {
'key': '<api-key>', # Do not expose private key in production
}
process_docx(docx_file_path, json_data)
// Assumes there is a <input type="file" id="docxFile" accept=".docx">
let fileInput = document.getElementById('docxFile');
let file = fileInput.files[0];
if (!file) {
// Error
}
let formData = new FormData();
formData.append('file', file);
const jsonParams = JSON.stringify({
key: '<api-key>', // Do not expose private key in production
});
formData.append('jsonParams', new Blob([jsonParams], { type: 'application/json' }));
const response = await fetch('https://api.sapling.ai/api/v1/ingest/docx_to_text', {
method: 'POST',
credentials: 'include',
body: formData
})
console.log(response.json());
Response Parameters
{
"text": "[EXTRACTED TEXT]"
}