Перейти к основному содержимому

Python

import json, requests, time, json

class ExtractorClient:

def __init__(self, authTokenBase64):
self.authTokenBase64 = authTokenBase64

def __getUrl(self, path):
return 'https://extractor.correct.su/' + path
def __getHeaders(self, contentType = None):
headers = {
'Authorization' : 'Bearer ' + self.authTokenBase64
}
if contentType:
headers['Content-Type'] = contentType
return headers

def __postJson(self, path):
return requests.request("POST", self.__getUrl(path), headers=self.__getHeaders('application/json'), data = {})

def __postFile(self, path, fileName):
files = [
('content', open(fileName,'rb'))
]
return requests.request("POST", self.__getUrl(path), headers=self.__getHeaders(), data = {}, files = files)

def __get(self, path):
return requests.request("GET", self.__getUrl(path), headers=self.__getHeaders('application/json'), data = {})

def createPackage(self):
""" Создание пакета. """
response = self.__postJson('api/packages')
response.raise_for_status()
return response.json()['packageId']

def uploadImage(self, packageId, fileName):
""" Добавление изображений. """
response = self.__postFile('api/images/package/%s' % packageId, fileName)
response.raise_for_status()
return response.json()['imageIds']

def getImage(self, imageId):
""" Получение изображений по идентификатору. """
response = self.__get('api/images/%s' % imageId)
response.raise_for_status()
return response.content

def getImages(self, packageId):
""" Получение DTO с идентификаторами изображений. """
response = self.__get('api/images/package/%s' % packageId)
response.raise_for_status()
return response.json()['images']

def getDocument(self, packageId, documentId):
""" Получение PDF с изображениями документа. """
response = self.__get(f'api/images/package/{packageId}/document/{documentId}')
response.raise_for_status()
return response.content

def startRecognition(self, packageId):
""" Запуск распознавания пакета. """
response = self.__postJson('api/packages/%s/start' % packageId)
response.raise_for_status()

def getRecognitionResult(self, packageId, includeStamps = True, includeSignatures = True, includeBarcodes = True, includeUnrecognizedTexts = True):
""" Получение результатов распознавания. """
response = self.__get(f'api/packages/{packageId}' +
f'?includeStamps={includeStamps}' +
f'&includeSignatures={includeSignatures}' +
f'&includeBarcodes={includeBarcodes}' +
f'&includeUnrecognizedTexts={includeUnrecognizedTexts}')
response.raise_for_status()
return response.json()

# Пример распознавания пакета
print('CORRECT SaaS client started')

extractorClient = ExtractorClient('INSERT_TOKEN_HERE')
packageId = extractorClient.createPackage()
print('created package %s' % packageId)
imagePaths = [
'd:\\correct-saas-client\\sources\\1.jpg',
'd:\\correct-saas-client\\sources\\2.png'
]
for imagePath in imagePaths:
extractorClient.uploadImage(packageId, imagePath)
print('uploaded image %s' % imagePath)
extractorClient.startRecognition(packageId)
print('recognition is started')
result = None
recognized = False
while not recognized:
time.sleep(10)
result = extractorClient.getRecognitionResult(packageId)
recognized = result['state'] == 'Recognized'
if not recognized:
print('current recognition state is %s' % result['state'])

print('package is recognized');


# Сохранение json распознанного пакета
packageFileName = f'd:\\correct-saas-client\\results\\package{packageId}.json'
packageFile = open(packageFileName, 'wb')
packageFile.write(json.dumps(result, ensure_ascii=False).encode('utf-8'))
packageFile.close()
print('package recognition result saved to file %s' % packageFileName);

imageDtos = extractorClient.getImages(packageId)


# Сохранение правильно повернутых и выровненных изображений
for imageDto in imageDtos:
imageId = imageDto['id']
data = extractorClient.getImage(imageId)
imageFileName = f'd:\\correct-saas-client\\results\\package{packageId}-image{imageId}.png'
imageFile = open(imageFileName, 'wb')
imageFile.write(data)
imageFile.close()
print(f'rotated and aligned image {imageId} saved to file {imageFileName}');


# Сохранение PDF с изображениями документа
for document in result['documents']:
documentId = document['id']
documentType = document['docType']
data = extractorClient.getDocument(packageId, documentId)
documentFileName = f'd:\\correct-saas-client\\results\\package{packageId}-document{documentId}-{documentType}.pdf'
documentFile = open(documentFileName, 'wb')
documentFile.write(bytearray(data))
documentFile.close()
print(f'document "{documentType}" with id {documentId} saved to file {documentFileName}');

print('CORRECT SaaS client finished')