本文整理汇总了Python中pyocr.get_available_tools函数的典型用法代码示例。如果您正苦于以下问题:Python get_available_tools函数的具体用法?Python get_available_tools怎么用?Python get_available_tools使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_available_tools函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: pdf2text
def pdf2text(pdf_filename):
tool = pyocr.get_available_tools()[0]
lang = tool.get_available_languages()[1]
req_image = []
final_text = []
image_pdf = Image(filename=pdf_filename, resolution=300)
image_jpeg = image_pdf.convert('jpeg')
for img in image_jpeg.sequence:
img_page = Image(image=img)
req_image.append(img_page.make_blob('jpeg'))
for img in req_image:
txt = tool.image_to_string(
PI.open(io.BytesIO(img)),
lang=lang,
builder=pyocr.builders.TextBuilder()
)
final_text.append(txt)
return final_text
开发者ID:jonesram,项目名称:manhattan-project-scratch,代码行数:26,代码来源:ocr_pdf.py
示例2: build_config_info
def build_config_info():
'''Builds configuration information about installed OCR software'''
tools = pyocr.get_available_tools()
infos = [{'name': tool.get_name(), 'langs': tool.get_available_languages()} for tool in tools]
return infos
开发者ID:tuttlem,项目名称:ocr,代码行数:7,代码来源:ocr.py
示例3: readFile
def readFile(self, pdfFile, mode = 'top-right' ):
mode = 0 if mode == 'top-right' else 1
self.__img_crop = self.__img_crop_modes[mode]
tool = pyocr.get_available_tools()[0]
lang = tool.get_available_languages()[1]
# this could've been done better, but for now will do
reCaseSubmitted = re.compile(ur'The case is submitted\.\s*\(Whereupon,', re.UNICODE)
reCaseSubmitted2 = re.compile(ur'Case is submitted\.\s*\(Whereupon,', re.UNICODE)
reCaseSubmitted3 = re.compile(ur'\(?Whereupon, at \w\w:\w\w', re.UNICODE) # the most-relaxed
# get file name
helper = FileHelper()
outfp = open(self.__outputDir + helper.GetFileName(pdfFile) + ".plain", 'w')
with Image(filename=pdfFile, resolution=self.__dpi) as image_pdf:
image_pngs = image_pdf.convert('png')
idx = 0
output_text = ''
for img in image_pngs.sequence:
if self.__debug:
print "Parsing Page: " + str(idx + 1)
cloneImg = img[self.__img_crop[0] : self.__img_crop[2], self.__img_crop[1] : self.__img_crop[3] ]
cloneImg.alpha_channel = False
# cloneImg.save(filename = './img_{}.png'.format(idx))
self.evaluate( cloneImg, 'threshold', self.__threshold)
txt = tool.image_to_string( PI.open(io.BytesIO(cloneImg.make_blob('png'))), lang=lang, builder=pyocr.builders.TextBuilder())+ "\n"
output_text = output_text + self._clean_text(txt)
if reCaseSubmitted.search(txt) != None or reCaseSubmitted2.search(txt) != None or reCaseSubmitted3.search(txt) != None:
break
idx += 1
outfp.write(output_text)
开发者ID:polakluk,项目名称:supreme-court-analysis,代码行数:34,代码来源:tesseractocr.py
示例4: testPdf
def testPdf(self, pdfFile, border):
reCaseSubmitted = re.compile(ur'The case is submitted\.\s*\(Whereupon,', re.UNICODE)
reCaseSubmitted2 = re.compile(ur'Case is submitted\.\s*\(Whereupon,', re.UNICODE)
reCaseSubmitted3 = re.compile(ur'\(?Whereupon, at \w\w:\w\w', re.UNICODE) # the most-relaxed
tool = pyocr.get_available_tools()[0]
lang = tool.get_available_languages()[1]
req_image = []
final_text = []
# this could've been done better, but for now will do
idx = 0
with Image(filename = pdfFile, resolution=self.__dpi) as img:
# if self.__debug:
# print "Parsing Page: " + str(idx + 1)
cloneImg =img
cloneImg.alpha_channel = False
self.evaluate( cloneImg, 'threshold', self.__threshold)
txt1 = tool.image_to_string( PI.open(io.BytesIO(cloneImg[:, :border].make_blob('png'))), lang=lang, builder=pyocr.builders.TextBuilder())+ "\n"
txt2 = tool.image_to_string( PI.open(io.BytesIO(cloneImg[:, border:].make_blob('png'))), lang=lang, builder=pyocr.builders.TextBuilder())+ "\n"
txt = txt1+txt2
test = self._clean_text(txt1)
print(txt1)
print(txt2)
if reCaseSubmitted.search(txt) != None or reCaseSubmitted2.search(txt) != None or reCaseSubmitted3.search(txt) != None:
print( 'TRUE' )
else:
print('FALSE')
开发者ID:polakluk,项目名称:supreme-court-analysis,代码行数:29,代码来源:tesseractocr.py
示例5: image_to_string
def image_to_string(self, filename):
tools = pyocr.get_available_tools()
if len(tools) == 0:
raise PyOCRIntegrationNoOCRFound('No OCR tool has been found on '
'this system. Make sure it\'s on'
'PATH variable of your system')
filename_split, fileextension_split = os.path.splitext(filename)
grayscale_filename = filename_split + '_gray' + fileextension_split
with WandImage(filename=filename) as img:
img.type = 'grayscale'
img.save(filename=grayscale_filename)
adaptive_thresh_filename = filename_split + '_adt' + fileextension_split
OpenCVIntegration.adaptive_threshold(filename, adaptive_thresh_filename)
processes = []
for tool in tools:
if tool.get_name() == "Tesseract":
thread_t = self._OCRProcessingThread(tool, self.lang, filename)
thread_t.start()
processes.append(thread_t)
else:
thread_c_raw = self._OCRProcessingThread(tool, self.lang,
filename)
thread_c_raw.start()
processes.append(thread_c_raw)
thread_c_gs = self._OCRProcessingThread(tool, self.lang,
grayscale_filename)
thread_c_gs.start()
processes.append(thread_c_gs)
thread_c_prd = self._OCRProcessingThread(tool, self.lang,
adaptive_thresh_filename)
thread_c_prd.start()
processes.append(thread_c_prd)
# Wait this all threads finish processing
result = []
threads_running = True
while threads_running:
found_thread_alive = False
for p in processes:
if p.is_alive():
found_thread_alive = True
if not found_thread_alive:
threads_running = False
for p in processes:
result.append(p.return_value)
# Removing generated files
self._cleanup(grayscale_filename)
self._cleanup(adaptive_thresh_filename)
return result
开发者ID:omar331,项目名称:ocr-process-service,代码行数:60,代码来源:ocr.py
示例6: pdf2ocr
def pdf2ocr(pdffile):
"""
Optical Character Recognition on PDF files using Python
see https://pythontips.com/2016/02/25/ocr-on-pdf-files-using-python/
:param pdffile: pdffile to be OCR'd
:return:
"""
from wand.image import Image
from PIL import Image as PI
import pyocr
import pyocr.builders
import io
tool = pyocr.get_available_tools()[0]
lang = tool.get_available_languages()[0] # [0] for english
req_image = []
final_text = []
print "Reading {0}".format(pdffile)
image_pdf = Image(filename=pdffile, resolution=300)
image_jpeg = image_pdf.convert("jpeg")
for img in image_jpeg.sequence:
img_page = Image(image=img)
print ("appending image")
req_image.append(img_page.make_blob("jpeg"))
print "Generating text"
for img in req_image:
txt = tool.image_to_string(PI.open(io.BytesIO(img)), lang=lang, builder=pyocr.builders.TextBuilder())
final_text.append(txt)
return final_text
开发者ID:mfacorcoran,项目名称:utils,代码行数:29,代码来源:pdf2txt.py
示例7: extract_text
def extract_text(filename, output_filename):
tools = pyocr.get_available_tools()
if len(tools) == 0:
print("No OCR tool found")
sys.exit(1)
# The tools are returned in the recommended order of usage
tool = tools[0]
print("Will use tool '%s'" % (tool.get_name()))
langs = tool.get_available_languages()
print("Available languages: %s" % ", ".join(langs))
lang = langs[0]
print("Will use lang '%s'" % (lang))
txt = tool.image_to_string(
Image.open(filename),
lang=lang,
builder=pyocr.builders.TextBuilder()
)
with open(output_filename, 'w') as txtfile:
txtfile.write(txt.encode("UTF-8"))
print "Text contents saved as '{}'".format(output_filename)
开发者ID:ssmall,项目名称:archive-scan,代码行数:25,代码来源:scan.py
示例8: test
def test(image_name):
"""
只能识别验证码在正中间的情况
:param image_name:
:return:
"""
with Image.open(image_name) as image:
# 把彩色图像转化为灰度图像。彩色图像转化为灰度图像的方法很多,这里采用RBG转化到HSI彩色空间,采用L分量。
image = image.convert("L")
# 需要把图像中的噪声去除掉。这里的图像比较简单,直接阈值化就行了。我们把大于阈值threshold的像素置为1,其他的置为0。对此,先生成一张查找表,映射过程让库函数帮我们做。
image = cut_noise(image)
tools = pyocr.get_available_tools()
if len(tools) == 0:
print("No OCR tool found")
sys.exit(1)
tool = tools[0]
image.save("test.jpg")
txt = tool.image_to_string(image, lang="eng", builder=pyocr.builders.TextBuilder())
# Digits - Only Tesseract
digits = tool.image_to_string(image, lang="eng", builder=pyocr.tesseract.DigitBuilder())
print(txt)
print(digits)
开发者ID:L1nwatch,项目名称:Mac-Python-3.X,代码行数:25,代码来源:captcha.py
示例9: picture2text
def picture2text(self, picture):
tools = pyocr.get_available_tools()
tool = tools[0]
self.txt = tool.image_to_string(
picture,
lang="jpn",
builder=pyocr.builders.TextBuilder(tesseract_layout=6)
)
开发者ID:m-sakano,项目名称:unidice,代码行数:8,代码来源:unidice.py
示例10: GetLanguages
def GetLanguages(cls):
lang = ''
for tool in pyocr.get_available_tools():
if tool.get_name() == 'Tesseract (sh)':
ocr = tool
break
if ocr:
lang = '+'.join(ocr.get_available_languages())
return lang
开发者ID:angoru,项目名称:ambar,代码行数:9,代码来源:ocrproxy.py
示例11: get_named_ocr_tool
def get_named_ocr_tool(toolname):
'''For a given tool name, this function will return an ocr tool'''
tools = pyocr.get_available_tools()
for tool in tools:
if tool.get_name() == toolname:
return tool
return None
开发者ID:tuttlem,项目名称:ocr,代码行数:9,代码来源:ocr.py
示例12: __init__
def __init__(self):
#初始化浏览器和ocr tool
self.driver = webdriver.Firefox(executable_path='./geckodriver')
tools = pyocr.get_available_tools()
if len(tools) == 0:
sys.exit(1)
self.tool = tools[0]
self.im = None
self.path = None
self.crop_xy = [(35, 300, 770, 610), (35, 550, 770, 720), (35, 730, 770, 830), (35, 840, 770, 940)]
开发者ID:codehai,项目名称:bot,代码行数:10,代码来源:cddh.py
示例13: ocr_recipe_name
def ocr_recipe_name(coords, img):
img = img.crop(coords)
tool = pyocr.get_available_tools()[0]
lang = tool.get_available_languages()[2]
recipe_name = tool.image_to_string(
img, lang=lang, builder=pyocr.builders.TextBuilder()
)
recipe_name = recipe_name.replace('"', '') \
.replace(' ', '_').replace('/', '_').replace('(', '').replace(')', '').replace('!', '').lower()
return recipe_name
开发者ID:pydo,项目名称:cookServeDelicious-Bot,代码行数:10,代码来源:async_utils.py
示例14: recognition_phone
def recognition_phone(phone_img_file):
tools = pyocr.get_available_tools()
tool = tools[0]
langs = tool.get_available_languages()
lang = langs[1]
phone_text = tool.image_to_string(Image.open(phone_img_file), lang=lang, builder=pyocr.builders.TextBuilder())
print phone_text
return phone_text
开发者ID:novomirskoy,项目名称:python_parser,代码行数:10,代码来源:parser.py
示例15: ptoi
def ptoi(name):
with open(name,'r') as f:
img = Image.open(name)
img = img.convert('1')
img.save('tmp.bmp')
tools = pyocr.get_available_tools()
if len(tools) == 0:
print('No OCR tool found!')
sys.exit(1)
print("Using '%s'" % (tools[0].get_name()))
return tools[0].image_to_string(img)
开发者ID:iaalm,项目名称:CheckDecoder,代码行数:11,代码来源:1.py
示例16: __init__
def __init__(self):
self.ocr = None
self.lang = None
for tool in pyocr.get_available_tools():
if tool.get_name() == 'Tesseract (sh)':
self.ocr = tool
break
if self.ocr:
self.lang = '+'.join(self.ocr.get_available_languages())
开发者ID:angoru,项目名称:ambar,代码行数:11,代码来源:ocrproxy.py
示例17: image_to_string
def image_to_string(args):
img, lang = args
ocr = pyocr.get_available_tools()[0]
with Image.open(os.path.join(Consumer.SCRATCH, img)) as f:
if ocr.can_detect_orientation():
try:
orientation = ocr.detect_orientation(f, lang=lang)
f = f.rotate(orientation["angle"], expand=1)
except TesseractError:
pass
return ocr.image_to_string(f, lang=lang)
开发者ID:pitkley,项目名称:paperless,代码行数:11,代码来源:consumer.py
示例18: check_required_software
def check_required_software():
logger = logging.getLogger(__name__)
tools = pyocr.get_available_tools()
if len(tools) == 0:
raise PyOCRIntegrationNoOCRFound('No OCR tool has been found on '
'this system. Make sure it\'s on')
elif len(tools) == 1:
logger.info("I've found only one ocr tool [%s]. This is not exactly "
"an error but you should get better results if you have "
"both Tesseract and Cuneiform installed"
% tools[0].get_name())
else:
logger.info("I've found all required software. We're good to go =)")
开发者ID:omar331,项目名称:ocr-process-service,代码行数:13,代码来源:ocr.py
示例19: ocr
def ocr(self, img, angles=None):
"""
Returns immediately.
Listen for the signal ocr-done to get the result
"""
if (not self.__config['ocr_enabled'].value or
len(pyocr.get_available_tools()) == 0):
angles = 0
elif angles is None:
angles = 4
img.load()
job = self.factories['ocr'].make(img, angles)
self.schedulers['ocr'].schedule(job)
return job
开发者ID:jflesch,项目名称:paperwork,代码行数:14,代码来源:scan.py
示例20: get_default_ocr_lang
def get_default_ocr_lang():
# Try to guess based on the system locale what would be
# the best OCR language
ocr_tools = pyocr.get_available_tools()
if len(ocr_tools) == 0:
return DEFAULT_OCR_LANG
ocr_langs = ocr_tools[0].get_available_languages()
lang = find_language()
if hasattr(lang, 'iso639_3_code') and lang.iso639_3_code in ocr_langs:
return lang.iso639_3_code
if hasattr(lang, 'terminology') and lang.terminology in ocr_langs:
return lang.terminology
return DEFAULT_OCR_LANG
开发者ID:jflesch,项目名称:paperwork,代码行数:15,代码来源:config.py
注:本文中的pyocr.get_available_tools函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论