本文整理汇总了Python中pyocr.tesseract.image_to_string函数的典型用法代码示例。如果您正苦于以下问题:Python image_to_string函数的具体用法?Python image_to_string怎么用?Python image_to_string使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了image_to_string函数的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_text
def test_text(image_file, lang='eng'):
print image_file
return tesseract.image_to_string(
Image.open(image_file),
lang=lang,
builder=tesseract.DigitBuilder())
开发者ID:altsen,项目名称:SearchHouseOwner,代码行数:7,代码来源:digit_test.py
示例2: __test_txt
def __test_txt(self, image_file, expected_box_file, lang='eng'):
image_file = "tests/data/" + image_file
expected_box_file = "tests/tesseract/" + expected_box_file
with codecs.open(expected_box_file, 'r', encoding='utf-8') \
as file_descriptor:
expected_boxes = self.builder.read_file(file_descriptor)
expected_boxes.sort()
boxes = tesseract.image_to_string(Image.open(image_file), lang=lang,
builder=self.builder)
boxes.sort()
self.assertTrue(len(boxes) > 0)
self.assertEqual(len(boxes), len(expected_boxes))
for i in range(0, min(len(boxes), len(expected_boxes))):
try:
# python 2.7
self.assertEqual(type(expected_boxes[i].content), unicode)
self.assertEqual(type(boxes[i].content), unicode)
except NameError:
# python 3
self.assertEqual(type(expected_boxes[i].content), str)
self.assertEqual(type(boxes[i].content), str)
self.assertEqual(boxes[i], expected_boxes[i])
开发者ID:alistairwalsh,项目名称:pyocr,代码行数:26,代码来源:tests_tesseract.py
示例3: _read_from_img
def _read_from_img(self, image_path, lang=None):
boxes = tesseract.image_to_string(
Image.open(image_path),
lang=lang,
builder=self._builder
)
boxes.sort()
return boxes
开发者ID:ZhangXinNan,项目名称:pyocr,代码行数:9,代码来源:tests_base.py
示例4: find_secret_rects
def find_secret_rects(image, secret_res, lang, tesseract_configs=None):
"""
Find secret rects in an image.
param: Image image
param: list secret_res
param: str lang
param: str tesseract_configs
return: list of rects
rtype: list
"""
# When using pyocr, an input image is converted to RGB (not RGBA).
# During the conversion, transparent pixels are converted into BLACK.
# Sometimes the black pixels get in the way of recognizing text.
#
# For example, macOS's screenshot image taken by Command+Shift+4+Space
# has transparent pixels around an window. This results in a black and
# thick border in the edge of image. The border worsen quality of OCR
# of text near by the border. To avoid this, convert transparent pixels
# into WHITE by pasting image into an white background.
#
# See: http://stackoverflow.com/questions/9166400/convert-rgba-png-to-rgb-with-pil
if image.mode == 'RGBA':
background = Image.new('RGB', image.size, (255, 255, 255))
background.paste(image, mask=image.split()[3]) # Paste image masked by alpha channel [3]
image = background
# offset = (0, 150)
# cropped_image = image.crop((offset[0], offset[1], image.size[0], 220))
offset = (0, 0)
cropped_image = image
builder = ModifiedCharBoxBuilder(cropped_image.size[1])
if tesseract_configs:
builder.tesseract_configs = tesseract_configs
boxes = image_to_string(cropped_image, lang=lang, builder=builder)
if os.environ.get('DEBUG'):
for box in boxes:
print(box.content, box.position)
content = ''.join(box.content for box in boxes)
assert len(boxes) == len(content)
secret_rects = []
for secret_re in secret_res:
for m in secret_re.finditer(content):
matched_boxes = boxes[m.start():m.end()]
matched_rects = [b.position for b in matched_boxes]
for rect in bounding_boxes_by_line(matched_rects):
rect = offset_rect(offset, padding_box(rect, 2))
secret_rects.append(rect)
return secret_rects
开发者ID:orangain,项目名称:masecret,代码行数:56,代码来源:cli.py
示例5: __test_txt
def __test_txt(self, image_file, expected_output_file, lang="eng"):
image_file = "tests/data/" + image_file
expected_output_file = "tests/tesseract/" + expected_output_file
expected_output = ""
with codecs.open(expected_output_file, "r", encoding="utf-8") as file_descriptor:
for line in file_descriptor:
expected_output += line
expected_output = expected_output.strip()
output = tesseract.image_to_string(Image.open(image_file), lang=lang)
self.assertEqual(output, expected_output)
开发者ID:guoyonggang178,项目名称:pyocr,代码行数:13,代码来源:tests_tesseract.py
示例6: __test_text
def __test_text(self, image_file, expected_output_file, lang='eng'):
image_file = "tests/data/" + image_file
expected_output_file = "tests/tesseract/" + expected_output_file
expected_output = ""
with codecs.open(expected_output_file, 'r', encoding='utf-8') \
as file_descriptor:
for line in file_descriptor:
expected_output += line
expected_output = expected_output.strip()
output = tesseract.image_to_string(Image.open(image_file), lang=lang, builder=self.builder)
self.assertEqual(output, expected_output)
开发者ID:gsh45,项目名称:pyocr,代码行数:14,代码来源:tests_tesseract.py
示例7: __test_txt
def __test_txt(self, image_file, expected_output_file, lang='eng'):
image_file = os.path.join("tests", "input", "specific", image_file)
expected_output_file = os.path.join(
"tests", "output", "specific", "tesseract", expected_output_file
)
expected_output = ""
with codecs.open(expected_output_file, 'r', encoding='utf-8') \
as file_descriptor:
for line in file_descriptor:
expected_output += line
expected_output = expected_output.strip()
output = tesseract.image_to_string(Image.open(image_file), lang=lang)
self.assertEqual(output, expected_output)
开发者ID:TeisD,项目名称:pyocr,代码行数:16,代码来源:tests_tesseract.py
示例8: get_data
def get_data(self, region, rerun=0,
image_optimizer=None, data_optimizer=None, threshold=None):
'''Gets data out of a single region
:param rerun: incrementing integer for every rerun.
:param image_optimizer: callback, takes PIL.Image and rerun as params.
:param data_optimizer: callback, takes String and rerun as params.
:param threshold: callback, takes String and should return False for a rerun.
'''
if image_optimizer:
optiregion = image_optimizer(region, rerun)
data = tesseract.image_to_string(optiregion)
if data_optimizer:
data = data_optimizer(data, rerun)
if threshold and not threshold(data):
return self.get_data(
region, rerun + 1,
image_optimizer, data_optimizer, threshold)
else:
return data
开发者ID:barraponto,项目名称:sabesp,代码行数:19,代码来源:cropandread.py
示例9: test_write_read
def test_write_read(self):
original_boxes = tesseract.image_to_string(Image.open("tests/data/test.png"), builder=self.builder)
self.assertTrue(len(original_boxes) > 0)
(file_descriptor, tmp_path) = tempfile.mkstemp()
try:
# we must open the file with codecs.open() for utf-8 support
os.close(file_descriptor)
with codecs.open(tmp_path, "w", encoding="utf-8") as fdescriptor:
self.builder.write_file(fdescriptor, original_boxes)
with codecs.open(tmp_path, "r", encoding="utf-8") as fdescriptor:
new_boxes = self.builder.read_file(fdescriptor)
self.assertEqual(len(new_boxes), len(original_boxes))
for i in range(0, len(original_boxes)):
self.assertEqual(new_boxes[i], original_boxes[i])
finally:
os.remove(tmp_path)
开发者ID:guoyonggang178,项目名称:pyocr,代码行数:20,代码来源:tests_tesseract.py
示例10: getText
def getText(filename):
img = Image.open(filename)
text = tesseract.image_to_string(img)
return text
开发者ID:gtfierro,项目名称:invalidpatents,代码行数:4,代码来源:insertTexts.py
示例11:
from pyocr import tesseract
from PIL import Image
imagefile = Image.open('code.jpg')
val = tesseract.image_to_string(imagefile)
print(val)
开发者ID:wubiao239,项目名称:python,代码行数:5,代码来源:piltest.py
注:本文中的pyocr.tesseract.image_to_string函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论