• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python pyocr.get_available_tools函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pyocr.get_available_tools函数的典型用法代码示例。如果您正苦于以下问题:Python get_available_tools函数的具体用法?Python get_available_tools怎么用?Python get_available_tools使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了get_available_tools函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: pdf2text

def pdf2text(pdf_filename):

    tool = pyocr.get_available_tools()[0]
    lang = tool.get_available_languages()[1]

    req_image = []
    final_text = []

    image_pdf = Image(filename=pdf_filename, resolution=300)
    image_jpeg = image_pdf.convert('jpeg')

    for img in image_jpeg.sequence:
        img_page = Image(image=img)
        req_image.append(img_page.make_blob('jpeg'))

    for img in req_image:

      txt = tool.image_to_string(
          PI.open(io.BytesIO(img)),
          lang=lang,
          builder=pyocr.builders.TextBuilder()
      )

      final_text.append(txt)

    return final_text
开发者ID:jonesram,项目名称:manhattan-project-scratch,代码行数:26,代码来源:ocr_pdf.py


示例2: build_config_info

def build_config_info():
	'''Builds configuration information about installed OCR software'''

	tools = pyocr.get_available_tools()
	infos = [{'name': tool.get_name(), 'langs': tool.get_available_languages()} for tool in tools]

	return infos
开发者ID:tuttlem,项目名称:ocr,代码行数:7,代码来源:ocr.py


示例3: readFile

	def readFile(self, pdfFile, mode = 'top-right' ):
		mode = 0 if mode == 'top-right' else 1
		self.__img_crop = self.__img_crop_modes[mode]
		tool = pyocr.get_available_tools()[0]
		lang = tool.get_available_languages()[1]

		# this could've been done better, but for now will do
		reCaseSubmitted = re.compile(ur'The case is submitted\.\s*\(Whereupon,', re.UNICODE)
		reCaseSubmitted2 = re.compile(ur'Case is submitted\.\s*\(Whereupon,', re.UNICODE)
		reCaseSubmitted3 = re.compile(ur'\(?Whereupon, at \w\w:\w\w', re.UNICODE) # the most-relaxed

		# get file name
		helper = FileHelper()
		outfp = open(self.__outputDir + helper.GetFileName(pdfFile) + ".plain", 'w')

		with Image(filename=pdfFile, resolution=self.__dpi) as image_pdf:
			image_pngs = image_pdf.convert('png')
			idx = 0
			output_text = ''
			for img in image_pngs.sequence:
				if self.__debug:
					print "Parsing Page: " + str(idx + 1)
				cloneImg = img[self.__img_crop[0] : self.__img_crop[2], self.__img_crop[1] : self.__img_crop[3] ]
				cloneImg.alpha_channel = False
#				cloneImg.save(filename = './img_{}.png'.format(idx))
				self.evaluate( cloneImg, 'threshold', self.__threshold)

				txt = tool.image_to_string( PI.open(io.BytesIO(cloneImg.make_blob('png'))), lang=lang, builder=pyocr.builders.TextBuilder())+ "\n"
				output_text = output_text + self._clean_text(txt)
				if reCaseSubmitted.search(txt) != None or reCaseSubmitted2.search(txt) != None or reCaseSubmitted3.search(txt) != None:
					break
				idx += 1

		outfp.write(output_text)
开发者ID:polakluk,项目名称:supreme-court-analysis,代码行数:34,代码来源:tesseractocr.py


示例4: testPdf

	def testPdf(self, pdfFile, border):
		reCaseSubmitted = re.compile(ur'The case is submitted\.\s*\(Whereupon,', re.UNICODE)
		reCaseSubmitted2 = re.compile(ur'Case is submitted\.\s*\(Whereupon,', re.UNICODE)
		reCaseSubmitted3 = re.compile(ur'\(?Whereupon, at \w\w:\w\w', re.UNICODE)  # the most-relaxed
		tool = pyocr.get_available_tools()[0]
		lang = tool.get_available_languages()[1]

		req_image = []
		final_text = []
		# this could've been done better, but for now will do
		idx = 0
		with Image(filename = pdfFile, resolution=self.__dpi) as img:
#			if self.__debug:
#				print "Parsing Page: " + str(idx + 1)
			cloneImg =img

			cloneImg.alpha_channel = False
			self.evaluate( cloneImg, 'threshold', self.__threshold)

			txt1 = tool.image_to_string( PI.open(io.BytesIO(cloneImg[:, :border].make_blob('png'))), lang=lang, builder=pyocr.builders.TextBuilder())+ "\n"
			txt2 = tool.image_to_string( PI.open(io.BytesIO(cloneImg[:, border:].make_blob('png'))), lang=lang, builder=pyocr.builders.TextBuilder())+ "\n"
			txt = txt1+txt2
			test = self._clean_text(txt1)
			print(txt1)
			print(txt2)
			if reCaseSubmitted.search(txt) != None or reCaseSubmitted2.search(txt) != None or reCaseSubmitted3.search(txt) != None:
				print( 'TRUE' )
			else:
				print('FALSE')
开发者ID:polakluk,项目名称:supreme-court-analysis,代码行数:29,代码来源:tesseractocr.py


示例5: image_to_string

    def image_to_string(self, filename):
        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            raise PyOCRIntegrationNoOCRFound('No OCR tool has been found on '
                                             'this system. Make sure it\'s on'
                                             'PATH variable of your system')

        filename_split, fileextension_split = os.path.splitext(filename)

        grayscale_filename = filename_split + '_gray' + fileextension_split
        with WandImage(filename=filename) as img:
            img.type = 'grayscale'
            img.save(filename=grayscale_filename)

        adaptive_thresh_filename = filename_split + '_adt' + fileextension_split
        OpenCVIntegration.adaptive_threshold(filename, adaptive_thresh_filename)

        processes = []
        for tool in tools:
            if tool.get_name() == "Tesseract":

                thread_t = self._OCRProcessingThread(tool, self.lang, filename)
                thread_t.start()
                processes.append(thread_t)

            else:
                thread_c_raw = self._OCRProcessingThread(tool, self.lang,
                                                         filename)
                thread_c_raw.start()
                processes.append(thread_c_raw)

                thread_c_gs = self._OCRProcessingThread(tool, self.lang,
                                                        grayscale_filename)
                thread_c_gs.start()
                processes.append(thread_c_gs)

                thread_c_prd = self._OCRProcessingThread(tool, self.lang,
                                                         adaptive_thresh_filename)
                thread_c_prd.start()
                processes.append(thread_c_prd)

        # Wait this all threads finish processing
        result = []
        threads_running = True
        while threads_running:
            found_thread_alive = False
            for p in processes:
                if p.is_alive():
                    found_thread_alive = True

            if not found_thread_alive:
                threads_running = False
                for p in processes:
                    result.append(p.return_value)

        # Removing generated files
        self._cleanup(grayscale_filename)
        self._cleanup(adaptive_thresh_filename)

        return result
开发者ID:omar331,项目名称:ocr-process-service,代码行数:60,代码来源:ocr.py


示例6: pdf2ocr

def pdf2ocr(pdffile):
    """
    Optical Character Recognition on PDF files using Python
    see https://pythontips.com/2016/02/25/ocr-on-pdf-files-using-python/
    :param pdffile: pdffile to be OCR'd
    :return:
    """
    from wand.image import Image
    from PIL import Image as PI
    import pyocr
    import pyocr.builders
    import io

    tool = pyocr.get_available_tools()[0]
    lang = tool.get_available_languages()[0]  # [0] for english
    req_image = []
    final_text = []
    print "Reading {0}".format(pdffile)
    image_pdf = Image(filename=pdffile, resolution=300)
    image_jpeg = image_pdf.convert("jpeg")
    for img in image_jpeg.sequence:
        img_page = Image(image=img)
        print ("appending image")
        req_image.append(img_page.make_blob("jpeg"))
    print "Generating text"
    for img in req_image:
        txt = tool.image_to_string(PI.open(io.BytesIO(img)), lang=lang, builder=pyocr.builders.TextBuilder())
        final_text.append(txt)
    return final_text
开发者ID:mfacorcoran,项目名称:utils,代码行数:29,代码来源:pdf2txt.py


示例7: extract_text

def extract_text(filename, output_filename):
    tools = pyocr.get_available_tools()
    if len(tools) == 0:
        print("No OCR tool found")
        sys.exit(1)
    # The tools are returned in the recommended order of usage
    tool = tools[0]
    print("Will use tool '%s'" % (tool.get_name()))

    langs = tool.get_available_languages()
    print("Available languages: %s" % ", ".join(langs))
    lang = langs[0]
    print("Will use lang '%s'" % (lang))

    txt = tool.image_to_string(
        Image.open(filename),
        lang=lang,
        builder=pyocr.builders.TextBuilder()
    )


    with open(output_filename, 'w') as txtfile:
        txtfile.write(txt.encode("UTF-8"))

    print "Text contents saved as '{}'".format(output_filename)
开发者ID:ssmall,项目名称:archive-scan,代码行数:25,代码来源:scan.py


示例8: test

def test(image_name):
    """
    只能识别验证码在正中间的情况
    :param image_name:
    :return:
    """
    with Image.open(image_name) as image:
        # 把彩色图像转化为灰度图像。彩色图像转化为灰度图像的方法很多,这里采用RBG转化到HSI彩色空间,采用L分量。
        image = image.convert("L")

        # 需要把图像中的噪声去除掉。这里的图像比较简单,直接阈值化就行了。我们把大于阈值threshold的像素置为1,其他的置为0。对此,先生成一张查找表,映射过程让库函数帮我们做。
        image = cut_noise(image)

        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            print("No OCR tool found")
            sys.exit(1)
        tool = tools[0]
        image.save("test.jpg")

        txt = tool.image_to_string(image, lang="eng", builder=pyocr.builders.TextBuilder())
        # Digits - Only Tesseract
        digits = tool.image_to_string(image, lang="eng", builder=pyocr.tesseract.DigitBuilder())
        print(txt)
        print(digits)
开发者ID:L1nwatch,项目名称:Mac-Python-3.X,代码行数:25,代码来源:captcha.py


示例9: picture2text

 def picture2text(self, picture):
     tools = pyocr.get_available_tools()
     tool = tools[0]
     self.txt = tool.image_to_string(
         picture,
         lang="jpn",
         builder=pyocr.builders.TextBuilder(tesseract_layout=6)
     )
开发者ID:m-sakano,项目名称:unidice,代码行数:8,代码来源:unidice.py


示例10: GetLanguages

 def GetLanguages(cls):        
     lang = ''
     for tool in pyocr.get_available_tools():
         if tool.get_name() == 'Tesseract (sh)':
             ocr = tool
             break
     if ocr:
         lang = '+'.join(ocr.get_available_languages())
     return lang
开发者ID:angoru,项目名称:ambar,代码行数:9,代码来源:ocrproxy.py


示例11: get_named_ocr_tool

def get_named_ocr_tool(toolname):
	'''For a given tool name, this function will return an ocr tool'''
	tools = pyocr.get_available_tools()

	for tool in tools:
		if tool.get_name() == toolname:
			return tool

	return None
开发者ID:tuttlem,项目名称:ocr,代码行数:9,代码来源:ocr.py


示例12: __init__

	def __init__(self):
		#初始化浏览器和ocr tool
		self.driver = webdriver.Firefox(executable_path='./geckodriver')
		tools = pyocr.get_available_tools()
		if len(tools) == 0:
			sys.exit(1)
		self.tool = tools[0]
		self.im = None
		self.path = None
		self.crop_xy = [(35, 300, 770, 610), (35, 550, 770, 720), (35, 730, 770, 830), (35, 840, 770, 940)]
开发者ID:codehai,项目名称:bot,代码行数:10,代码来源:cddh.py


示例13: ocr_recipe_name

def ocr_recipe_name(coords, img):
    img = img.crop(coords)
    tool = pyocr.get_available_tools()[0]
    lang = tool.get_available_languages()[2]
    recipe_name = tool.image_to_string(
        img, lang=lang, builder=pyocr.builders.TextBuilder()
    )
    recipe_name = recipe_name.replace('"', '') \
        .replace(' ', '_').replace('/', '_').replace('(', '').replace(')', '').replace('!', '').lower()
    return recipe_name
开发者ID:pydo,项目名称:cookServeDelicious-Bot,代码行数:10,代码来源:async_utils.py


示例14: recognition_phone

def recognition_phone(phone_img_file):
    tools = pyocr.get_available_tools()
    tool = tools[0]
    langs = tool.get_available_languages()
    lang = langs[1]
    phone_text = tool.image_to_string(Image.open(phone_img_file), lang=lang, builder=pyocr.builders.TextBuilder())

    print phone_text

    return phone_text
开发者ID:novomirskoy,项目名称:python_parser,代码行数:10,代码来源:parser.py


示例15: ptoi

def ptoi(name):
	with open(name,'r') as f:
		img = Image.open(name)
	img = img.convert('1')
	img.save('tmp.bmp')
	tools = pyocr.get_available_tools()
	if len(tools) == 0:
		print('No OCR tool found!')
		sys.exit(1)
	print("Using '%s'" % (tools[0].get_name()))
	return tools[0].image_to_string(img)
开发者ID:iaalm,项目名称:CheckDecoder,代码行数:11,代码来源:1.py


示例16: __init__

    def __init__(self):
        self.ocr = None
        self.lang = None

        for tool in pyocr.get_available_tools():
            if tool.get_name() == 'Tesseract (sh)':
                self.ocr = tool
                break
        
        if self.ocr:
            self.lang = '+'.join(self.ocr.get_available_languages())
开发者ID:angoru,项目名称:ambar,代码行数:11,代码来源:ocrproxy.py


示例17: image_to_string

def image_to_string(args):
    img, lang = args
    ocr = pyocr.get_available_tools()[0]
    with Image.open(os.path.join(Consumer.SCRATCH, img)) as f:
        if ocr.can_detect_orientation():
            try:
                orientation = ocr.detect_orientation(f, lang=lang)
                f = f.rotate(orientation["angle"], expand=1)
            except TesseractError:
                pass
        return ocr.image_to_string(f, lang=lang)
开发者ID:pitkley,项目名称:paperless,代码行数:11,代码来源:consumer.py


示例18: check_required_software

 def check_required_software():
     logger = logging.getLogger(__name__)
     tools = pyocr.get_available_tools()
     if len(tools) == 0:
         raise PyOCRIntegrationNoOCRFound('No OCR tool has been found on '
                                          'this system. Make sure it\'s on')
     elif len(tools) == 1:
         logger.info("I've found only one ocr tool [%s]. This is not exactly "
                     "an error but you should get better results if you have "
                     "both Tesseract and Cuneiform installed"
                     % tools[0].get_name())
     else:
         logger.info("I've found all required software. We're good to go =)")
开发者ID:omar331,项目名称:ocr-process-service,代码行数:13,代码来源:ocr.py


示例19: ocr

 def ocr(self, img, angles=None):
     """
     Returns immediately.
     Listen for the signal ocr-done to get the result
     """
     if (not self.__config['ocr_enabled'].value or
             len(pyocr.get_available_tools()) == 0):
         angles = 0
     elif angles is None:
         angles = 4
     img.load()
     job = self.factories['ocr'].make(img, angles)
     self.schedulers['ocr'].schedule(job)
     return job
开发者ID:jflesch,项目名称:paperwork,代码行数:14,代码来源:scan.py


示例20: get_default_ocr_lang

def get_default_ocr_lang():
    # Try to guess based on the system locale what would be
    # the best OCR language

    ocr_tools = pyocr.get_available_tools()
    if len(ocr_tools) == 0:
        return DEFAULT_OCR_LANG
    ocr_langs = ocr_tools[0].get_available_languages()

    lang = find_language()
    if hasattr(lang, 'iso639_3_code') and lang.iso639_3_code in ocr_langs:
        return lang.iso639_3_code
    if hasattr(lang, 'terminology') and lang.terminology in ocr_langs:
        return lang.terminology
    return DEFAULT_OCR_LANG
开发者ID:jflesch,项目名称:paperwork,代码行数:15,代码来源:config.py



注:本文中的pyocr.get_available_tools函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python tesseract.image_to_string函数代码示例发布时间:2022-05-27
下一篇:
Python pyobjus.autoclass函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap