• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python writer.PlaintextWriter类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pyth.plugins.plaintext.writer.PlaintextWriter的典型用法代码示例。如果您正苦于以下问题:Python PlaintextWriter类的具体用法?Python PlaintextWriter怎么用?Python PlaintextWriter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了PlaintextWriter类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: read_rtf_text

def read_rtf_text(fp, errors='strict', encoding='utf-8'):
    doc = CustomRtf15Reader.read(fp, errors=errors)

    for p in doc.content:
        p.content = filter(paragraph_is_text_like, p.content)

    return PlaintextWriter.write(doc).read().decode(encoding)
开发者ID:labhackercd,项目名称:novo-retorica,代码行数:7,代码来源:utils.py


示例2: convert_to_txt

def convert_to_txt(file_path):
    logger.debug("convert_to_txt: %s" % file_path)
    words = None
    if not os.path.exists(file_path):
        logger.error("missing file %s", file_path)
    file_size = os.stat(file_path).st_size
    logger.debug("convert_to_txt: %d bytes at %s",file_size, file_path)
    ext = _get_extension(file_path)
    if ext == '.txt':
        logger.debug("loading txt file")
        worked = False
        try:
            encoding, file_handle, words = open_with_correct_encoding(file_path)
        except Exception as e:
            logger.error("Wasn't able to read the words from the file %s" % file_path)
            words = ""
    elif ext == '.docx':
        logger.debug("loading docx file")
        words = _docx_to_txt(file_path)
    elif ext == '.rtf':
        logger.debug("loading rtf file")
        doc = Rtf15Reader.read(open(file_path))
        words = PlaintextWriter.write(doc).getvalue()
    else:
        logging.warning("Couldn't find an extension on the file, so assuming text")
        with codecs.open(file_path, 'r', ENCODING_UTF_8) as myfile:
            words = myfile.read()
    logger.debug("loaded %d chars" % len(words))
    return words
开发者ID:c4fcm,项目名称:DataBasic,代码行数:29,代码来源:filehandler.py


示例3: GetExternal

def GetExternal(version, odl_data, source, class_id):
    external = ""

    for item in version[2]:
        if item[0] == "Attribute" \
                and item[1] == "_Art1_RTF":

            if len(item[2]) == 2:
                if isinstance(source, ZipFile):
                    data = source.open(item[2][0]).read()
                else:
                    file_name = join(source, item[2][0])
                    f = open(file_name, 'rb')
                    data = f.read()
                    f.close()
                data = data.replace("\x0c", "")
            elif len(item[2]) == 1:
                data = item[2][0]

            if data == "":
                return ""

            f = StringIO()
            f.write(data)
            doc = Rtf15Reader.read(f, clean_paragraphs = False)
            external = PlaintextWriter.write(doc).getvalue()
            external = external.replace("\n\n", "\n")

    return ReplaceTextNames(external, version, odl_data, class_id)
开发者ID:jeroenk,项目名称:artisanConvert,代码行数:29,代码来源:odl_extract.py


示例4: read_recommendations

 def read_recommendations(self, file_name):
     """
     Function reads the targeted values from the file "WHO Daily Recommended Values.rtf"
     It process the entries and creates a dictionary with
     Nutrient name as Key and Nutrient Value as value
     :param file_name:
     :return:
     """
     target = dict()
     filtered_col = list()
     doc = Rtf15Reader.read(open(file_name))
     entities = PlaintextWriter.write(doc).getvalue().split('\n\n')
     for item in entities:
         splited = item.split(',')
         name = splited[0].split('(')[0]
         value = splited[1]
         try:
             unit = splited[0].split('(')[1].split(')')[0]
         except:
             unit = ''
         # target.append({'nutrient': name,
         # 'unit': unit,
         # 'value': value})
         target.update({name: value})
         filtered_col.append(name)
     self.target_values = target
     return target, filtered_col
开发者ID:Basit-qc,项目名称:WHO---Food-Menu,代码行数:27,代码来源:buildmenu.py


示例5: upload

def upload(request):
	# user uploads a document -> convert into a dict of the terms found
	if request.FILES:
		if 'file' in request.FILES:
			result = ''
			f = request.FILES['file']
			fp = 'shake_v3/static/data/' + str(f)
			fp2 = fp[:len(fp)-3] + 'txt'
			if fp[len(fp)-3:len(fp)] == 'pdf':
				with open(fp, 'wb+') as pdff:
					for chunk in f.chunks():
						pdff.write(chunk)
				result = pdf_to_txt(fp)
				with open(fp2, 'wb+') as txtf:
					txtf.write(result)			
			elif fp[len(fp)-3:len(fp)] == 'rtf':
				with open(fp, 'wb+') as rtff:
					for line in f:
						rtff.write(line)
				doc = Rtf15Reader.read(open(fp, 'rb'))
				doctxt = PlaintextWriter.write(doc).getvalue()
				with open(fp2, 'wb+') as txtf:
					for line in doctxt:
						txtf.write(line)
				f = str(f)[:-4] + ".txt"
				result = doctxt
			else:
				with open(fp2, 'wb+') as txtf:
					for line in f:
						txtf.write(line)
				result = open(fp2, 'r').read()
		response_dict = generate_term_dict(result)
		response_dict['fp'] = 'static/data/' + str(f)
		return HttpResponse(simplejson.dumps(response_dict), mimetype='application/javascript')
	# user indicates terms -> give a grade
	elif request.POST:
		#TO DO: implement saving the data
		rating = ""
		score = custom_POST_to_score(request)
		if score > 4.5:
			rating = 'A+'
		elif score > 4:
			rating = 'A'
		elif score > 3.5:
			rating = 'B+'
		elif score > 3:
			rating = 'B'
		elif score > 2.5:
			rating = 'C+'
		elif score > 2:
			rating = 'C'
		elif score > 1:
			rating = 'D'
		else:
			rating = 'F'
		return HttpResponse(rating)
	# display the upload part 1
	else:
		score = 0
		return render_to_response('upload.html', {'score': score}, context_instance = RequestContext(request))
开发者ID:vickimo,项目名称:shakev3,代码行数:60,代码来源:views.py


示例6: analyze

def analyze(committeeFile):
    
    try:
        doc = Rtf15Reader.read(open(committeeFile, "rb"))
    except:
        print "%s - skipped..." % committeeFile
        errFile = committeeFile.replace(global_options.indir, global_options.errdir)
        shutil.copyfile(committeeFile, errFile)
        return False

    #print PlaintextWriter.write(doc).getValue()

    f = open("test.out", 'w')
    f.write(PlaintextWriter.write(doc).getvalue())
    f.close()

    f = open("test.out", 'r')
    participants = find_participants(f.read())
    f.close()

    # Getting the indication whether the participant spoke in the committee
    f = open("test.out", 'r')
    docstring = f.read()
    for line in docstring.splitlines():
        name = ''
        if ":" in line:
            participant = line.split(":")[0]
            for p in participants:
                if participant in p['name']:
                    p['speaker'] = True
                    p['speak_count'] += 1

    f.close()

    fname = committeeFile.replace(global_options.indir, global_options.outdir)
    fname = fname.replace("rtf", "txt")
    file = codecs.open(fname, "w", "utf-8")

    for participant in participants:
        string_builder = []
        for key, val in participant.iteritems():
            string = u"'%s': '%s'"
            if val is not None:
                if type(val) == str:
                    val = val.replace("'", "")
                    val = val.replace('"', '')
                string = string % (key, print_unicode(val))
                string_builder.append(string)
        wrt_ln = ', '.join(string_builder)
        wrt_ln += ',\n'
        try:
            file.write(wrt_ln)

        except UnicodeEncodeError:
            print wrt_ln

    file.close()
    verbose("Generated participants file: " + fname)
    return True
开发者ID:assafsinvani,项目名称:gknesset,代码行数:59,代码来源:analyze_protocols.py


示例7: extract_terms

def extract_terms(rtffile):
    """ Get data from rtffile """
    judges_list = []
    rtf_text = PlaintextWriter.write(rtffile).getvalue()
    lines = re.split('\n',rtf_text)
    for line in itertools.islice(lines, 0, None, 4): # 1: from the second line ([1]), 
        judges_list.append(line)              # None: to the end,
    return judges_list                                  # 2: step
开发者ID:JonathanBowker,项目名称:memex-gate,代码行数:8,代码来源:scrape_legal_lexicon.py


示例8: load_stickies

def load_stickies(path):
    stickies = []
    with open(path) as fd:
        for i,rtf in enumerate(parse_sticky_database(fd.read())):
            doc = Rtf15Reader.read(StringIO.StringIO(rtf))
            plaintext = PlaintextWriter.write(doc).getvalue()
            stickies.append(plaintext)
    return stickies
开发者ID:alexflint,项目名称:sticky-sync,代码行数:8,代码来源:client.py


示例9: get_rtf_text

def get_rtf_text(path):
	"""
	Take the path of an rtf file as an argument and return the text
	"""
	
		
	doc = Rtf15Reader.read(open(path))

	return PlaintextWriter.write(doc).getvalue()
开发者ID:vignesh117,项目名称:MusicalText,代码行数:9,代码来源:makecorpusfirstset.py


示例10: readRtf

 def readRtf(self, path):
     try:
         doc = Rtf15Reader.read(open(path, "rb"))
     except:
         self._log("Some screwy rtf shit going on with " + path)
         return "Can't process ur shitty rtf <3 dfbot"
     contents = PlaintextWriter.write(doc).getvalue()
     #print contents
     return contents
开发者ID:danielhfrank,项目名称:Tumbox,代码行数:9,代码来源:tumbox.py


示例11: parse

	def parse(self, path):
		# Directory
		if os.path.isdir(path):
			raise NotImplementedError()
		# File
		else:
			doc = Rtf15Reader.read(open(path))
			sample = Sample(path, None, PlaintextWriter.write(doc).getvalue())
			return sample
开发者ID:hcouch21,项目名称:styloproject,代码行数:9,代码来源:RtfParser.py


示例12: test_read2

    def test_read2(self):
        rtf = StringIO("""{\\rtf1\\ansi\\ansicpg1252\\cocoartf1343\\cocoasubrtf160\\cocoascreenfonts1{\\fonttbl\\f0\\fnil\\fcharset222 Thonburi;}
{\\colortbl;\\red255\\green255\\blue255;}
\\pard\\tx560\\tx1120\\tx1680\\tx2240\\tx2800\\tx3360\\tx3920\\tx4480\\tx5040\\tx5600\\tx6160\\tx6720\\pardirnatural\\qc

{\\f0\\fs24 \\cf0 \\'b9\\'e9\\'d3\\'b5\\'a1}""")
        doc = Rtf15Reader.read(rtf)
        text = PlaintextWriter.write(doc).read()
        print text
        self.assertEquals(u"น้ำตก", text.decode('utf8'))
开发者ID:pphetra,项目名称:pyth,代码行数:10,代码来源:test_readosxrtf.py


示例13: clean_rtf

def clean_rtf(fname):
    doc = Rtf15Reader.read(open(fname))
    plain = PlaintextWriter.write(doc).getvalue()
    lines = plain.split("\n")
    # print '#############################\norig: %s' % pprint.pformat(lines[:10])
    lines = filter(lambda l: len(l) > 0, lines)
    # print "##############################\nno blank lines:\t%s" % pprint.pformat(lines[:10])
    lines = [line.split(";") for line in lines]
    lines = [[val[1:-1] for val in line] for line in lines]
    # print "##############################\nsplit lines:\t%s" % pprint.pformat(lines[:10])
    return lines
开发者ID:embr,项目名称:nonce,代码行数:11,代码来源:upload_jami_rtf.py


示例14: _rtf_to_txt

def _rtf_to_txt(file_path, dst_dir, file_name):
    """
    Uses the pyth python module to extract text from a rtf file and save
    to .txt in dst_dir.
    """
    if file_name is None:
        file_name = os.path.split(file_path)[1]
    file_dst = os.path.join(dst_dir, re.sub(r'\.rtf$', '.txt', file_name))
    doc = Rtf15Reader.read(open(file_path))
    txt = PlaintextWriter.write(doc).getvalue()
    txt = unidecode(txt)
    with open(file_dst, 'w') as f:
        f.write(txt)
    return 0
开发者ID:rjweiss,项目名称:rosetta,代码行数:14,代码来源:converters.py


示例15: _convert_rtf_to_text

    def _convert_rtf_to_text(self, password=None):
	input_rtf = self.cvFile
	rtf = Rtf15Reader.read(open(input_rtf))
	outputPath = self.scratchDir
    	inputPath = os.getcwd()
    	if os.path.exists(input_rtf):
            inputPath = os.path.dirname(input_rtf)
    	input_filename = os.path.basename(input_rtf)
    	input_parts = input_filename.split(".")
    	input_parts.pop()
	randomStr = int(time.time())
    	output_filename = outputPath + os.path.sep + ".".join(input_parts) + randomStr.__str__() + r".txt"
	self.cvTextFile = output_filename
	fw = open(self.cvTextFile, "w")
	fw.write(PlaintextWriter.write(rtf).getvalue())
	fw.close()
	return (0)
开发者ID:arshpreetsingh,项目名称:cv-parser,代码行数:17,代码来源:cvparser.py


示例16: loadAllRTFToDB

def loadAllRTFToDB(folderPath):
	db = DBController()
	for dirPath, dirNames, fileNames in os.walk(folderPath):
		for fileName in fileNames:
			if not fileName.endswith('.rtf'):
				continue
			filePath = os.path.join(dirPath, fileName)
			print(filePath)
			try:
				doc = Rtf15Reader.read(open(filePath))
				text = PlaintextWriter.write(doc).getvalue()
			except:
				continue
			lines = [line.strip() for line in text.split('\n') if line]
			articleLinesDict, articleStartIndex = {}, 0
			for i, line in enumerate(lines):
				if line.startswith('Document ') and len(line.split(' ')) == 2:
					articleId = line.split(' ')[-1]
					articleLinesDict[articleId] = lines[articleStartIndex : i]
					articleStartIndex = i + 1

			for articleId, lines in articleLinesDict.iteritems():
				bylineIndex, wordCountIndex, textStartIndex = -1, -1, -1
				for i, line in enumerate(lines):
					line = line.lower()
					if line.startswith('by '):
						bylineIndex = i
					elif line.endswith(' words'):
						wordCountIndex = i
					elif line == 'english':
						textStartIndex = i + 2

				if wordCountIndex == -1 or textStartIndex == -1 or wordCountIndex > textStartIndex:
					print(filePath + ', ' + articleId)
				else:
					articleDict = {'_id': articleId,
					               'filePath' : filePath.split('Marshall_RA/')[-1],
					               'headline': ' '.join(lines[: wordCountIndex]) if bylineIndex == -1 else ' '.join(lines[: bylineIndex]),
					               'byline' : '' if bylineIndex == -1 else lines[bylineIndex],
					               'date' : parser.parse(lines[wordCountIndex + 1]),
					               'sourceName' : lines[wordCountIndex + 2] if lines[wordCountIndex + 2].find(' AM') == -1 and lines[wordCountIndex + 2].find(' PM') == -1 else lines[wordCountIndex + 3],
					               'leadParagraph' : '',
					               'tailParagraph' : '\n'.join(lines[textStartIndex:]),
					               'sourceCode' : '', 'industry' : [], 'region' : [], 'newsSubject' : [], 'company' : []}
					db.saveArticle(articleDict)
开发者ID:exsonic,项目名称:CorpusAnalysis,代码行数:45,代码来源:FileUtils.py


示例17: documentToText

def documentToText(path):
    if path[-4:] == ".doc":
        cmd = ['antiword', path]
        p = Popen(cmd, stdout=PIPE)
        stdout, stderr = p.communicate()
        return removeNonAscii(stdout)
    elif path[-5:] == ".docx":
        return removeNonAscii(doc.process(path))
    elif path[-4:] == ".txt":
        inputFile = open(path)
        text = inputFile.read() #Because memory and such
        inputFile.close()
        return(removeNonAscii(text))
    elif path[-4:] == ".pdf":
        return removeNonAscii(convert_pdf_to_txt(path))
    elif path[-4:] == ".rtf":
        text = Rtf15Reader.read(open(path))
        return removeNonAscii(PlaintextWriter.write(text).getvalue())
    return "Returned Nothing."
开发者ID:zcatbear,项目名称:Expelliarmus,代码行数:19,代码来源:expelliarmus.py


示例18: Run

def Run(journal_file):
  raw_entries = plistlib.readPlist(journal_file)

  acc = utils.EntryAccumulator(lambda x: x['date'])
  for k, v in raw_entries.iteritems():
    if not v: continue
    # 12/29/2001 -> 2001-12-29
    new_k = re.sub(r'(\d\d)/(\d\d)/(\d\d\d\d)', r'\3-\1-\2', k)
    d = parser.parse(new_k)

    if isinstance(v, plistlib.Data):
      f = StringIO.StringIO(v.data)
      try:
        doc = Rtf15Reader.read(f)
      except ValueError as e:
        print v.data
        raise e
      txt = PlaintextWriter.write(doc).getvalue()
      acc.add({
        'date': d,
        'rtf': v.data,
        'text': txt
      })
    else:
      acc.add({
        'date': d,
        'text': v
      })

  for day, entries in acc.iteritems():
    assert len(entries) == 1
    entry = entries[0]

    if not entry['text']:
      continue

    summary = utils.SummarizeText(entry['text'])
    utils.WriteSingleSummary(day, maker='osxapp', summary=summary, dry_run=dry_run)
    if 'rtf' in entry:
      utils.WriteOriginal(day, maker='osxapp', contents=entry['rtf'], filename='journal.rtf', dry_run=dry_run)
    else:
      utils.WriteOriginal(day, maker='osxapp', contents=entry['text'].encode('utf8'), filename='journal.txt', dry_run=dry_run)
开发者ID:danvk,项目名称:personal-archive,代码行数:42,代码来源:import_osxjournal.py


示例19: upload_file

def upload_file(request):
    error_message = ""
    if request.method == "POST":
        form = UploadForm(request.POST, request.FILES)
        if form.is_valid():
            doc_name = UploadedFile(request.FILES["doc_file"])
            doc_uploaded_date = timezone.now()
            doc = request.FILES["doc_file"]

            if get_file_type(doc_name) == ".rtf":
                result = Rtf15Reader.read(doc)
                parser = LawHtmlParser(PlaintextWriter.write(result).read())
            elif get_file_type(doc_name) == ".txt":
                parser = LawHtmlParser(doc.read())
            parsed_doc_content = parser.get_parsed_text()
            new_doc = Document(name=doc_name, content=parsed_doc_content, uploaded_date=doc_uploaded_date, file=doc)
            new_doc.save()
            return HttpResponseRedirect(reverse("document:list"))
        else:
            error_message = "Please select a file."

    form = UploadForm()
    return render(request, "document/upload.html", {"form": form, "error_message": error_message})
开发者ID:vteremasov,项目名称:zakon,代码行数:23,代码来源:views.py


示例20: document_create_index

    def document_create_index(document, user_id=None):

        import os
        from xlrd import open_workbook
        from pyth.plugins.rtf15.reader import Rtf15Reader
        from pyth.plugins.plaintext.writer import PlaintextWriter
        import sunburnt

        document = json.loads(document)
        table = s3db.doc_document
        id = document["id"]

        name = document["name"]
        filename = document["filename"]

        filename = "%s/%s/uploads/%s" % (os.path.abspath("applications"), \
                                        request.application, filename)

        si = sunburnt.SolrInterface(settings.get_base_solr_url())

        extension = os.path.splitext(filename)[1][1:]

        if extension == "pdf":
            data = os.popen("pdf2txt.py " + filename).read()
        elif extension == "doc":
            data = os.popen("antiword " + filename).read()
        elif extension == "xls":
            wb = open_workbook(filename)
            data=" "
            for s in wb.sheets():
                for row in range(s.nrows):
                    values = []
                    for col in range(s.ncols):
                        values.append(str(s.cell(row, col).value))
                    data = data + ",".join(values) + "\n"
        elif extension == "rtf":
            doct = Rtf15Reader.read(open(filename))
            data = PlaintextWriter.write(doct).getvalue()
        else:
            data = os.popen("strings " + filename).read()

        # The text needs to be in unicode or ascii, with no contol characters
        data = str(unicode(data, errors="ignore"))
        data = "".join(c if ord(c) >= 32 else " " for c in data)

        # Put the data according to the Multiple Fields
        # @ToDo: Also, would change this according to requirement of Eden
        document = {"id": str(id), # doc_document.id
                    "name": data, # the data of the file
                    "url": filename, # the encoded file name stored in uploads/
                    "filename": name, # the filename actually uploaded by the user
                    "filetype": extension  # x.pdf -> pdf is the extension of the file
                    }

        # Add and commit Indices
        si.add(document)
        si.commit()
        # After Indexing, set the value for has_been_indexed to True in the database
        db(table.id == id).update(has_been_indexed = True)

        db.commit()
开发者ID:gnarula,项目名称:eden_deployment,代码行数:61,代码来源:tasks.py



注:本文中的pyth.plugins.plaintext.writer.PlaintextWriter类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python reader.Rtf15Reader类代码示例发布时间:2022-05-27
下一篇:
Python pytestemb.assert_true函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap