本文整理汇总了Python中pyth.plugins.rtf15.reader.Rtf15Reader类的典型用法代码示例。如果您正苦于以下问题:Python Rtf15Reader类的具体用法?Python Rtf15Reader怎么用?Python Rtf15Reader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Rtf15Reader类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: rtf
def rtf(f):
doc = Rtf15Reader.read(open(f, "rb"))
result = []
for element in doc.content:
for text in element.content:
result.append("".join(text.content))
return "".join(result)
开发者ID:jmunoz298,项目名称:libreQDA,代码行数:7,代码来源:text_extraction.py
示例2: get_one_month_from_rtf
def get_one_month_from_rtf(url):
rtf_file = urllib2.urlopen(url)
rtf_file = StringIO(rtf_file.read())
doc = Rtf15Reader.read(rtf_file)
final_data = []
header = False
for c in doc.content:
full_p = c.content.__repr__().lower()
if "capacity" in full_p and "use cna" in full_p:
header = True
continue
if header:
row= re.split(r"\t", c.content[0].content[0])
if len(row) == 7 :
final_data.append(row)
df = pd.DataFrame(final_data, columns = ["prison_name","baseline_cna", "in_use_cna", "operational_capacity", "population", "perc_pop_to_used_cna", "perc_acc_available"])
df.iloc[:,1:] = df.iloc[:,1:].replace("%", "", regex=True).replace(",", "", regex=True)
for c in df.columns:
df[c]= pd.to_numeric(df[c], errors='ignore')
cols = [c for c in df.columns if "perc" in c]
df.loc[:,cols] = df.loc[:,cols]/100
return df
开发者ID:RobinL,项目名称:prison_population_viewer,代码行数:32,代码来源:get_one_month.py
示例3: convert_to_txt
def convert_to_txt(file_path):
logger.debug("convert_to_txt: %s" % file_path)
words = None
if not os.path.exists(file_path):
logger.error("missing file %s", file_path)
file_size = os.stat(file_path).st_size
logger.debug("convert_to_txt: %d bytes at %s",file_size, file_path)
ext = _get_extension(file_path)
if ext == '.txt':
logger.debug("loading txt file")
worked = False
try:
encoding, file_handle, words = open_with_correct_encoding(file_path)
except Exception as e:
logger.error("Wasn't able to read the words from the file %s" % file_path)
words = ""
elif ext == '.docx':
logger.debug("loading docx file")
words = _docx_to_txt(file_path)
elif ext == '.rtf':
logger.debug("loading rtf file")
doc = Rtf15Reader.read(open(file_path))
words = PlaintextWriter.write(doc).getvalue()
else:
logging.warning("Couldn't find an extension on the file, so assuming text")
with codecs.open(file_path, 'r', ENCODING_UTF_8) as myfile:
words = myfile.read()
logger.debug("loaded %d chars" % len(words))
return words
开发者ID:c4fcm,项目名称:DataBasic,代码行数:29,代码来源:filehandler.py
示例4: GetExternal
def GetExternal(version, odl_data, source, class_id):
external = ""
for item in version[2]:
if item[0] == "Attribute" \
and item[1] == "_Art1_RTF":
if len(item[2]) == 2:
if isinstance(source, ZipFile):
data = source.open(item[2][0]).read()
else:
file_name = join(source, item[2][0])
f = open(file_name, 'rb')
data = f.read()
f.close()
data = data.replace("\x0c", "")
elif len(item[2]) == 1:
data = item[2][0]
if data == "":
return ""
f = StringIO()
f.write(data)
doc = Rtf15Reader.read(f, clean_paragraphs = False)
external = PlaintextWriter.write(doc).getvalue()
external = external.replace("\n\n", "\n")
return ReplaceTextNames(external, version, odl_data, class_id)
开发者ID:jeroenk,项目名称:artisanConvert,代码行数:29,代码来源:odl_extract.py
示例5: upload
def upload(request):
# user uploads a document -> convert into a dict of the terms found
if request.FILES:
if 'file' in request.FILES:
result = ''
f = request.FILES['file']
fp = 'shake_v3/static/data/' + str(f)
fp2 = fp[:len(fp)-3] + 'txt'
if fp[len(fp)-3:len(fp)] == 'pdf':
with open(fp, 'wb+') as pdff:
for chunk in f.chunks():
pdff.write(chunk)
result = pdf_to_txt(fp)
with open(fp2, 'wb+') as txtf:
txtf.write(result)
elif fp[len(fp)-3:len(fp)] == 'rtf':
with open(fp, 'wb+') as rtff:
for line in f:
rtff.write(line)
doc = Rtf15Reader.read(open(fp, 'rb'))
doctxt = PlaintextWriter.write(doc).getvalue()
with open(fp2, 'wb+') as txtf:
for line in doctxt:
txtf.write(line)
f = str(f)[:-4] + ".txt"
result = doctxt
else:
with open(fp2, 'wb+') as txtf:
for line in f:
txtf.write(line)
result = open(fp2, 'r').read()
response_dict = generate_term_dict(result)
response_dict['fp'] = 'static/data/' + str(f)
return HttpResponse(simplejson.dumps(response_dict), mimetype='application/javascript')
# user indicates terms -> give a grade
elif request.POST:
#TO DO: implement saving the data
rating = ""
score = custom_POST_to_score(request)
if score > 4.5:
rating = 'A+'
elif score > 4:
rating = 'A'
elif score > 3.5:
rating = 'B+'
elif score > 3:
rating = 'B'
elif score > 2.5:
rating = 'C+'
elif score > 2:
rating = 'C'
elif score > 1:
rating = 'D'
else:
rating = 'F'
return HttpResponse(rating)
# display the upload part 1
else:
score = 0
return render_to_response('upload.html', {'score': score}, context_instance = RequestContext(request))
开发者ID:vickimo,项目名称:shakev3,代码行数:60,代码来源:views.py
示例6: compute
def compute(self):
""" compute() -> None
Dispatch the HTML contents to the spreadsheet
"""
filename = self.get_input("File").name
text_format = self.get_input("Format")
with open(filename, 'rb') as fp:
if text_format == 'html':
html = fp.read() # reads bytes
elif text_format == 'rtf':
try:
py_import('pyth', {'pip': 'pyth'})
except ImportError:
raise ModuleError(self, "'rtf' format requires the pyth "
"Python library")
else:
from pyth.plugins.rtf15.reader import Rtf15Reader
from pyth.plugins.xhtml.writer import XHTMLWriter
doc = Rtf15Reader.read(fp)
html = XHTMLWriter.write(doc).read() # gets bytes
else:
raise ModuleError(self, "'%s' format is unknown" % text_format)
self.displayAndWait(RichTextCellWidget, (html,))
开发者ID:hjanime,项目名称:VisTrails,代码行数:25,代码来源:richtext.py
示例7: read_recommendations
def read_recommendations(self, file_name):
"""
Function reads the targeted values from the file "WHO Daily Recommended Values.rtf"
It process the entries and creates a dictionary with
Nutrient name as Key and Nutrient Value as value
:param file_name:
:return:
"""
target = dict()
filtered_col = list()
doc = Rtf15Reader.read(open(file_name))
entities = PlaintextWriter.write(doc).getvalue().split('\n\n')
for item in entities:
splited = item.split(',')
name = splited[0].split('(')[0]
value = splited[1]
try:
unit = splited[0].split('(')[1].split(')')[0]
except:
unit = ''
# target.append({'nutrient': name,
# 'unit': unit,
# 'value': value})
target.update({name: value})
filtered_col.append(name)
self.target_values = target
return target, filtered_col
开发者ID:Basit-qc,项目名称:WHO---Food-Menu,代码行数:27,代码来源:buildmenu.py
示例8: main
def main():
'''
Purpose::
Input::
Output::
Assumptions::
'''
# Get arguments
args = parse_arguments()
if args.url:
url = args.url
# Get file and read it into structure
try:
with open(url, 'rb') as rtffile:
judges = extract_terms(Rtf15Reader.read(rtffile))
#print PlaintextWriter.write(doc).getvalue()
except IOError as e:
print 'An error occured fetching %s \n %s' % (url, e.reason)
return 1
f = open('US_legal_lexicon.txt', 'w')
# Print data
#f.write("\n".join(str(i).encode('utf8') for i in judges))
for i in judges:
f.write((i).encode('utf8') +'\n')
f.close()
开发者ID:JonathanBowker,项目名称:memex-gate,代码行数:32,代码来源:scrape_legal_lexicon.py
示例9: decode_cell
def decode_cell(cell):
'''The cell matched so lets handle it'''
# variable that will hold the converted text
temp_cell = []
# pyth checks for the rtf syntax before processing, so 'unicode_escape' escapes the '\' so pyth doesn't complain
cell_encode = re.sub(r'\\u|\\\\u|\\N|\\\\N', ' ', cell)
cell_encode = cell_encode.decode('unicode_escape')
cell_encode = filter(lambda x: x in string.printable, cell_encode)
cell_rtf = Rtf15Reader.read(StringIO(cell_encode))
# turn the pyth object into readable text
cell_txt = [x.content for x in cell_rtf.content]
# iterate and extract the pyth object text into temp_cell
for line in cell_txt:
for l in line:
temp_cell.append(l.content)
# combine and join the extracted text into one string (for one cell)
combined = [i for sub in temp_cell for i in sub]
new_cell = ' '.join(combined)
# the non-ascii characters in your file were followed by _ so i removed them for cleanliness
# uncomment to keep the _
new_cell = re.sub('_', '', new_cell)
# remove extra whitespace and return the converted cell
# remove L at end of string
return ' '.join(new_cell[:-1].split())
开发者ID:icdoctor2017,项目名称:DrLulz,代码行数:32,代码来源:Lulz_working.py
示例10: analyze
def analyze(committeeFile):
try:
doc = Rtf15Reader.read(open(committeeFile, "rb"))
except:
print "%s - skipped..." % committeeFile
errFile = committeeFile.replace(global_options.indir, global_options.errdir)
shutil.copyfile(committeeFile, errFile)
return False
#print PlaintextWriter.write(doc).getValue()
f = open("test.out", 'w')
f.write(PlaintextWriter.write(doc).getvalue())
f.close()
f = open("test.out", 'r')
participants = find_participants(f.read())
f.close()
# Getting the indication whether the participant spoke in the committee
f = open("test.out", 'r')
docstring = f.read()
for line in docstring.splitlines():
name = ''
if ":" in line:
participant = line.split(":")[0]
for p in participants:
if participant in p['name']:
p['speaker'] = True
p['speak_count'] += 1
f.close()
fname = committeeFile.replace(global_options.indir, global_options.outdir)
fname = fname.replace("rtf", "txt")
file = codecs.open(fname, "w", "utf-8")
for participant in participants:
string_builder = []
for key, val in participant.iteritems():
string = u"'%s': '%s'"
if val is not None:
if type(val) == str:
val = val.replace("'", "")
val = val.replace('"', '')
string = string % (key, print_unicode(val))
string_builder.append(string)
wrt_ln = ', '.join(string_builder)
wrt_ln += ',\n'
try:
file.write(wrt_ln)
except UnicodeEncodeError:
print wrt_ln
file.close()
verbose("Generated participants file: " + fname)
return True
开发者ID:assafsinvani,项目名称:gknesset,代码行数:59,代码来源:analyze_protocols.py
示例11: test_inline_png
def test_inline_png(self):
sample_with_image = os.path.join(os.path.abspath(os.path.dirname(__file__)), "rtfs", "sample-with-image.rtf")
with open(sample_with_image, 'rb') as rtf:
doc = Rtf15Reader.read(rtf)
image = next(node.content[0] for node in doc.content if isinstance(node.content[0], pyth.document.Image))
expected = {'pngblip': True, 'picw': '20714', 'picwgoal': '750', 'pich': '12143',
'pichgoal': '750', 'picscaley': '100', 'picscalex': '100'}
self.assertEquals(expected, image.properties)
开发者ID:kippr,项目名称:pyth,代码行数:8,代码来源:test_readrtf15.py
示例12: test_inline_png
def test_inline_png(self):
sample_with_image = os.path.join(os.path.abspath(os.path.dirname(__file__)), "rtfs", "sample-with-image.rtf")
with open(sample_with_image, 'rb') as rtf:
source = Rtf15Reader.read(rtf)
doc = XHTMLWriter.write(source).getvalue()
self.assertIn('<img src="data:image/png;base64,', doc)
self.assertIn('width:50px', doc)
self.assertIn('height:50px', doc)
开发者ID:kippr,项目名称:pyth,代码行数:8,代码来源:test_writexhtml.py
示例13: load_stickies
def load_stickies(path):
stickies = []
with open(path) as fd:
for i,rtf in enumerate(parse_sticky_database(fd.read())):
doc = Rtf15Reader.read(StringIO.StringIO(rtf))
plaintext = PlaintextWriter.write(doc).getvalue()
stickies.append(plaintext)
return stickies
开发者ID:alexflint,项目名称:sticky-sync,代码行数:8,代码来源:client.py
示例14: rtf
def rtf(f):
with open(f, "rb") as f:
doc = Rtf15Reader.read(f)
result = []
for element in doc.content:
for text in element.content:
result.append(''.join(text.content))
return '\r\n'.join(result)
开发者ID:DarioGT,项目名称:libreQDA,代码行数:8,代码来源:text_extraction.py
示例15: parse
def parse(self, path):
# Directory
if os.path.isdir(path):
raise NotImplementedError()
# File
else:
doc = Rtf15Reader.read(open(path))
sample = Sample(path, None, PlaintextWriter.write(doc).getvalue())
return sample
开发者ID:hcouch21,项目名称:styloproject,代码行数:9,代码来源:RtfParser.py
示例16: readRtf
def readRtf(self, path):
try:
doc = Rtf15Reader.read(open(path, "rb"))
except:
self._log("Some screwy rtf shit going on with " + path)
return "Can't process ur shitty rtf <3 dfbot"
contents = PlaintextWriter.write(doc).getvalue()
#print contents
return contents
开发者ID:danielhfrank,项目名称:Tumbox,代码行数:9,代码来源:tumbox.py
示例17: get_rtf_text
def get_rtf_text(path):
"""
Take the path of an rtf file as an argument and return the text
"""
doc = Rtf15Reader.read(open(path))
return PlaintextWriter.write(doc).getvalue()
开发者ID:vignesh117,项目名称:MusicalText,代码行数:9,代码来源:makecorpusfirstset.py
示例18: test_read2
def test_read2(self):
rtf = StringIO("""{\\rtf1\\ansi\\ansicpg1252\\cocoartf1343\\cocoasubrtf160\\cocoascreenfonts1{\\fonttbl\\f0\\fnil\\fcharset222 Thonburi;}
{\\colortbl;\\red255\\green255\\blue255;}
\\pard\\tx560\\tx1120\\tx1680\\tx2240\\tx2800\\tx3360\\tx3920\\tx4480\\tx5040\\tx5600\\tx6160\\tx6720\\pardirnatural\\qc
{\\f0\\fs24 \\cf0 \\'b9\\'e9\\'d3\\'b5\\'a1}""")
doc = Rtf15Reader.read(rtf)
text = PlaintextWriter.write(doc).read()
print text
self.assertEquals(u"น้ำตก", text.decode('utf8'))
开发者ID:pphetra,项目名称:pyth,代码行数:10,代码来源:test_readosxrtf.py
示例19: clean_rtf
def clean_rtf(fname):
doc = Rtf15Reader.read(open(fname))
plain = PlaintextWriter.write(doc).getvalue()
lines = plain.split("\n")
# print '#############################\norig: %s' % pprint.pformat(lines[:10])
lines = filter(lambda l: len(l) > 0, lines)
# print "##############################\nno blank lines:\t%s" % pprint.pformat(lines[:10])
lines = [line.split(";") for line in lines]
lines = [[val[1:-1] for val in line] for line in lines]
# print "##############################\nsplit lines:\t%s" % pprint.pformat(lines[:10])
return lines
开发者ID:embr,项目名称:nonce,代码行数:11,代码来源:upload_jami_rtf.py
示例20: transform
def transform(self, data, options=None):
if self._validate(data) is None:
return None
file = cStringIO.StringIO()
file.write(''.join(self.filter(data)))
file.seek(0)
doc = Rtf15Reader.read(file, errors='replace')
xhtml = XHTMLWriter.write(doc)
xhtml_ = xhtml.read()
xhtml.close()
return TransformResult(StringIter(xhtml_))
开发者ID:joka,项目名称:plone.transforms,代码行数:13,代码来源:rtf_html.py
注:本文中的pyth.plugins.rtf15.reader.Rtf15Reader类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论