本文整理汇总了Python中pysrt.open函数的典型用法代码示例。如果您正苦于以下问题:Python open函数的具体用法?Python open怎么用?Python open使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了open函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: get_word_freq_dict
def get_word_freq_dict(inputfiles, verification_list):
freq_dict = {}
for inputfile in inputfiles:
print 'processing', inputfile
subs = []
try:
subs = pysrt.open(inputfile)
except UnicodeDecodeError as e:
subs = []
if not subs:
for enc in ['utf8',"iso-8859-1"]:
try:
print 'trying with' , enc
subs = pysrt.open(inputfile, encoding=enc)
except UnicodeDecodeError as e:
subs =[]
if subs:
break
if not subs:
print 'couldnt open ', inputfile
continue
for sub in subs:
words = sub.text.split()
for w in words:
insert_word(freq_dict, w, verification_list)
print len(freq_dict), sum(freq_dict.values())
return freq_dict
开发者ID:jeroendecroos,项目名称:vocabulary_master,代码行数:27,代码来源:extract_words.py
示例2: collect_subtitles_lines
def collect_subtitles_lines(self, subtitle_file, file_path=None):
if not file_path:
file_path = os.path.join(
subtitle_file.directory, subtitle_file.file_name)
try:
subs = pysrt.open(file_path)
except UnicodeDecodeError:
subs = pysrt.open(
file_path,
encoding='iso-8859-1')
for sub in subs:
start = str(datetime.timedelta(
milliseconds=sub.start.ordinal))
end = str(datetime.timedelta(
milliseconds=sub.end.ordinal))
text = sub.text
try:
line = SubtitlesLine.objects.create(
subtitlefile=subtitle_file,
index=sub.index,
start=str(start),
end=str(end),
text=text,
)
except (ValidationError, ValueError) as e:
print 'Ignoring: {t}'.format(t=text.encode('utf8'))
continue
line.text_vector = SearchVector(
'text', config=subtitle_file.language)
line.save()
开发者ID:davidbt,项目名称:djmediastreamer,代码行数:32,代码来源:collect_subtitles.py
示例3: _detect_subtitle_language
def _detect_subtitle_language(srt_path):
log.debug('Detecting subtitle language')
# Load srt file (try first iso-8859-1 with fallback to utf-8)
try:
subtitle = pysrt.open(path=srt_path, encoding='iso-8859-1')
except Exception:
try:
subtitle = pysrt.open(path=srt_path, encoding='utf-8')
except Exception:
# If we can't read it, we can't detect, so return
return None
# Read first 5 subtitle lines to determine the language
if len(subtitle) >= 5:
text = ''
for sub in subtitle[0:5]:
text += sub.text
# Detect the language with highest probability and return it if it's more than the required minimum probability
detected_languages = langdetect.detect_langs(text)
log.debug('Detected subtitle language(s): %s', detected_languages)
if len(detected_languages) > 0:
# Get first detected language (list is sorted according to probability, highest first)
detected_language = detected_languages[0]
language_probability = detected_language.prob
if language_probability >= autosubliminal.DETECTEDLANGUAGEPROBABILITY:
log.debug('Probability of detected subtitle language accepted: %s', detected_language)
return Language.fromietf(detected_language.lang)
else:
log.debug('Probability of detected subtitle language too low: %s', detected_language)
return None
开发者ID:h3llrais3r,项目名称:Auto-Subliminal,代码行数:33,代码来源:filesystem.py
示例4: readAllFiles
def readAllFiles(path):
for root, dirs, files in os.walk(path):
files.sort()
for fileread in files:
if fileread.endswith(".scenes"):
with open(path + fileread) as a_file:
lines = a_file.readlines()
fileName = lines[0].rstrip()
del lines[0]
number = 1
listSrt = list()
for item in lines:
lhs, rhs = item.split(",", 1)
partName, ext = fileName.split(".", 1)
newPath = path + 'parted/'
if not os.path.exists(newPath):
os.makedirs(newPath)
newFileNameMovie = newPath + partName + '_' + str(number) + '.mp4'
newFileNameSrt = newPath + partName + '_' + str(number) + '.srt'
number += 1
# Split movie file
# ffmpeg -i video.mp4 -ss 00:01:00 -to 00:02:00 -c copy cut.mp4
# ffmpeg -i input.avi -c:v libx264 -crf 19 -preset slow -c:a libfaac -b:a 192k -ac 2 out.mp4
try:
if ext == 'mp4':
os.system('ffmpeg -i "%s" -ss "%s" -to "%s" -c copy "%s" ' % (
path + fileName, lhs, rhs.rstrip(), newFileNameMovie))
else:
os.system('ffmpeg -i "%s" -ss "%s" -to "%s" -c:v libx264 -c:a copy "%s" ' % (
path + fileName, lhs, rhs.rstrip(), newFileNameMovie))
except:
print "Error with spliting movie file"
# Split *.srt file
try:
#subs = SubRipFile.open(path + partName + '.srt')
try:
subs = pysrt.open(path + partName + '.srt')
except UnicodeDecodeError:
subs = pysrt.open(path + partName + '.srt',encoding='iso-8859-1')
Hs, Ms, Ss = lhs.split(":", 2)
He, Me, Se = rhs.split(":", 2)
part = subs.slice(starts_after={'hours': int(Hs), 'minutes': int(Ms), 'seconds': int(Ss)},
ends_before={'hours': int(He), 'minutes': int(Me), 'seconds': int(Se)})
part.save(newFileNameSrt)
listSrt.append(newFileNameSrt)
# part.shift(hours=-int(Hs), minutes=-int(Ms), seconds=-int(Ss))
except:
print "Error with spliting srt file"
if not listSrt:
print "Error there are no srt files"
else:
"""
srtdir = path+'wholeSrt/'
ensure_dir(srtdir)
srtmerge(listSrt, srtdir + partName + '_new.srt', offset=1000)
srtToTxt(path+'wholeSrt/')
"""
srtToTxt(newPath)
开发者ID:NickKok,项目名称:irss2014,代码行数:60,代码来源:parserMovie.py
示例5: srtToTxt
def srtToTxt(dirName):
for infile in glob.glob(os.path.join(dirName, '*.srt')):
# os.system("iconv --from-code=ISO-8859-1 --to-code=UTF-8 \"" + infile + "\" > \"" + infile2 + "\"")
#subs = SubRipFile.open(infile)
try:
subs = pysrt.open(infile)
except UnicodeDecodeError:
subs = pysrt.open(infile,encoding='iso-8859-1')
outfile = infile[:-4] + '.txt'
f = codecs.open(outfile, "w", encoding="utf-8")
#f = open(outfile, 'w')
for i in range(len(subs)):
f.write(subs[i].text)
f.close()
开发者ID:NickKok,项目名称:irss2014,代码行数:14,代码来源:parserMovie.py
示例6: join_srt_files
def join_srt_files(srt_top, srt_btm, srt_out):
"""Join two subtitles and save result.
"""
top = pysrt.open(srt_top)
btm = pysrt.open(srt_btm)
merged = pysrt.SubRipFile(items=btm)
for item in top:
item.text = TOP_SRT_TEMPLATE.format(item.text)
merged.append(item)
merged.sort()
merged.clean_indexes()
merged.save(srt_out)
开发者ID:radioxoma,项目名称:langmix,代码行数:14,代码来源:langmix.py
示例7: emptyEntries
def emptyEntries(myFile, keep, verbose):
emptyEntryFound = False
emptyEntries = 0
entriesToDelete = []
if verbose:
print "--- Searching for empty entries"
subs = pysrt.open(myFile, encoding='utf-8') # open sub with pysrt as utf-8
entries = len(subs) # count entries
if verbose:
print "--- %s entries total" % entries
for entryNo in range(0, entries): # count entry numbers up to number of entries
subEntry = u"%s" % subs[entryNo] # read single entry
lines = subEntry.split('\n') # split entry into lines
lineNo = 0 # set first line to 0
emptyEntry = False
for row in lines: # read lines one by one
if lineNo == 2:
if (row == " "
or row == " "
or not row): # if third line is or empty
emptyEntry = True
if emptyEntry and lineNo == 3 and row == "": # if third line is and fourth line is empty
emptyEntryFound = True
emptyEntries += 1
entriesToDelete.append(entryNo) # add entry number to list
lineNo += 1
if emptyEntryFound: # if empty entry is found
print "*** %s empty entries found" % emptyEntries
for entryNo in reversed(entriesToDelete): # run through entry numbers in reverse
# print lineNo
del subs[entryNo] # delete entry
if keep:
if verbose:
print "--- Copying original file to %s.emptyEntries" % myFile
copyfile(myFile, "%s.emptyEntries" % myFile)
subs.save(myFile, encoding='utf-8') # save sub
subs = pysrt.open(myFile, encoding='utf-8') # open new sub with pysrt
entries = len(subs) # count entries
print "--- Now has %s entries" % entries
return emptyEntryFound
开发者ID:jonsag,项目名称:pySubs,代码行数:49,代码来源:format.py
示例8: extract_lines
def extract_lines(subtitle_path):
try:
subtitle_object = pysrt.open(subtitle_path)
except UnicodeDecodeError:
subtitle_object = pysrt.open(subtitle_path, encoding='latin1')
subtitle_lines = []
for sub in subtitle_object:
text = sub.text
# Removing any formatting via HTML tags
text = re.sub('<[^<]+?>', '', text)
# Skipping links (usually ads or subtitle credits so irrelevant)
if re.search(URL_REGEX, text):
continue
subtitle_lines.append(text)
return subtitle_lines
开发者ID:AmiZya,项目名称:tv-inspector,代码行数:15,代码来源:features.py
示例9: cut_subtitle
def cut_subtitle(self):
sbt_in = self.subtitle_pick.get_text()
if os.path.isfile(sbt_in):
sbt_out = self.save_pick.get_text() + os.path.splitext(sbt_in)[1]
h1, m1, s1 = self.start.get_h_m_s()
h2, m2, s2 = self.stop.get_h_m_s()
import chardet
detected = chardet.detect(open(sbt_in, "rb").read(1024 * 1024))
enc = detected["encoding"]
cnf = detected["confidence"]
e = None
encs = OrderedSet([enc, "utf-8", "latin1"])
for encoding in encs:
try:
logger.info("Trying to open subtitle with encoding %s" % encoding)
subs = pysrt.open(sbt_in, error_handling=pysrt.ERROR_LOG, encoding=encoding)
subtitle_cut(h1, m1, s1, h2, m2, s2, subs, sbt_out)
return
except Exception as ae:
e = e or ae
logger.warning("encoding %s failed", encoding, exc_info=1)
msg = (
"Could not open {} with any of the following encodings:\n {}\n\n"
"Confidence on {} was {}.\nFirst error was: {}"
)
msg = msg.format(os.path.basename(sbt_in), ", ".join(encs), enc, cnf, str(e))
QMessageBox.warning(self, "Opening subtitle failed", msg, defaultButton=QMessageBox.NoButton)
return sbt_out
开发者ID:eddy-geek,项目名称:quickcut,代码行数:30,代码来源:__init__.py
示例10: start
def start(text_input,language_analysis_stimulated):
#time.sleep(0.3) # Wait 0.5 seconds for other processes's start
t0 = time.time() # Initiation time
if os.path.exists(text_input): # If captions file exist
subs = pysrt.open(text_input) # Get whole subtitles
i = 0 # Step counter
while i < len(subs): # While step counter less than amount of subtitles
time.sleep(0.1) # Wait 0.5 seconds to prevent aggressive loop
if (time.time() - t0 + 0.8) > subs[i].start.seconds: # If current time is greater than subtitle's start
sub_starting_time = datetime.datetime.now() # Starting time of the memory
language_analysis_stimulated.value = 1 # Language analysis stimulated
sub_ending_time = sub_starting_time + datetime.timedelta(seconds=(subs[i].end - subs[i].start).seconds) # Calculate the ending time by subtitle's delta
sub = subs[i].text.encode('ascii','ignore') # Encode subtitle's text by ascii and assign to sub variable
sub = sub.translate(None, '[email protected]#$?,')
words = sub.split()
phone_groups = []
for word in words:
phone_groups.append(LanguageAnalyzer.word_to_phones(word))
phones = " ".join(phone_groups)
phone_duration = datetime.timedelta(seconds=(subs[i].end - subs[i].start).seconds) / len(phones)
starting_time = sub_starting_time
for word_inphones in phone_groups:
ending_time = starting_time + phone_duration * len(word_inphones.split())
if ending_time <= sub_ending_time and word_inphones != "":
process5 = multiprocessing.Process(target=LanguageMemoryUtil.add_memory, args=(word_inphones, starting_time, ending_time)) # Define write memory process
process5.start() # Start write memory process
starting_time = ending_time + datetime.timedelta(milliseconds=50)
print subs[i].text + "\n" # Print subtitle's text
print phones + "\n"
print "_____________________________________________________________________________________\n"
language_analysis_stimulated.value = 0 # Language analysis NOT stimulated
i += 1 # Increase step counter
else: # If captions file doesn't exist
raise ValueError('VTT file doesn\'t exist!') # Raise a ValueError
开发者ID:Unmukt,项目名称:Cerebrum,代码行数:35,代码来源:analysis.py
示例11: _read_subtitle
def _read_subtitle(self, subtitle_filename):
"""Read the subtitle file and output dialogs.
"""
subtitle_text = pysrt.open(subtitle_filename, encoding='iso-8859-1')
subtitle_text = [l.strip() for l in subtitle_text.text.split('\n')]
subtitle_text = [quote_matches.sub('', l).strip() for l in subtitle_text]
# Prepare dialogs
dialogs = []
create_new_dialog = True
for l in subtitle_text:
if not l: # Get rid of newlines
continue
if create_new_dialog:
dialogs.append([l]) # Start new dialog
else:
dialogs[-1].append(l) # Append to last dialog
# Decide what to do with next line based on current line ending
create_new_dialog = False
if l[-1] in ['.', '!', '?', ':', ')']:
create_new_dialog = True
# Join the lists to form single dialogs
for d in range(len(dialogs)):
dialogs[d] = ' '.join(dialogs[d])
return dialogs
开发者ID:YCKung,项目名称:MovieQA,代码行数:28,代码来源:story_loader.py
示例12: handle_subtitle
def handle_subtitle(cls, filename, target=None, to='zh', by_words=True):
subs = pysrt.open(filename)
words_list = cls.init_word_list()
for sub in subs:
if by_words:
result = ''
result_dict = BaiduTranslate.translate(sub.text.replace(' ', '\n'))
for k in result_dict:
if cls.is_word_valid(k, words_list):
result += k + '(' + result_dict.get(k) + ') '
else:
result += k + ' '
sub.text = result
print(result)
else:
try:
result = BaiduTranslate.translate(sub.text, to=to)
except requests.exceptions.ReadTimeout:
time.sleep(10)
BaiduTranslate.log('HTTP TIME OUT : ' + sub.text)
continue
for r in result:
sub.text += '\n' + result[r]
subs.save(target or filename + '.' + to + '.srt')
return True
开发者ID:joway,项目名称:PySubtitle,代码行数:25,代码来源:handle_subtitle.py
示例13: download_sub
def download_sub(self):
print 'Validation: ' + str(self.validate)
if self.validate:
validate = Validate(self.movie_path)
chain_iterators = chain(DBSub().download_sub(self.movie_path),
OpenSubs().download_sub(self.movie_path))
for file_path in chain_iterators:
if self.validate:
subs = pysrt.open(file_path)
text_slices = subs.slice(starts_after={'minutes': validate.start_min - 1, 'seconds': 59},
ends_before={'minutes': validate.start_min,
'seconds': 11})
text = ''
for t_slice in text_slices.data:
text = text + t_slice.text + ' '
text = ' '.join(text.split())
print("For file : {} Movie Text is : {}".format(file_path, text))
if validate.validate(text):
print("Found validated subtitle")
self._final(True)
return
os.remove(file_path)
else:
continue
self._final(False)
开发者ID:abbi031892,项目名称:FUN,代码行数:27,代码来源:subtitle.py
示例14: parseSubs
def parseSubs(subtitles):
for filename in os.listdir(SUB_PATH_BASE_DIR):
print ("Parsing srt file: " + filename)
try:
subs = pysrt.open(SUB_PATH_BASE_DIR+filename)
except:
print "Could not parse "+ filename
continue
for i in xrange(len(subs)):
sub = subs[i]
if i != len(subs)-1:
# some subbers are crazy impatient! subs drop out prematurely
# given a threshold for about 2 seconds, we will extend the sub up to
# 1 second based on the start time of the next subtitle
nextSub = subs[i+1]
timeToNextSub = nextSub.start - sub.end
secondsToNextSub = timeToNextSub.seconds + timeToNextSub.milliseconds/1000.0
if secondsToNextSub <= 2:
sub.end.seconds += secondsToNextSub/2.0
else:
sub.end.seconds += 1
if (len(sub.text.split()) == 0): continue
CurrentSubtitle = Subtitle(sub, filename)
subtitles.append(CurrentSubtitle)
开发者ID:chrisjwei,项目名称:iacd-final,代码行数:26,代码来源:main.py
示例15: parse_srt
def parse_srt(sub_file=None):
sub_file = sub_file or get_random_srt()
debug(u"Using {} as SRT".format(sub_file))
try:
subs = srt.open(sub_file)
except:
subs = srt.open(sub_file, "latin1")
flat_subs = subs.text.replace("\n", " ")
clean_subs = junk_re.sub(" ", flat_subs)
piece_iter = iter(split_re.split(clean_subs))
split_subs = [l+next(piece_iter, '').strip() for l in piece_iter]
return split_subs
开发者ID:BooDoo,项目名称:animecommentbot,代码行数:16,代码来源:scratch.py
示例16: ldSrtFile
def ldSrtFile(path):
# check out the param
subs = pysrt.open(path)
if len(subs) == 0:
return false
else:
return subs
开发者ID:csrgxtu,项目名称:transcript,代码行数:8,代码来源:transcript.py
示例17: main
def main():
"""
This is the main method. It inits and executes the argparser.
Loops over all subs
"""
parser = argparse.ArgumentParser(
description='This scipt helps to customize '
'the .srt file of a DJI Phantom 3.')
parser.add_argument('-i', '--input',
help='input .srt file', required=True)
parser.add_argument('-o', '--output',
help='output .srt file', required=True)
parser.add_argument('-hb', '--barometer', action='store_true',
help='add barometer height')
parser.add_argument('-hu', '--ultrasonic', action='store_true',
help='add ultrasonic height')
parser.add_argument('-da', '--date', action='store_true',
help='add date of flight')
parser.add_argument('-ti', '--time', action='store_true',
help='add time of flight')
parser.add_argument('-du', '--duration', action='store_true',
help='add duration of flight')
parser.add_argument('-sp', '--speed', action='store_true',
help='add speed')
parser.add_argument('-l', '--label', action='store_true',
help='add text label to each piece of data')
args = parser.parse_args()
if not (args.barometer or args.ultrasonic or args.date or args.time or
args.duration or args.speed):
print('Please specify some data to add!\n')
parser.print_help()
return -1
subs = pysrt.open(args.input)
for s in subs:
t = ''
for token in re.split(' |\n', s.text):
if args.barometer:
t += filter_height(token=token, ultrasonic=False,
use_label=args.label)
if args.ultrasonic:
t += filter_height(token=token, ultrasonic=True,
use_label=args.label)
if args.date:
t += filter_date(token=token, use_label=args.label)
if args.time:
t += filter_time(token=token, use_label=args.label)
if args.duration:
t += compute_duration(token=token, use_label=args.label)
if args.speed:
t += compute_speed(token=token, use_label=args.label)
s.text = t
subs.save(args.output, encoding='utf-8')
return 0
开发者ID:wachjose88,项目名称:phantom3-srt-customizer,代码行数:58,代码来源:phantom3-srt-customizer.py
示例18: parseSrt
def parseSrt(file, overlap, output):
subs = pysrt.open(file)
entries = len(subs)
fullText = subs[0].text.replace('\n', ' ')
timeArray = ''
if overlap:
subs.shift(seconds=-3)
time = getTimeInMilliseconds(subs[0].start)
end = getTimeInMilliseconds(subs[0].end)
difference = end - time
text = subs[0].text.replace('\n', ' ')
textArray = text.split(' ')
if overlap:
for x in xrange(0,len(textArray)):
time += (difference / len(textArray))
timeArray += str(time) + ' '
print(timeArray)
else:
for x in xrange(0,len(textArray)):
time += (difference / len(textArray))
timeArray += str(time) + ' '
# for all subtitle entries
for x in xrange(1,entries):
sub = subs[x]
time = getTimeInMilliseconds(sub.start)
end = getTimeInMilliseconds(sub.end)
difference = end - time
text = sub.text.replace('\n', ' ')
textArray = text.split(' ')
if overlap:
fullText += ' ' + textArray[len(textArray) - 1]
timeArray += str(end) + ' '
else:
fullText += ' ' + text
for x in xrange(0,len(textArray)):
time += (difference / len(textArray))
timeArray += str(time) + ' '
# remove tailing whitespace
timeArray = timeArray[:-1]
print(len(fullText.split(' ')))
print(len(timeArray.split(' ')))
if output:
text_file = open(output, "w")
text_file.write(fullText.encode('UTF-8'))
text_file.write('\n\n')
text_file.write(timeArray)
text_file.close()
开发者ID:sebastiankirch,项目名称:scripts,代码行数:58,代码来源:parse_srt.py
示例19: get_subtitles
def get_subtitles(srt_filename):
episode_subtitles = pd.DataFrame(columns=['start', 'end', 'text'])
subs = pysrt.open(srt_filename)
subs.shift(seconds=0)
for i, sub in enumerate(subs):
episode_subtitles.loc[i] = [int(sub.start.minutes) * 60 + int(sub.start.seconds) + sub.start.milliseconds/1000,
int(sub.end.minutes) * 60 + int(sub.end.seconds) + sub.end.milliseconds/1000,
sub.text]
return episode_subtitles
开发者ID:bveber,项目名称:speaker_seeker,代码行数:9,代码来源:audio_processor.py
示例20: loadSubtitles
def loadSubtitles(self):
if ".srt" in self.reader.currentFile.loadedExtensions:
self.srt = pysrt.open(self.reader.currentFile.name+".srt")
for x in self.srt:
x.text = " ".join(x.text.split("\n"))
self.srt.save(self.reader.currentFile.name+".srt",encoding="utf-8")
entireText = "\n".join([i.text for i in self.srt])
#if self.reader.textContent.toPlainText() == "":
self.reader.textContent.setPlainText(entireText)
开发者ID:dayjaby,项目名称:yomisama,代码行数:9,代码来源:movie.py
注:本文中的pysrt.open函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论