• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python utils.grouper函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中utils.grouper函数的典型用法代码示例。如果您正苦于以下问题:Python grouper函数的具体用法?Python grouper怎么用?Python grouper使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了grouper函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: compute_descriptors

def compute_descriptors(infile, descriptor_types):
    """Reads low-level descriptors from DenseTracks."""

    LEN_LINE = 436

    POS_IDXS = [1, 2, 0]        # Position coordinates (X, Y, T).
    NORM_POS_IDXS = [7, 8, 9]   # Normalized position coordinates (X, Y, T).

    dense_tracks = subprocess.Popen(
        [DENSE_TRACK, infile],
        stdout=subprocess.PIPE)

    for lines in grouper(dense_tracks.stdout, NR_DESCRIPTORS):
        all_descs = np.vstack([
            map(float, line.split())
            for line in lines
            if line is not None]
        ).astype(np.float32)

        assert all_descs.shape[0] <= NR_DESCRIPTORS
        assert all_descs.shape[1] == LEN_LINE

        positions = all_descs[:, POS_IDXS]
        normalized_positions = all_descs[:, NORM_POS_IDXS]
        descriptors = {
            desc_type: all_descs[:, DESC_IDXS[desc_type]]
            for desc_type in descriptor_types}

        yield positions, normalized_positions, descriptors
开发者ID:martin-xavier,项目名称:medpackage,代码行数:29,代码来源:densetrack_to_fisher_shot_errorprotect.py


示例2: main

def main():
    logger = configure_logging('parse_serverstatus')
    client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database)
    with open(args.input_file, 'r') as f:
        for line_number, chunk in enumerate(grouper(f, args.batch_size)):
            # print(line_number)
            json_points = []
            for line in chunk:
                # zip_longest will backfill any missing values with None, so we need to handle this, otherwise we'll miss the last batch
                if line:
                    try:
                        server_status_json = json.loads(line)
                        # print((line_number + 0) * _BATCH_SIZE)
                        # print((line_number + 1) * _BATCH_SIZE)
                        common_metric_data = get_metrics("serverstatus", server_status_json, common_metrics, line_number)
                        json_points.append(create_point(*common_metric_data))
                        wiredtiger_metric_data = get_metrics("serverstatus_wiredtiger", server_status_json, wiredtiger_metrics, line_number)
                        json_points.append(create_point(*wiredtiger_metric_data))
                        # for metric_data in get_metrics(server_status_json, common_metrics, line_number):
                        #     import ipdb; ipdb.set_trace()
                        #     print(json_points)
                        #     json_points.append(create_point(*metric_data))
                        # # for metric in get_metrics(server_status_json, wiredtiger_metrics, line_number):
                        #     json_points.append(create_point(*metric))
                        # for metric in get_metrics(server_status_json, mmapv1_metrics, line_number):
                        #     json_points.append(create_point(*metric))
                    except ValueError:
                        logger.error("Line {} does not appear to be valid JSON - \"{}\"".format(line_number, line.strip()))
            write_points(logger, client, json_points, line_number)
开发者ID:jimoleary,项目名称:mongo-insight,代码行数:29,代码来源:parse_serverstatus.py


示例3: main

def main(args):
    global DEBUG
    if len(args) == 1:
        # no args - repl
        while True:
            print 'que?>',
            try:
                print google_it(raw_input())
            except EOFError:
                break
            except:
                import traceback
                traceback.print_exc()
    else:
        # test mode
        DEBUG = False
        print 'Loading testfile...'
        tests = filter(bool, open(args[1]).read().split('\n'))

        print len(tests), 'tests'
        for clue, answer in utils.grouper(2, tests):
            clue = clue.split('~!clue')[1]
            answer = answer.split("~!answer")[1]
            try:
                print '----------------------------------------------------------------'
                print 'clue:', clue
                print 'correct:', answer
                print 'eubank:', google_it(clue)
            except KeyboardInterrupt:
                sys.exit(0)
            except:
                import traceback
                traceback.print_exc()
开发者ID:andrewgjohnson-forks,项目名称:Eubank,代码行数:33,代码来源:jeopardy.py


示例4: main

def main():
    description = 'Split a FASTA file into multiple subfiles.'
    parser = ArgumentParser(description=description,
                            parents=[get_default_argument_parser()])
    parser.add_argument('-f', '--in-format',
                        default=_DEFAULT_FMT,
                        help="A biopython file format string.")
    parser.add_argument('-n', '--num-files', type=int,
                        default=_DEFAULT_N,
                        help=("The number of splits. "
                              "DEFAULT=%d") % _DEFAULT_N)
    parser.add_argument('in_path', nargs='?', default=None,
                        help=("The path of the file to be read in. "
                              "If no argument given, reads from STDIN."))
    parser.add_argument('out_pattern', default=None,
                        help=("Output file names format string. "
                              "Must contain one '%%d' for the file number."))
    args = parser.parse_args()

    if args.in_path is None:
        record_parser = SeqIO.parse(sys.stdin, args.in_format)
    else:
        record_parser = SeqIO.parse(args.in_path, args.in_format)

    write_multithread(grouper(record_parser, 100),
                      lambda recs, handle:
                          SeqIO.write(recs, handle, args.in_format),
                      args.out_pattern, n=args.num_files)
开发者ID:bsmith89,项目名称:rrnum,代码行数:28,代码来源:split_seqs.py


示例5: train

    def train(self, sentences, total_words=None, word_count=0, chunksize=100):
        """
        Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
        Each sentence must be a list of utf8 strings.

        """
        logger.info("training model on %i vocabulary and %i features" % (len(self.vocab), self.layer1_size))
        if not self.vocab:
            raise RuntimeError("you must first build vocabulary before training the model")

        start, next_report = time.time(), 1.0
        if not total_words:
            total_words = sum(v.count for v in self.vocab.itervalues())
        # convert input string lists to Vocab objects (or None for OOV words)
        no_oov = ([self.vocab.get(word, None) for word in sentence] for sentence in sentences)
        # run in chunks of e.g. 100 sentences (= 1 job) 
        for job in utils.grouper(no_oov, chunksize):
            # update the learning rate before every job
            alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count / total_words))
            # how many words did we train on? out-of-vocabulary (unknown) words do not count
            job_words = sum(train_sentences(self, sentence, alpha) for sentence in job)
            word_count += job_words
            # report progress
            elapsed = time.time() - start
            if elapsed >= next_report:
                logger.info("PROGRESS: at %.2f%% words, alpha %.05f, %.0f words/s" %
                    (100.0 * word_count / total_words, alpha, word_count / elapsed if elapsed else 0.0))
                next_report = elapsed + 1.0  # don't flood the log, wait at least a second between progress reports
        elapsed = time.time() - start
        logger.info("training on %i words took %.1fs, %.0f words/s" %
            (word_count, elapsed, word_count / elapsed if elapsed else 0.0))
        return word_count
开发者ID:nudles,项目名称:word2vec,代码行数:32,代码来源:word2vec.py


示例6: __iter__

 def __iter__(self):
     if self.chunksize:
         for chunk in utils.grouper(self.corpus, self.chunksize):
             for transformed in self.obj.__getitem__(chunk, chunksize=None):
                 yield transformed
     else:
         for doc in self.corpus:
             yield self.obj[doc]
开发者ID:Anikacyp,项目名称:gensim,代码行数:8,代码来源:interfaces.py


示例7: train

    def train(self, sentences, total_words=None, word_count=0, chunksize=100):
        """
        Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
        Each sentence must be a list of utf8 strings.

        """
        logger.info("training model with %i workers on %i vocabulary and %i features" % (self.workers, len(self.vocab), self.layer1_size))

        if not self.vocab:
            raise RuntimeError("you must first build vocabulary before training the model")

        start, next_report = time.time(), [1.0]
        word_count, total_words = [word_count], total_words or sum(v.count for v in self.vocab.itervalues())
        jobs = Queue(maxsize=2 * self.workers)  # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :(
        lock = threading.Lock()  # for shared state (=number of words trained so far, log reports...)

        def worker_train():
            """Train the model, lifting lists of sentences from the jobs queue."""
            work = matutils.zeros_aligned(self.layer1_size, dtype=REAL)  # each thread must have its own work memory

            while True:
                job = jobs.get()
                if job is None:  # data finished, exit
                    break
                # update the learning rate before every job
                alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words))
                # how many words did we train on? out-of-vocabulary (unknown) words do not count
                job_words = sum(train_sentence(self, sentence, alpha, work) for sentence in job)
                with lock:
                    word_count[0] += job_words
                    elapsed = time.time() - start
                    if elapsed >= next_report[0]:
                        logger.info("PROGRESS: at %.2f%% words, alpha %.05f, %.0f words/s" %
                            (100.0 * word_count[0] / total_words, alpha, word_count[0] / elapsed if elapsed else 0.0))
                        next_report[0] = elapsed + 1.0  # don't flood the log, wait at least a second between progress reports

        workers = [threading.Thread(target=worker_train) for _ in xrange(self.workers)]
        for thread in workers:
            thread.daemon = True  # make interrupting the process with ctrl+c easier
            thread.start()

        # convert input strings to Vocab objects (or None for OOV words), and start filling the jobs queue
        no_oov = ([self.vocab.get(word, None) for word in sentence] for sentence in sentences)
        for job_no, job in enumerate(utils.grouper(no_oov, chunksize)):
            logger.debug("putting job #%i in the queue, qsize=%i" % (job_no, jobs.qsize()))
            jobs.put(job)
        logger.info("reached the end of input; waiting to finish %i outstanding jobs" % jobs.qsize())
        for _ in xrange(self.workers):
            jobs.put(None)  # give the workers heads up that they can finish -- no more work!

        for thread in workers:
            thread.join()

        elapsed = time.time() - start
        logger.info("training on %i words took %.1fs, %.0f words/s" %
            (word_count[0], elapsed, word_count[0] / elapsed if elapsed else 0.0))

        return word_count[0]
开发者ID:MorLong,项目名称:word2vec-1,代码行数:58,代码来源:word2vec.py


示例8: import_json

def import_json():
    for g in grouper(1000,sys.stdin):
        try:
            Model.database.bulk_save([json.loads(l) for l in g if l])
        except BulkSaveError as err:
            if any(d['error']!='conflict' for d in err.errors):
                raise
            else:
                logging.warn("conflicts for %r",[d['id'] for d in err.errors])
开发者ID:JeffAMcGee,项目名称:localcrawl,代码行数:9,代码来源:admin.py


示例9: read_slr

def read_slr(fh):
    stats = fh.readline()
    seqs = []

    for l in utils.grouper(fh, 2):
        name = l[0].rstrip()
        seq = l[1].rstrip()
        seqs.append(SeqRecord(id=name, seq=Seq(seq), description=""))
        
    return seqs
开发者ID:pomeranz,项目名称:tree_stats,代码行数:10,代码来源:prepare_fubar.py


示例10: __init__

 def __init__(self, horn_pointing=False, siamfile=None):
     self.horn_pointing = horn_pointing
     if siamfile is None:
         siamfile = private.siam
     f = open(siamfile)
     lines = f.readlines()
     self.siam = {}
     for line in grouper(4,lines[1:]):
         chtag = line[0].split()[0]
         m = np.array(np.matrix(';'.join(line[1:])))
         self.siam[chtag] = m
开发者ID:tskisner,项目名称:planck,代码行数:11,代码来源:pointingtools.py


示例11: import_old_json

def import_old_json():
    for g in grouper(1000,sys.stdin):
        docs = [json.loads(l) for l in g if l]
        for d in docs:
            del d['doc_type']
            for k,v in d.iteritems():
                if k[-2:]=='id' or k in ('rtt','rtu'):
                    d[k]=v[1:]
            for field in ['ats','fols','frs']:
                if field in d and isinstance(d[field],list):
                    d[field] = [u[1:] for u in d[field]]
        Model.database.bulk_save(docs)
开发者ID:JeffAMcGee,项目名称:localcrawl,代码行数:12,代码来源:admin.py


示例12: xfory

def xfory(price_info, units):
    """ function to discount per groups. if you pay Y you get X """
    total = 0
    x = price_info.get('x')
    y = price_info.get('y')
    price = price_info.get('unitPrice')

    for group in grouper(x, range(0, units)):
        has_discount = len(group) == x
        per_unit = price if not has_discount else y / x * price
        total = total + (per_unit * len(group))

    return total / units
开发者ID:abelgvidal,项目名称:exercises,代码行数:13,代码来源:rules.py


示例13: command_service

    def command_service(self, rawCommand):
        """
        Parse raw input and execute specified function with args

        :param rawCommand: csv string from Matlab/Simulink of the form:
                'command, namedArg1, arg1, namedArg2, arg2, ..., namedArgN, argN'
        :return: the command and arguments as a dictionary
        """
        pack = [x.strip() for x in split('[,()]*', rawCommand.strip())]
        raw_cmd = pack[0]
        argDict = {key: literal_eval(value) for key, value in utils.grouper(pack[1:], 2)}
        cmd = self.mapInterface.commands[raw_cmd]
        ret = cmd(**argDict)
        logger.info("Command '{}' run with args {}".format(raw_cmd, argDict))
        return raw_cmd, ret
开发者ID:friend0,项目名称:world_engine,代码行数:15,代码来源:server.py


示例14: train

    def train(self,triples, total_triples=None, triples_count = 0, chunksize=1000):
        if not self.vocab or not self.vocab_rel:
            raise RuntimeError("you must first build entity and relation vocabulary before training the model")
        start,next_report = time.time(),[1.0]
        triples_count = [triples_count]
        total_triples = total_triples or int(sum(1 for v in triples))
        jobs = Queue(maxsize=2*self.workers)
        lock = threading.Lock()

        def worker_train():
            work = zeros(self.layer1_size, dtype=REAL)
            detR = zeros((self.layer1_size,self.layer1_size),dtype=REAL)
            # neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL)
            while True:
                job = jobs.get()
                if job is None:
                    break
                alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * triples_count[0] / total_triples))
                job_triples = self._get_job_triples(alpha,job,work,detR)
                with lock:
                    triples_count[0] += job_triples
                    elapsed = time.time() - start
                    if elapsed>= next_report[0]:
                        logger.info("PROGRESS: at %.2f%% triplrs, alpha %.05f, %.0f triples/s" %
                            (100.0 * triples_count[0] / total_triples, alpha, triples_count[0] / elapsed if elapsed else 0.0))
                        next_report[0] = elapsed + 1.0

        workers = [threading.Thread(target=worker_train) for _ in xrange(self.workers)]
        for thread in workers:
            thread.daemon = True  # make interrupting the process with ctrl+c easier
            thread.start()

        # convert input strings to Vocab objects (eliding OOV/downsampled words), and start filling the jobs queue
        for job_no, job in enumerate(utils.grouper(self._prepare_triples(triples), chunksize)):
            logger.debug("putting job #%i in the queue, qsize=%i" % (job_no, jobs.qsize()))
            jobs.put(job)
        logger.info("reached the end of input; waiting to finish %i outstanding jobs" % jobs.qsize())
        for _ in xrange(self.workers):
            jobs.put(None)  # give the workers heads up that they can finish -- no more work!

        for thread in workers:
            thread.join()

        elapsed = time.time() - start
        logger.info("training on %i triples took %.1fs, %.0f triples/s" %
            (triples_count[0], elapsed, triples_count[0] / elapsed if elapsed else 0.0))
        self.syn0norm = None
        return triples_count[0]
开发者ID:v-shinc,项目名称:KB2Vec,代码行数:48,代码来源:kb2vec.py


示例15: fetch_edges

def fetch_edges():
    Edges.database = connect("houtx_edges")
    User.database = connect("away_user")
    old_edges = set(int(row['id']) for row in Edges.database.paged_view("_all_docs",endkey="_"))
    uids = set(_users_from_scores())-old_edges
    settings.pdb()
    for g in grouper(100,uids):
        for user in twitter.user_lookup(g):
            if user is None or user.protected: continue
            try:
                edges = twitter.get_edges(user._id)
            except restkit.errors.Unauthorized:
                logging.warn("unauthorized!")
                continue
            except restkit.errors.ResourceNotFound:
                logging.warn("resource not found!?")
                continue
            edges.save()
            user.save()
            sleep_if_needed()
开发者ID:JeffAMcGee,项目名称:localcrawl,代码行数:20,代码来源:admin.py


示例16: compute_descriptors

def compute_descriptors(infile, descriptor_type):
    """Reads low-level descriptors from DenseTracks."""

    LEN_LINE = 436
    POS_IDXS = [1, 2, 0]  # Positional coordinates (X, Y, T).

    dense_tracks = subprocess.Popen(
        ['./DenseTrack', infile], stdout=subprocess.PIPE)
    descriptor_idxs = DESC_IDXS[descriptor_type]

    for lines in grouper(dense_tracks.stdout, NR_DESCRIPTORS):

        all_descs = np.vstack([
            map(float, line.split())
            for line in lines
            if line is not None]
        ).astype(np.float32)

        assert all_descs.shape[0] <= NR_DESCRIPTORS
        assert all_descs.shape[1] == LEN_LINE

        yield all_descs[:, POS_IDXS], all_descs[:, descriptor_idxs]
开发者ID:martin-xavier,项目名称:medpackage,代码行数:22,代码来源:fisher_vector.py


示例17: main

def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('-model', type=str, required=True)
	parser.add_argument('-weights', type=str, required=True)
	parser.add_argument('-results', type=str, required=True)
	args = parser.parse_args()

	model = model_from_json(open(args.model).read())
	model.load_weights(args.weights)
	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

	questions_val = open('../data/preprocessed/questions_val2014.txt', 
						'r').read().decode('utf8').splitlines()
	answers_val = open('../data/preprocessed/answers_val2014.txt', 
						'r').read().decode('utf8').splitlines()
	images_val = open('../data/preprocessed/images_val2014.txt', 
						'r').read().decode('utf8').splitlines()
	vgg_model_path = '../features/coco/vgg_feats.mat'
	
	print 'Model compiled, weights loaded...'
	labelencoder = joblib.load('../models/labelencoder.pkl')

	features_struct = scipy.io.loadmat(vgg_model_path)
	VGGfeatures = features_struct['feats']
	print 'loaded vgg features'
	image_ids = open('../features/coco_vgg_IDMap.txt').read().splitlines()
	img_map = {}
	for ids in image_ids:
		id_split = ids.split()
		img_map[id_split[0]] = int(id_split[1])

	nlp = English()
	print 'loaded word2vec features'

	nb_classes = 1000
	y_predict_text = []
	batchSize = 128
	widgets = ['Evaluating ', Percentage(), ' ', Bar(marker='#',left='[',right=']'),
           ' ', ETA()]
	pbar = ProgressBar(widgets=widgets)

	for qu_batch,an_batch,im_batch in pbar(zip(grouper(questions_val, batchSize, fillvalue=questions_val[0]), 
												grouper(answers_val, batchSize, fillvalue=answers_val[0]), 
												grouper(images_val, batchSize, fillvalue=images_val[0]))):
		X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
		if 'language_only' in args.model:
			X_batch = X_q_batch
		else:
			X_i_batch = get_images_matrix(im_batch, img_map , VGGfeatures)
			X_batch = np.hstack((X_q_batch, X_i_batch))
		y_predict = model.predict_classes(X_batch, verbose=0)
		y_predict_text.extend(labelencoder.inverse_transform(y_predict))

	correct_val=0
	incorrect_val=0	
	f1 = open(args.results, 'w')

	for prediction, truth, question, image in zip(y_predict_text, answers_val, questions_val, images_val):
		temp_count=0
		for _truth in truth.split(';'):
			if prediction == _truth:
				temp_count+=1

		if temp_count>2:
			correct_val+=1
		else:
			incorrect_val+=1

		f1.write(question.encode('utf-8'))
		f1.write('\n')
		f1.write(image.encode('utf-8'))
		f1.write('\n')
		f1.write(prediction)
		f1.write('\n')
		f1.write(truth.encode('utf-8'))
		f1.write('\n')
		f1.write('\n')

	f1.write('Final Accuracy is ' + str(float(correct_val)/(incorrect_val+correct_val)))
	f1.close()
	f1 = open('../results/overall_results.txt', 'a')
	f1.write(args.weights + '\n')
	f1.write(str(float(correct_val)/(incorrect_val+correct_val)) + '\n')
	f1.close()
	print 'Final Accuracy on the validation set is', float(correct_val)/(incorrect_val+correct_val)
开发者ID:Goddard,项目名称:visual-qa,代码行数:85,代码来源:evaluateMLP.py


示例18: main

def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('-num_hidden_units', type=int, default=512)
	parser.add_argument('-num_lstm_layers', type=int, default=2)
	parser.add_argument('-dropout', type=float, default=0.2)
	parser.add_argument('-activation', type=str, default='tanh')
	parser.add_argument('-num_epochs', type=int, default=100)
	parser.add_argument('-model_save_interval', type=int, default=5)
	parser.add_argument('-batch_size', type=int, default=128)
	parser.add_argument('-word_vector', type=str, default='')
	args = parser.parse_args()

	questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines()
	questions_lengths_train = open('../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines()
	answers_train = open('../data/preprocessed/answers_train2014.txt', 'r').read().decode('utf8').splitlines()
	images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines()
	max_answers = 1000
	questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, max_answers)

	print 'Loaded questions, sorting by length...'
	questions_lengths_train, questions_train, answers_train = (list(t) for t in zip(*sorted(zip(questions_lengths_train, questions_train, answers_train))))
	
	#encode the remaining answers
	labelencoder = preprocessing.LabelEncoder()
	labelencoder.fit(answers_train)
	nb_classes = len(list(labelencoder.classes_))
	joblib.dump(labelencoder,'../models/labelencoder.pkl')
	max_len = 30 #25 is max for training, 27 is max for validation
	word_vec_dim = 300

	model = Sequential()
	model.add(LSTM(output_dim = args.num_hidden_units, activation='tanh', 
			return_sequences=True, input_shape=(max_len, word_vec_dim)))
	model.add(Dropout(args.dropout))
	model.add(LSTM(args.num_hidden_units, return_sequences=False))
	model.add(Dense(nb_classes, init='uniform'))
	model.add(Activation('softmax'))

	json_string = model.to_json()
	model_file_name = '../models/lstm_language_only_num_hidden_units_' + str(args.num_hidden_units) + '_num_lstm_layers_' + str(args.num_lstm_layers) + '_dropout_' + str(args.dropout)
	open(model_file_name  + '.json', 'w').write(json_string)
	
	print 'Compiling model...'
	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
	print 'Compilation done...'

	#set up word vectors
        # Code to choose the word vectors, default is Goldberg but GLOVE is preferred
        if args.word_vector == 'glove':
            nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors')
        else:
            nlp = English()

	print 'loaded ' + args.word_vector + ' word2vec features...'

	## training
        # Moved few variables to args.parser (num_epochs, batch_size, model_save_interval)
	print 'Training started...'
	for k in xrange(args.num_epochs):

		progbar = generic_utils.Progbar(len(questions_train))

		for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[0]), 
												grouper(answers_train, args.batch_size, fillvalue=answers_train[0]), 
												grouper(images_train, args.batch_size, fillvalue=images_train[0])):
			timesteps = len(nlp(qu_batch[-1])) #questions sorted in descending order of length
			X_q_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps)
			Y_batch = get_answers_matrix(an_batch, labelencoder)
			loss = model.train_on_batch(X_q_batch, Y_batch)
			# fix for the Keras v0.3 issue #9
			progbar.add(args.batch_size, values=[("train loss", loss[0])])

		
		if k%args.model_save_interval == 0:
			model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))

	model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k+1))
开发者ID:iamaaditya,项目名称:visual-qa,代码行数:77,代码来源:trainLSTM_language.py


示例19: BeautifulSoup

    learnset_url += "{0}_(Pok%C3%A9mon)/Generation_I_learnset".format(pname)

    html_file = urllib2.urlopen(learnset_url)
    learnset_html = html_file.read()
    html_file.close()

    bs = BeautifulSoup(learnset_html)
    x = [td.text for td in bs.findAll("td")
         if 0 < len(td.text) < 60]
    # if td.text in movename_to_num.values()] worked pretty well, but...
    # Just grabbing everything that appears anywhere and is a valid move
    # name will grab Psychic, when those characters only appeared to indicate
    # the type of a move and not the Move Psychic

    # So instead, group them into clumps... it seems to group very consistently
    grouped = list(grouper(x, 6))

    # Pikachu had a weird move: Light Screen, which he learns at Level 50 in
    # Pokemon Yellow, but never in Red/Blue. So just grabbing the values in the
    # table that are valid moves would actually lead us to believe that Pikachu
    # can learn Light Screen, which he can, but it doesn't have a TM until Gen
    # 3. Instead, let's group the entries, drop the ones from Pokemon Yellow,
    # and then grab the remaining moves
    not_yellow = [entry for entry in grouped if not entry[0].endswith("Y")]

    # fix a problem that Vaporean == 106 was having
    valid_starts = ("T", "H", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9")
    valid = [entry for entry in not_yellow
             if entry[0].startswith(valid_starts)]

    moves = [standardize(entry[1]) for entry in valid
开发者ID:hoxiea,项目名称:pokemodel,代码行数:31,代码来源:get_learnsets.py


示例20: train

    def train(self, sentences, total_words=None, word_count=0, sent_count=0, chunksize=100):
        """
        Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
        Each sentence must be a list of unicode strings.

        """
        logger.info("training model with %i workers on %i sentences and %i features, "
                    "using 'skipgram'=%s 'hierarchical softmax'=%s 'subsample'=%s and 'negative sampling'=%s" %
                    (self.workers, self.sents_len, self.layer1_size, self.sg, self.hs, self.sample, self.negative))

        if not self.vocab:
            raise RuntimeError("you must first build vocabulary before training the model")

        start, next_report = time.time(), [1.0]
        word_count = [word_count]
        sent_count = [sent_count]
        total_words = total_words or sum(v.count * v.sample_probability for v in itervalues(self.vocab))
        total_sents = self.total_sents #it's now different from self.sents_len
        jobs = Queue(maxsize=2 * self.workers)  # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :(
        lock = threading.Lock()  # for shared state (=number of words trained so far, log reports...)

        def worker_train():
            """Train the model, lifting lists of sentences from the jobs queue."""
            work = matutils.zeros_aligned(self.layer1_size + 8, dtype=REAL)  # each thread must have its own work memory
            neu1 = matutils.zeros_aligned(self.layer1_size + 8, dtype=REAL)

            while True:
                job = jobs.get()
                if job is None:  # data finished, exit
                    break
                # update the learning rate before every job
                if self.update_mode == 0:
                    alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words))
                else:
                    alpha = self.alpha
                job_words = sum(train_sent_vec(self, self.sents[sent_no], sentence, alpha, work, neu1, self.sents_grad[sent_no])
                                for sent_no, sentence in job)
                with lock:
                    word_count[0] += job_words
                    sent_count[0] += chunksize
                    elapsed = time.time() - start
                    if elapsed >= next_report[0]:
                        logger.info("PROGRESS: at %.2f%% sents, alpha %.05f, %.0f words/s" %
                                    (100.0 * sent_count[0] / total_sents, alpha, word_count[0] / elapsed if elapsed else 0.0))
                        next_report[0] = elapsed + 1.0  # don't flood the log, wait at least a second between progress reports

        workers = [threading.Thread(target=worker_train) for _ in xrange(self.workers)]
        for thread in workers:
            thread.daemon = True  # make interrupting the process with ctrl+c easier
            thread.start()

        def prepare_sentences():
            for sent_tuple in sentences:
                sentence = sent_tuple[0]
                sent_id  = sent_tuple[1]
                sent_no = self.sent_no_hash[sent_id]
                sampled = [self.vocab.get(word, None) for word in sentence
                           if word in self.vocab and (self.vocab[word].sample_probability >= 1.0 or self.vocab[word].sample_probability >= random.random_sample())]
                yield (sent_no, sampled)

        # convert input strings to Vocab objects (eliding OOV/downsampled words), and start filling the jobs queue
        for job_no, job in enumerate(utils.grouper(prepare_sentences(), chunksize)):
            logger.debug("putting job #%i in the queue, qsize=%i" % (job_no, jobs.qsize()))
            jobs.put(job)
        logger.info("reached the end of input; waiting to finish %i outstanding jobs" % jobs.qsize())
        for _ in xrange(self.workers):
            jobs.put(None)  # give the workers heads up that they can finish -- no more work!

        for thread in workers:
            thread.join()

        elapsed = time.time() - start
        logger.info("training on %i words took %.1fs, %.0f words/s" %
                    (word_count[0], elapsed, word_count[0] / elapsed if elapsed else 0.0))

        return word_count[0]
开发者ID:nathan2718,项目名称:category2vec,代码行数:76,代码来源:sent2vec.py



注:本文中的utils.grouper函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python utils.hash_to_string函数代码示例发布时间:2022-05-26
下一篇:
Python utils.group函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap