• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python tensorflow.string_split函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中tensorflow.string_split函数的典型用法代码示例。如果您正苦于以下问题:Python string_split函数的具体用法?Python string_split怎么用?Python string_split使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了string_split函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: init

  def init(self):
    # init
    self.global_step = global_step = tf.Variable(0, trainable=False, name='global_step')
    self.learning_rate = learning_rate = tf.train.exponential_decay(1e-2, global_step, 500, 0.95, staircase=True)

    # Load classes
    src_table = tf.contrib.lookup.index_table_from_file('./iwslt15/vocab.en', default_value=0)
    tgt_table = tf.contrib.lookup.index_table_from_file('./iwslt15/vocab.vi', default_value=0)

    #src_table_size = src_table.size()
    #tgt_table_size = tgt_table.size()
    src_table_size = 17191
    tgt_table_size = 7709
    src_eos_id = tf.cast(src_table.lookup(tf.constant('</s>')), tf.int64)
    self.tgt_eos_id = tgt_eos_id = tf.cast(tgt_table.lookup(tf.constant('</s>')), tf.int64)
    self.tgt_sos_id = tgt_sos_id = tf.cast(tgt_table.lookup(tf.constant('<s>')), tf.int64)

    # file placeholder
    src_files = tf.placeholder(tf.string, shape=[None])
    tgt_files = tf.placeholder(tf.string, shape=[None])

    # Read data
    src_dataset = tf.contrib.data.TextLineDataset(src_files)
    tgt_dataset = tf.contrib.data.TextLineDataset(tgt_files)

    # Convert data to word indices
    src_dataset = src_dataset.map(lambda string: tf.concat([['<s>'], tf.string_split([string]).values, ['</s>']], 0))
    src_dataset = src_dataset.map(lambda words: (words, tf.size(words)))
    src_dataset = src_dataset.map(lambda words, size: (src_table.lookup(words), size))

    tgt_dataset = tgt_dataset.map(lambda string: tf.concat([['<s>'], tf.string_split([string]).values, ['</s>']], 0))
    tgt_dataset = tgt_dataset.map(lambda words: (words, tf.size(words)))
    tgt_dataset = tgt_dataset.map(lambda words, size: (tgt_table.lookup(words), size))

    # zip data
    dataset = tf.contrib.data.Dataset.zip((src_dataset, tgt_dataset))

    # batch
    batched_dataset = dataset.padded_batch(self.batch_size,
        padded_shapes=((tf.TensorShape([None]), tf.TensorShape([])),(tf.TensorShape([None]), tf.TensorShape([]))),
        padding_values=((src_eos_id, 0), (tgt_eos_id, 0)))
    batched_iterator = batched_dataset.make_initializable_iterator()
    ((source, source_lengths), (target, target_lengths)) = batched_iterator.get_next()

    self.target = target
    self.target_lengths = target_lengths
    self.source_lengths = source_lengths

    # Load embedding (dic limits to 100000)
    src_embed = tf.Variable(tf.random_normal([100000, self.embed_vector_size], stddev=0.1))
    self.tgt_embed = tgt_embed = tf.Variable(tf.random_normal([100000, self.embed_vector_size], stddev=0.1))

    self.src_lookup = src_lookup = tf.nn.embedding_lookup(src_embed, source)
    self.tgt_lookup = tgt_lookup = tf.nn.embedding_lookup(tgt_embed, target)

    # Projection Layer
    self.projection_layer = projection_layer = layers_core.Dense(tgt_table_size)

    return batched_iterator, src_files, tgt_files
开发者ID:flrngel,项目名称:understanding-ai,代码行数:59,代码来源:model.py


示例2: create_char_vectors_from_post

 def create_char_vectors_from_post(self, raw_post, mxlen):
     char2index = self.index
     if self.do_lowercase:
         raw_post = self.lowercase(raw_post)
     raw_post = tf.string_split(tf.reshape(raw_post, [-1]))
     culled_word_token_vals = tf.substr(raw_post.values, 0, self.mxwlen)
     char_tokens = tf.string_split(culled_word_token_vals, delimiter='')
     char_indices = char2index.lookup(char_tokens)
     return self.reshape_indices(char_indices, [mxlen, self.mxwlen])
开发者ID:dpressel,项目名称:baseline,代码行数:9,代码来源:preprocessors.py


示例3: decode_libsvm

 def decode_libsvm(line):
     columns = tf.string_split([line], ' ')
     labels = tf.string_to_number(columns.values[0], out_type=tf.float32)
     splits = tf.string_split(columns.values[1:], ':')
     id_vals = tf.reshape(splits.values,splits.dense_shape)
     feat_ids, feat_vals = tf.split(id_vals,num_or_size_splits=2,axis=1)
     feat_ids = tf.string_to_number(feat_ids, out_type=tf.int32)
     feat_vals = tf.string_to_number(feat_vals, out_type=tf.float32)
     return {"feat_ids": feat_ids, "feat_vals": feat_vals}, labels
开发者ID:chenxingqiang,项目名称:ML_CIA,代码行数:9,代码来源:NFM.py


示例4: __init__

    def __init__(self, args, txt_file, num_classes, mode, batch_size, num_preprocess_threads=1, shuffle=True,
                 min_queue_examples=1):
        self.args = args
        self.txt_file = txt_file
        self.num_preprocess_threads = num_preprocess_threads
        self.min_queue_examples = min_queue_examples
        self.batch_size = batch_size
        self.mode = mode
        self.imgShape = [self.args.imageHeight, self.args.imageWidth, self.args.imageChannels]
        self.maskShape = tf.stack([self.args.imageHeight, self.args.imageWidth])
        self.num_classes = int(num_classes)

        input_queue = tf.train.string_input_producer([txt_file], shuffle=False)
        line_reader = tf.TextLineReader()
        _, line = line_reader.read(input_queue)
        split_line = tf.string_split([line]).values

        if (mode == 'training' or mode == 'validation'):
            split_line = tf.string_split([line]).values

            rgb_image_path = split_line[0]
            label_image_path = split_line[1]

            self.image_o = self.read_image(rgb_image_path, 0)

            self.label_image_o = self.read_image(label_image_path, 1)

            do_flip = tf.random_uniform([], 0, 1)
            self.image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(self.image_o), lambda: self.image_o)
            self.label_image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(self.label_image_o),
                                       lambda: self.label_image_o)

            self.image.set_shape((self.args.imageHeight, self.args.imageWidth, 3))
            self.label_image.set_shape((self.args.imageHeight, self.args.imageWidth, 1))

            self.img_batch, self.label_batch = tf.train.shuffle_batch([self.image, self.label_image],
                                                                      batch_size=batch_size,
                                                                      num_threads=num_preprocess_threads,
                                                                      capacity=min_queue_examples + 3 * batch_size,
                                                                      min_after_dequeue=min_queue_examples)

        elif (mode == 'test'):
            print('Generating test Image Batch')
            split_line = tf.string_split([line]).values

            rgb_image_path = split_line[0]
            self.image = self.read_image(rgb_image_path, 0)

            self.image.set_shape((self.args.imageHeight, self.args.imageWidth, 3))

            self.img_batch = tf.train.batch([self.image],
                                            batch_size=batch_size,
                                            num_threads=num_preprocess_threads,
                                            capacity=min_queue_examples + 1 * batch_size,
                                            )
开发者ID:ruyi345,项目名称:Fully-convolutional-networks-TF,代码行数:55,代码来源:dataGenerator.py


示例5: _parse_line

def _parse_line(line):
    """
    _parse_line
    """
    line_arr = tf.string_split([line], '\t').values
    #print(line_arr[2]) Tensor("strided_slice:0", shape=(), dtype=string)
    user = line_arr[0]
    label = tf.string_to_number(line_arr[1], out_type=tf.int32)
    #print(tf.string_split([line_arr[2]]).values)  Tensor("StringSplit_1:1", shape=(?,), dtype=string)
    features = {}
    features["words"] = tf.string_to_number(tf.string_split([line_arr[2]], ",").values, tf.int32)
    features["id"] = user
    return features, label
开发者ID:UranusZS,项目名称:machine_learning_study,代码行数:13,代码来源:data.py


示例6: _get_labels_builder

  def _get_labels_builder(self, labels_file):
    labels_vocabulary = tf.contrib.lookup.index_table_from_file(
        self.labels_vocabulary_file,
        vocab_size=self.num_labels)

    dataset = tf.data.TextLineDataset(labels_file)
    process_fn = lambda x: {
        "tags": tf.string_split([x]).values,
        "tags_id": labels_vocabulary.lookup(tf.string_split([x]).values)
    }
    padded_shapes_fn = lambda: {
        "tags": [None],
        "tags_id": [None]
    }
    return dataset, process_fn, padded_shapes_fn
开发者ID:yhgon,项目名称:OpenNMT-tf,代码行数:15,代码来源:sequence_tagger.py


示例7: lowercase

 def lowercase(self, raw_post):
     split_chars = tf.string_split(tf.reshape(raw_post, [-1]), delimiter="").values
     upchar_inds = self.upchars_lut.lookup(split_chars)
     return tf.reduce_join(tf.map_fn(lambda x: tf.cond(x[0] > 25,
                                                       lambda: x[1],
                                                       lambda: self.lchars[x[0]]),
                                     (upchar_inds, split_chars), dtype=tf.string))
开发者ID:dpressel,项目名称:baseline,代码行数:7,代码来源:preprocessors.py


示例8: get_predict_iterator

def get_predict_iterator(src_vocab_table, vocab_size, batch_size, max_len=max_sequence):
    pred_dataset = tf.contrib.data.TextLineDataset(pred_file)
    pred_dataset = pred_dataset.map(
        lambda src: tf.string_split([src]).values)
    if max_len:
        pred_dataset = pred_dataset.map(lambda src: src[:max_sequence])

    pred_dataset = pred_dataset.map(
        lambda src: tf.cast(src_vocab_table.lookup(src), tf.int32))

    pred_dataset = pred_dataset.map(lambda src: (src, tf.size(src)))

    def batching_func(x):
        return x.padded_batch(
            batch_size,
            padded_shapes=(tf.TensorShape([None]),  # src
                           tf.TensorShape([])),  # src_len
            padding_values=(vocab_size+1,  # src
                            0))  # src_len -- unused

    batched_dataset = batching_func(pred_dataset)
    batched_iter = batched_dataset.make_initializable_iterator()
    (src_ids, src_seq_len) = batched_iter.get_next()

    # 这里target_input在预测的时候不需要,但是不能返回None否则报错。这里则用个placeholder代替,但是仍然不会用到。
    WAHTEVER = 10
    fake_tag = tf.placeholder(tf.int32, [None, WAHTEVER])
    return BatchedInput(
        initializer=batched_iter.initializer,
        source=src_ids,
        target_input=fake_tag,
        source_sequence_length=src_seq_len,
        target_sequence_length=src_seq_len)
开发者ID:luluyouyue,项目名称:NER,代码行数:33,代码来源:utils.py


示例9: _decode_and_resize

    def _decode_and_resize(image_tensor):
      """Decodes jpeg string, resizes it and returns a uint8 tensor."""

      # These constants are set by Inception v3's expectations.
      height = 299
      width = 299
      channels = 3

      image_tensor = tf.where(tf.equal(image_tensor, ''), IMAGE_DEFAULT_STRING, image_tensor)

      # Fork by whether image_tensor value is a file path, or a base64 encoded string.
      slash_positions = tf.equal(tf.string_split([image_tensor], delimiter="").values, '/')
      is_file_path = tf.cast(tf.count_nonzero(slash_positions), tf.bool)

      # The following two functions are required for tf.cond. Note that we can not replace them
      # with lambda. According to TF docs, if using inline lambda, both branches of condition
      # will be executed. The workaround is to use a function call.
      def _read_file():
        return tf.read_file(image_tensor)

      def _decode_base64():
        return tf.decode_base64(image_tensor)

      image = tf.cond(is_file_path, lambda: _read_file(), lambda: _decode_base64())
      image = tf.image.decode_jpeg(image, channels=channels)
      image = tf.expand_dims(image, 0)
      image = tf.image.resize_bilinear(image, [height, width], align_corners=False)
      image = tf.squeeze(image, squeeze_dims=[0])
      image = tf.cast(image, dtype=tf.uint8)
      return image
开发者ID:googledatalab,项目名称:pydatalab,代码行数:30,代码来源:feature_transforms.py


示例10: get_test_iterator

def get_test_iterator(src_dataset, src_vocab_table, batch_size, config):
    src_eos_id = tf.cast(src_vocab_table.lookup(tf.constant(config.eos)), tf.int32)
    src_dataset = src_dataset.map(lambda src: tf.string_split([src]).values)

    src_dataset = src_dataset.map(lambda src: src[:config.src_max_len])

    src_dataset = src_dataset.map(
        lambda src: tf.cast(src_vocab_table.lookup(src), tf.int32))

    if config.reverse_src:
        src_dataset = src_dataset.map(lambda src: tf.reverse(src, axis=[0]))

    src_dataset = src_dataset.map(lambda src: (src, tf.size(src)))

    def batching_func(x):
        return x.padded_batch(
            config.batch_size,
            padded_shapes=(tf.TensorShape([None]),
                           tf.TensorShape([])),
            padding_values=(src_eos_id,
                            0))

    batched_dataset = batching_func(src_dataset)
    batched_iter = batched_dataset.make_initializable_iterator()
    src_ids, src_seq_len = batched_iter.get_next()
    return BatchedInput(
        initializer=batched_iter.initializer,
        source=src_ids,
        target_input=None,
        target_output=None,
        source_sequence_length=src_seq_len,
        target_sequence_length=None)
开发者ID:rpryzant,项目名称:code-doodles,代码行数:32,代码来源:input_pipeline.py


示例11: custom_fast_text

def custom_fast_text(features, labels, mode, params):
    vocab_table = lookup.index_table_from_file(vocabulary_file='data/vocab.csv', num_oov_buckets=1, default_value=-1)
    text = features[commons.FEATURE_COL]
    words = tf.string_split(text)
    dense_words = tf.sparse_tensor_to_dense(words, default_value=commons.PAD_WORD)
    word_ids = vocab_table.lookup(dense_words)

    padding = tf.constant([[0, 0], [0, commons.CNN_MAX_DOCUMENT_LENGTH]])
    # Pad all the word_ids entries to the maximum document length
    word_ids_padded = tf.pad(word_ids, padding)
    word_id_vector = tf.slice(word_ids_padded, [0, 0], [-1, commons.CNN_MAX_DOCUMENT_LENGTH])

    if mode == tf.estimator.ModeKeys.TRAIN:
        tf.keras.backend.set_learning_phase(True)
    else:
        tf.keras.backend.set_learning_phase(False)

    embedded_sequences = tf.keras.layers.Embedding(params.N_WORDS, 20, input_length=commons.CNN_MAX_DOCUMENT_LENGTH)(
        word_id_vector)
    f1 = tf.keras.layers.GlobalMaxPooling1D()(embedded_sequences)
    logits = tf.keras.layers.Dense(commons.TARGET_SIZE, activation=None)(f1)

    predictions = tf.nn.sigmoid(logits)

    if mode == tf.estimator.ModeKeys.PREDICT:
        prediction_dict = {
            'class': tf.cast(tf.map_fn(lambda x: tf.cond(x > 0.30, lambda: 1.0, lambda: 0.0),
                                       tf.squeeze(predictions)), dtype=tf.int32),


        }

        export_outputs = {
            'predictions': tf.estimator.export.PredictOutput(prediction_dict)
        }

        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs)

    loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=labels, logits=logits)

    tf.summary.scalar('loss', loss)

    acc = tf.equal(tf.cast(predictions, dtype=tf.int32), labels)
    acc = tf.reduce_mean(tf.cast(acc, tf.float32))

    tf.summary.scalar('acc', acc)

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer()

        train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())

        return tf.estimator.EstimatorSpec(mode=mode, train_op=train_op, loss=loss)

    if mode == tf.estimator.ModeKeys.EVAL:
        eval_metrics_ops = {
            'accuracy': tf.metrics.accuracy(labels=labels, predictions=predictions)
        }
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics_ops)
开发者ID:Aurora11111,项目名称:CloudML-Serving,代码行数:59,代码来源:custom_model.py


示例12: testStringSplit

  def testStringSplit(self):
    strings = ["pigs on the wing", "animals"]

    with self.test_session() as sess:
      tokens = tf.string_split(strings)
      indices, values, shape = sess.run(tokens)
      self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [1, 0]])
      self.assertAllEqual(values, [b"pigs", b"on", b"the", b"wing", b"animals"])
      self.assertAllEqual(shape, [2, 4])
开发者ID:821760408-sp,项目名称:tensorflow,代码行数:9,代码来源:string_split_op_test.py


示例13: sparse_from_csv

def sparse_from_csv(csv):
  ids, post_tags_str = tf.decode_csv(csv, [[-1], [""]])
  table = tf.contrib.lookup.index_table_from_tensor(
      mapping=TAG_SET, default_value=-1) ## 这里构造了个查找表 ##
  split_tags = tf.string_split(post_tags_str, "|")
  return tf.SparseTensor(
      indices=split_tags.indices,
      values=table.lookup(split_tags.values), ## 这里给出了不同值通过表查到的index ##
      dense_shape=split_tags.dense_shape)
开发者ID:huyuxiang,项目名称:tensorflow_practice,代码行数:9,代码来源:multi-value-feature.py


示例14: resize_sen

 def resize_sen(self, raw, mxlen):
     """
     Splits and rejoins a string to ensure that tokens meet
     the required max len.
     """
     raw_tokens = tf.string_split(tf.reshape(raw, [-1])).values
     # sentence length > mxlen
     raw_post = tf.reduce_join(raw_tokens[:mxlen], separator=" ")
     return raw_post
开发者ID:dpressel,项目名称:baseline,代码行数:9,代码来源:preprocessors.py


示例15: testStringSplitEmptyToken

  def testStringSplitEmptyToken(self):
    strings = [" hello ", "", "world "]

    with self.test_session() as sess:
      tokens = tf.string_split(strings)
      indices, values, shape = sess.run(tokens)
      self.assertAllEqual(indices, [[0, 0], [2, 0]])
      self.assertAllEqual(values, [b"hello", b"world"])
      self.assertAllEqual(shape, [3, 1])
开发者ID:821760408-sp,项目名称:tensorflow,代码行数:9,代码来源:string_split_op_test.py


示例16: create_word_vectors_from_post

 def create_word_vectors_from_post(self, raw_post, mxlen):
     # vocab has only lowercase words
     word2index = self.index
     if self.do_lowercase:
         raw_post = self.lowercase(raw_post)
     word_tokens = tf.string_split(tf.reshape(raw_post, [-1]))
     word_indices = word2index.lookup(word_tokens)
     # Reshape them out to the proper length
     reshaped_words = tf.sparse_reshape(word_indices, shape=[-1])
     return self.reshape_indices(reshaped_words, [mxlen])
开发者ID:dpressel,项目名称:baseline,代码行数:10,代码来源:preprocessors.py


示例17: has_no_question_marks

  def has_no_question_marks(line):
    """Returns True if the line of text has no question marks."""
    # split the line into an array of characters
    chars = tf.string_split(line[tf.newaxis], "").values
    # for each character check if it is a question mark
    is_question = tf.equal(chars, "?")
    any_question = tf.reduce_any(is_question)
    no_question = ~any_question

    return no_question
开发者ID:dananjayamahesh,项目名称:tensorflow,代码行数:10,代码来源:imports85.py


示例18: testStringSplitEmptyDelimiter

  def testStringSplitEmptyDelimiter(self):
    strings = ["hello", "hola"]

    with self.test_session() as sess:
      tokens = tf.string_split(strings, delimiter="")
      indices, values, shape = sess.run(tokens)
      self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [0, 4],
                                    [1, 0], [1, 1], [1, 2], [1, 3]])
      self.assertAllEqual(values, [b"h", b"e", b"l", b"l", b"o", b"h", b"o",
                                   b"l", b"a"])
      self.assertAllEqual(shape, [2, 5])
开发者ID:821760408-sp,项目名称:tensorflow,代码行数:11,代码来源:string_split_op_test.py


示例19: _create_word_vectors_from_post_mixed_case

    def _create_word_vectors_from_post_mixed_case(self, nraw_post, mxlen):
        # vocab has only lowercase words
        word_tokens = tf.string_split(tf.reshape(nraw_post, [-1]))

        word_indices = self.word2index.lookup(word_tokens)

        # Reshape them out to the proper length
        reshaped_words = tf.sparse_reshape(word_indices, shape=[-1])
        x = self._reshape_indices(reshaped_words, [mxlen])

        return x
开发者ID:dpressel,项目名称:baseline,代码行数:11,代码来源:exporter_elmo.py


示例20: testStringSplitWithDelimiter

  def testStringSplitWithDelimiter(self):
    strings = ["hello|world", "hello world"]

    with self.test_session() as sess:
      self.assertRaises(
          ValueError, tf.string_split, strings, delimiter=["|", ""])

      self.assertRaises(ValueError, tf.string_split, strings, delimiter=["a"])

      tokens = tf.string_split(strings, delimiter="|")
      indices, values, shape = sess.run(tokens)
      self.assertAllEqual(indices, [[0, 0], [0, 1], [1, 0]])
      self.assertAllEqual(values, [b"hello", b"world", b"hello world"])
      self.assertAllEqual(shape, [2, 2])

      tokens = tf.string_split(strings, delimiter="| ")
      indices, values, shape = sess.run(tokens)
      self.assertAllEqual(indices, [[0, 0], [0, 1], [1, 0], [1, 1]])
      self.assertAllEqual(values, [b"hello", b"world", b"hello", b"world"])
      self.assertAllEqual(shape, [2, 2])
开发者ID:BloodD,项目名称:tensorflow,代码行数:20,代码来源:string_split_op_test.py



注:本文中的tensorflow.string_split函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python tensorflow.sub函数代码示例发布时间:2022-05-27
下一篇:
Python tensorflow.strided_slice函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap