• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python string_ops.unicode_transcode函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中tensorflow.python.ops.string_ops.unicode_transcode函数的典型用法代码示例。如果您正苦于以下问题:Python unicode_transcode函数的具体用法?Python unicode_transcode怎么用?Python unicode_transcode使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了unicode_transcode函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_transcode_utf8_simple

  def test_transcode_utf8_simple(self):
    strings = [[b"a", b"abc"], [b"ABC", b"DEF"]]

    with self.cached_session() as sess:
      outputs = string_ops.unicode_transcode(
          strings,
          input_encoding="UTF-8",
          output_encoding="UTF-8",
          errors="replace",
          replacement_char=ord(" "),
          replace_control_characters=False)
      values = self.evaluate(outputs)
      self.assertAllEqual(values, strings)

      outputs = string_ops.unicode_transcode(
          strings,
          input_encoding="ISO-8859-1",
          output_encoding="UTF-8",
          errors="replace",
          replacement_char=ord(" "),
          replace_control_characters=False)
      values = self.evaluate(outputs)
      self.assertAllEqual(values, strings)

      outputs = string_ops.unicode_transcode(
          strings,
          input_encoding="US-ASCII",
          output_encoding="UTF-8",
          errors="replace",
          replacement_char=ord(" "),
          replace_control_characters=False)
      values = self.evaluate(outputs)
      self.assertAllEqual(values, strings)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:33,代码来源:unicode_transcode_op_test.py


示例2: test_invalid_encoding_causes_errors

  def test_invalid_encoding_causes_errors(self):
    strings = [[b"a", b"abc"], [b"ABC", b"DEF"]]

    with self.cached_session() as sess:
      outputs = string_ops.unicode_transcode(
          strings,
          input_encoding="invalid",
          output_encoding="UTF-8",
          errors="replace",
          replacement_char=ord(" "),
          replace_control_characters=False)
      with self.assertRaisesOpError(
          "Could not create converter for input encoding: invalid"):
        self.evaluate(outputs)

    with self.assertRaisesRegexp(ValueError, "Op passed string 'invalid'"):
      with self.cached_session() as sess:
        outputs = string_ops.unicode_transcode(
            strings,
            input_encoding="UTF-8",
            output_encoding="invalid",
            errors="replace",
            replacement_char=ord(" "),
            replace_control_characters=False)
        self.evaluate(outputs)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:25,代码来源:unicode_transcode_op_test.py


示例3: test_cjk_encodings

  def test_cjk_encodings(self):
    strings_ja = [
        b"\x5c\x5c",  # Yen sign
        b"\x8f\x70",  # kanji character "waza"
        b"\x83\x4f"
    ]  # katakana character "gu"
    strings_zh_cn = [b"\xca\xf5"]  # simplified "shu4"
    strings_zh_tw = [b"\xb3\x4e"]  # traditional "shu4"
    strings_ko = [b"\xc7\xd1\xb9\xce"]  # hangul "hanmin"

    expected_ja = [s.decode("shift_jis").encode("UTF-8") for s in strings_ja]
    expected_zh_cn = [
        s.decode("gb18030").encode("UTF-8") for s in strings_zh_cn
    ]
    expected_zh_tw = [s.decode("big5").encode("UTF-8") for s in strings_zh_tw]
    expected_ko = [s.decode("euc_kr").encode("UTF-8") for s in strings_ko]

    with self.cached_session() as sess:
      outputs_ja = string_ops.unicode_transcode(
          strings_ja,
          input_encoding="shift_jis",
          output_encoding="UTF-8",
          replacement_char=ord(" "),
          replace_control_characters=False)

      outputs_zh_cn = string_ops.unicode_transcode(
          strings_zh_cn,
          input_encoding="gb18030",
          output_encoding="UTF-8",
          replacement_char=ord(" "),
          replace_control_characters=False)

      outputs_zh_tw = string_ops.unicode_transcode(
          strings_zh_tw,
          input_encoding="big5",
          output_encoding="UTF-8",
          replacement_char=ord(" "),
          replace_control_characters=False)

      outputs_ko = string_ops.unicode_transcode(
          strings_ko,
          input_encoding="euc_kr",
          output_encoding="UTF-8",
          replacement_char=ord(" "),
          replace_control_characters=False)

      result_ja, result_zh_cn, result_zh_tw, result_ko = sess.run(
          [outputs_ja, outputs_zh_cn, outputs_zh_tw, outputs_ko])

      self.assertAllEqual(result_ja, expected_ja)
      self.assertAllEqual(result_zh_cn, expected_zh_cn)
      self.assertAllEqual(result_zh_tw, expected_zh_tw)
      self.assertAllEqual(result_ko, expected_ko)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:53,代码来源:unicode_transcode_op_test.py


示例4: test_transcode_utf8_with_bom

  def test_transcode_utf8_with_bom(self):
    bom_string = b"\xef\xbb\xbfabcdefg"
    with self.cached_session() as sess:
      outputs = string_ops.unicode_transcode(
          bom_string, input_encoding="UTF-8", output_encoding="UTF-8")
      values = self.evaluate(outputs)
      self.assertAllEqual(values, b"\xef\xbb\xbfabcdefg")  # BOM preserved

      outputs = string_ops.unicode_transcode(
          bom_string, input_encoding="UTF-8", output_encoding="UTF-16-BE")
      values = self.evaluate(outputs)
      utf16expected = bom_string.decode("UTF-8").encode("UTF-16-BE")
      self.assertAllEqual(values, utf16expected)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:13,代码来源:unicode_transcode_op_test.py


示例5: test_transcode_utf8_with_replacement_char

  def test_transcode_utf8_with_replacement_char(self):
    strings = [b"a\xef\xbf\xbd"]
    with self.cached_session() as sess:
      outputs = string_ops.unicode_transcode(
          strings, input_encoding="UTF-8", output_encoding="UTF-8",
          errors="strict")
      values = self.evaluate(outputs)
      self.assertAllEqual(values, [b"a\xef\xbf\xbd"])

      outputs = string_ops.unicode_transcode(
          strings, input_encoding="UTF-8", output_encoding="UTF-8",
          errors="replace", replacement_char=ord("?"))
      values = self.evaluate(outputs)
      self.assertAllEqual(values, [b"a\xef\xbf\xbd"])
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:14,代码来源:unicode_transcode_op_test.py


示例6: test_transcode_bad_utf8_termination_with_defaults

 def test_transcode_bad_utf8_termination_with_defaults(self):
   bad_string = b"a\xf0"
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         bad_string, input_encoding="UTF-8", output_encoding="UTF-8")
     values = self.evaluate(outputs)
     self.assertAllEqual(values, b"a\xef\xbf\xbd")   # 0xFFFD
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:7,代码来源:unicode_transcode_op_test.py


示例7: test_transcode_bad_utf8_with_defaults

 def test_transcode_bad_utf8_with_defaults(self):
   bad_string = b"\x00\xff"
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         bad_string, input_encoding="UTF-8", output_encoding="UTF-8")
     values = sess.run(outputs)
     self.assertAllEqual(values, b"\x00\xef\xbf\xbd")
开发者ID:abhinav-upadhyay,项目名称:tensorflow,代码行数:7,代码来源:unicode_transcode_op_test.py


示例8: test_transcode_bad_utf8_with_space_replacement

 def test_transcode_bad_utf8_with_space_replacement(self):
   bad_string = b"\x00\xff"
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         bad_string, input_encoding="UTF-8", output_encoding="UTF-8",
         replacement_char=ord(" "))
     values = self.evaluate(outputs)
     self.assertAllEqual(values, b"\x00 ")
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:8,代码来源:unicode_transcode_op_test.py


示例9: test_transcode_bad_utf8_with_elision_of_malformatting

 def test_transcode_bad_utf8_with_elision_of_malformatting(self):
   bad_string = b"\x00\xff"
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         bad_string,
         input_encoding="UTF-8",
         output_encoding="UTF-8",
         errors="ignore")
     values = self.evaluate(outputs)
     self.assertAllEqual(values, b"\x00")
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:10,代码来源:unicode_transcode_op_test.py


示例10: test_transcode_bad_utf8_with_elision_including_control_chars

 def test_transcode_bad_utf8_with_elision_including_control_chars(self):
   bad_string = b"\x00\xff"
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         bad_string,
         input_encoding="UTF-8",
         output_encoding="UTF-8",
         errors="ignore",
         replace_control_characters=True)
     values = self.evaluate(outputs)
     self.assertAllEqual(values, b"")
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:11,代码来源:unicode_transcode_op_test.py


示例11: test_transcode_bad_utf8_start_with_strict_errors

 def test_transcode_bad_utf8_start_with_strict_errors(self):
   bad_string = b"\xffabcd"
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         bad_string,
         input_encoding="UTF-8",
         output_encoding="UTF-8",
         errors="strict")
     with self.assertRaisesOpError(
         "Invalid formatting on input string"):
       self.evaluate(outputs)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:11,代码来源:unicode_transcode_op_test.py


示例12: test_transcode_utf16_le_be_with_bom

  def test_transcode_utf16_le_be_with_bom(self):
    bom_string = b"\xfe\xff\x00\x61"  # Big-endian BOM with 'a' encoded
    with self.cached_session() as sess:
      outputs = string_ops.unicode_transcode(
          bom_string, input_encoding="UTF-16-BE", output_encoding="UTF-8")
      values = self.evaluate(outputs)
      # BOM is preserved in output
      self.assertAllEqual(values, b"\xef\xbb\xbfa")

      outputs = string_ops.unicode_transcode(
          bom_string, input_encoding="UTF-16-LE", output_encoding="UTF-8")
      values = self.evaluate(outputs)
      # mangled BOM and value from (incorrect) LE encoding
      self.assertAllEqual(values, b"\xef\xbf\xbe\xe6\x84\x80")

      bom_string = b"\xff\xfe\x61\x00"  # Little-endian BOM with 'a' encoded
      outputs = string_ops.unicode_transcode(
          bom_string, input_encoding="UTF-16-LE", output_encoding="UTF-8")
      values = self.evaluate(outputs)
      self.assertAllEqual(values, b"\xef\xbb\xbfa")
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:20,代码来源:unicode_transcode_op_test.py


示例13: test_transcode_ascii_with_shift_chars

 def test_transcode_ascii_with_shift_chars(self):
   strings = [b"\x0e\x0e", b"\x0f\x0f"]
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         strings,
         input_encoding="US-ASCII",
         output_encoding="UTF-8",
         replacement_char=ord(" "),
         replace_control_characters=False)
     values = self.evaluate(outputs)
     self.assertAllEqual(values, strings)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:11,代码来源:unicode_transcode_op_test.py


示例14: test_transcode_bad_utf8_with_some_good

 def test_transcode_bad_utf8_with_some_good(self):
   bad_string = b"abc\xffabcdefg"
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         bad_string,
         input_encoding="UTF-8",
         output_encoding="UTF-8",
         errors="replace",
         replacement_char=ord(" "),
         replace_control_characters=False)
     values = self.evaluate(outputs)
     self.assertAllEqual(values, b"abc abcdefg")
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:12,代码来源:unicode_transcode_op_test.py


示例15: test_transcode_utf8_to_utf32

 def test_transcode_utf8_to_utf32(self):
   strings = [b"ab\xe2\x82\xac", b"\xf0\x90\x90\xb7"]
   expected = [s.decode("UTF-8").encode("UTF-32-BE") for s in strings]
   with self.cached_session() as sess:
     outputs = string_ops.unicode_transcode(
         strings,
         input_encoding="UTF-8",
         output_encoding="UTF-32-BE",
         replacement_char=ord(" "),
         replace_control_characters=False)
     values = self.evaluate(outputs)
     self.assertAllEqual(values, expected)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:12,代码来源:unicode_transcode_op_test.py


示例16: test_forwarding

  def test_forwarding(self):
    with self.cached_session():
      # Generate an input that is uniquely consumed by the transcode op.
      # This exercises code paths which are optimized for this case
      # (e.g., using forwarding).
      inp = string_ops.substr(
          constant_op.constant([b"AbCdEfG", b"HiJkLmN"], dtypes.string),
          pos=0,
          len=5)
      transcoded = string_ops.unicode_transcode(
          inp, input_encoding="UTF-8", output_encoding="UTF-8")

      self.assertAllEqual([b"AbCdE", b"HiJkL"], transcoded)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:13,代码来源:unicode_transcode_op_test.py


示例17: test_transcode_bad_utf8

  def test_transcode_bad_utf8(self):
    bad_string = b"\x00\xff"
    with self.cached_session() as sess:
      outputs = string_ops.unicode_transcode(
          bad_string,
          input_encoding="UTF-8",
          output_encoding="UTF-8",
          errors="replace",
          replacement_char=ord(" "),
          replace_control_characters=True)
      values = sess.run(outputs)
      self.assertAllEqual(values, b"  ")

      outputs = string_ops.unicode_transcode(
          bad_string,
          input_encoding="UTF-8",
          output_encoding="UTF-8",
          errors="replace",
          replacement_char=ord(" "),
          replace_control_characters=False)
      values = sess.run(outputs)
      self.assertAllEqual(values, b"\x00 ")
开发者ID:abhinav-upadhyay,项目名称:tensorflow,代码行数:22,代码来源:unicode_transcode_op_test.py


示例18: test_transcode_utf16_to_utf8

  def test_transcode_utf16_to_utf8(self):
    strings = [b"\x00a\x00b\x20\xAC", b"\xD8\x01\xDC\x37"]  # U+10437
    expected = [s.decode("UTF-16-BE").encode("UTF-8") for s in strings]

    with self.cached_session() as sess:
      outputs = string_ops.unicode_transcode(
          strings,
          input_encoding="UTF-16",
          output_encoding="UTF-8",
          errors="replace",
          replacement_char=ord(" "),
          replace_control_characters=False)
      values = self.evaluate(outputs)
      self.assertAllEqual(values, expected)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:14,代码来源:unicode_transcode_op_test.py


示例19: test_invalid_error_policy_causes_errors

  def test_invalid_error_policy_causes_errors(self):
    strings = [[b"a", b"abc"], [b"ABC", b"DEF"]]

    with self.assertRaisesRegexp(
        ValueError, "'invalid' not in: \"strict\", \"replace\", \"ignore\"."):
      with self.cached_session() as sess:
        outputs = string_ops.unicode_transcode(
            strings,
            input_encoding="UTF-8",
            output_encoding="UTF-8",
            errors="invalid",
            replacement_char=ord(" "),
            replace_control_characters=False)
        self.evaluate(outputs)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:14,代码来源:unicode_transcode_op_test.py


示例20: test_bom_handling

 def test_bom_handling(self, string, input_encoding, expected):
   with self.test_session():
     output = string_ops.unicode_transcode(
         string, input_encoding=input_encoding, output_encoding="UTF-8")
     self.assertAllEqual(output.eval(), expected)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:5,代码来源:unicode_transcode_op_test.py



注:本文中的tensorflow.python.ops.string_ops.unicode_transcode函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python summary_op_util.collect函数代码示例发布时间:2022-05-27
下一篇:
Python string_ops.substr函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap