• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python smart_open.smart_open函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中smart_open.smart_open函数的典型用法代码示例。如果您正苦于以下问题:Python smart_open函数的具体用法?Python smart_open怎么用?Python smart_open使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了smart_open函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: get_lines

def get_lines(glove_file_name):
    """Return the number of vectors and dimensions in a file in GloVe format."""
    with smart_open.smart_open(glove_file_name, 'r') as f:
        num_lines = sum(1 for line in f)
    with smart_open.smart_open(glove_file_name, 'r') as f:
        num_dims = len(f.readline().split()) - 1
    return num_lines, num_dims
开发者ID:jroakes,项目名称:glove-to-word2vec,代码行数:7,代码来源:convert.py


示例2: testConversion

    def testConversion(self):
        word2vec2tensor(word2vec_model_path=self.datapath, tensor_filename=self.output_folder)

        with smart_open(self.metadata_file, 'rb') as f:
            metadata = f.readlines()

        with smart_open(self.tensor_file, 'rb') as f:
            vectors = f.readlines()

        # check if number of words and vector size in tensor file line up with word2vec
        with smart_open(self.datapath, 'rb') as f:
            first_line = f.readline().strip()

        number_words, vector_size = map(int, first_line.split(b' '))
        self.assertTrue(len(metadata) == len(vectors) == number_words,
            ('Metadata file %s and tensor file %s imply different number of rows.'
                % (self.metadata_file, self.tensor_file)))

        # grab metadata and vectors from written file
        metadata = [word.strip() for word in metadata]
        vectors = [vector.replace(b'\t', b' ') for vector in vectors]

        # get the originaly vector KV model
        orig_model = KeyedVectors.load_word2vec_format(self.datapath, binary=False)

        # check that the KV model and tensor files have the same values key-wise
        for word, vector in zip(metadata, vectors):
            word_string = word.decode("utf8")
            vector_string = vector.decode("utf8")
            vector_array = np.array(list(map(float, vector_string.split())))
            np.testing.assert_almost_equal(orig_model[word_string], vector_array, decimal=5)
开发者ID:RaRe-Technologies,项目名称:gensim,代码行数:31,代码来源:test_scripts.py


示例3: word2vec2tensor

def word2vec2tensor(word2vec_model_path, tensor_filename, binary=False):
    """Convert file in Word2Vec format and writes two files 2D tensor TSV file.

    File "tensor_filename"_tensor.tsv contains word-vectors, "tensor_filename"_metadata.tsv contains words.

    Parameters
    ----------
    word2vec_model_path : str
        Path to file in Word2Vec format.
    tensor_filename : str
        Prefix for output files.
    binary : bool, optional
        True if input file in binary format.

    """
    model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_model_path, binary=binary)
    outfiletsv = tensor_filename + '_tensor.tsv'
    outfiletsvmeta = tensor_filename + '_metadata.tsv'

    with smart_open(outfiletsv, 'wb') as file_vector, smart_open(outfiletsvmeta, 'wb') as file_metadata:
        for word in model.index2word:
            file_metadata.write(gensim.utils.to_utf8(word) + gensim.utils.to_utf8('\n'))
            vector_row = '\t'.join(str(x) for x in model[word])
            file_vector.write(gensim.utils.to_utf8(vector_row) + gensim.utils.to_utf8('\n'))

    logger.info("2D tensor file saved to %s", outfiletsv)
    logger.info("Tensor metadata file saved to %s", outfiletsvmeta)
开发者ID:RaRe-Technologies,项目名称:gensim,代码行数:27,代码来源:word2vec2tensor.py


示例4: test_s3_iter_moto

    def test_s3_iter_moto(self):
        """Are S3 files iterated over correctly?"""
        # a list of strings to test with
        expected = [b"*" * 5 * 1024**2] + [b'0123456789'] * 1024 + [b"test"]

        # create fake bucket and fake key
        s3 = boto3.resource('s3')
        s3.create_bucket(Bucket='mybucket')

        with smart_open.smart_open("s3://mybucket/mykey", "wb", s3_min_part_size=5 * 1024**2) as fout:
            # write a single huge line (=full multipart upload)
            fout.write(expected[0] + b'\n')

            # write lots of small lines
            for lineno, line in enumerate(expected[1:-1]):
                fout.write(line + b'\n')

            # ...and write the last line too, no newline at the end
            fout.write(expected[-1])

        # connect to fake s3 and read from the fake key we filled above
        smart_open_object = smart_open.smart_open("s3://mybucket/mykey")
        output = [line.rstrip(b'\n') for line in smart_open_object]
        self.assertEqual(output, expected)

        # same thing but using a context manager
        with smart_open.smart_open("s3://mybucket/mykey") as smart_open_object:
            output = [line.rstrip(b'\n') for line in smart_open_object]
            self.assertEqual(output, expected)
开发者ID:mpenkov,项目名称:smart_open,代码行数:29,代码来源:test_smart_open_old.py


示例5: test_s3_boto

    def test_s3_boto(self, mock_s3_open_read, mock_boto):
        """Is S3 line iterator called correctly?"""
        # Configure the mock boto.config.get to return default host
        smart_open.smart_open_lib.boto.config.get.return_value = 's3.amazonaws.com'

        # no credentials
        smart_open_object = smart_open.smart_open("s3://mybucket/mykey")
        smart_open_object.__iter__()
        mock_boto.connect_s3.assert_called_with(aws_access_key_id=None, aws_secret_access_key=None, profile_name=None, host='s3.amazonaws.com')

        # with credential
        smart_open_object = smart_open.smart_open("s3://access_id:[email protected]/mykey")
        smart_open_object.__iter__()
        mock_boto.connect_s3.assert_called_with(aws_access_key_id="access_id", aws_secret_access_key="access_secret", profile_name=None, host='s3.amazonaws.com')

        # with credential profile
        smart_open_object = smart_open.smart_open("s3://mybucket/mykey", profile_name="my_credentials")
        smart_open_object.__iter__()
        mock_boto.connect_s3.assert_called_with(aws_access_key_id=None, aws_secret_access_key=None, profile_name="my_credentials", host='s3.amazonaws.com')

        # lookup bucket, key; call s3_iter_lines
        smart_open_object = smart_open.smart_open("s3://access_id:[email protected]/mykey")
        smart_open_object.__iter__()
        mock_boto.connect_s3().get_bucket.assert_called_with("mybucket")
        mock_boto.connect_s3().get_bucket().get_key.assert_called_with("mykey")
        #
        # TODO: this is kind of a useless assertion...
        #
        self.assertTrue(smart_open_object.__iter__.called)

        # with user-specified host
        smart_open_object = smart_open.smart_open("s3://access_id:[email protected]/mykey", host='aa.domain.com')
        smart_open_object.__iter__()
        mock_boto.connect_s3.assert_called_with(aws_access_key_id="access_id", aws_secret_access_key="access_secret", profile_name=None, host='aa.domain.com')
开发者ID:move-fast,项目名称:smart_open,代码行数:34,代码来源:test_smart_open.py


示例6: test_http_bz2

    def test_http_bz2(self):
        """Can open bz2 via http?"""
        test_string = b'Hello World Compressed.'
        #
        # TODO: why are these tests writing to temporary files?  We can do the
        # bz2 compression in memory.
        #
        with tempfile.NamedTemporaryFile('wb', suffix='.bz2', delete=False) as infile:
            test_file = infile.name

        with smart_open.smart_open(test_file, 'wb') as outfile:
            outfile.write(test_string)

        with open(test_file, 'rb') as infile:
            compressed_data = infile.read()

        if os.path.isfile(test_file):
            os.unlink(test_file)

        responses.add(responses.GET, "http://127.0.0.1/data.bz2",
                      body=compressed_data, stream=True)
        smart_open_object = smart_open.smart_open("http://127.0.0.1/data.bz2")

        # decompress the gzip and get the same md5 hash
        self.assertEqual(smart_open_object.read(), test_string)
开发者ID:mpenkov,项目名称:smart_open,代码行数:25,代码来源:test_smart_open_old.py


示例7: test_s3_iter_moto

    def test_s3_iter_moto(self):
        """Are S3 files iterated over correctly?"""
        # a list of strings to test with
        expected = [b"*" * 5 * 1024**2] + [b'0123456789'] * 1024 + [b"test"]

        # create fake bucket and fake key
        conn = boto.connect_s3()
        conn.create_bucket("mybucket")
        # lower the multipart upload size, to speed up these tests
        smart_open_lib.S3_MIN_PART_SIZE = 5 * 1024**2
        with smart_open.smart_open("s3://mybucket/mykey", "wb") as fout:
            # write a single huge line (=full multipart upload)
            fout.write(expected[0] + b'\n')

            # write lots of small lines
            for lineno, line in enumerate(expected[1:-1]):
                fout.write(line + b'\n')

            # ...and write the last line too, no newline at the end
            fout.write(expected[-1])

        # connect to fake s3 and read from the fake key we filled above
        smart_open_object = smart_open.smart_open("s3://mybucket/mykey")
        output = [line.rstrip(b'\n') for line in smart_open_object]
        self.assertEqual(output, expected)

        # same thing but using a context manager
        with smart_open.smart_open("s3://mybucket/mykey") as smart_open_object:
            output = [line.rstrip(b'\n') for line in smart_open_object]
            self.assertEqual(output, expected)
开发者ID:move-fast,项目名称:smart_open,代码行数:30,代码来源:test_smart_open.py


示例8: test_file

    def test_file(self, mock_smart_open):
        """Is file:// line iterator called correctly?"""
        prefix = "file://"
        full_path = '/tmp/test.txt'
        read_mode = "rb"
        smart_open_object = smart_open.smart_open(prefix+full_path, read_mode)
        smart_open_object.__iter__()
        # called with the correct path?
        mock_smart_open.assert_called_with(full_path, read_mode, buffering=-1)

        full_path = '/tmp/test#hash##more.txt'
        read_mode = "rb"
        smart_open_object = smart_open.smart_open(prefix+full_path, read_mode)
        smart_open_object.__iter__()
        # called with the correct path?
        mock_smart_open.assert_called_with(full_path, read_mode, buffering=-1)

        full_path = 'aa#aa'
        read_mode = "rb"
        smart_open_object = smart_open.smart_open(full_path, read_mode)
        smart_open_object.__iter__()
        # called with the correct path?
        mock_smart_open.assert_called_with(full_path, read_mode, buffering=-1)

        short_path = "~/tmp/test.txt"
        full_path = os.path.expanduser(short_path)
开发者ID:mpenkov,项目名称:smart_open,代码行数:26,代码来源:test_smart_open_old.py


示例9: test_s3_metadata_write

    def test_s3_metadata_write(self):
        # Read local file fixture
        path = os.path.join(CURR_DIR, 'test_data/crime-and-punishment.txt.gz')
        data = ""
        with smart_open.smart_open(path, 'rb') as fd:
            data = fd.read()

        # Create a test bucket
        s3 = boto3.resource('s3')
        s3.create_bucket(Bucket='mybucket')

        # Write data, with multipart_upload options
        write_stream = smart_open.smart_open(
            's3://mybucket/crime-and-punishment.txt.gz', 'wb',
            s3_upload={
                'ContentType': 'text/plain',
                'ContentEncoding': 'gzip'
            }
        )
        with write_stream as fout:
            fout.write(data)

        key = s3.Object('mybucket', 'crime-and-punishment.txt.gz')
        self.assertIn('text/plain', key.content_type)
        self.assertEqual(key.content_encoding, 'gzip')
开发者ID:mpenkov,项目名称:smart_open,代码行数:25,代码来源:test_smart_open_old.py


示例10: test_s3_mode_mock

    def test_s3_mode_mock(self, mock_session):
        """Are s3:// open modes passed correctly?"""

        # correct write mode, correct s3 URI
        smart_open.smart_open("s3://mybucket/mykey", "w", host='s3.amazonaws.com')
        mock_session.return_value.resource.assert_called_with(
            's3', endpoint_url='http://s3.amazonaws.com'
        )
开发者ID:mpenkov,项目名称:smart_open,代码行数:8,代码来源:test_smart_open_old.py


示例11: test_s3_mode_mock

 def test_s3_mode_mock(self, mock_write, mock_boto):
     """Are s3:// open modes passed correctly?"""
     # correct write mode, correct s3 URI
     smart_open.smart_open("s3://mybucket/mykey", "w")
     mock_boto.connect_s3.assert_called_with(aws_access_key_id=None, aws_secret_access_key=None)
     mock_boto.connect_s3().lookup.return_value = True
     mock_boto.connect_s3().get_bucket.assert_called_with("mybucket")
     self.assertTrue(mock_write.called)
开发者ID:salilb,项目名称:smart_open,代码行数:8,代码来源:test_smart_open.py


示例12: test_session_write_mode

    def test_session_write_mode(self):
        """
        Write stream should use a custom boto3.Session
        """
        session = boto3.Session()
        session.resource = mock.MagicMock()

        smart_open.smart_open('s3://bucket/key', 'wb', s3_session=session)
        session.resource.assert_called_with('s3')
开发者ID:mpenkov,项目名称:smart_open,代码行数:9,代码来源:test_smart_open_old.py


示例13: write_read_assertion

    def write_read_assertion(self, test_file):
        with smart_open.smart_open(test_file, 'wb') as fout:  # 'b' for binary, needed on Windows
            fout.write(self.TEXT.encode('utf8'))

        with smart_open.smart_open(test_file, 'rb') as fin:
            self.assertEqual(fin.read().decode('utf8'), self.TEXT)

        if os.path.isfile(test_file):
            os.unlink(test_file)
开发者ID:mpenkov,项目名称:smart_open,代码行数:9,代码来源:test_smart_open_old.py


示例14: test_gzip_write_mode

    def test_gzip_write_mode(self):
        """Should always open in binary mode when writing through a codec."""
        s3 = boto3.resource('s3')
        s3.create_bucket(Bucket='bucket')
        uri = smart_open_lib._parse_uri("s3://bucket/key.gz")

        with mock.patch('smart_open.s3.open') as mock_open:
            smart_open.smart_open("s3://bucket/key.gz", "wb")
            mock_open.assert_called_with('bucket', 'key.gz', 'wb')
开发者ID:mpenkov,项目名称:smart_open,代码行数:9,代码来源:test_smart_open_old.py


示例15: test_readline

    def test_readline(self):
        """Does readline() return the correct file content?"""
        s3 = boto3.resource('s3')
        s3.create_bucket(Bucket='mybucket')
        test_string = u"hello žluťoučký world!\nhow are you?".encode('utf8')
        with smart_open.smart_open("s3://mybucket/mykey", "wb") as fout:
            fout.write(test_string)

        reader = smart_open.smart_open("s3://mybucket/mykey", "rb")
        self.assertEqual(reader.readline(), u"hello žluťoučký world!\n".encode("utf-8"))
开发者ID:mpenkov,项目名称:smart_open,代码行数:10,代码来源:test_smart_open_old.py


示例16: prepend_line

 def prepend_line(infile, outfile, line):
     """ 
     Function to prepend lines using smart_open
     """
     with smart_open.smart_open(infile, ' rb ') as old:
         with smart_open.smart_open(outfile, ' wb ') as new:
             new.write(str(line) + " \n ")
             for line in old:
                 new.write(line)
     return outfile
开发者ID:sqxiang,项目名称:glove-gensim,代码行数:10,代码来源:glove2word2vec.py


示例17: glove2word2vec

def glove2word2vec(glove_input_file, word2vec_output_file):
    """Convert `glove_input_file` in GloVe format into `word2vec_output_file` in word2vec format."""
    num_lines, num_dims = get_glove_info(glove_input_file)
    logger.info("converting %i vectors from %s to %s", num_lines, glove_input_file, word2vec_output_file)
    with smart_open(word2vec_output_file, 'wb') as fout:
        fout.write("{0} {1}\n".format(num_lines, num_dims).encode('utf-8'))
        with smart_open(glove_input_file, 'rb') as fin:
            for line in fin:
                fout.write(line)
    return num_lines, num_dims
开发者ID:abs51295,项目名称:gensim,代码行数:10,代码来源:glove2word2vec.py


示例18: test_read_encoding_implicit_text

 def test_read_encoding_implicit_text(self):
     """Should open the file with the correct encoding, implicit text read."""
     s3 = boto3.resource('s3')
     s3.create_bucket(Bucket='bucket')
     key = "s3://bucket/key.txt"
     text = u'это знала ева, это знал адам, колеса любви едут прямо по нам'
     with smart_open.smart_open(key, 'wb') as fout:
         fout.write(text.encode('koi8-r'))
     with smart_open.smart_open(key, encoding='koi8-r') as fin:
         actual = fin.read()
     self.assertEqual(text, actual)
开发者ID:mpenkov,项目名称:smart_open,代码行数:11,代码来源:test_smart_open_old.py


示例19: test_s3_mode_mock

    def test_s3_mode_mock(self, mock_write, mock_boto):
        """Are s3:// open modes passed correctly?"""
        # Configure the mock boto.config.get to return default host
        smart_open.smart_open_lib.boto.config.get.return_value = 's3.amazonaws.com'

        # correct write mode, correct s3 URI
        smart_open.smart_open("s3://mybucket/mykey", "w")
        mock_boto.connect_s3.assert_called_with(aws_access_key_id=None, aws_secret_access_key=None, profile_name=None, host='s3.amazonaws.com')
        mock_boto.connect_s3().lookup.return_value = True
        mock_boto.connect_s3().get_bucket.assert_called_with("mybucket")
        self.assertTrue(mock_write.called)
开发者ID:move-fast,项目名称:smart_open,代码行数:11,代码来源:test_smart_open.py


示例20: test_write_encoding

    def test_write_encoding(self):
        """Should open the file for writing with the correct encoding."""
        s3 = boto3.resource('s3')
        s3.create_bucket(Bucket='bucket')
        key = "s3://bucket/key.txt"
        text = u'какая боль, какая боль, аргентина - ямайка, 5-0'

        with smart_open.smart_open(key, 'w', encoding='koi8-r') as fout:
            fout.write(text)
        with smart_open.smart_open(key, encoding='koi8-r') as fin:
            actual = fin.read()
        self.assertEqual(text, actual)
开发者ID:mpenkov,项目名称:smart_open,代码行数:12,代码来源:test_smart_open_old.py



注:本文中的smart_open.smart_open函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python logger.debug函数代码示例发布时间:2022-05-27
下一篇:
Python sysconf.get函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap