本文整理汇总了Python中smart_open.smart_open函数的典型用法代码示例。如果您正苦于以下问题:Python smart_open函数的具体用法?Python smart_open怎么用?Python smart_open使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了smart_open函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: get_lines
def get_lines(glove_file_name):
"""Return the number of vectors and dimensions in a file in GloVe format."""
with smart_open.smart_open(glove_file_name, 'r') as f:
num_lines = sum(1 for line in f)
with smart_open.smart_open(glove_file_name, 'r') as f:
num_dims = len(f.readline().split()) - 1
return num_lines, num_dims
开发者ID:jroakes,项目名称:glove-to-word2vec,代码行数:7,代码来源:convert.py
示例2: testConversion
def testConversion(self):
word2vec2tensor(word2vec_model_path=self.datapath, tensor_filename=self.output_folder)
with smart_open(self.metadata_file, 'rb') as f:
metadata = f.readlines()
with smart_open(self.tensor_file, 'rb') as f:
vectors = f.readlines()
# check if number of words and vector size in tensor file line up with word2vec
with smart_open(self.datapath, 'rb') as f:
first_line = f.readline().strip()
number_words, vector_size = map(int, first_line.split(b' '))
self.assertTrue(len(metadata) == len(vectors) == number_words,
('Metadata file %s and tensor file %s imply different number of rows.'
% (self.metadata_file, self.tensor_file)))
# grab metadata and vectors from written file
metadata = [word.strip() for word in metadata]
vectors = [vector.replace(b'\t', b' ') for vector in vectors]
# get the originaly vector KV model
orig_model = KeyedVectors.load_word2vec_format(self.datapath, binary=False)
# check that the KV model and tensor files have the same values key-wise
for word, vector in zip(metadata, vectors):
word_string = word.decode("utf8")
vector_string = vector.decode("utf8")
vector_array = np.array(list(map(float, vector_string.split())))
np.testing.assert_almost_equal(orig_model[word_string], vector_array, decimal=5)
开发者ID:RaRe-Technologies,项目名称:gensim,代码行数:31,代码来源:test_scripts.py
示例3: word2vec2tensor
def word2vec2tensor(word2vec_model_path, tensor_filename, binary=False):
"""Convert file in Word2Vec format and writes two files 2D tensor TSV file.
File "tensor_filename"_tensor.tsv contains word-vectors, "tensor_filename"_metadata.tsv contains words.
Parameters
----------
word2vec_model_path : str
Path to file in Word2Vec format.
tensor_filename : str
Prefix for output files.
binary : bool, optional
True if input file in binary format.
"""
model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_model_path, binary=binary)
outfiletsv = tensor_filename + '_tensor.tsv'
outfiletsvmeta = tensor_filename + '_metadata.tsv'
with smart_open(outfiletsv, 'wb') as file_vector, smart_open(outfiletsvmeta, 'wb') as file_metadata:
for word in model.index2word:
file_metadata.write(gensim.utils.to_utf8(word) + gensim.utils.to_utf8('\n'))
vector_row = '\t'.join(str(x) for x in model[word])
file_vector.write(gensim.utils.to_utf8(vector_row) + gensim.utils.to_utf8('\n'))
logger.info("2D tensor file saved to %s", outfiletsv)
logger.info("Tensor metadata file saved to %s", outfiletsvmeta)
开发者ID:RaRe-Technologies,项目名称:gensim,代码行数:27,代码来源:word2vec2tensor.py
示例4: test_s3_iter_moto
def test_s3_iter_moto(self):
"""Are S3 files iterated over correctly?"""
# a list of strings to test with
expected = [b"*" * 5 * 1024**2] + [b'0123456789'] * 1024 + [b"test"]
# create fake bucket and fake key
s3 = boto3.resource('s3')
s3.create_bucket(Bucket='mybucket')
with smart_open.smart_open("s3://mybucket/mykey", "wb", s3_min_part_size=5 * 1024**2) as fout:
# write a single huge line (=full multipart upload)
fout.write(expected[0] + b'\n')
# write lots of small lines
for lineno, line in enumerate(expected[1:-1]):
fout.write(line + b'\n')
# ...and write the last line too, no newline at the end
fout.write(expected[-1])
# connect to fake s3 and read from the fake key we filled above
smart_open_object = smart_open.smart_open("s3://mybucket/mykey")
output = [line.rstrip(b'\n') for line in smart_open_object]
self.assertEqual(output, expected)
# same thing but using a context manager
with smart_open.smart_open("s3://mybucket/mykey") as smart_open_object:
output = [line.rstrip(b'\n') for line in smart_open_object]
self.assertEqual(output, expected)
开发者ID:mpenkov,项目名称:smart_open,代码行数:29,代码来源:test_smart_open_old.py
示例5: test_s3_boto
def test_s3_boto(self, mock_s3_open_read, mock_boto):
"""Is S3 line iterator called correctly?"""
# Configure the mock boto.config.get to return default host
smart_open.smart_open_lib.boto.config.get.return_value = 's3.amazonaws.com'
# no credentials
smart_open_object = smart_open.smart_open("s3://mybucket/mykey")
smart_open_object.__iter__()
mock_boto.connect_s3.assert_called_with(aws_access_key_id=None, aws_secret_access_key=None, profile_name=None, host='s3.amazonaws.com')
# with credential
smart_open_object = smart_open.smart_open("s3://access_id:[email protected]/mykey")
smart_open_object.__iter__()
mock_boto.connect_s3.assert_called_with(aws_access_key_id="access_id", aws_secret_access_key="access_secret", profile_name=None, host='s3.amazonaws.com')
# with credential profile
smart_open_object = smart_open.smart_open("s3://mybucket/mykey", profile_name="my_credentials")
smart_open_object.__iter__()
mock_boto.connect_s3.assert_called_with(aws_access_key_id=None, aws_secret_access_key=None, profile_name="my_credentials", host='s3.amazonaws.com')
# lookup bucket, key; call s3_iter_lines
smart_open_object = smart_open.smart_open("s3://access_id:[email protected]/mykey")
smart_open_object.__iter__()
mock_boto.connect_s3().get_bucket.assert_called_with("mybucket")
mock_boto.connect_s3().get_bucket().get_key.assert_called_with("mykey")
#
# TODO: this is kind of a useless assertion...
#
self.assertTrue(smart_open_object.__iter__.called)
# with user-specified host
smart_open_object = smart_open.smart_open("s3://access_id:[email protected]/mykey", host='aa.domain.com')
smart_open_object.__iter__()
mock_boto.connect_s3.assert_called_with(aws_access_key_id="access_id", aws_secret_access_key="access_secret", profile_name=None, host='aa.domain.com')
开发者ID:move-fast,项目名称:smart_open,代码行数:34,代码来源:test_smart_open.py
示例6: test_http_bz2
def test_http_bz2(self):
"""Can open bz2 via http?"""
test_string = b'Hello World Compressed.'
#
# TODO: why are these tests writing to temporary files? We can do the
# bz2 compression in memory.
#
with tempfile.NamedTemporaryFile('wb', suffix='.bz2', delete=False) as infile:
test_file = infile.name
with smart_open.smart_open(test_file, 'wb') as outfile:
outfile.write(test_string)
with open(test_file, 'rb') as infile:
compressed_data = infile.read()
if os.path.isfile(test_file):
os.unlink(test_file)
responses.add(responses.GET, "http://127.0.0.1/data.bz2",
body=compressed_data, stream=True)
smart_open_object = smart_open.smart_open("http://127.0.0.1/data.bz2")
# decompress the gzip and get the same md5 hash
self.assertEqual(smart_open_object.read(), test_string)
开发者ID:mpenkov,项目名称:smart_open,代码行数:25,代码来源:test_smart_open_old.py
示例7: test_s3_iter_moto
def test_s3_iter_moto(self):
"""Are S3 files iterated over correctly?"""
# a list of strings to test with
expected = [b"*" * 5 * 1024**2] + [b'0123456789'] * 1024 + [b"test"]
# create fake bucket and fake key
conn = boto.connect_s3()
conn.create_bucket("mybucket")
# lower the multipart upload size, to speed up these tests
smart_open_lib.S3_MIN_PART_SIZE = 5 * 1024**2
with smart_open.smart_open("s3://mybucket/mykey", "wb") as fout:
# write a single huge line (=full multipart upload)
fout.write(expected[0] + b'\n')
# write lots of small lines
for lineno, line in enumerate(expected[1:-1]):
fout.write(line + b'\n')
# ...and write the last line too, no newline at the end
fout.write(expected[-1])
# connect to fake s3 and read from the fake key we filled above
smart_open_object = smart_open.smart_open("s3://mybucket/mykey")
output = [line.rstrip(b'\n') for line in smart_open_object]
self.assertEqual(output, expected)
# same thing but using a context manager
with smart_open.smart_open("s3://mybucket/mykey") as smart_open_object:
output = [line.rstrip(b'\n') for line in smart_open_object]
self.assertEqual(output, expected)
开发者ID:move-fast,项目名称:smart_open,代码行数:30,代码来源:test_smart_open.py
示例8: test_file
def test_file(self, mock_smart_open):
"""Is file:// line iterator called correctly?"""
prefix = "file://"
full_path = '/tmp/test.txt'
read_mode = "rb"
smart_open_object = smart_open.smart_open(prefix+full_path, read_mode)
smart_open_object.__iter__()
# called with the correct path?
mock_smart_open.assert_called_with(full_path, read_mode, buffering=-1)
full_path = '/tmp/test#hash##more.txt'
read_mode = "rb"
smart_open_object = smart_open.smart_open(prefix+full_path, read_mode)
smart_open_object.__iter__()
# called with the correct path?
mock_smart_open.assert_called_with(full_path, read_mode, buffering=-1)
full_path = 'aa#aa'
read_mode = "rb"
smart_open_object = smart_open.smart_open(full_path, read_mode)
smart_open_object.__iter__()
# called with the correct path?
mock_smart_open.assert_called_with(full_path, read_mode, buffering=-1)
short_path = "~/tmp/test.txt"
full_path = os.path.expanduser(short_path)
开发者ID:mpenkov,项目名称:smart_open,代码行数:26,代码来源:test_smart_open_old.py
示例9: test_s3_metadata_write
def test_s3_metadata_write(self):
# Read local file fixture
path = os.path.join(CURR_DIR, 'test_data/crime-and-punishment.txt.gz')
data = ""
with smart_open.smart_open(path, 'rb') as fd:
data = fd.read()
# Create a test bucket
s3 = boto3.resource('s3')
s3.create_bucket(Bucket='mybucket')
# Write data, with multipart_upload options
write_stream = smart_open.smart_open(
's3://mybucket/crime-and-punishment.txt.gz', 'wb',
s3_upload={
'ContentType': 'text/plain',
'ContentEncoding': 'gzip'
}
)
with write_stream as fout:
fout.write(data)
key = s3.Object('mybucket', 'crime-and-punishment.txt.gz')
self.assertIn('text/plain', key.content_type)
self.assertEqual(key.content_encoding, 'gzip')
开发者ID:mpenkov,项目名称:smart_open,代码行数:25,代码来源:test_smart_open_old.py
示例10: test_s3_mode_mock
def test_s3_mode_mock(self, mock_session):
"""Are s3:// open modes passed correctly?"""
# correct write mode, correct s3 URI
smart_open.smart_open("s3://mybucket/mykey", "w", host='s3.amazonaws.com')
mock_session.return_value.resource.assert_called_with(
's3', endpoint_url='http://s3.amazonaws.com'
)
开发者ID:mpenkov,项目名称:smart_open,代码行数:8,代码来源:test_smart_open_old.py
示例11: test_s3_mode_mock
def test_s3_mode_mock(self, mock_write, mock_boto):
"""Are s3:// open modes passed correctly?"""
# correct write mode, correct s3 URI
smart_open.smart_open("s3://mybucket/mykey", "w")
mock_boto.connect_s3.assert_called_with(aws_access_key_id=None, aws_secret_access_key=None)
mock_boto.connect_s3().lookup.return_value = True
mock_boto.connect_s3().get_bucket.assert_called_with("mybucket")
self.assertTrue(mock_write.called)
开发者ID:salilb,项目名称:smart_open,代码行数:8,代码来源:test_smart_open.py
示例12: test_session_write_mode
def test_session_write_mode(self):
"""
Write stream should use a custom boto3.Session
"""
session = boto3.Session()
session.resource = mock.MagicMock()
smart_open.smart_open('s3://bucket/key', 'wb', s3_session=session)
session.resource.assert_called_with('s3')
开发者ID:mpenkov,项目名称:smart_open,代码行数:9,代码来源:test_smart_open_old.py
示例13: write_read_assertion
def write_read_assertion(self, test_file):
with smart_open.smart_open(test_file, 'wb') as fout: # 'b' for binary, needed on Windows
fout.write(self.TEXT.encode('utf8'))
with smart_open.smart_open(test_file, 'rb') as fin:
self.assertEqual(fin.read().decode('utf8'), self.TEXT)
if os.path.isfile(test_file):
os.unlink(test_file)
开发者ID:mpenkov,项目名称:smart_open,代码行数:9,代码来源:test_smart_open_old.py
示例14: test_gzip_write_mode
def test_gzip_write_mode(self):
"""Should always open in binary mode when writing through a codec."""
s3 = boto3.resource('s3')
s3.create_bucket(Bucket='bucket')
uri = smart_open_lib._parse_uri("s3://bucket/key.gz")
with mock.patch('smart_open.s3.open') as mock_open:
smart_open.smart_open("s3://bucket/key.gz", "wb")
mock_open.assert_called_with('bucket', 'key.gz', 'wb')
开发者ID:mpenkov,项目名称:smart_open,代码行数:9,代码来源:test_smart_open_old.py
示例15: test_readline
def test_readline(self):
"""Does readline() return the correct file content?"""
s3 = boto3.resource('s3')
s3.create_bucket(Bucket='mybucket')
test_string = u"hello žluťoučký world!\nhow are you?".encode('utf8')
with smart_open.smart_open("s3://mybucket/mykey", "wb") as fout:
fout.write(test_string)
reader = smart_open.smart_open("s3://mybucket/mykey", "rb")
self.assertEqual(reader.readline(), u"hello žluťoučký world!\n".encode("utf-8"))
开发者ID:mpenkov,项目名称:smart_open,代码行数:10,代码来源:test_smart_open_old.py
示例16: prepend_line
def prepend_line(infile, outfile, line):
"""
Function to prepend lines using smart_open
"""
with smart_open.smart_open(infile, ' rb ') as old:
with smart_open.smart_open(outfile, ' wb ') as new:
new.write(str(line) + " \n ")
for line in old:
new.write(line)
return outfile
开发者ID:sqxiang,项目名称:glove-gensim,代码行数:10,代码来源:glove2word2vec.py
示例17: glove2word2vec
def glove2word2vec(glove_input_file, word2vec_output_file):
"""Convert `glove_input_file` in GloVe format into `word2vec_output_file` in word2vec format."""
num_lines, num_dims = get_glove_info(glove_input_file)
logger.info("converting %i vectors from %s to %s", num_lines, glove_input_file, word2vec_output_file)
with smart_open(word2vec_output_file, 'wb') as fout:
fout.write("{0} {1}\n".format(num_lines, num_dims).encode('utf-8'))
with smart_open(glove_input_file, 'rb') as fin:
for line in fin:
fout.write(line)
return num_lines, num_dims
开发者ID:abs51295,项目名称:gensim,代码行数:10,代码来源:glove2word2vec.py
示例18: test_read_encoding_implicit_text
def test_read_encoding_implicit_text(self):
"""Should open the file with the correct encoding, implicit text read."""
s3 = boto3.resource('s3')
s3.create_bucket(Bucket='bucket')
key = "s3://bucket/key.txt"
text = u'это знала ева, это знал адам, колеса любви едут прямо по нам'
with smart_open.smart_open(key, 'wb') as fout:
fout.write(text.encode('koi8-r'))
with smart_open.smart_open(key, encoding='koi8-r') as fin:
actual = fin.read()
self.assertEqual(text, actual)
开发者ID:mpenkov,项目名称:smart_open,代码行数:11,代码来源:test_smart_open_old.py
示例19: test_s3_mode_mock
def test_s3_mode_mock(self, mock_write, mock_boto):
"""Are s3:// open modes passed correctly?"""
# Configure the mock boto.config.get to return default host
smart_open.smart_open_lib.boto.config.get.return_value = 's3.amazonaws.com'
# correct write mode, correct s3 URI
smart_open.smart_open("s3://mybucket/mykey", "w")
mock_boto.connect_s3.assert_called_with(aws_access_key_id=None, aws_secret_access_key=None, profile_name=None, host='s3.amazonaws.com')
mock_boto.connect_s3().lookup.return_value = True
mock_boto.connect_s3().get_bucket.assert_called_with("mybucket")
self.assertTrue(mock_write.called)
开发者ID:move-fast,项目名称:smart_open,代码行数:11,代码来源:test_smart_open.py
示例20: test_write_encoding
def test_write_encoding(self):
"""Should open the file for writing with the correct encoding."""
s3 = boto3.resource('s3')
s3.create_bucket(Bucket='bucket')
key = "s3://bucket/key.txt"
text = u'какая боль, какая боль, аргентина - ямайка, 5-0'
with smart_open.smart_open(key, 'w', encoding='koi8-r') as fout:
fout.write(text)
with smart_open.smart_open(key, encoding='koi8-r') as fin:
actual = fin.read()
self.assertEqual(text, actual)
开发者ID:mpenkov,项目名称:smart_open,代码行数:12,代码来源:test_smart_open_old.py
注:本文中的smart_open.smart_open函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论