• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python utils.to_unicode函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中utils.to_unicode函数的典型用法代码示例。如果您正苦于以下问题:Python to_unicode函数的具体用法?Python to_unicode怎么用?Python to_unicode使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了to_unicode函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: __init__

    def __init__(self, input, transposed=True):
        """
        Initialize the matrix reader.

        The `input` refers to a file on local filesystem, which is expected to
        be in the sparse (coordinate) Matrix Market format. Documents are assumed
        to be rows of the matrix (and document features are columns).

        `input` is either a string (file path) or a file-like object that supports
        `seek()` (e.g. gzip.GzipFile, bz2.BZ2File).
        """
        logger.info("initializing corpus reader from %s" % input)
        self.input, self.transposed = input, transposed
        with utils.file_or_filename(self.input) as lines:
            try:
                header = utils.to_unicode(next(lines)).strip()
                if not header.lower().startswith('%%matrixmarket matrix coordinate real general'):
                    raise ValueError("File %s not in Matrix Market format with coordinate real general; instead found: \n%s" %
                                    (self.input, header))
            except StopIteration:
                pass

            self.num_docs = self.num_terms = self.num_nnz = 0
            for lineno, line in enumerate(lines):
                line = utils.to_unicode(line)
                if not line.startswith('%'):
                    self.num_docs, self.num_terms, self.num_nnz = map(int, line.split())
                    if not self.transposed:
                        self.num_docs, self.num_terms = self.num_terms, self.num_docs
                    break

        logger.info("accepted corpus with %i documents, %i features, %i non-zero entries" %
                     (self.num_docs, self.num_terms, self.num_nnz))
开发者ID:nathan2718,项目名称:category2vec,代码行数:33,代码来源:matutils.py


示例2: single_picky

def single_picky(slug='test'):
    try:
        f = open(PICKY_DIR + slug + '.md')
    except IOError:
        abort(404)
    picky = f.read()
    f.close()
    meta_regex = re.compile(
            r"^\s*(?:-|=){3,}\s*\n((?:.|\n)+?)\n\s*(?:-|=){3,}\s*\n*",
            re.MULTILINE
        )
    match = re.match(meta_regex, picky)
    if not match:
        abort(404)
    metas = match.group(1)
    title = None
    date = None
    meta = metas.split("\n")
    try:
        title = meta[0].split("=>")[1]
    except IndexError:
        title = meta[0].split("=>")[0]
    try:
        date = meta[1].split("=>")[1]
    except IndexError:
        date = meta[1].split("=>")[0]
    cont = to_unicode(picky[match.end():])
    content = to_markdown(cont)
    return template('picky.html', content=content, title=to_unicode(title),
                                 date=to_unicode(date), slug=slug)
开发者ID:iTriumph,项目名称:MiniAkio,代码行数:30,代码来源:picky.py


示例3: load_cat2vec_format

 def load_cat2vec_format(cls, cat_model=None, sent_model=None, word_model=None):
     """
     Load sentence vectors
     """
     model = Category2Vec(None)
     count = 0
     if cat_model:
         logger.info("loading %s object(cat) from %s" % (cls.__name__, cat_model))
         for line in open(cat_model,"r"):
             line = line.rstrip()
             if count == 0:
                 info = line.split()
                 model.cat_len = int(info[0])
                 model.layer1_size = int(info[1])
                 model.sg = int(info[2])
                 model.hs = int(info[3])
                 model.negative = int(info[4])
                 model.cbow_mean = int(info[5])
                 model.cats = empty((model.cat_len, model.layer1_size), dtype=REAL)
                 model.cat_no_hash = {}
                 model.cat_id_list = []
             else:
                 idx = count - 1
                 row = line.split("\t")
                 cat_id = utils.to_unicode(row[0])
                 model.cat_no_hash[cat_id] = idx
                 model.cat_id_list.append(cat_id)
                 vals = row[1].split()
                 for j in xrange(model.layer1_size):
                     model.cats[idx][j] = float(vals[j])
             count += 1
     count = 0
     if sent_model:
         logger.info("loading %s object(sentence) from %s" % (cls.__name__, sent_model))
         for line in open(sent_model,"r"):
             line = line.rstrip()
             if count == 0:
                 info = line.split()
                 model.sents_len = int(info[0])
                 model.sents = empty((model.sents_len, model.layer1_size), dtype=REAL)
                 model.sent_no_hash = {}
                 model.sent_id_list = []
             else:
                 idx = count - 1
                 row = line.split("\t")
                 sent_id = utils.to_unicode(row[0])
                 model.sent_no_hash[sent_id] = idx
                 model.sent_id_list.append(sent_id)
                 vals = row[1].split()
                 for j in xrange(model.layer1_size):
                     model.sents[idx][j] = float(vals[j])
             count += 1
     if word_model:
         logger.info("loading word2vec from %s" % word_model)
         model.w2v = Word2Vec.load(word_model)
         model.vocab = model.w2v.vocab
     return model
开发者ID:nathan2718,项目名称:category2vec,代码行数:57,代码来源:cat2vec.py


示例4: __init__

    def __init__ (self, id, uri, name, type):
        if id is None:
            self.id = DBRepository.id_counter
            DBRepository.id_counter += 1
        else:
            self.id = id

        self.uri = to_unicode (uri)
        self.name = to_unicode (name)
        self.type = to_unicode (type)
开发者ID:achernet,项目名称:CVSAnalY,代码行数:10,代码来源:Database.py


示例5: __init__

 def __init__ (self, id, commit):
     if id is None:
         self.id = DBLog.id_counter
         DBLog.id_counter += 1
     else:
         self.id = id
         
     self.rev = to_unicode (commit.revision)
     self.committer = None
     self.author = None
     self.date = commit.date
     self.message = to_unicode (commit.message)
     self.composed_rev = commit.composed_rev
开发者ID:AlertProject,项目名称:CVSAnalY,代码行数:13,代码来源:Database.py


示例6: __iter__

 def __iter__(self):
     """Iterate through the lines in the source."""
     try:
         # Assume it is a file-like object and try treating it as such
         # Things that don't have seek will trigger an exception
         self.source.seek(0)
         for line in self.source:
             yield utils.to_unicode(line).split()
     except AttributeError:
         # If it didn't work like a file, use it as a string filename
         with utils.smart_open(self.source) as fin:
             for line in fin:
                 yield utils.to_unicode(line).split()
开发者ID:nathan2718,项目名称:category2vec,代码行数:13,代码来源:word2vec.py


示例7: response

    def response(self,msg,**kwargs):
        ## msg is parsed and your handled data.Actually,it is a dict.
        ## Your could specify a type by assign.ex response(type='music').I list all legal types.
        '''
        ex: response(message,type='yourType')
        optional kwargs:
        type='legal_types',content='yourContent',handler=foo,count=1 
        ps:when type is news,the count kwarg is nessceary
        support types:
        text,image,voice,video,music,news
        '''
        msg['receiver'],msg['sender'] = msg['sender'],msg['receiver']
        legal_types = ['text','music','image','voice','video','news']

        ## get some kwargs ##
        # key word content ---- which force type to textand return a static string
        if kwargs.get('type'):
            type = kwargs.get('type')
        else:type = msg['type']
        if type == 'music':
            if not msg['hq_musurl']:
                msg['hq_musurl'] = msg['musurl']
        # charge receiver and sender
        if kwargs.get('content'):
            msg['type'] = type = 'text'
            msg['content'] = to_unicode(kwargs.get('content'))
        if not type in legal_types:
            raise Exception("Illgal type!You could only choose one type from legal_types!") 
        # key word handler ---- which is a function object,accept a dict and return a modified dict
        else:
            msg['type'] = type
        if kwargs.get('handler'):
            msg = kwargs.get('handler')(msg)
        ## more kwargs ##

        if not type == 'news':
            template = to_unicode(getattr(Template(),type))
        else:
            count = kwargs.get('count')
            if count:
                temp = Template() 
                template = to_unicode(temp.news(count))
            else:
                raise Exception('When type is set to news,the count kwarg is necessary!')

        logging.info(template.format(**msg))
        try:
            retdata = template.format(**msg)
        except:
            raise Exception("You did't pass enough args or pass wrong args,please check args which template needed.Read template.py maybe inspire your mind")
        return retdata
开发者ID:SeavantUUz,项目名称:lolibot,代码行数:51,代码来源:loli.py


示例8: add_header

 def add_header(self):
     if self.file is not None :
         dis = ""
         dis += "Script file    : %s\n" % sys.argv[0]
         dis += "Date           : %s\n" % time.strftime("%d/%m/%Y %H:%M:%S", self.gtime.start_date)
         dis += "\n%s\n" % self.format("Time(s)", "Scope", "Info")
         self.file.write(utils.to_unicode(dis))
开发者ID:BackupTheBerlios,项目名称:pytestemb,代码行数:7,代码来源:trace.py


示例9: __init__

    def __init__(self, unique_name, base_filepath, parameters):
        """
        Arguments
        ---------
            keyword_name : feature unique name
            base_filepath : filepath of feature config
            parameters : lexicon parameters, presented by dictionary
        """
        self.unique_name = unique_name
        self.parameters = parameters

        filepath = os.path.join(
            base_filepath,
            parameters[BagOfClustersFeature.PARAM_CLUSTERED_WORDS_FILEPATH])

        if parameters[BagOfClustersFeature.PARAM_ENABLED] == 'false':
            return

        print "Loading file with clusters of words: {}".format(filepath)
        with io.open(filepath, 'r', encoding='utf-8') as f:
            self.clustered_words = json.load(f, encoding='utf-8')

        print "Create dictionary with all clusters, accessed by cluster_id ..."
        self.clusters = {}
        for word in self.clustered_words.iterkeys():
            cluster_id = self.clustered_words[word]
            if cluster_id not in self.clusters:
                self.clusters[cluster_id] = []
            self.clusters[cluster_id].append(utils.to_unicode(word))
开发者ID:nicolay-r,项目名称:tone-classifier,代码行数:29,代码来源:clustered_words.py


示例10: _create_des_

def _create_des_(msg):
    if msg is None :
        return {}
    elif not(isinstance(msg, types.StringTypes)):
        raise pexception.PytestembError("Msg must be a string")
    else:
        return dict({"msg":"%s" % utils.to_unicode(msg)})
开发者ID:BackupTheBerlios,项目名称:pytestemb,代码行数:7,代码来源:__init__.py


示例11: get_cluster_id

 def get_cluster_id(self, word):
     """
     Returns
     -------
        Returns id of cluster, which is contain the 'word'
     """
     return self.clustered_words[utils.to_unicode(word)]
开发者ID:nicolay-r,项目名称:tone-classifier,代码行数:7,代码来源:clustered_words.py


示例12: get_local_features

def get_local_features(token, word_freq=None):

    assert len(token) >= 1

    features = []
    
    ntoken = normalize(token, lowercase=False)

    if token.isalpha():

        if 'UpperCase' in features_on:
            if first_upper_case(ntoken):
                features += ['IsUpperCase']

        if 'AllUpperCase' in features_on:
            if all_upper_case(ntoken):
                features += ['IsAllUpperCase']

        if 'AllLowerCase' in features_on:
            if all_lower_case(ntoken):
                features += ['IsAllLowerCase']

        if 'Freq' in features_on:
            features += ['Freq:%s' % str(word_freq[ntoken])]
        
        if 'Rare' in features_on:
            if word_freq[ntoken] <= rare_thr:
                features += ['IsRare']

        if 'IsWord' in features_on:
            features += ['IsWord']

    elif token.isdigit():

        if 'Number' in features_on:
            features += ['IsNumber']

    elif token.isalnum():

        if 'AlphaNum' in features_on:
            features += ['IsAlphaNum']

    elif len(to_unicode(token)) == 1:

        if is_punct(token):
            if 'Separator' in features_on:
                features += ['IsSeparator']
        else:
            if 'NonAlphanum' in features_on:
                features += ['IsNonAlphanum']
    
    if 'Word' in features_on:
        if not any(x in features for x in ['IsNumber', 'IsAlphaNum']):
            features += ['W=%s' % ntoken]

    if 'Length' in features_on:
        features += ['Length:%s' % str(len(ntoken))]

    return features
开发者ID:donvel,项目名称:affiliations,代码行数:59,代码来源:export.py


示例13: get_terms_info

 def get_terms_info(self, term):
     """
     returns: dict
         amount of documents which includes 'term' for different sentiment
         classes and at all (DocVocabulary.ALL)
     """
     uterm = to_unicode(term)
     return self.terms_info[uterm]
开发者ID:nicolay-r,项目名称:tone-classifier,代码行数:8,代码来源:DocVocabulary.py


示例14: _check_same_origin

 def _check_same_origin(self, current_url):
     """
     检查两个URL是否同源
     """
     current_url = to_unicode(current_url)
     url_part = urlparse.urlparse(current_url)
     url_origin = (url_part.scheme, url_part.netloc)
     return url_origin == self.origin
开发者ID:BoyceYang,项目名称:wsbs,代码行数:8,代码来源:spider.py


示例15: __getitem__

    def __getitem__(self, name):
        '''Get a header value, from the message, decoded and as a
        unicode string.

        If the header does not exist, None is returned'''
        value = self._msg[name]
        if value is None:
            return None
        return u''.join(to_unicode(*tupl) for tupl in decode_header(value))
开发者ID:sirech,项目名称:deliver,代码行数:9,代码来源:simple.py


示例16: ReplyText

def ReplyText(ToUserName,FromUserName,Content):      
    Temp="""<xml>
 <ToUserName><![CDATA[%s]]></ToUserName>
 <FromUserName><![CDATA[%s]]></FromUserName>
 <CreateTime>%s</CreateTime>
 <MsgType><![CDATA[text]]></MsgType>
 <Content><![CDATA[%s]]></Content>
 </xml>"""
    return Temp % (ToUserName,FromUserName,str(int(time.time())),to_unicode(Content))
开发者ID:luckfu,项目名称:werobot,代码行数:9,代码来源:reply.py


示例17: get_release_note

 def get_release_note(self):
     if self.notes is None:
         field = self.parent.field_id_map['Release Note']
         if field in self.fields:
             self.notes = to_unicode(self.fields[field])
         elif self.get_incompatible_change() or self.get_important():
             self.notes = self.get_description()
         else:
             self.notes = ""
     return self.notes
开发者ID:ajayyadava,项目名称:yetus,代码行数:10,代码来源:releasedocmaker.py


示例18: __iter__

 def __iter__(self):
     try:
         self.source.seek(0)
         for line in self.source:
             k = utils.to_unicode(line.rstrip()).split("\t")
             categories = k[3].split(" ")
             for cat in categories:
                 if "/" in cat:
                     continue
                 yield k[4:], k[1], cat
     except AttributeError:
         with utils.smart_open(self.source) as fin:
             for line in fin:
                 k = utils.to_unicode(line.rstrip()).split("\t")
                 categories = k[3].split(" ")
                 for cat in categories:
                     if "/" in cat:
                         continue
                     yield k[4:], k[1], cat
开发者ID:rakutentech,项目名称:category2vec,代码行数:19,代码来源:sentences.py


示例19: filter_wiki

def filter_wiki(raw):
    """
    Filter out wiki mark-up from `raw`, leaving only text. `raw` is either unicode
    or utf-8 encoded string.
    """
    # parsing of the wiki markup is not perfect, but sufficient for our purposes
    # contributions to improving this code are welcome :)
    text = utils.to_unicode(raw, 'utf8', errors='ignore')
    text = utils.decode_htmlentities(text) # '&amp;nbsp;' --> '\xa0'
    return remove_markup(text)
开发者ID:panx27,项目名称:src,代码行数:10,代码来源:gensimplaintext.py


示例20: get_all_tones_from_table

    def get_all_tones_from_table(self):
        logging.info("Loading lexicon '%s': %s ..." % (self.unique_name,
                                                       self.table_filepath))

        df = pd.read_csv(self.table_filepath, sep=',')

        for row in df.index:
            name = df[self.term_column_name][row]
            value = df[self.value_column_name][row]
            self.cache[utils.to_unicode(name)] = float(value)
开发者ID:nicolay-r,项目名称:tone-classifier,代码行数:10,代码来源:lexicon.py



注:本文中的utils.to_unicode函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python utils.to_utf8函数代码示例发布时间:2022-05-26
下一篇:
Python utils.to_timestamp函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap