• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python utils.in_chunks函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中r2.lib.utils.in_chunks函数的典型用法代码示例。如果您正苦于以下问题:Python in_chunks函数的具体用法?Python in_chunks怎么用?Python in_chunks使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了in_chunks函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: activate_names_requested_in

def activate_names_requested_in(link):
    tree = get_comment_tree(link)
    acceptable_names = []
    if tree.tree:
        top_level_cids = tree.tree[None]
        comments = chain.from_iterable(Comment._byID(chunk, return_dict=False,
                                                     data=True)
                                       for chunk in in_chunks(top_level_cids))

        for comment in sorted(comments, key=lambda c: c._ups, reverse=True):
            if comment._spam or comment._deleted:
                continue

            sanitized = comment.body.strip()
            match = valid_name_re.search(sanitized)
            if match:
                acceptable_names.append((comment, match.group(1)))

    # we activate one name for each 100% of rev goal met
    names = acceptable_names[:link.revenue_bucket]
    activate_names(link, names)

    activated_names = [name for comment, name in names]
    link.server_names = activated_names
    link.flair_text = ", ".join(activated_names) if names else "/dev/null"
    link.flair_css_class = "goal-bucket-%d" % link.revenue_bucket
    link._commit()
开发者ID:GodOfConquest,项目名称:reddit-plugin-gold,代码行数:27,代码来源:gold_end_of_day.py


示例2: rebuild_link_index

def rebuild_link_index(start_at=None, sleeptime=1, cls=Link,
                       uploader=LinkUploader, doc_api='CLOUDSEARCH_DOC_API',
                       estimate=50000000, chunk_size=1000):
    doc_api = getattr(g, doc_api)
    uploader = uploader(doc_api)

    q = cls._query(cls.c._deleted == (True, False), sort=desc('_date'))

    if start_at:
        after = cls._by_fullname(start_at)
        assert isinstance(after, cls)
        q._after(after)

    q = r2utils.fetch_things2(q, chunk_size=chunk_size)
    q = r2utils.progress(q, verbosity=1000, estimate=estimate, persec=True,
                         key=_progress_key)
    for chunk in r2utils.in_chunks(q, size=chunk_size):
        uploader.things = chunk
        for x in range(5):
            try:
                uploader.inject()
            except httplib.HTTPException as err:
                print "Got %s, sleeping %s secs" % (err, x)
                time.sleep(x)
                continue
            else:
                break
        else:
            raise err
        last_update = chunk[-1]
        print "last updated %s" % last_update._fullname
        time.sleep(sleeptime)
开发者ID:KeyserSosa,项目名称:reddit,代码行数:32,代码来源:cloudsearch.py


示例3: update_activity

def update_activity():
    events = {}
    event_counts = collections.Counter()

    query = (ev for ev in LiveUpdateEvent._all()
             if ev.state == "live" and not ev.banned)
    for chunk in utils.in_chunks(query, size=100):
        context_ids = {ev._fullname: ev._id for ev in chunk}

        view_countable = [ev._fullname for ev in chunk
                          if ev._date >= g.liveupdate_min_date_viewcounts]
        view_counts_query = ViewCountsQuery.execute_async(view_countable)

        try:
            with c.activity_service.retrying(attempts=4) as svc:
                infos = svc.count_activity_multi(context_ids.keys())
        except TTransportException:
            continue

        view_counts = view_counts_query.result()

        for context_id, info in infos.iteritems():
            event_id = context_ids[context_id]

            try:
                LiveUpdateActivityHistoryByEvent.record_activity(
                    event_id, info.count)
            except tdb_cassandra.TRANSIENT_EXCEPTIONS as e:
                g.log.warning("Failed to update activity history for %r: %s",
                              event_id, e)

            try:
                event = LiveUpdateEvent.update_activity(
                    event_id, info.count, info.is_fuzzed)
            except tdb_cassandra.TRANSIENT_EXCEPTIONS as e:
                g.log.warning("Failed to update event activity for %r: %s",
                              event_id, e)
            else:
                events[event_id] = event
                event_counts[event_id] = info.count

            websockets.send_broadcast(
                "/live/" + event_id,
                type="activity",
                payload={
                    "count": info.count,
                    "fuzzed": info.is_fuzzed,
                    "total_views": view_counts.get(context_id),
                },
            )

    top_event_ids = [event_id for event_id, count in event_counts.most_common(1000)]
    top_events = [events[event_id] for event_id in top_event_ids]
    query_ttl = datetime.timedelta(days=3)
    with CachedQueryMutator() as m:
        m.replace(get_active_events(), top_events, ttl=query_ttl)

    # ensure that all the amqp messages we've put on the worker's queue are
    # sent before we allow this script to exit.
    amqp.worker.join()
开发者ID:reddit,项目名称:reddit-plugin-liveupdate,代码行数:60,代码来源:activity.py


示例4: update_flair_counts

def update_flair_counts():
    flairs = Counter()
    user_ids = []

    sr = Subreddit._byID(g.live_config["thebutton_srid"], data=True)
    raw = AccountsActiveBySR._cf.xget(sr._id36)
    for uid, _ in raw:
        user_ids.append(uid)

    for user_chunk in in_chunks(user_ids, size=100):
        users = Account._byID36(user_chunk, data=True, return_dict=False)
        for user in users:
            flair = user.flair_css_class(sr._id)
            if not flair:
                if user._date < ACCOUNT_CREATION_CUTOFF:
                    flair = "no-press"
                else:
                    flair = "cant-press"

            flairs[flair] += 1

    if 'cheater' in flairs:
        del flairs['cheater']

    sr.flair_counts = sorted(
        flairs.iteritems(),
        key=lambda x: 'z' if x[0] == 'no-press' else x[0],
        reverse=True)
    sr._commit()
开发者ID:imclab,项目名称:reddit-plugin-thebutton,代码行数:29,代码来源:scripts.py


示例5: rebuild_index

def rebuild_index(start_at=None, sleeptime=1, cls=Link, estimate=50000000,
                  chunk_size=1000):
    if start_at is _REBUILD_INDEX_CACHE_KEY:
        start_at = g.cache.get(start_at)
        if not start_at:
            raise ValueError("Told me to use '%s' key, but it's not set" %
                             _REBUILD_INDEX_CACHE_KEY)
    
    q = cls._query(cls.c._deleted == (True, False),
                   sort=desc('_date'), data=True)
    if start_at:
        after = cls._by_fullname(start_at)
        assert isinstance(after, cls)
        q._after(after)
    q = r2utils.fetch_things2(q, chunk_size=chunk_size)
    q = r2utils.progress(q, verbosity=1000, estimate=estimate, persec=True,
                         key=_progress_key)
    for chunk in r2utils.in_chunks(q, size=chunk_size):
        for x in range(5):
            try:
                inject(chunk)
            except httplib.HTTPException as err:
                print "Got  %s, sleeping %s secs" % (err, x)
                time.sleep(x)
                continue
            else:
                break
        else:
            raise err
        last_update = chunk[-1]
        g.cache.set(_REBUILD_INDEX_CACHE_KEY, last_update._fullname)
        time.sleep(sleeptime)
开发者ID:ProfNandaa,项目名称:reddit,代码行数:32,代码来源:cloudsearch.py


示例6: _generate_sitemaps

def _generate_sitemaps(links, set_lastmod=True):
    """Create an iterator of sitemaps.

    Each sitemap has up to 50000 links, being the maximum allowable number of
    links according to the sitemap standard.
    """
    for subreddit_chunks in in_chunks(links, LINKS_PER_SITEMAP):
        yield generate_sitemap_from_links(subreddit_chunks, set_lastmod=set_lastmod)
开发者ID:zeantsoi,项目名称:reddit,代码行数:8,代码来源:generate.py


示例7: subreddit_sitemaps

def subreddit_sitemaps(subreddits):
    """Create an array of sitemaps.

    Each sitemap has up to 50000 links, being the maximum allowable number of
    links according to the sitemap standard.
    """
    for subreddit_chunks in in_chunks(subreddits, LINKS_PER_SITEMAP):
        yield _subreddit_sitemap(subreddit_chunks)
开发者ID:AHAMED750,项目名称:reddit,代码行数:8,代码来源:generate.py


示例8: get_details

    def get_details(cls, thing, voters=None):
        from r2.models import Comment, Link
        if isinstance(thing, Link):
            details_cls = VoteDetailsByLink
        elif isinstance(thing, Comment):
            details_cls = VoteDetailsByComment
        else:
            raise ValueError

        voter_id36s = None
        if voters:
            voter_id36s = [voter._id36 for voter in voters]

        try:
            row = details_cls._byID(thing._id36, properties=voter_id36s)
            raw_details = row._values()
        except tdb_cassandra.NotFound:
            return []

        try:
            row = VoterIPByThing._byID(thing._fullname, properties=voter_id36s)
            ips = row._values()
        except tdb_cassandra.NotFound:
            ips = {}

        # look up all the accounts in batches of 100
        account_id36s = set(raw_details.keys())
        accounts = {}
        for id_chunk in in_chunks(account_id36s, size=100):
            accounts.update(Account._byID36(id_chunk, data=True))

        details = []
        for voter_id36, json_data in raw_details.iteritems():
            vote_data = json.loads(json_data)
            vote_data = cls.convert_old_details(vote_data)

            extra_data = vote_data["data"]
            extra_data["ip"] = ips.get(voter_id36)

            vote = Vote(
                user=accounts[voter_id36],
                thing=thing,
                direction=Vote.deserialize_direction(vote_data["direction"]),
                date=datetime.utcfromtimestamp(vote_data["date"]),
                data=extra_data,
                effects=vote_data["effects"],
                get_previous_vote=False,
            )

            details.append(vote)

        details.sort(key=lambda d: d.date)

        return details
开发者ID:zeantsoi,项目名称:reddit,代码行数:54,代码来源:vote.py


示例9: rebuild_index

def rebuild_index(after_id = None):
    cls = Link

    # don't pull spam/deleted
    q = cls._query(sort=desc('_date'), data=True)

    if after_id:
        q._after(cls._byID(after_id))

    q = fetch_things2(q)

    q = progress(q, verbosity=1000, estimate=10000000, persec=True)
    for chunk in in_chunks(q):
        inject(chunk)
开发者ID:codetripping,项目名称:reddit,代码行数:14,代码来源:indextankupdate.py


示例10: simple_get_multi

    def simple_get_multi(self, keys):
        results = {}
        category_bundles = {}
        for key in keys:
            category, ids = self._split_key(key)
            category_bundles.setdefault(category, []).append(ids)

        for category in category_bundles:
            idses = category_bundles[category]
            chunks = in_chunks(idses, size=50)
            for chunk in chunks:
                new_results = self.backend.get_multi(category, chunk)
                results.update(new_results)

        return results
开发者ID:0xcd03,项目名称:reddit,代码行数:15,代码来源:cache.py


示例11: give_trophies

def give_trophies(users):
    for fullnames in in_chunks(progress(users, verbosity=50), size=50):
        users = Account._by_fullname(fullnames, return_dict=False)

        for user in users:
            team = get_user_team(user)

            trophy = Award.give_if_needed(
                codename="f2p_orangered" if team == "red" else "f2p_periwinkle",
                user=user,
            )
            if trophy:
                trophy._commit()

        sleep(.5)
开发者ID:13steinj,项目名称:reddit-plugin-f2p,代码行数:15,代码来源:trophies.py


示例12: get_recent_name_submissions

def get_recent_name_submissions():
    link_fullnames = list(queries.get_links(SERVERNAME_SR, "new", "all"))
    links = chain.from_iterable(Thing._by_fullname(chunk, return_dict=False)
                                for chunk in in_chunks(link_fullnames))

    for link in links:
        if link._deleted or link._spam:
            continue

        # OH GOD WHAT HAVE YOU POSTED IN MY LOVELY AUTOMATED SUBREDDIT!?
        if (not hasattr(link, "revenue_date") or
            not hasattr(link, "revenue_bucket") or
            not hasattr(link, "server_names")):
            continue

        yield link
开发者ID:GodOfConquest,项目名称:reddit-plugin-gold,代码行数:16,代码来源:gold_end_of_day.py


示例13: port_deleted_links

def port_deleted_links(after_id=None):
    from r2.models import Link
    from r2.lib.db.operators import desc
    from r2.models.query_cache import CachedQueryMutator
    from r2.lib.db.queries import get_deleted_links
    from r2.lib.utils import fetch_things2, in_chunks, progress

    q = Link._query(Link.c._deleted == True, Link.c._spam == (True, False), sort=desc("_date"), data=True)
    q = fetch_things2(q, chunk_size=500)
    q = progress(q, verbosity=1000)

    for chunk in in_chunks(q):
        with CachedQueryMutator() as m:
            for link in chunk:
                query = get_deleted_links(link.author_id)
                m.insert(query, [link])
开发者ID:Shilohtd,项目名称:reddit,代码行数:16,代码来源:migrate.py


示例14: bulk_load

 def bulk_load(self, start='', end='', chunk_size = 100):
     """Try to load everything out of Cassandra and put it into
        memcached"""
     cf = self.cassa.cf
     for rows in in_chunks(cf.get_range(start=start,
                                        finish=end,
                                        columns=['value']),
                           chunk_size):
         print rows[0][0]
         rows = dict((key, pickle.loads(cols['value']))
                     for (key, cols)
                     in rows
                     if (cols
                         # hack
                         and len(key) < 250))
         self.memcache.set_multi(rows)
开发者ID:0xcd03,项目名称:reddit,代码行数:16,代码来源:cache.py


示例15: rebuild_index

def rebuild_index(after_id = None, estimate=10000000):
    cls = Link

    # don't pull spam/deleted
    q = cls._query(sort=desc('_date'), data=True)

    if after_id:
        q._after(cls._byID(after_id))

    q = fetch_things2(q)

    def key(link):
        # we're going back in time, so this will give us a good idea
        # of how far we've gone
        return "%s/%s" % (link._id, link._date)

    q = progress(q, verbosity=1000, estimate=estimate, persec=True, key=key)
    for chunk in in_chunks(q):
        inject(chunk)
开发者ID:JediWatchman,项目名称:reddit,代码行数:19,代码来源:indextank.py


示例16: _populate

def _populate(after_id=None, estimate=54301242):
    from r2.models import desc
    from r2.lib.db import tdb_cassandra
    from r2.lib import utils

    # larger has a chance to decrease the number of Cassandra writes,
    # but the probability is low
    chunk_size = 5000

    q = Comment._query(Comment.c._spam == (True, False), Comment.c._deleted == (True, False), sort=desc("_date"))

    if after_id is not None:
        q._after(Comment._byID(after_id))

    q = utils.fetch_things2(q, chunk_size=chunk_size)
    q = utils.progress(q, verbosity=chunk_size, estimate=estimate)

    for chunk in utils.in_chunks(q, chunk_size):
        chunk = filter(lambda x: hasattr(x, "link_id"), chunk)
        update_comment_votes(chunk)
开发者ID:druska,项目名称:reddit,代码行数:20,代码来源:comment_tree.py


示例17: rebuild_link_index

def rebuild_link_index(
    start_at=None,
    sleeptime=1,
    cls=Link,
    uploader=LinkUploader,
    doc_api="CLOUDSEARCH_DOC_API",
    estimate=50000000,
    chunk_size=1000,
):
    cache_key = _REBUILD_INDEX_CACHE_KEY % uploader.__name__.lower()
    doc_api = getattr(g, doc_api)
    uploader = uploader(doc_api)

    if start_at is _REBUILD_INDEX_CACHE_KEY:
        start_at = g.cache.get(cache_key)
        if not start_at:
            raise ValueError("Told me to use '%s' key, but it's not set" % cache_key)

    q = cls._query(cls.c._deleted == (True, False), sort=desc("_date"), data=True)
    if start_at:
        after = cls._by_fullname(start_at)
        assert isinstance(after, cls)
        q._after(after)
    q = r2utils.fetch_things2(q, chunk_size=chunk_size)
    q = r2utils.progress(q, verbosity=1000, estimate=estimate, persec=True, key=_progress_key)
    for chunk in r2utils.in_chunks(q, size=chunk_size):
        uploader.things = chunk
        for x in range(5):
            try:
                uploader.inject()
            except httplib.HTTPException as err:
                print "Got %s, sleeping %s secs" % (err, x)
                time.sleep(x)
                continue
            else:
                break
        else:
            raise err
        last_update = chunk[-1]
        g.cache.set(cache_key, last_update._fullname)
        time.sleep(sleeptime)
开发者ID:jzplusplus,项目名称:reddit,代码行数:41,代码来源:cloudsearch.py


示例18: port_cassaurls

def port_cassaurls(after_id=None, estimate=15231317):
    from r2.models import Link, LinksByUrlAndSubreddit
    from r2.lib.db import tdb_cassandra
    from r2.lib.db.operators import desc
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.utils import fetch_things2, in_chunks, progress

    q = Link._query(Link.c._spam == (True, False),
                    sort=desc('_date'), data=True)
    if after_id:
        q._after(Link._byID(after_id,data=True))
    q = fetch_things2(q, chunk_size=500)
    q = progress(q, estimate=estimate)
    q = (l for l in q
         if getattr(l, 'url', 'self') != 'self'
         and not getattr(l, 'is_self', False))
    chunks = in_chunks(q, 500)

    for chunk in chunks:
        for l in chunk:
            LinksByUrlAndSubreddit.add_link(l)
开发者ID:AHAMED750,项目名称:reddit,代码行数:21,代码来源:migrate.py


示例19: port_cassaurls

def port_cassaurls(after_id=None, estimate=15231317):
    from r2.models import Link, LinksByUrl
    from r2.lib.db import tdb_cassandra
    from r2.lib.db.operators import desc
    from r2.lib.db.tdb_cassandra import CL
    from r2.lib.utils import fetch_things2, in_chunks, progress

    q = Link._query(Link.c._spam == (True, False), sort=desc("_date"), data=True)
    if after_id:
        q._after(Link._byID(after_id, data=True))
    q = fetch_things2(q, chunk_size=500)
    q = progress(q, estimate=estimate)
    q = (l for l in q if getattr(l, "url", "self") != "self" and not getattr(l, "is_self", False))
    chunks = in_chunks(q, 500)

    for chunk in chunks:
        with LinksByUrl._cf.batch(write_consistency_level=CL.ONE) as b:
            for l in chunk:
                k = LinksByUrl._key_from_url(l.url)
                if k:
                    b.insert(k, {l._id36: l._id36})
开发者ID:Shilohtd,项目名称:reddit,代码行数:21,代码来源:migrate.py


示例20: _location_by_ips

def _location_by_ips(ips):
    if not hasattr(g, 'geoip_location'):
        g.log.warning("g.geoip_location not set. skipping GeoIP lookup.")
        return {}

    ret = {}
    for batch in in_chunks(ips, MAX_IPS_PER_GROUP):
        ip_string = '+'.join(batch)
        url = os.path.join(g.geoip_location, 'geoip', ip_string)

        try:
            response = urllib2.urlopen(url=url, timeout=3)
            json_data = response.read()
        except (urllib2.URLError, httplib.HTTPException, socket.error) as e:
            g.log.warning("Failed to fetch GeoIP information: %r" % e)
            continue

        try:
            ret.update(json.loads(json_data))
        except ValueError, e:
            g.log.warning("Invalid JSON response for GeoIP lookup: %r" % e)
            continue
开发者ID:APerson241,项目名称:reddit,代码行数:22,代码来源:geoip.py



注:本文中的r2.lib.utils.in_chunks函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python utils.is_subdomain函数代码示例发布时间:2022-05-26
下一篇:
Python utils.fetch_things2函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap