• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python database.connect_database函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pyspider.database.connect_database函数的典型用法代码示例。如果您正苦于以下问题:Python connect_database函数的具体用法?Python connect_database怎么用?Python connect_database使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了connect_database函数的17个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: resultdb_migrating

def resultdb_migrating(project, from_connection, to_connection):
    logging.info("resultdb: %s", project)
    f = connect_database(from_connection)
    t = connect_database(to_connection)
    t.drop(project)
    for result in f.select(project):
        t.save(project, result['taskid'], result['url'], result['result'])
开发者ID:01jiagnwei01,项目名称:pyspider,代码行数:7,代码来源:migrate.py


示例2: taskdb_migrating

def taskdb_migrating(project, from_connection, to_connection):
    logging.info("taskdb: %s", project)
    f = connect_database(from_connection)
    t = connect_database(to_connection)
    t.drop(project)
    for status in range(1, 5):
        for task in f.load_tasks(status, project=project):
            t.insert(project, task['taskid'], task)
开发者ID:01jiagnwei01,项目名称:pyspider,代码行数:8,代码来源:migrate.py


示例3: cli

def cli(ctx, **kwargs):
    """
    A powerful spider system in python.
    """
    logging.config.fileConfig(os.path.join(os.path.dirname(__file__), "logging.conf"))

    # get db from env
    for db in ('taskdb', 'projectdb', 'resultdb'):
        if kwargs[db] is not None:
            continue
        if os.environ.get('MYSQL_NAME'):
            kwargs[db] = Get(lambda db=db: connect_database('mysql+%s://%s:%s/%s' % (
                db, os.environ['MYSQL_PORT_3306_TCP_ADDR'],
                os.environ['MYSQL_PORT_3306_TCP_PORT'], db)))
        elif os.environ.get('MONGODB_NAME'):
            kwargs[db] = Get(lambda db=db: connect_database('mongodb+%s://%s:%s/%s' % (
                db, os.environ['MONGODB_PORT_27017_TCP_ADDR'],
                os.environ['MONGODB_PORT_27017_TCP_PORT'], db)))
        else:
            if not os.path.exists(kwargs['data_path']):
                os.mkdir(kwargs['data_path'])
            kwargs[db] = Get(lambda db=db: connect_database('sqlite+%s:///%s/%s.db' % (
                db, kwargs['data_path'], db[:-2])))

    # queue
    if kwargs.get('amqp_url'):
        from pyspider.libs.rabbitmq import Queue
        for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher',
                     'fetcher2processor', 'processor2result'):
            kwargs[name] = Get(lambda name=name: Queue(name, amqp_url=kwargs['amqp_url'],
                                                       maxsize=kwargs['queue_maxsize']))
    elif os.environ.get('RABBITMQ_NAME'):
        from pyspider.libs.rabbitmq import Queue
        amqp_url = ("amqp://guest:[email protected]%(RABBITMQ_PORT_5672_TCP_ADDR)s"
                    ":%(RABBITMQ_PORT_5672_TCP_PORT)s/%%2F" % os.environ)
        for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher',
                     'fetcher2processor', 'processor2result'):
            kwargs[name] = Get(lambda name=name: Queue(name, amqp_url=amqp_url,
                                                       maxsize=kwargs['queue_maxsize']))
    else:
        from multiprocessing import Queue
        for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher',
                     'fetcher2processor', 'processor2result'):
            kwargs[name] = Queue(kwargs['queue_maxsize'])

    # phantomjs-proxy
    if kwargs.get('phantomjs_proxy'):
        pass
    elif os.environ.get('PHANTOMJS_NAME'):
        kwargs['phantomjs_proxy'] = os.environ['PHANTOMJS_PORT'][len('tcp://'):]

    ctx.obj['instances'] = []
    ctx.obj.update(kwargs)

    if ctx.invoked_subcommand is None and not ctx.obj.get('testing_mode'):
        ctx.invoke(all)
    return ctx
开发者ID:Debug-Orz,项目名称:pyspider,代码行数:57,代码来源:run.py


示例4: migrate

def migrate(pool, from_connection, to_connection):
    """
    Migrate tool for pyspider
    """
    f = connect_database(from_connection)
    t = connect_database(to_connection)

    if isinstance(f, ProjectDB):
        for each in f.get_all():
            each = unicode_obj(each)
            logging.info("projectdb: %s", each['name'])
            t.drop(each['name'])
            t.insert(each['name'], each)
    elif isinstance(f, TaskDB):
        pool = Pool(pool)
        pool.map(
            lambda x, f=from_connection, t=to_connection: taskdb_migrating(x, f, t),
            f.projects)
    elif isinstance(f, ResultDB):
        pool = Pool(pool)
        pool.map(
            lambda x, f=from_connection, t=to_connection: resultdb_migrating(x, f, t),
            f.projects)
开发者ID:01jiagnwei01,项目名称:pyspider,代码行数:23,代码来源:migrate.py


示例5: setUpClass

 def setUpClass(self):
     self.resultdb = database.connect_database(
         'sqlalchemy+mysql+mysqlconnector+resultdb://[email protected]/pyspider_test_resultdb'
     )
开发者ID:bartqiao,项目名称:pyspider,代码行数:4,代码来源:test_database.py


示例6: cli

def cli(ctx, **kwargs):
    """
    A powerful spider system in python.
    """
    logging.config.fileConfig(os.path.join(os.path.dirname(__file__), "logging.conf"))

    # get db from env
    for db in ("taskdb", "projectdb", "resultdb"):
        if kwargs[db] is not None:
            continue
        if os.environ.get("MYSQL_NAME"):
            kwargs[db] = utils.Get(
                lambda db=db: connect_database(
                    "mysql+%s://%s:%s/%s"
                    % (db, os.environ["MYSQL_PORT_3306_TCP_ADDR"], os.environ["MYSQL_PORT_3306_TCP_PORT"], db)
                )
            )
        elif os.environ.get("MONGODB_NAME"):
            kwargs[db] = utils.Get(
                lambda db=db: connect_database(
                    "mongodb+%s://%s:%s/%s"
                    % (db, os.environ["MONGODB_PORT_27017_TCP_ADDR"], os.environ["MONGODB_PORT_27017_TCP_PORT"], db)
                )
            )
        elif ctx.invoked_subcommand == "bench":
            if kwargs["data_path"] == "./data":
                kwargs["data_path"] += "/bench"
                shutil.rmtree(kwargs["data_path"], ignore_errors=True)
                os.mkdir(kwargs["data_path"])
            if db in ("taskdb", "resultdb"):
                kwargs[db] = utils.Get(lambda db=db: connect_database("sqlite+%s://" % (db)))
            else:
                kwargs[db] = utils.Get(
                    lambda db=db: connect_database("sqlite+%s:///%s/%s.db" % (db, kwargs["data_path"], db[:-2]))
                )
        else:
            if not os.path.exists(kwargs["data_path"]):
                os.mkdir(kwargs["data_path"])
            kwargs[db] = utils.Get(
                lambda db=db: connect_database("sqlite+%s:///%s/%s.db" % (db, kwargs["data_path"], db[:-2]))
            )

    # queue
    if kwargs.get("amqp_url"):
        from pyspider.libs.rabbitmq import Queue

        for name in ("newtask_queue", "status_queue", "scheduler2fetcher", "fetcher2processor", "processor2result"):
            kwargs[name] = utils.Get(
                lambda name=name: Queue(name, amqp_url=kwargs["amqp_url"], maxsize=kwargs["queue_maxsize"])
            )
    elif os.environ.get("RABBITMQ_NAME"):
        from pyspider.libs.rabbitmq import Queue

        amqp_url = (
            "amqp://guest:[email protected]%(RABBITMQ_PORT_5672_TCP_ADDR)s" ":%(RABBITMQ_PORT_5672_TCP_PORT)s/%%2F" % os.environ
        )
        for name in ("newtask_queue", "status_queue", "scheduler2fetcher", "fetcher2processor", "processor2result"):
            kwargs[name] = utils.Get(lambda name=name: Queue(name, amqp_url=amqp_url, maxsize=kwargs["queue_maxsize"]))
    else:
        from multiprocessing import Queue

        for name in ("newtask_queue", "status_queue", "scheduler2fetcher", "fetcher2processor", "processor2result"):
            kwargs[name] = Queue(kwargs["queue_maxsize"])

    # phantomjs-proxy
    if kwargs.get("phantomjs_proxy"):
        pass
    elif os.environ.get("PHANTOMJS_NAME"):
        kwargs["phantomjs_proxy"] = os.environ["PHANTOMJS_PORT"][len("tcp://") :]

    ctx.obj = utils.ObjectDict(ctx.obj or {})
    ctx.obj["instances"] = []
    ctx.obj.update(kwargs)

    if ctx.invoked_subcommand is None and not ctx.obj.get("testing_mode"):
        ctx.invoke(all)
    return ctx
开发者ID:0xa-cc,项目名称:pyspider,代码行数:77,代码来源:run.py


示例7: one

def one(ctx, interactive, enable_phantomjs, scripts):
    """
    One mode not only means all-in-one, it runs every thing in one process over
    tornado.ioloop, for debug purpose
    """

    ctx.obj['debug'] = False
    g = ctx.obj
    g['testing_mode'] = True

    if scripts:
        from pyspider.database.local.projectdb import ProjectDB
        g['projectdb'] = ProjectDB(scripts)
        if g.get('is_taskdb_default'):
            g['taskdb'] = connect_database('sqlite+taskdb://')
        if g.get('is_resultdb_default'):
            g['resultdb'] = None

    if enable_phantomjs:
        phantomjs_config = g.config.get('phantomjs', {})
        phantomjs_obj = ctx.invoke(phantomjs, **phantomjs_config)
        if phantomjs_obj:
            g.setdefault('phantomjs_proxy', 'localhost:%s' % phantomjs_obj.port)
    else:
        phantomjs_obj = None

    result_worker_config = g.config.get('result_worker', {})
    if g.resultdb is None:
        result_worker_config.setdefault('result_cls',
                                        'pyspider.result.OneResultWorker')
    result_worker_obj = ctx.invoke(result_worker, **result_worker_config)

    processor_config = g.config.get('processor', {})
    processor_config.setdefault('enable_stdout_capture', False)
    processor_obj = ctx.invoke(processor, **processor_config)

    fetcher_config = g.config.get('fetcher', {})
    fetcher_config.setdefault('xmlrpc', False)
    fetcher_obj = ctx.invoke(fetcher, **fetcher_config)

    scheduler_config = g.config.get('scheduler', {})
    scheduler_config.setdefault('xmlrpc', False)
    scheduler_config.setdefault('scheduler_cls',
                                'pyspider.scheduler.OneScheduler')
    scheduler_obj = ctx.invoke(scheduler, **scheduler_config)

    scheduler_obj.init_one(ioloop=fetcher_obj.ioloop,
                           fetcher=fetcher_obj,
                           processor=processor_obj,
                           result_worker=result_worker_obj,
                           interactive=interactive)
    if scripts:
        for project in g.projectdb.projects:
            scheduler_obj.trigger_on_start(project)

    try:
        scheduler_obj.run()
    finally:
        scheduler_obj.quit()
        if phantomjs_obj:
            phantomjs_obj.quit()
开发者ID:ConnorDFlynn,项目名称:Group1PySpider,代码行数:61,代码来源:run.py


示例8: cli

def cli(ctx, **kwargs):
    """
    A powerful spider system in python.
    """
    if kwargs['add_sys_path']:
        sys.path.append(os.getcwd())

    logging.config.fileConfig(kwargs['logging_config'])

    # get db from env
    for db in ('taskdb', 'projectdb', 'resultdb'):
        if kwargs[db] is not None:
            continue
        if os.environ.get('MYSQL_NAME'):
            kwargs[db] = utils.Get(lambda db=db: connect_database(
                'sqlalchemy+mysql+{0!s}://{1!s}:{2!s}/{3!s}'.format(
                    db, os.environ['MYSQL_PORT_3306_TCP_ADDR'],
                    os.environ['MYSQL_PORT_3306_TCP_PORT'], db)))
        elif os.environ.get('MONGODB_NAME'):
            kwargs[db] = utils.Get(lambda db=db: connect_database(
                'mongodb+{0!s}://{1!s}:{2!s}/{3!s}'.format(
                    db, os.environ['MONGODB_PORT_27017_TCP_ADDR'],
                    os.environ['MONGODB_PORT_27017_TCP_PORT'], db)))
        elif ctx.invoked_subcommand == 'bench':
            if kwargs['data_path'] == './data':
                kwargs['data_path'] += '/bench'
                shutil.rmtree(kwargs['data_path'], ignore_errors=True)
                os.mkdir(kwargs['data_path'])
            if db in ('taskdb', 'resultdb'):
                kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+{0!s}://'.format((db))))
            else:
                kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+{0!s}:///{1!s}/{2!s}.db'.format(
                    db, kwargs['data_path'], db[:-2])))
        else:
            if not os.path.exists(kwargs['data_path']):
                os.mkdir(kwargs['data_path'])
            kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+{0!s}:///{1!s}/{2!s}.db'.format(
                db, kwargs['data_path'], db[:-2])))
            kwargs['is_{0!s}_default'.format(db)] = True

    # create folder for counter.dump
    if not os.path.exists(kwargs['data_path']):
        os.mkdir(kwargs['data_path'])

    # message queue, compatible with old version
    if kwargs.get('message_queue'):
        pass
    elif kwargs.get('amqp_url'):
        kwargs['message_queue'] = kwargs['amqp_url']
    elif os.environ.get('RABBITMQ_NAME'):
        kwargs['message_queue'] = ("amqp://guest:[email protected]%(RABBITMQ_PORT_5672_TCP_ADDR)s"
                                   ":%(RABBITMQ_PORT_5672_TCP_PORT)s/%%2F" % os.environ)
    elif kwargs.get('beanstalk'):
        kwargs['message_queue'] = "beanstalk://{0!s}/".format(kwargs['beanstalk'])

    for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher',
                 'fetcher2processor', 'processor2result'):
        if kwargs.get('message_queue'):
            kwargs[name] = utils.Get(lambda name=name: connect_message_queue(
                name, kwargs.get('message_queue'), kwargs['queue_maxsize']))
        else:
            kwargs[name] = connect_message_queue(name, kwargs.get('message_queue'),
                                                 kwargs['queue_maxsize'])

    # phantomjs-proxy
    if kwargs.get('phantomjs_proxy'):
        pass
    elif os.environ.get('PHANTOMJS_NAME'):
        kwargs['phantomjs_proxy'] = os.environ['PHANTOMJS_PORT_25555_TCP'][len('tcp://'):]

    ctx.obj = utils.ObjectDict(ctx.obj or {})
    ctx.obj['instances'] = []
    ctx.obj.update(kwargs)

    if ctx.invoked_subcommand is None and not ctx.obj.get('testing_mode'):
        ctx.invoke(all)
    return ctx
开发者ID:runt18,项目名称:pyspider,代码行数:77,代码来源:run.py


示例9: connect_db

def connect_db(ctx, param, value):
    if value is None:
        return
    return Get(lambda: connect_database(value))
开发者ID:Debug-Orz,项目名称:pyspider,代码行数:4,代码来源:run.py


示例10: one

def one(ctx, interactive, enable_phantomjs, scripts):
    """
    One mode not only means all-in-one, it runs every thing in one process over
    tornado.ioloop, for debug purpose
    """

    ctx.obj["debug"] = False
    g = ctx.obj
    g["testing_mode"] = True

    if scripts:
        from pyspider.database.local.projectdb import ProjectDB

        g["projectdb"] = ProjectDB(scripts)
        if g.get("is_taskdb_default"):
            g["taskdb"] = connect_database("sqlite+taskdb://")
        if g.get("is_resultdb_default"):
            g["resultdb"] = None

    if enable_phantomjs:
        phantomjs_config = g.config.get("phantomjs", {})
        phantomjs_obj = ctx.invoke(phantomjs, **phantomjs_config)
        if phantomjs_obj:
            g.setdefault("phantomjs_proxy", "127.0.0.1:%s" % phantomjs_obj.port)
    else:
        phantomjs_obj = None

    result_worker_config = g.config.get("result_worker", {})
    if g.resultdb is None:
        result_worker_config.setdefault("result_cls", "pyspider.result.OneResultWorker")
    result_worker_obj = ctx.invoke(result_worker, **result_worker_config)

    processor_config = g.config.get("processor", {})
    processor_config.setdefault("enable_stdout_capture", False)
    processor_obj = ctx.invoke(processor, **processor_config)

    fetcher_config = g.config.get("fetcher", {})
    fetcher_config.setdefault("xmlrpc", False)
    fetcher_obj = ctx.invoke(fetcher, **fetcher_config)

    scheduler_config = g.config.get("scheduler", {})
    scheduler_config.setdefault("xmlrpc", False)
    scheduler_config.setdefault("scheduler_cls", "pyspider.scheduler.OneScheduler")
    scheduler_obj = ctx.invoke(scheduler, **scheduler_config)

    scheduler_obj.init_one(
        ioloop=fetcher_obj.ioloop,
        fetcher=fetcher_obj,
        processor=processor_obj,
        result_worker=result_worker_obj,
        interactive=interactive,
    )
    if scripts:
        for project in g.projectdb.projects:
            scheduler_obj.trigger_on_start(project)

    try:
        scheduler_obj.run()
    finally:
        scheduler_obj.quit()
        if phantomjs_obj:
            phantomjs_obj.quit()
开发者ID:RacoonBattle,项目名称:pyspider,代码行数:62,代码来源:run.py


示例11: cli

def cli(ctx, **kwargs):
    """
    A powerful spider system in python.
    """
    if kwargs["add_sys_path"]:
        sys.path.append(os.getcwd())

    logging.config.fileConfig(kwargs["logging_config"])

    # get db from env
    for db in ("taskdb", "projectdb", "resultdb"):
        if kwargs[db] is not None:
            continue
        if os.environ.get("MYSQL_NAME"):
            kwargs[db] = utils.Get(
                lambda db=db: connect_database(
                    "sqlalchemy+mysql+%s://%s:%s/%s"
                    % (db, os.environ["MYSQL_PORT_3306_TCP_ADDR"], os.environ["MYSQL_PORT_3306_TCP_PORT"], db)
                )
            )
        elif os.environ.get("MONGODB_NAME"):
            kwargs[db] = utils.Get(
                lambda db=db: connect_database(
                    "mongodb+%s://%s:%s/%s"
                    % (db, os.environ["MONGODB_PORT_27017_TCP_ADDR"], os.environ["MONGODB_PORT_27017_TCP_PORT"], db)
                )
            )
        elif ctx.invoked_subcommand == "bench":
            if kwargs["data_path"] == "./data":
                kwargs["data_path"] += "/bench"
                shutil.rmtree(kwargs["data_path"], ignore_errors=True)
                os.mkdir(kwargs["data_path"])
            if db in ("taskdb", "resultdb"):
                kwargs[db] = utils.Get(lambda db=db: connect_database("sqlite+%s://" % (db)))
            else:
                kwargs[db] = utils.Get(
                    lambda db=db: connect_database("sqlite+%s:///%s/%s.db" % (db, kwargs["data_path"], db[:-2]))
                )
        else:
            if not os.path.exists(kwargs["data_path"]):
                os.mkdir(kwargs["data_path"])
            kwargs[db] = utils.Get(
                lambda db=db: connect_database("sqlite+%s:///%s/%s.db" % (db, kwargs["data_path"], db[:-2]))
            )
            kwargs["is_%s_default" % db] = True

    # create folder for counter.dump
    if not os.path.exists(kwargs["data_path"]):
        os.mkdir(kwargs["data_path"])

    # message queue, compatible with old version
    if kwargs.get("message_queue"):
        pass
    elif kwargs.get("amqp_url"):
        kwargs["message_queue"] = kwargs["amqp_url"]
    elif os.environ.get("RABBITMQ_NAME"):
        kwargs["message_queue"] = (
            "amqp://guest:[email protected]%(RABBITMQ_PORT_5672_TCP_ADDR)s" ":%(RABBITMQ_PORT_5672_TCP_PORT)s/%%2F" % os.environ
        )
    elif kwargs.get("beanstalk"):
        kwargs["message_queue"] = "beanstalk://%s/" % kwargs["beanstalk"]

    for name in ("newtask_queue", "status_queue", "scheduler2fetcher", "fetcher2processor", "processor2result"):
        if kwargs.get("message_queue"):
            kwargs[name] = utils.Get(
                lambda name=name: connect_message_queue(name, kwargs.get("message_queue"), kwargs["queue_maxsize"])
            )
        else:
            kwargs[name] = connect_message_queue(name, kwargs.get("message_queue"), kwargs["queue_maxsize"])

    # phantomjs-proxy
    if kwargs.get("phantomjs_proxy"):
        pass
    elif os.environ.get("PHANTOMJS_NAME"):
        kwargs["phantomjs_proxy"] = os.environ["PHANTOMJS_PORT_25555_TCP"][len("tcp://") :]

    ctx.obj = utils.ObjectDict(ctx.obj or {})
    ctx.obj["instances"] = []
    ctx.obj.update(kwargs)

    if ctx.invoked_subcommand is None and not ctx.obj.get("testing_mode"):
        ctx.invoke(all)
    return ctx
开发者ID:RacoonBattle,项目名称:pyspider,代码行数:83,代码来源:run.py


示例12: connect_database

conn=psycopg2.connect(database="resultdb", user="postgres",password="", host="", port="")
cur = conn.cursor()
from pyspider.database import connect_database
resultdb = connect_database("sqlalchemy+postgresql+resultdb://postgres:@10.1.36.183:5432/resultdb")
#result=resultdb.select('test6').next()
#row_result = result['result']
#url=row_result['wages_and_employment_content']
#print type(url),url

##抓数据


##抓取相关的数据 并存到数据库中  

##获取进一步的链接 并返回列表 列表是一个字典 (带相关内容的)
def get_more_touchs(list_content,types):
    for each in list_content:
        url=each[0]
        try:
            r=requests.get(url,headers=header)
            soup=BeautifulSoup(r.text)
            websites={
            "detailed_work_activities" :"/search/dwa/compare/.*?g=Continue",
               "work_context":"^/find/descriptor/result/.*?",
               "work_values_content":"^/explore/workvalues/.*?",
               "work_styles_content":"^/find/descriptor/result/.*?",
               "work_activities":"^/find/descriptor/result/.*?",
               "skills_content":"^/find/descriptor/result/.*?",
                "knowledge_content":"^/find/descriptor/result/.*?",
                "interests":"^/explore/interests/.*?",
                  "abilities":"^/explore/interests/.*?"
开发者ID:zx403413599,项目名称:codes,代码行数:31,代码来源:backup.py


示例13: one

def one(ctx, interactive, enable_phantomjs, scripts):
    """
    One mode not only means all-in-one, it runs every thing in one process over
    tornado.ioloop, for debug purpose

    * webui is not running in one mode.
    * SCRIPTS is the script file path of project
        - when set, taskdb and resultdb will use a in-memery sqlite db by default
        - when set, on_start callback will be triggered on start
    * the status of project is always RUNNING.
    * rate and burst can be set in script with comments like:
        # rate: 1.0
        # burst: 3
    """

    ctx.obj['debug'] = False
    g = ctx.obj
    g['testing_mode'] = True

    if scripts:
        from pyspider.database.local.projectdb import ProjectDB
        g['projectdb'] = ProjectDB(scripts)
        if g.get('is_taskdb_default'):
            g['taskdb'] = connect_database('sqlite+taskdb://')
        if g.get('is_resultdb_default'):
            g['resultdb'] = connect_database('sqlite+resultdb://')

    if enable_phantomjs:
        phantomjs_config = g.config.get('phantomjs', {})
        phantomjs_obj = ctx.invoke(phantomjs, **phantomjs_config)
        if phantomjs_obj:
            g.setdefault('phantomjs_proxy', 'localhost:%s' % phantomjs_obj.port)
    else:
        phantomjs_obj = None

    result_worker_config = g.config.get('result_worker', {})
    result_worker_obj = ctx.invoke(result_worker, **result_worker_config)

    processor_config = g.config.get('processor', {})
    processor_obj = ctx.invoke(processor, **processor_config)

    fetcher_config = g.config.get('fetcher', {})
    fetcher_config.setdefault('xmlrpc', False)
    fetcher_obj = ctx.invoke(fetcher, **fetcher_config)

    scheduler_config = g.config.get('scheduler', {})
    scheduler_config.setdefault('xmlrpc', False)
    scheduler_config.setdefault('scheduler_cls',
                                'pyspider.scheduler.scheduler.OneScheduler')
    scheduler_obj = ctx.invoke(scheduler, **scheduler_config)

    scheduler_obj.init_one(ioloop=fetcher_obj.ioloop,
                           fetcher=fetcher_obj,
                           processor=processor_obj,
                           result_worker=result_worker_obj,
                           interactive=interactive)
    if scripts:
        for project in g.projectdb.projects:
            scheduler_obj.trigger_on_start(project)
    try:
        scheduler_obj.run()
    except KeyboardInterrupt:
        scheduler_obj.quit()
        if phantomjs_obj:
            phantomjs_obj.quit()
        raise
开发者ID:zhaoxiaojun,项目名称:pyspider,代码行数:66,代码来源:run.py


示例14: setUpClass

 def setUpClass(self):
     self.taskdb = database.connect_database(
         'sqlalchemy+postgresql+taskdb://[email protected]:5432/pyspider_test_taskdb'
     )
     self.tearDownClass()
开发者ID:01jiagnwei01,项目名称:pyspider,代码行数:5,代码来源:test_database.py


示例15: connect_db

def connect_db(ctx, param, value):
    if not value:
        return
    return utils.Get(lambda: connect_database(value))
开发者ID:ConnorDFlynn,项目名称:Group1PySpider,代码行数:4,代码来源:run.py


示例16: cli

def cli(ctx, **kwargs):
    """
    A powerful spider system in python.
    """
    logging.config.fileConfig(kwargs['logging_config'])

    # get db from env
    for db in ('taskdb', 'projectdb', 'resultdb'):
        if kwargs[db] is not None:
            continue
        if os.environ.get('MYSQL_NAME'):
            kwargs[db] = utils.Get(lambda db=db: connect_database('mysql+%s://%s:%s/%s' % (
                db, os.environ['MYSQL_PORT_3306_TCP_ADDR'],
                os.environ['MYSQL_PORT_3306_TCP_PORT'], db)))
        elif os.environ.get('MONGODB_NAME'):
            kwargs[db] = utils.Get(lambda db=db: connect_database('mongodb+%s://%s:%s/%s' % (
                db, os.environ['MONGODB_PORT_27017_TCP_ADDR'],
                os.environ['MONGODB_PORT_27017_TCP_PORT'], db)))
        elif ctx.invoked_subcommand == 'bench':
            if kwargs['data_path'] == './data':
                kwargs['data_path'] += '/bench'
                shutil.rmtree(kwargs['data_path'], ignore_errors=True)
                os.mkdir(kwargs['data_path'])
            if db in ('taskdb', 'resultdb'):
                kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+%s://' % (db)))
            else:
                kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+%s:///%s/%s.db' % (
                    db, kwargs['data_path'], db[:-2])))
        else:
            if not os.path.exists(kwargs['data_path']):
                os.mkdir(kwargs['data_path'])
            kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+%s:///%s/%s.db' % (
                db, kwargs['data_path'], db[:-2])))
            kwargs['is_%s_default' % db] = True

    # create folder for counter.dump
    if not os.path.exists(kwargs['data_path']):
        os.mkdir(kwargs['data_path'])

    # queue
    if kwargs.get('amqp_url'):
        from pyspider.libs.rabbitmq import Queue
        for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher',
                     'fetcher2processor', 'processor2result'):
            kwargs[name] = utils.Get(lambda name=name: Queue(name, amqp_url=kwargs['amqp_url'],
                                                             maxsize=kwargs['queue_maxsize']))
    elif os.environ.get('RABBITMQ_NAME'):
        from pyspider.libs.rabbitmq import Queue
        amqp_url = ("amqp://guest:[email protected]%(RABBITMQ_PORT_5672_TCP_ADDR)s"
                    ":%(RABBITMQ_PORT_5672_TCP_PORT)s/%%2F" % os.environ)
        for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher',
                     'fetcher2processor', 'processor2result'):
            kwargs[name] = utils.Get(lambda name=name: Queue(name, amqp_url=amqp_url,
                                                             maxsize=kwargs['queue_maxsize']))
    elif kwargs.get('beanstalk'):
        from pyspider.libs.beanstalk import Queue
        for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher',
                     'fetcher2processor', 'processor2result'):
            kwargs[name] = utils.Get(lambda name=name: Queue(name, host=kwargs.get('beanstalk'),
                                                             maxsize=kwargs['queue_maxsize']))
    else:
        from multiprocessing import Queue
        for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher',
                     'fetcher2processor', 'processor2result'):
            kwargs[name] = Queue(kwargs['queue_maxsize'])

    # phantomjs-proxy
    if kwargs.get('phantomjs_proxy'):
        pass
    elif os.environ.get('PHANTOMJS_NAME'):
        kwargs['phantomjs_proxy'] = os.environ['PHANTOMJS_PORT_25555_TCP'][len('tcp://'):]

    ctx.obj = utils.ObjectDict(ctx.obj or {})
    ctx.obj['instances'] = []
    ctx.obj.update(kwargs)

    if ctx.invoked_subcommand is None and not ctx.obj.get('testing_mode'):
        ctx.invoke(all)
    return ctx
开发者ID:ConnorDFlynn,项目名称:Group1PySpider,代码行数:79,代码来源:run.py


示例17: setUpClass

 def setUpClass(self):
     self.resultdb = database.connect_database(
         'elasticsearch+resultdb://127.0.0.1:9200/?index=test_pyspider_resultdb'
     )
     assert self.resultdb.index == 'test_pyspider_resultdb'
开发者ID:Dmitry-Kucher,项目名称:pyspider,代码行数:5,代码来源:test_database.py



注:本文中的pyspider.database.connect_database函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python tornado_fetcher.Fetcher类代码示例发布时间:2022-05-27
下一篇:
Python vi_task.VITask类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap