本文整理汇总了Python中webpage.WebPage类的典型用法代码示例。如果您正苦于以下问题:Python WebPage类的具体用法?Python WebPage怎么用?Python WebPage使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了WebPage类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: set_data
def set_data(self):
w=WebPage(htmldata=self.htmldata1)
#find all the codes for season/year in the first html form data
self.semesters={}
xpath="""//*[@id="CLASS_SRCH_WRK2_STRM$35$"]/option"""
for e in w.get_from_xpath(xpath):
key,semester=e.text,e.get("value")
if key.strip() and semester.strip():
self.semesters[key]=semester
#match up season/year to the codes we just found, if possible
code=0
for key in self.semesters:
if self.season.lower() in key.lower() and str(self.year) in key:
code=self.semesters[key]
break
if not code:
print_color("Warning: failed to find season/year in search options. season='%s' year='%s'"%(self.season,self.year),COLORS.RED)
print_d("search options",self.semesters)
self.data={"ICFocus":"SSR_CLSRCH_WRK_ACAD_CAREER$2",
"CLASS_SRCH_WRK2_STRM$35$":str(code),
"SSR_CLSRCH_WRK_SUBJECT$0":self.department,
"ICAction":"CLASS_SRCH_WRK2_SSR_PB_CLASS_SRCH",
"SSR_CLSRCH_WRK_ACAD_CAREER$2":self.level}
开发者ID:Zulban,项目名称:zconcordia,代码行数:27,代码来源:concordia_search.py
示例2: Crawler
class Crawler():
def __init__(self ):
self.downloader = DownloadManager()
self.webpage = None
self.init_database()
self.rules = {}
def init_database(self):
self.queue = QueueDB('queue.db')
self.webpagedb = WebpageDB('webpage.db')
self.duplcheck = DuplCheckDB('duplcheck.db')
def add_seeds(self, links):
new_links = self.duplcheck.filter_dupl_urls(links)
self.duplcheck.add_urls(new_links)
self.queue.push_urls(new_links)
def add_rules(self, rules):
self.rules = {}
for url, inurls in rules.items():
reurl = re.compile(url)
repatn = []
for u in inurls:
repatn.append(re.compile(u))
self.rules[reurl] = repatn
def get_patterns_from_rules(self,url):
patns = []
for purl,ru in self.rules.items():
if purl.match(url)!= None:
patns.extend(ru)
return list(set(patns))
def start(self):
while 1:
url = self.queue.pop_url()
print url
if url == None:
print "crawling task is done."
break
error_msg, url, redirected_url, html = self.downloader.download(url)
#print error_msg, url, redirected_url, html
if html !=None:
self.webpagedb.html2db(url,html)
self.webpage = WebPage(url,html)
self.webpage.parse_links()
ruptn = self.get_patterns_from_rules(url)
print ruptn
links = self.webpage.filter_links(tags = ['a'], patterns= ruptn)
self.add_seeds(links)
self.mysleep(3)
def mysleep(self, n):
for i in range(n):
time.sleep(1)
print "sleep",i,"of",n
开发者ID:ricocmc,项目名称:pythonTraining,代码行数:58,代码来源:crawler.py
示例3: Crawler
class Crawler():
def __init__(self):
self.downloader = DownloadManager()
self.webpage = None
self.rules = {}
self.dbop = OperatorDB()
def add_seeds(self, links):
self.dbop.add_seeds(links)
def add_rules(self, rules):
self.rules = {}
for url, inurls in rules.items():
reurl = re.compile(url)
repatn = []
for u in inurls:
repatn.append(re.compile(u))
self.rules[reurl] = repatn
def get_patterns_from_rules(self, url):
patns = []
for purl, ru in self.rules.items():
if purl.match(url) != None:
patns.extend(ru)
return list(set(patns))
def start(self):
while 1:
try:
url = self.dbop.pop_url()
print "url: %s" % url
if url == None:
print "crawling task is done."
break
error_msg, url, redirected_url, html = self.downloader.download(url)
#print error_msg, url, redirected_url, html
if html != None:
self.webpage = WebPage(url, html)
article = self.webpage.extract()
if len(article) > 5:
addtime = "%s %s" % (article[1], article[2])
self.dbop.html2db(url, html,
article[0],
addtime,
article[3],
article[5])
else:
self.dbop.html2db(url, html)
print self.webpage.parse_links()
ruptn = self.get_patterns_from_rules(url)
links = self.webpage.filter_links(tags=['a'],
str_patterns=ruptn)
self.add_seeds(links)
self.mysleep(3)
except Exception, err:
print "!!error!! Exception happend! %s %s" % (url, err)
self.dbop.close()
开发者ID:agentwx,项目名称:auCrawler,代码行数:57,代码来源:crawler.py
示例4: updateView
def updateView(self):
page = WebPage(logger=None, parent=self)
page.setLinkDelegationPolicy(QWebPage.DelegateAllLinks)
page.mainFrame().addToJavaScriptWindowObject("qtWindow", self)
self.ui.webView.setPage(page)
html = self.generateHtml()
# baseUrl must end with a trailing '/' otherwise QWebView won't be able
# to load files from there
baseUrl = QUrl.fromLocalFile(os.path.join(self.dataDir, "static/"))
self.ui.webView.setHtml(html, baseUrl)
开发者ID:agateau,项目名称:qyok,代码行数:11,代码来源:mainwindow.py
示例5: __init__
def __init__(self):
logging.debug("-->")
super(WebBrowser, self).__init__()
self.app = QApplication.instance()
if self.app is None:
self.app = QApplication(sys.argv)
self.app.setQuitOnLastWindowClosed(False)
self.event_loop = QEventLoop()
self.cookie_jar = CookieJar()
self.proxy = QNetworkProxy(QNetworkProxy.HttpProxy, "127.0.1.1", 8888)
self.network_manager = NetworkAccessManager()
self.network_manager.setCookieJar(self.cookie_jar)
# self.network_manager.setProxy(self.proxy)
self.web_page = WebPage()
self.web_page.setNetworkAccessManager(self.network_manager)
self.web_view = QWebView()
self.web_view.setPage(self.web_page)
self.web_view.settings().setAttribute(QWebSettings.AutoLoadImages,False)
self.web_view.settings().setAttribute(QWebSettings.PluginsEnabled, True)
self.web_view.settings().setAttribute(QWebSettings.JavascriptEnabled, True)
# self.web_view.settings().setAttribute(QWebSettings.XSSAuditingEnabled, False)
self.web_view.settings().setAttribute(QWebSettings.LocalContentCanAccessRemoteUrls, True)
self.connect(self.web_view.page().networkAccessManager(),SIGNAL("finished(QNetworkReply*)"),self.network_reply_finished)
self.page_loaded_validator = None
self.page_loaded_handler = None
self.page_loaded_handler_kwargs = None
self.timeout_message = None
self.timer = None
self.event_loop_exception = None
logging.debug("<--")
开发者ID:mguillech,项目名称:tfb-scraper,代码行数:30,代码来源:webbrowser.py
示例6: getlinks
def getlinks(self,url,html):
self.webpage = WebPage(url,html)
self.webpage.parse_links()
ruptn = self.get_patterns_from_rules(url)
#print ruptn
links = self.webpage.filter_links(tags = ['a'], patterns= ruptn)
return links
开发者ID:acha21,项目名称:python-crawler,代码行数:7,代码来源:crawler.py
示例7: start
def start(self):
while 1:
url = self.queue.pop_url()
print url
if url == None:
print "crawling task is done."
break
error_msg, url, redirected_url, html = self.downloader.download(url)
# print error_msg, url, redirected_url, html
if html !=None:
self.webpagedb.html2db(url,html)
self.webpage = WebPage(url,html)
self.webpage.parse_links()
ruptn = self.get_patterns_from_rules(url)
#print ruptn
links = self.webpage.filter_links(tags = ['a'], patterns= ruptn)
print links
self.add_seeds(links)
file_pattern = []
file_pattern.append(re.compile(self.file_rule))
files = self.webpage.filter_links(tags = ['a'], patterns = file_pattern)
self.files.append(files)
#TODO:
self.download_files(files)
print files
开发者ID:gsliu,项目名称:Cybertron,代码行数:26,代码来源:crawler.py
示例8: start
def start(self):
while 1:
try:
url = self.dbop.pop_url()
print "url: %s" % url
if url == None:
print "crawling task is done."
break
error_msg, url, redirected_url, html = self.downloader.download(url)
#print error_msg, url, redirected_url, html
if html != None:
self.webpage = WebPage(url, html)
article = self.webpage.extract()
if len(article) > 5:
addtime = "%s %s" % (article[1], article[2])
self.dbop.html2db(url, html,
article[0],
addtime,
article[3],
article[5])
else:
self.dbop.html2db(url, html)
print self.webpage.parse_links()
ruptn = self.get_patterns_from_rules(url)
links = self.webpage.filter_links(tags=['a'],
str_patterns=ruptn)
self.add_seeds(links)
self.mysleep(3)
except Exception, err:
print "!!error!! Exception happend! %s %s" % (url, err)
self.dbop.close()
开发者ID:agentwx,项目名称:auCrawler,代码行数:31,代码来源:crawler.py
示例9: __init__
def __init__(self, parent, args):
QObject.__init__(self, parent)
# variable declarations
self.m_defaultPageSettings = {}
self.m_pages = []
self.m_verbose = args.verbose
self.m_page = WebPage(self)
self.m_returnValue = 0
self.m_terminated = False
# setup the values from args
self.m_scriptFile = args.script
self.m_args = args.script_args
self.m_filesystem = FileSystem(self)
self.m_pages.append(self.m_page)
do_action('PhantomInitPre')
if args.proxy is None:
QNetworkProxyFactory.setUseSystemConfiguration(True)
else:
proxy = QNetworkProxy(QNetworkProxy.HttpProxy, args.proxy[0], int(args.proxy[1]))
QNetworkProxy.setApplicationProxy(proxy)
# Provide WebPage with a non-standard Network Access Manager
self.m_netAccessMan = NetworkAccessManager(self, args.disk_cache, args.cookies, args.ignore_ssl_errors)
self.m_page.setNetworkAccessManager(self.m_netAccessMan)
self.m_page.javaScriptConsoleMessageSent.connect(self.printConsoleMessage)
self.m_defaultPageSettings['loadImages'] = args.load_images
self.m_defaultPageSettings['loadPlugins'] = args.load_plugins
self.m_defaultPageSettings['javascriptEnabled'] = True
self.m_defaultPageSettings['XSSAuditingEnabled'] = False
self.m_defaultPageSettings['userAgent'] = self.m_page.userAgent()
self.m_defaultPageSettings['localAccessRemote'] = args.local_access_remote
self.m_page.applySettings(self.m_defaultPageSettings)
self.libraryPath = os.path.dirname(os.path.abspath(self.m_scriptFile))
# inject our properties and slots into javascript
self.m_page.mainFrame().addToJavaScriptWindowObject('phantom', self)
self.m_page.mainFrame().addToJavaScriptWindowObject('fs', self.m_filesystem)
bootstrap = QFile(':/bootstrap.js')
if not bootstrap.open(QFile.ReadOnly):
sys.exit('Can not bootstrap!')
bootstrapper = str(bootstrap.readAll())
bootstrap.close()
if not bootstrapper:
sys.exit('Can not bootstrap!')
self.m_page.mainFrame().evaluateJavaScript(bootstrapper)
do_action('PhantomInitPost')
开发者ID:aivaturi,项目名称:phantomjs,代码行数:56,代码来源:phantom.py
示例10: __init__
def __init__(self, args, parent = None):
QObject.__init__(self, parent)
# variable declarations
self.m_loadStatus = self.m_state = QString()
self.m_var = self.m_paperSize = self.m_loadScript_cache = {}
self.m_verbose = args.verbose
self.m_page = WebPage(self)
self.m_clipRect = QRect()
# setup the values from args
self.m_script = QString.fromUtf8(args.script[0].read())
self.m_scriptFile = args.script[0].name
self.m_args = args.script[1:]
self.m_upload_file = args.upload_file
autoLoadImages = False if args.load_images == 'no' else True
pluginsEnabled = True if args.load_plugins == 'yes' else False
args.script[0].close()
palette = self.m_page.palette()
palette.setBrush(QPalette.Base, Qt.transparent)
self.m_page.setPalette(palette)
if not args.proxy:
QNetworkProxyFactory.setUseSystemConfiguration(True)
else:
proxy = QNetworkProxy(QNetworkProxy.HttpProxy, args.proxy[0], int(args.proxy[1]))
QNetworkProxy.setApplicationProxy(proxy)
self.m_page.settings().setAttribute(QWebSettings.AutoLoadImages, autoLoadImages)
self.m_page.settings().setAttribute(QWebSettings.PluginsEnabled, pluginsEnabled)
self.m_page.settings().setAttribute(QWebSettings.FrameFlatteningEnabled, True)
self.m_page.settings().setAttribute(QWebSettings.OfflineStorageDatabaseEnabled, True)
self.m_page.settings().setAttribute(QWebSettings.LocalStorageEnabled, True)
self.m_page.settings().setLocalStoragePath(QDesktopServices.storageLocation(QDesktopServices.DataLocation))
self.m_page.settings().setOfflineStoragePath(QDesktopServices.storageLocation(QDesktopServices.DataLocation))
# Ensure we have a document.body.
self.m_page.mainFrame().setHtml('<html><body></body></html>')
self.m_page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
self.m_page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
# if our script was called in a different directory, change to it
# to make any dealings with files be relative to the scripts directory
if os.path.dirname(self.m_scriptFile):
os.chdir(os.path.dirname(self.m_scriptFile))
if self.m_verbose:
m_netAccessMan = NetworkAccessManager(self)
self.m_page.setNetworkAccessManager(m_netAccessMan)
# inject our properties and slots into javascript
self.connect(self.m_page.mainFrame(), SIGNAL('javaScriptWindowObjectCleared()'), self.inject)
self.connect(self.m_page, SIGNAL('loadFinished(bool)'), self.finish)
开发者ID:westonruter,项目名称:phantomjs,代码行数:55,代码来源:phantom.py
示例11: __init__
def __init__(self, args, parent = None):
QObject.__init__(self, parent)
# variable declarations
self.m_loadStatus = self.m_state = ''
self.m_var = self.m_paperSize = self.m_loadScript_cache = {}
self.m_verbose = args.verbose
self.m_page = WebPage(self)
self.m_clipRect = QRect()
# setup the values from args
self.m_script = args.script.read()
self.m_scriptFile = args.script.name
self.m_scriptDir = os.path.dirname(args.script.name)
if sys.platform.startswith('win'):
self.m_scriptDir += '\\'
else:
self.m_scriptDir += '/'
self.m_args = args.script_args
self.m_upload_file = args.upload_file
autoLoadImages = False if args.load_images == 'no' else True
pluginsEnabled = True if args.load_plugins == 'yes' else False
args.script.close()
palette = self.m_page.palette()
palette.setBrush(QPalette.Base, Qt.transparent)
self.m_page.setPalette(palette)
if not args.proxy:
QNetworkProxyFactory.setUseSystemConfiguration(True)
else:
proxy = QNetworkProxy(QNetworkProxy.HttpProxy, args.proxy[0], int(args.proxy[1]))
QNetworkProxy.setApplicationProxy(proxy)
self.m_page.settings().setAttribute(QWebSettings.AutoLoadImages, autoLoadImages)
self.m_page.settings().setAttribute(QWebSettings.PluginsEnabled, pluginsEnabled)
self.m_page.settings().setAttribute(QWebSettings.FrameFlatteningEnabled, True)
self.m_page.settings().setAttribute(QWebSettings.OfflineStorageDatabaseEnabled, True)
self.m_page.settings().setAttribute(QWebSettings.LocalStorageEnabled, True)
self.m_page.settings().setLocalStoragePath(QDesktopServices.storageLocation(QDesktopServices.DataLocation))
self.m_page.settings().setOfflineStoragePath(QDesktopServices.storageLocation(QDesktopServices.DataLocation))
# Ensure we have a document.body.
self.m_page.mainFrame().setHtml('<html><body></body></html>')
self.m_page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
self.m_page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
if self.m_verbose:
m_netAccessMan = NetworkAccessManager(args.disk_cache, self)
self.m_page.setNetworkAccessManager(m_netAccessMan)
# inject our properties and slots into javascript
self.m_page.mainFrame().javaScriptWindowObjectCleared.connect(self.inject)
self.m_page.loadFinished.connect(self.finish)
开发者ID:NathanHowell,项目名称:phantomjs-waf,代码行数:55,代码来源:phantom.py
示例12: run
def run(self):
while self.status:
try:
url = self.spider.task_list.get(timeout = 1)
except Empty:
# log.info('%s: task_list Empty' % self.name)
continue
self.spider.increase_running()
if not self.spider.check_robots(url):
log.info('%s - robots forbidden : %s' % (self.name, url))
continue
page = WebPage(url)
# print('%s prepare to fetch %s' % (self.name, url))
if page.fetch():
self.spider.db.save_data(page.get_data())
for link in page.get_link(): # retrive links from html
if link not in self.spider.visited_list: # not visited yet
self.spider.extend_list.add(link)
else:
print('%s: Page fetch failed: %s' % (self.name, page.url))
self.spider.decrease_running()
开发者ID:YvesChan,项目名称:OpenSP,代码行数:21,代码来源:spider.py
示例13: createWebPage
def createWebPage(self):
page = WebPage(self)
self.m_pages.append(page)
page.applySettings(self.m_defaultPageSettings)
page.setNetworkAccessManager(self.m_netAccessMan)
page.libraryPath = os.path.dirname(os.path.abspath(self.m_scriptFile))
return page
开发者ID:tryhendri,项目名称:phantomjs,代码行数:7,代码来源:phantom.py
示例14: __init__
def __init__(self, args, parent=None):
QObject.__init__(self, parent)
# variable declarations
self.m_defaultPageSettings = {}
self.m_verbose = args.verbose
self.m_page = WebPage(self)
self.m_returnValue = 0
self.m_terminated = False
# setup the values from args
self.m_script = args.script
self.m_scriptFile = args.script_name
self.m_args = args.script_args
do_action('PhantomInitPre', Bunch(locals()))
if not args.proxy:
QNetworkProxyFactory.setUseSystemConfiguration(True)
else:
proxy = QNetworkProxy(QNetworkProxy.HttpProxy, args.proxy[0], int(args.proxy[1]))
QNetworkProxy.setApplicationProxy(proxy)
# Provide WebPage with a non-standard Network Access Manager
self.m_netAccessMan = NetworkAccessManager(args.disk_cache, args.ignore_ssl_errors, self)
self.m_page.setNetworkAccessManager(self.m_netAccessMan)
self.m_page.javaScriptConsoleMessageSent.connect(self.printConsoleMessage)
self.m_defaultPageSettings['loadImages'] = args.load_images
self.m_defaultPageSettings['loadPlugins'] = args.load_plugins
self.m_defaultPageSettings['userAgent'] = self.m_page.userAgent()
self.m_page.applySettings(self.m_defaultPageSettings)
# inject our properties and slots into javascript
self.m_page.mainFrame().addToJavaScriptWindowObject('phantom', self)
bootstrap = QFile(':/bootstrap.js')
if not bootstrap.open(QFile.ReadOnly):
qCritical('Can not bootstrap!')
sys.exit(1)
bootstrapper = str(bootstrap.readAll())
bootstrap.close()
if not bootstrapper:
qCritical('Can not bootstrap!')
sys.exit(1)
self.m_page.mainFrame().evaluateJavaScript(bootstrapper)
do_action('PhantomInitPost', Bunch(locals()))
开发者ID:lautis,项目名称:phantomjs,代码行数:48,代码来源:phantom.py
示例15: add_webpage
def add_webpage(self):
webpage = WebPage(name='', description='', url='', load_content=False)
webpage.name = raw_input('Name: ')
webpage.description = raw_input('Description: ')
webpage.url = raw_input('URL: ')
webpage.update_timeout = int(raw_input('Update timeout: '))
webpage.request_timeout = int(raw_input('Request timeout: '))
webpage.data_offset = int(raw_input('Data offset: '))
done = False
while not done:
confirm = raw_input('Save? (y/n)')
if confirm in ['y', 'Y']:
try:
webpage.current = webpage.retrieve()
except ValueError, e:
logger.error('[!] Error: ' + str(e))
done = True
self.__webpages.append(webpage)
#self.start_updater()
if confirm in ['y', 'Y', 'n', 'N']:
done = True
开发者ID:piero,项目名称:WebDiff,代码行数:21,代码来源:console_interface.py
示例16: __init__
def __init__(self, parent, args):
super(Phantom, self).__init__(parent)
# variable declarations
self.m_defaultPageSettings = {}
self.m_pages = []
self.m_verbose = args.verbose
self.m_page = WebPage(self, args)
self.m_returnValue = 0
self.m_terminated = False
# setup the values from args
self.app_args = args
self.m_scriptFile = args.script
self.m_args = args.script_args
self.m_scriptEncoding = Encode(args.script_encoding, 'utf-8')
self.m_outputEncoding = Encode(args.output_encoding, sys.stdout.encoding_sys)
self.m_pages.append(self.m_page)
do_action('PhantomInitPre')
if args.proxy is None:
QNetworkProxyFactory.setUseSystemConfiguration(True)
else:
proxy = QNetworkProxy(QNetworkProxy.HttpProxy, args.proxy[0], int(args.proxy[1]))
QNetworkProxy.setApplicationProxy(proxy)
self.m_page.javaScriptConsoleMessageSent.connect(self.printConsoleMessage)
self.m_defaultPageSettings['loadImages'] = args.load_images
self.m_defaultPageSettings['loadPlugins'] = args.load_plugins
self.m_defaultPageSettings['javascriptEnabled'] = True
self.m_defaultPageSettings['XSSAuditingEnabled'] = False
self.m_defaultPageSettings['userAgent'] = self.m_page.userAgent()
self.m_defaultPageSettings['localToRemoteUrlAccessEnabled'] = args.local_to_remote_url_access
self.m_page.applySettings(self.m_defaultPageSettings)
self.libraryPath = os.path.dirname(os.path.abspath(self.m_scriptFile))
# inject our properties and slots into javascript
self.m_page.mainFrame().addToJavaScriptWindowObject('phantom', self)
with QPyFile(':/bootstrap.js') as f:
bootstrap = str(f.readAll())
self.m_page.mainFrame().evaluateJavaScript(bootstrap)
do_action('PhantomInitPost')
开发者ID:jsnell,项目名称:phantomjs,代码行数:47,代码来源:phantom.py
示例17: start
def start(self):
while 1:
url = self.queue.pop_url()
print url
if url == None:
print "crawling task is done."
break
error_msg, url, redirected_url, html = self.downloader.download(url)
#print error_msg, url, redirected_url, html
if html !=None:
self.webpagedb.html2db(url,html)
self.webpage = WebPage(url,html)
self.webpage.parse_links()
ruptn = self.get_patterns_from_rules(url)
print ruptn
links = self.webpage.filter_links(tags = ['a'], patterns= ruptn)
self.add_seeds(links)
self.mysleep(3)
开发者ID:ricocmc,项目名称:pythonTraining,代码行数:19,代码来源:crawler.py
示例18: start
def start(self):
while 1:
url = self.queue.popUrl()
print url
if url == None:
print "crawling task is done."
break
error_msg, url, redirected_url, html = self.downloader.download(url)
#print error_msg, url, redirected_url, html
if html !=None:
self.webpagedb.storeHtmlToDb(url,html)#把网页存储起来
self.webpage = WebPage(url,html)#开始解析网页
self.webpage.parseLinks()#得到全部的超链接
ruptn = self.get_patterns_from_rules(url)
print ruptn
links = self.webpage.filter_links(tags = ['a'], patterns= ruptn)#得到None
if links:
self.addSeeds(links)
self.mysleep(3)#休息一下再继续爬
开发者ID:ReedGuo,项目名称:framework,代码行数:20,代码来源:crawler.py
示例19: WebBrowser
class WebBrowser(QObject):
def __init__(self):
logging.debug("-->")
super(WebBrowser, self).__init__()
self.app = QApplication.instance()
if self.app is None:
self.app = QApplication(sys.argv)
self.app.setQuitOnLastWindowClosed(False)
self.event_loop = QEventLoop()
self.cookie_jar = CookieJar()
self.proxy = QNetworkProxy(QNetworkProxy.HttpProxy, "127.0.1.1", 8888)
self.network_manager = NetworkAccessManager()
self.network_manager.setCookieJar(self.cookie_jar)
# self.network_manager.setProxy(self.proxy)
self.web_page = WebPage()
self.web_page.setNetworkAccessManager(self.network_manager)
self.web_view = QWebView()
self.web_view.setPage(self.web_page)
self.web_view.settings().setAttribute(QWebSettings.AutoLoadImages,False)
self.web_view.settings().setAttribute(QWebSettings.PluginsEnabled, True)
self.web_view.settings().setAttribute(QWebSettings.JavascriptEnabled, True)
# self.web_view.settings().setAttribute(QWebSettings.XSSAuditingEnabled, False)
self.web_view.settings().setAttribute(QWebSettings.LocalContentCanAccessRemoteUrls, True)
self.connect(self.web_view.page().networkAccessManager(),SIGNAL("finished(QNetworkReply*)"),self.network_reply_finished)
self.page_loaded_validator = None
self.page_loaded_handler = None
self.page_loaded_handler_kwargs = None
self.timeout_message = None
self.timer = None
self.event_loop_exception = None
logging.debug("<--")
def network_reply_finished(self,reply):
logging.debug("Reply received for: " + reply.request().url().toString())
self.network_manager.request_queue[reply.request().url()] = "Completed"
redirect_url = self.get_redirect_url(reply.attribute(QNetworkRequest.RedirectionTargetAttribute),reply.request().url())
if redirect_url is not None:
self.redirect(redirect_url,reply.request())
def redirect(self,url,request):
frame = self.find_frame_to_redirect(self.web_view.page().mainFrame(),request)
if frame is not None:
logging.debug("Redirecting to: " + url.toString())
frame.load(url)
def find_frame_to_redirect(self,frame,request):
if frame.requestedUrl() == request.url():
return frame
else:
children = frame.childFrames()
for child in children:
frame_to_redirect = self.find_frame_to_redirect(child,request)
if frame_to_redirect is not None:
return frame_to_redirect
def get_redirect_url(self,possible_redirect_url, orig_requested_url):
if possible_redirect_url is not None:
if possible_redirect_url.isRelative():
if orig_requested_url.isRelative():
return None
possible_redirect_url.setScheme(orig_requested_url.scheme())
possible_redirect_url.setHost(orig_requested_url.host())
if orig_requested_url != possible_redirect_url:
return possible_redirect_url
def get_cookies(self):
cookies = self.cookie_jar.allCookies()
raw_cookies = []
first = True
for cookie in cookies:
raw_cookies.append(cookie.toRawForm())
return raw_cookies
def set_cookies(self,raw_cookies):
cookies = []
for raw_cookie in raw_cookies:
cookie_list = QNetworkCookie.parseCookies(raw_cookie)
for cookie in cookie_list:
cookies.append(cookie)
self.cookie_jar.setAllCookies(cookies)
def cleanup(self):
logging.debug("-->")
self.disconnect(self.web_view.page().networkAccessManager(),SIGNAL("finished(QNetworkReply*)"),self.network_reply_finished)
self.web_view.setParent(None)
self.web_page.setParent(None)
self.network_manager.setParent(None)
self.event_loop.setParent(None)
self.setParent(None)
del self.web_view
del self.web_page
del self.network_manager
del self.event_loop
del self.app
logging.debug("<--")
开发者ID:mguillech,项目名称:tfb-scraper,代码行数:95,代码来源:webbrowser.py
示例20: Phantom
class Phantom(QObject):
def __init__(self, parent, args):
super(Phantom, self).__init__(parent)
# variable declarations
self.m_defaultPageSettings = {}
self.m_pages = []
self.m_verbose = args.verbose
self.m_page = WebPage(self, args)
self.m_returnValue = 0
self.m_terminated = False
# setup the values from args
self.app_args = args
self.m_scriptFile = args.script
self.m_args = args.script_args
self.m_scriptEncoding = Encode(args.script_encoding, 'utf-8')
self.m_outputEncoding = Encode(args.output_encoding, sys.stdout.encoding_sys)
self.m_pages.append(self.m_page)
do_action('PhantomInitPre')
if args.proxy is None:
QNetworkProxyFactory.setUseSystemConfiguration(True)
else:
proxy = QNetworkProxy(QNetworkProxy.HttpProxy, args.proxy[0], int(args.proxy[1]))
QNetworkProxy.setApplicationProxy(proxy)
self.m_page.javaScriptConsoleMessageSent.connect(self.printConsoleMessage)
self.m_defaultPageSettings['loadImages'] = args.load_images
self.m_defaultPageSettings['loadPlugins'] = args.load_plugins
self.m_defaultPageSettings['javascriptEnabled'] = True
self.m_defaultPageSettings['XSSAuditingEnabled'] = False
self.m_defaultPageSettings['userAgent'] = self.m_page.userAgent()
self.m_defaultPageSettings['localToRemoteUrlAccessEnabled'] = args.local_to_remote_url_access
self.m_page.applySettings(self.m_defaultPageSettings)
self.libraryPath = os.path.dirname(os.path.abspath(self.m_scriptFile))
# inject our properties and slots into javascript
self.m_page.mainFrame().addToJavaScriptWindowObject('phantom', self)
with QPyFile(':/bootstrap.js') as f:
bootstrap = str(f.readAll())
self.m_page.mainFrame().evaluateJavaScript(bootstrap)
do_action('PhantomInitPost')
def execute(self):
injectJsInFrame(self.m_scriptFile, self.m_scriptEncoding.encoding, os.path.dirname(os.path.abspath(__file__)), self.m_page.mainFrame(), True)
return not self.m_terminated
def printConsoleMessage(self, message, lineNumber, source):
if source:
message = '%s:%d %s' % (source, lineNumber, message)
print message
def returnValue(self):
return self.m_returnValue
##
# Properties and methods exposed to JavaScript
##
@pyqtProperty('QStringList')
def args(self):
return self.m_args
@pyqtSlot(result=FileSystem)
def createFilesystem(self):
return FileSystem(self)
@pyqtSlot(result=WebPage)
def createWebPage(self):
page = WebPage(self, self.app_args)
self.m_pages.append(page)
page.applySettings(self.m_defaultPageSettings)
page.libraryPath = os.path.dirname(os.path.abspath(self.m_scriptFile))
return page
@pyqtProperty('QVariantMap')
def defaultPageSettings(self):
return self.m_defaultPageSettings
@pyqtSlot()
@pyqtSlot(int)
def exit(self, code=0):
self.m_terminated = True
self.m_returnValue = code
# stop javascript execution in start script;
# delete all the pages C++ objects, then clear
# the page list, and empty the Phantom page
for page in self.m_pages:
sip.delete(page)
del self.m_pages[:]
self.m_page = None
QApplication.instance().exit(code)
#.........这里部分代码省略.........
开发者ID:jsnell,项目名称:phantomjs,代码行数:101,代码来源:phantom.py
注:本文中的webpage.WebPage类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论