本文整理汇总了Golang中github.com/aosen/mlog.LogInst函数的典型用法代码示例。如果您正苦于以下问题:Golang LogInst函数的具体用法?Golang LogInst怎么用?Golang LogInst使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了LogInst函数的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Golang代码示例。
示例1: downloadHtml
func (self *HttpDownloader) downloadHtml(p *robot.Page, req *robot.Request) *robot.Page {
var err error
p, destbody := self.downloadFile(p, req)
//fmt.Printf("Destbody %v \r\n", destbody)
if !p.IsSucc() {
//fmt.Print("Page error \r\n")
return p
}
bodyReader := bytes.NewReader([]byte(destbody))
var doc *goquery.Document
if doc, err = goquery.NewDocumentFromReader(bodyReader); err != nil {
mlog.LogInst().LogError(err.Error())
p.SetStatus(true, err.Error())
return p
}
var body string
if body, err = doc.Html(); err != nil {
mlog.LogInst().LogError(err.Error())
p.SetStatus(true, err.Error())
return p
}
p.SetBodyStr(body).SetHtmlParser(doc).SetStatus(false, "")
return p
}
开发者ID:aosen,项目名称:robot,代码行数:28,代码来源:httpdownloader.go
示例2: main
func main() {
start_url := "http://www.jiexieyin.org"
thread_num := uint(16)
redisAddr := "127.0.0.1:6379"
redisMaxConn := 10
redisMaxIdle := 10
proc := &MyProcessor{}
sp := robot.NewSpider(proc, "redis_scheduler_example").
//SetSleepTime("fixed", 6000, 6000).
//SetScheduler(scheduler.NewQueueScheduler(true)).
SetScheduler(robot.NewRedisScheduler(redisAddr, redisMaxConn, redisMaxIdle, true)).
AddPipeline(robot.NewPipelineConsole()).
SetThreadnum(thread_num)
init := false
for _, arg := range os.Args {
if arg == "--init" {
init = true
break
}
}
if init {
sp.AddUrl(start_url, "html")
mlog.LogInst().LogInfo("重新开始爬")
} else {
mlog.LogInst().LogInfo("继续爬")
}
sp.Run()
}
开发者ID:aosen,项目名称:spiders,代码行数:32,代码来源:redis.go
示例3: Poll
func (self *RedisScheduler) Poll() *robot.Request {
conn := self.redisPool.Get()
defer conn.Close()
length, err := self.count()
if err != nil {
return nil
}
if length <= 0 {
mlog.LogInst().LogError("RedisScheduler Poll length 0")
return nil
}
buf, err := conn.Do("LPOP", self.requestList)
if err != nil {
mlog.LogInst().LogError("RedisScheduler Poll Error: " + err.Error())
return nil
}
requ := &robot.Request{}
err = json.Unmarshal(buf.([]byte), requ)
if err != nil {
mlog.LogInst().LogError("RedisScheduler Poll Error: " + err.Error())
return nil
}
return requ
}
开发者ID:kjfcpua,项目名称:robot,代码行数:28,代码来源:redisscheduler.go
示例4: AddRequest
// add Request to Schedule
func (self *Spider) AddRequest(req *Request) *Spider {
if req == nil {
mlog.LogInst().LogError("request is nil")
return self
} else if req.GetUrl() == "" {
mlog.LogInst().LogError("request is empty")
return self
}
self.scheduler.Push(req)
return self
}
开发者ID:kjfcpua,项目名称:robot,代码行数:12,代码来源:engine.go
示例5: Process
func (self *MyProcessor) Process(p *robot.Page) {
if !p.IsSucc() {
mlog.LogInst().LogError(p.Errormsg())
return
}
u, err := url.Parse(p.GetRequest().GetUrl())
if err != nil {
mlog.LogInst().LogError(err.Error())
return
}
if !strings.HasSuffix(u.Host, "jiexieyin.org") {
return
}
var urls []string
query := p.GetHtmlParser()
query.Find("a").Each(func(i int, s *goquery.Selection) {
href, _ := s.Attr("href")
reJavascript := regexp.MustCompile("^javascript\\:")
reLocal := regexp.MustCompile("^\\#")
reMailto := regexp.MustCompile("^mailto\\:")
if reJavascript.MatchString(href) || reLocal.MatchString(href) || reMailto.MatchString(href) {
return
}
//处理相对路径
var absHref string
urlHref, err := url.Parse(href)
if err != nil {
mlog.LogInst().LogError(err.Error())
return
}
if !urlHref.IsAbs() {
urlPrefix := p.GetRequest().GetUrl()
absHref = urlPrefix + href
urls = append(urls, absHref)
} else {
urls = append(urls, href)
}
})
p.AddTargetRequests(initrequests(urls))
p.AddField("test1", p.GetRequest().GetUrl())
p.AddField("test2", p.GetRequest().GetUrl())
}
开发者ID:aosen,项目名称:robot,代码行数:48,代码来源:mgo.go
示例6: connectByHttp
// choose http GET/method to download
func connectByHttp(p *robot.Page, req *robot.Request) (*http.Response, error) {
client := &http.Client{}
httpreq, err := http.NewRequest(req.GetMethod(), req.GetUrl(), strings.NewReader(req.GetPostdata()))
if header := req.GetHeader(); header != nil {
httpreq.Header = req.GetHeader()
}
if cookies := req.GetCookies(); cookies != nil {
for i := range cookies {
httpreq.AddCookie(cookies[i])
}
}
var resp *http.Response
if resp, err = client.Do(httpreq); err != nil {
if e, ok := err.(*url.Error); ok && e.Err != nil && e.Err.Error() == "normal" {
// normal
} else {
mlog.LogInst().LogError(err.Error())
p.SetStatus(true, err.Error())
//fmt.Printf("client do error %v \r\n", err)
return nil, err
}
}
return resp, nil
}
开发者ID:aosen,项目名称:robot,代码行数:29,代码来源:httpdownloader.go
示例7: downloadJson
func (self *HttpDownloader) downloadJson(p *robot.Page, req *robot.Request) *robot.Page {
var err error
p, destbody := self.downloadFile(p, req)
if !p.IsSucc() {
return p
}
var body []byte
body = []byte(destbody)
mtype := req.GetResponceType()
if mtype == "jsonp" {
tmpstr := goutils.JsonpToJson(destbody)
body = []byte(tmpstr)
}
var r *simplejson.Json
if r, err = simplejson.NewJson(body); err != nil {
mlog.LogInst().LogError(string(body) + "\t" + err.Error())
p.SetStatus(true, err.Error())
return p
}
// json result
p.SetBodyStr(string(body)).SetJson(r).SetStatus(false, "")
return p
}
开发者ID:aosen,项目名称:robot,代码行数:27,代码来源:httpdownloader.go
示例8: main
func main() {
start_url := "http://www.jiexieyin.org"
redisAddr := "127.0.0.1:6379"
redisMaxConn := 10
redisMaxIdle := 10
mongoUrl := "localhost:27017"
mongoDB := "test"
mongoCollection := "test"
scheduleroptions := scheduler.RedisSchedulerOptions{
RequestList: "mgospider_requests",
UrlList: "mgospider_urls",
RedisAddr: redisAddr,
MaxConn: redisMaxConn,
MaxIdle: redisMaxIdle,
ForbiddenDuplicateUrl: false,
}
//爬虫初始化
options := robot.SpiderOptions{
TaskName: "mgospider",
PageProcesser: NewMyProcesser(),
Downloader: downloader.NewHttpDownloader("text/html; charset=gb2312"),
Scheduler: scheduler.NewRedisScheduler(scheduleroptions),
Pipelines: []robot.Pipeline{NewPipelineMongo(mongoUrl, mongoDB, mongoCollection)},
//设置资源管理器,资源池容量为10
ResourceManage: resource.NewSpidersPool(10, nil),
}
sp := robot.NewSpider(options)
init := false
for _, arg := range os.Args {
if arg == "--init" {
init = true
break
}
}
if init {
sp.AddRequest(initrequest(start_url))
mlog.LogInst().LogInfo("重新开始爬")
} else {
mlog.LogInst().LogInfo("继续爬")
}
sp.Run()
}
开发者ID:aosen,项目名称:robot,代码行数:47,代码来源:mgo.go
示例9: changeCharsetEncodingAuto
// Charset auto determine. Use golang.org/x/net/html/charset. Get page body and change it to utf-8
// 自动转码
func (self *HttpDownloader) changeCharsetEncodingAuto(contentTypeStr string, sor io.ReadCloser) string {
if len(strings.Split(contentTypeStr, " ")) < 2 {
contentTypeStr = self.DefaultContentType
}
destReader, err := charset.NewReader(sor, contentTypeStr)
if err != nil {
mlog.LogInst().LogError(err.Error())
destReader = sor
}
var sorbody []byte
sorbody, err = ioutil.ReadAll(destReader)
if err != nil {
mlog.LogInst().LogError(err.Error())
}
bodystr := string(sorbody)
return bodystr
}
开发者ID:aosen,项目名称:robot,代码行数:19,代码来源:httpdownloader.go
示例10: Push
func (self *RedisScheduler) Push(requ *robot.Request) {
requJson, err := json.Marshal(requ)
if err != nil {
mlog.LogInst().LogError("RedisScheduler Push Error: " + err.Error())
return
}
conn := self.redisPool.Get()
defer conn.Close()
if err != nil {
mlog.LogInst().LogError("RedisScheduler Push Error: " + err.Error())
return
}
if self.forbiddenDuplicateUrl {
urlExist, err := conn.Do("HGET", self.urlList, requ.GetUrl())
if err != nil {
mlog.LogInst().LogError("RedisScheduler Push Error: " + err.Error())
return
}
if urlExist != nil {
return
}
conn.Do("MULTI")
_, err = conn.Do("HSET", self.urlList, requ.GetUrl(), 1)
if err != nil {
mlog.LogInst().LogError("RedisScheduler Push Error: " + err.Error())
conn.Do("DISCARD")
return
}
}
_, err = conn.Do("RPUSH", self.requestList, requJson)
if err != nil {
mlog.LogInst().LogError("RedisScheduler Push Error: " + err.Error())
if self.forbiddenDuplicateUrl {
conn.Do("DISCARD")
}
return
}
if self.forbiddenDuplicateUrl {
conn.Do("EXEC")
}
}
开发者ID:kjfcpua,项目名称:robot,代码行数:44,代码来源:redisscheduler.go
示例11: changeCharsetEncodingAutoGzipSupport
func (self *HttpDownloader) changeCharsetEncodingAutoGzipSupport(contentTypeStr string, sor io.ReadCloser) string {
var err error
gzipReader, err := gzip.NewReader(sor)
if err != nil {
mlog.LogInst().LogError(err.Error())
return ""
}
defer gzipReader.Close()
return self.changeCharsetEncodingAuto(contentTypeStr, gzipReader)
}
开发者ID:aosen,项目名称:robot,代码行数:10,代码来源:httpdownloader.go
示例12: ResetHtmlParser
// GetHtmlParser returns goquery object binded to target crawl result.
func (self *Page) ResetHtmlParser() *goquery.Document {
r := strings.NewReader(self.body)
var err error
self.docParser, err = goquery.NewDocumentFromReader(r)
if err != nil {
mlog.LogInst().LogError(err.Error())
panic(err.Error())
}
return self.docParser
}
开发者ID:kjfcpua,项目名称:robot,代码行数:11,代码来源:engine.go
示例13: count
func (self *RedisScheduler) count() (int, error) {
conn := self.redisPool.Get()
defer conn.Close()
length, err := conn.Do("LLEN", self.requestList)
if err != nil {
mlog.LogInst().LogError("RedisScheduler Count Error: " + err.Error())
return 0, err
}
return int(length.(int64)), nil
}
开发者ID:kjfcpua,项目名称:robot,代码行数:10,代码来源:redisscheduler.go
示例14: pageProcess
// core processer
func (self *Spider) pageProcess(req *Request) {
var p *Page
defer func() {
if err := recover(); err != nil { // do not affect other
if strerr, ok := err.(string); ok {
mlog.LogInst().LogError(strerr)
} else {
mlog.LogInst().LogError("pageProcess error")
}
}
}()
// download page
for i := 0; i < 3; i++ {
self.sleep()
p = self.downloader.Download(req)
if p.IsSucc() { // if fail retry 3 times
break
}
}
if !p.IsSucc() { // if fail do not need process
return
}
self.pageProcesser.Process(p)
//将targetRequests中的所有请求列表放入调度队列
for _, req := range p.GetTargetRequests() {
self.AddRequest(req)
}
// output
if !p.GetSkip() {
for _, pip := range self.pipelines {
//fmt.Println("%v",p.GetPageItems().GetAll())
pip.Process(p.GetPageItems(), self)
}
}
}
开发者ID:kjfcpua,项目名称:robot,代码行数:42,代码来源:engine.go
示例15: NewRequest
func NewRequest(req *Request) *Request {
//主要做默认值设置与错误检测
if req.Url == "" {
mlog.LogInst().LogError("request url is nil")
}
if req.Method == "" || req.Method != "GET" || req.Method != "POST" || req.Method != "HEAD" || req.Method != "OPTIONS" || req.Method != "PUT" || req.Method != "DELETE" {
req.Method = "GET"
}
if req.RespType == "" || req.RespType != "html" || req.RespType != "json" || req.RespType != "jsonp" || req.RespType != "text" {
req.RespType = "html"
}
return req
}
开发者ID:kjfcpua,项目名称:robot,代码行数:13,代码来源:engine.go
示例16: Process
func (self *Www79xsComProcessor) Process(p *robot.Page) {
//判断页面是否抓取成功
if !p.IsSucc() {
mlog.LogInst().LogError(p.Errormsg())
return
}
//如果callback为空,则说明是入口页面,否则直接执行对应callback
callback := p.GetRequest().GetCallBack()
if callback == nil {
self.mainParse(p)
} else {
callback(p)
}
}
开发者ID:kjfcpua,项目名称:robot,代码行数:15,代码来源:process.go
示例17: Download
func (self *HttpDownloader) Download(req *robot.Request) *robot.Page {
var mtype string
var p = robot.NewPage(req)
mtype = req.GetResponceType()
switch mtype {
case "html":
return self.downloadHtml(p, req)
case "json":
fallthrough
case "jsonp":
return self.downloadJson(p, req)
case "text":
return self.downloadText(p, req)
default:
mlog.LogInst().LogError("error request type:" + mtype)
}
return p
}
开发者ID:aosen,项目名称:robot,代码行数:18,代码来源:httpdownloader.go
示例18: ReadHeaderFromFile
func ReadHeaderFromFile(headerFile string) http.Header {
//read file , parse the header and cookies
b, err := ioutil.ReadFile(headerFile)
if err != nil {
//make be: share access error
mlog.LogInst().LogError(err.Error())
return nil
}
js, _ := simplejson.NewJson(b)
//constructed to header
h := make(http.Header)
h.Add("User-Agent", js.Get("User-Agent").MustString())
h.Add("Referer", js.Get("Referer").MustString())
h.Add("Cookie", js.Get("Cookie").MustString())
h.Add("Cache-Control", "max-age=0")
h.Add("Connection", "keep-alive")
return h
}
开发者ID:kjfcpua,项目名称:robot,代码行数:19,代码来源:engine.go
示例19: downloadFile
// Download file and change the charset of page charset.
func (self *HttpDownloader) downloadFile(p *robot.Page, req *robot.Request) (*robot.Page, string) {
var err error
var urlstr string
if urlstr = req.GetUrl(); len(urlstr) == 0 {
mlog.LogInst().LogError("url is empty")
p.SetStatus(true, "url is empty")
return p, ""
}
var resp *http.Response
if proxystr := req.GetProxyHost(); len(proxystr) != 0 {
//using http proxy
//fmt.Print("HttpProxy Enter ",proxystr,"\n")
resp, err = connectByHttpProxy(p, req)
} else {
//normal http download
//fmt.Print("Http Normal Enter \n",proxystr,"\n")
resp, err = connectByHttp(p, req)
}
if err != nil {
return p, ""
}
p.SetHeader(resp.Header)
p.SetCookies(resp.Cookies())
// get converter to utf-8
var bodyStr string
if resp.Header.Get("Content-Encoding") == "gzip" {
bodyStr = self.changeCharsetEncodingAutoGzipSupport(resp.Header.Get("Content-Type"), resp.Body)
} else {
bodyStr = self.changeCharsetEncodingAuto(resp.Header.Get("Content-Type"), resp.Body)
}
//fmt.Printf("utf-8 body %v \r\n", bodyStr)
defer resp.Body.Close()
return p, bodyStr
}
开发者ID:aosen,项目名称:robot,代码行数:40,代码来源:httpdownloader.go
注:本文中的github.com/aosen/mlog.LogInst函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论