本文整理汇总了Golang中github.com/hu17889/go_spider/core/common/mlog.LogInst函数的典型用法代码示例。如果您正苦于以下问题:Golang LogInst函数的具体用法?Golang LogInst怎么用?Golang LogInst使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了LogInst函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Golang代码示例。
示例1: changeCharsetEncodingAutoGzipSupport
func (this *HttpDownloader) changeCharsetEncodingAutoGzipSupport(contentTypeStr string, sor io.ReadCloser) string {
var err error
gzipReader, err := gzip.NewReader(sor)
if err != nil {
mlog.LogInst().LogError(err.Error())
return ""
}
defer gzipReader.Close()
destReader, err := charset.NewReader(gzipReader, contentTypeStr)
if err != nil {
mlog.LogInst().LogError(err.Error())
destReader = sor
}
var sorbody []byte
if sorbody, err = ioutil.ReadAll(destReader); err != nil {
mlog.LogInst().LogError(err.Error())
// For gb2312, an error will be returned.
// Error like: simplifiedchinese: invalid GBK encoding
// return ""
}
//e,name,certain := charset.DetermineEncoding(sorbody,contentTypeStr)
bodystr := string(sorbody)
return bodystr
}
开发者ID:CrocdileChan,项目名称:go_spider,代码行数:27,代码来源:downloader_http.go
示例2: Poll
func (this *RedisScheduler) Poll() *request.Request {
this.locker.Lock()
defer this.locker.Unlock()
conn := this.redisPool.Get()
defer conn.Close()
length, err := this.count()
if err != nil {
return nil
}
if length <= 0 {
mlog.LogInst().LogError("RedisScheduler Poll length 0")
return nil
}
buf, err := conn.Do("LPOP", this.requestList)
if err != nil {
mlog.LogInst().LogError("RedisScheduler Poll Error: " + err.Error())
return nil
}
requ := &request.Request{}
err = json.Unmarshal(buf.([]byte), requ)
if err != nil {
mlog.LogInst().LogError("RedisScheduler Poll Error: " + err.Error())
return nil
}
return requ
}
开发者ID:CrocdileChan,项目名称:go_spider,代码行数:31,代码来源:redis_scheduler.go
示例3: downloadHtml
func (this *HttpDownloader) downloadHtml(req *request.Request) *page.Page {
var err error
var url string
if url = req.GetUrl(); len(url) == 0 {
return nil
}
var resp *http.Response
if resp, err = http.Get(url); err != nil {
mlog.LogInst().LogError(err.Error())
return nil
}
defer resp.Body.Close()
var doc *goquery.Document
if doc, err = goquery.NewDocumentFromReader(resp.Body); err != nil {
mlog.LogInst().LogError(err.Error())
return nil
}
var body string
if body, err = doc.Html(); err != nil {
mlog.LogInst().LogError(err.Error())
return nil
}
// create Page
var p *page.Page = page.NewPage(req).
SetBodyStr(body).
SetHtmlParser(doc)
return p
}
开发者ID:w3hacker,项目名称:go_spider,代码行数:34,代码来源:downloader_http.go
示例4: downloadJson
func (this *HttpDownloader) downloadJson(req *request.Request) *page.Page {
var err error
var url string
if url = req.GetUrl(); len(url) == 0 {
mlog.LogInst().LogError(err.Error())
return nil
}
var resp *http.Response
if resp, err = http.Get(url); err != nil {
mlog.LogInst().LogError(err.Error())
return nil
}
defer resp.Body.Close()
var body []byte
if body, err = ioutil.ReadAll(resp.Body); err != nil {
mlog.LogInst().LogError(err.Error())
return nil
}
var r *simplejson.Json
if r, err = simplejson.NewJson(body); err != nil {
mlog.LogInst().LogError(err.Error())
return nil
}
// create Page
// json result
var p *page.Page = page.NewPage(req).
SetBodyStr(string(body)).
SetJson(r)
return p
}
开发者ID:w3hacker,项目名称:go_spider,代码行数:35,代码来源:downloader_http.go
示例5: main
func main() {
start_url := "http://www.jiexieyin.org"
thread_num := uint(16)
redisAddr := "127.0.0.1:6379"
redisMaxConn := 10
redisMaxIdle := 10
proc := &MyProcessor{}
sp := spider.NewSpider(proc, "redis_scheduler_example").
//SetSleepTime("fixed", 6000, 6000).
//SetScheduler(scheduler.NewQueueScheduler(true)).
SetScheduler(scheduler.NewRedisScheduler(redisAddr, redisMaxConn, redisMaxIdle, true)).
AddPipeline(pipeline.NewPipelineConsole()).
SetThreadnum(thread_num)
init := false
for _, arg := range os.Args {
if arg == "--init" {
init = true
break
}
}
if init {
sp.AddUrl(start_url, "html")
mlog.LogInst().LogInfo("重新开始爬")
} else {
mlog.LogInst().LogInfo("继续爬")
}
sp.Run()
}
开发者ID:wadee,项目名称:go_proj,代码行数:32,代码来源:main.go
示例6: downloadHtml
func (this *HttpDownloader) downloadHtml(p *page.Page, req *request.Request) *page.Page {
var err error
p, destbody := this.downloadFile(p, req)
//fmt.Printf("Destbody %v \r\n", destbody)
if !p.IsSucc() {
//fmt.Print("Page error \r\n")
return p
}
bodyReader := bytes.NewReader([]byte(destbody))
var doc *goquery.Document
if doc, err = goquery.NewDocumentFromReader(bodyReader); err != nil {
mlog.LogInst().LogError(err.Error())
p.SetStatus(true, err.Error())
return p
}
var body string
if body, err = doc.Html(); err != nil {
mlog.LogInst().LogError(err.Error())
p.SetStatus(true, err.Error())
return p
}
p.SetBodyStr(body).SetHtmlParser(doc).SetStatus(false, "")
return p
}
开发者ID:CrocdileChan,项目名称:go_spider,代码行数:28,代码来源:downloader_http.go
示例7: addRequest
// add Request to Schedule
func (this *Spider) addRequest(req *request.Request) {
if req == nil {
mlog.LogInst().LogError("request is nil")
return
} else if req.GetUrl() == "" {
mlog.LogInst().LogError("request is empty")
return
}
this.pScheduler.Push(req)
}
开发者ID:w3hacker,项目名称:go_spider,代码行数:11,代码来源:spider.go
示例8: Push
func (this *RedisScheduler) Push(requ *request.Request) {
this.locker.Lock()
defer this.locker.Unlock()
requJson, err := json.Marshal(requ)
if err != nil {
mlog.LogInst().LogError("RedisScheduler Push Error: " + err.Error())
return
}
conn := this.redisPool.Get()
defer conn.Close()
if err != nil {
mlog.LogInst().LogError("RedisScheduler Push Error: " + err.Error())
return
}
if this.forbiddenDuplicateUrl {
urlExist, err := conn.Do("HGET", this.urlList, requ.GetUrl())
if err != nil {
mlog.LogInst().LogError("RedisScheduler Push Error: " + err.Error())
return
}
if urlExist != nil {
return
}
conn.Do("MULTI")
_, err = conn.Do("HSET", this.urlList, requ.GetUrl(), 1)
if err != nil {
mlog.LogInst().LogError("RedisScheduler Push Error: " + err.Error())
conn.Do("DISCARD")
return
}
}
_, err = conn.Do("RPUSH", this.requestList, requJson)
if err != nil {
mlog.LogInst().LogError("RedisScheduler Push Error: " + err.Error())
if this.forbiddenDuplicateUrl {
conn.Do("DISCARD")
}
return
}
if this.forbiddenDuplicateUrl {
conn.Do("EXEC")
}
}
开发者ID:CrocdileChan,项目名称:go_spider,代码行数:48,代码来源:redis_scheduler.go
示例9: downloadJson
func (this *HttpDownloader) downloadJson(p *page.Page, req *request.Request) *page.Page {
var err error
p, destbody := this.downloadFile(p, req)
if !p.IsSucc() {
return p
}
var body []byte
body = []byte(destbody)
mtype := req.GetResponceType()
if mtype == "jsonp" {
tmpstr := util.JsonpToJson(destbody)
body = []byte(tmpstr)
}
var r *simplejson.Json
if r, err = simplejson.NewJson(body); err != nil {
mlog.LogInst().LogError(string(body) + "\t" + err.Error())
p.SetStatus(true, err.Error())
return p
}
// json result
p.SetBodyStr(string(body)).SetJson(r).SetStatus(false, "")
return p
}
开发者ID:CrocdileChan,项目名称:go_spider,代码行数:27,代码来源:downloader_http.go
示例10: connectByHttp
// choose http GET/method to download
func connectByHttp(p *page.Page, req *request.Request) (*http.Response, error) {
client := &http.Client{
CheckRedirect: req.GetRedirectFunc(),
}
httpreq, err := http.NewRequest(req.GetMethod(), req.GetUrl(), strings.NewReader(req.GetPostdata()))
if header := req.GetHeader(); header != nil {
httpreq.Header = req.GetHeader()
}
if cookies := req.GetCookies(); cookies != nil {
for i := range cookies {
httpreq.AddCookie(cookies[i])
}
}
var resp *http.Response
if resp, err = client.Do(httpreq); err != nil {
if e, ok := err.(*url.Error); ok && e.Err != nil && e.Err.Error() == "normal" {
// normal
} else {
mlog.LogInst().LogError(err.Error())
p.SetStatus(true, err.Error())
//fmt.Printf("client do error %v \r\n", err)
return nil, err
}
}
return resp, nil
}
开发者ID:CrocdileChan,项目名称:go_spider,代码行数:31,代码来源:downloader_http.go
示例11: Process
func (this *MyProcessor) Process(p *page.Page) {
if !p.IsSucc() {
mlog.LogInst().LogError(p.Errormsg())
return
}
u, err := url.Parse(p.GetRequest().GetUrl())
if err != nil {
mlog.LogInst().LogError(err.Error())
return
}
if !strings.HasSuffix(u.Host, "jiexieyin.org") {
return
}
var urls []string
query := p.GetHtmlParser()
query.Find("a").Each(func(i int, s *goquery.Selection) {
href, _ := s.Attr("href")
reJavascript := regexp.MustCompile("^javascript\\:")
reLocal := regexp.MustCompile("^\\#")
reMailto := regexp.MustCompile("^mailto\\:")
if reJavascript.MatchString(href) || reLocal.MatchString(href) || reMailto.MatchString(href) {
return
}
//处理相对路径
var absHref string
urlHref, err := url.Parse(href)
if err != nil {
mlog.LogInst().LogError(err.Error())
return
}
if !urlHref.IsAbs() {
urlPrefix := p.GetRequest().GetUrl()
absHref = urlPrefix + href
urls = append(urls, absHref)
} else {
urls = append(urls, href)
}
})
p.AddTargetRequests(urls, "html")
}
开发者ID:wadee,项目名称:go_proj,代码行数:47,代码来源:main.go
示例12: ResetHtmlParser
// GetHtmlParser returns goquery object binded to target crawl result.
func (this *Page) ResetHtmlParser() *goquery.Document {
r := strings.NewReader(this.body)
var err error
this.docParser, err = goquery.NewDocumentFromReader(r)
if err != nil {
mlog.LogInst().LogError(err.Error())
panic(err.Error())
}
return this.docParser
}
开发者ID:CrocdileChan,项目名称:go_spider,代码行数:11,代码来源:page.go
示例13: count
func (this *RedisScheduler) count() (int, error) {
conn := this.redisPool.Get()
defer conn.Close()
length, err := conn.Do("LLEN", this.requestList)
if err != nil {
mlog.LogInst().LogError("RedisScheduler Count Error: " + err.Error())
return 0, err
}
return int(length.(int64)), nil
}
开发者ID:CrocdileChan,项目名称:go_spider,代码行数:10,代码来源:redis_scheduler.go
示例14: downloadFile
// Download file and change the charset of page charset.
func (this *HttpDownloader) downloadFile(p *page.Page, req *request.Request) (*page.Page, string) {
var err error
var urlstr string
if urlstr = req.GetUrl(); len(urlstr) == 0 {
mlog.LogInst().LogError("url is empty")
p.SetStatus(true, "url is empty")
return p, ""
}
client := &http.Client{
CheckRedirect: req.GetRedirectFunc(),
}
httpreq, err := http.NewRequest(req.GetMethod(), req.GetUrl(), strings.NewReader(req.GetPostdata()))
if header := req.GetHeader(); header != nil {
httpreq.Header = req.GetHeader()
}
if cookies := req.GetCookies(); cookies != nil {
for i := range cookies {
httpreq.AddCookie(cookies[i])
}
}
var resp *http.Response
if resp, err = client.Do(httpreq); err != nil {
if e, ok := err.(*url.Error); ok && e.Err != nil && e.Err.Error() == "normal" {
// normal
} else {
mlog.LogInst().LogError(err.Error())
p.SetStatus(true, err.Error())
return p, ""
}
}
p.SetHeader(resp.Header)
p.SetCookies(resp.Cookies())
// get converter to utf-8
bodyStr := this.changeCharsetEncodingAuto(resp.Header.Get("Content-Type"), resp.Body)
defer resp.Body.Close()
return p, bodyStr
}
开发者ID:xujb,项目名称:go_spider,代码行数:43,代码来源:downloader_http.go
示例15: pageProcess
// core processer
func (this *Spider) pageProcess(req *request.Request) {
var p *page.Page
defer func() {
if err := recover(); err != nil { // do not affect other
if strerr, ok := err.(string); ok {
mlog.LogInst().LogError(strerr)
} else {
mlog.LogInst().LogError("pageProcess error")
}
}
}()
// download page
for i := 0; i < 3; i++ {
this.sleep()
p = this.pDownloader.Download(req)
if p.IsSucc() { // if fail retry 3 times
break
}
}
if !p.IsSucc() { // if fail do not need process
return
}
this.pPageProcesser.Process(p)
for _, req := range p.GetTargetRequests() {
this.AddRequest(req)
}
// output
if !p.GetSkip() {
for _, pip := range this.pPiplelines {
//fmt.Println("%v",p.GetPageItems().GetAll())
pip.Process(p.GetPageItems(), this)
}
}
}
开发者ID:CrocdileChan,项目名称:go_spider,代码行数:41,代码来源:spider.go
示例16: Download
func (this *HttpDownloader) Download(req *request.Request) *page.Page {
var mtype string
mtype = req.GetResponceType()
switch mtype {
case "html":
return this.downloadHtml(req)
case "json":
return this.downloadJson(req)
default:
mlog.LogInst().LogError("error request type:" + mtype)
return nil
}
}
开发者ID:w3hacker,项目名称:go_spider,代码行数:13,代码来源:downloader_http.go
示例17: Download
func (this *HttpDownloader) Download(req *request.Request) *page.Page {
var mtype string
var p = page.NewPage(req)
mtype = req.GetResponceType()
switch mtype {
case "html":
return this.downloadHtml(p, req)
case "json":
fallthrough
case "jsonp":
return this.downloadJson(p, req)
case "text":
return this.downloadText(p, req)
default:
mlog.LogInst().LogError("error request type:" + mtype)
}
return p
}
开发者ID:CrocdileChan,项目名称:go_spider,代码行数:18,代码来源:downloader_http.go
示例18: readHeaderFromFile
func readHeaderFromFile(headerFile string) http.Header {
//read file , parse the header and cookies
b, err := ioutil.ReadFile(headerFile)
if err != nil {
//make be: share access error
mlog.LogInst().LogError(err.Error())
return nil
}
js, _ := simplejson.NewJson(b)
//constructed to header
h := make(http.Header)
h.Add("User-Agent", js.Get("User-Agent").MustString())
h.Add("Referer", js.Get("Referer").MustString())
h.Add("Cookie", js.Get("Cookie").MustString())
h.Add("Cache-Control", "max-age=0")
h.Add("Connection", "keep-alive")
return h
}
开发者ID:Zxnui,项目名称:go_spider,代码行数:19,代码来源:request.go
示例19: downloadFile
// Download file and change the charset of page charset.
func (this *HttpDownloader) downloadFile(p *page.Page, req *request.Request) (*page.Page, string) {
var err error
var urlstr string
if urlstr = req.GetUrl(); len(urlstr) == 0 {
mlog.LogInst().LogError("url is empty")
p.SetStatus(true, "url is empty")
return p, ""
}
var resp *http.Response
if proxystr := req.GetProxyHost(); len(proxystr) != 0 {
//using http proxy
//fmt.Print("HttpProxy Enter ",proxystr,"\n")
resp, err = connectByHttpProxy(p, req)
} else {
//normal http download
//fmt.Print("Http Normal Enter \n",proxystr,"\n")
resp, err = connectByHttp(p, req)
}
if err != nil {
return p, ""
}
//b, _ := ioutil.ReadAll(resp.Body)
//fmt.Printf("Resp body %v \r\n", string(b))
p.SetHeader(resp.Header)
p.SetCookies(resp.Cookies())
// get converter to utf-8
var bodyStr string
if resp.Header.Get("Content-Encoding") == "gzip" {
bodyStr = this.changeCharsetEncodingAutoGzipSupport(resp.Header.Get("Content-Type"), resp.Body)
} else {
bodyStr = this.changeCharsetEncodingAuto(resp.Header.Get("Content-Type"), resp.Body)
}
//fmt.Printf("utf-8 body %v \r\n", bodyStr)
defer resp.Body.Close()
return p, bodyStr
}
开发者ID:CrocdileChan,项目名称:go_spider,代码行数:43,代码来源:downloader_http.go
示例20: NewRequestWithHeaderFile
func NewRequestWithHeaderFile(url string, respType string, headerFile string) *Request {
_, err := os.Stat(headerFile)
if err != nil {
//file is not exist , using default mode
return NewRequest(url, respType, "", "GET", "", nil, nil, nil, nil)
}
//read file , parse the header and cookies
b, err := ioutil.ReadFile(headerFile)
if err != nil {
//make be: share access error
mlog.LogInst().LogError(err.Error())
}
js, _ := simplejson.NewJson(b)
//constructed to header
h := make(http.Header)
h.Add("User-Agent", js.Get("User-Agent").MustString())
h.Add("Cookie", js.Get("Cookie").MustString())
h.Add("Cache-Control", "max-age=0")
h.Add("Connection", "keep-alive")
return NewRequest(url, respType, "", "GET", "", h, nil, nil, nil)
}
开发者ID:xujb,项目名称:go_spider,代码行数:22,代码来源:request.go
注:本文中的github.com/hu17889/go_spider/core/common/mlog.LogInst函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论