• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Golang charset.NewReader函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Golang中golang.org/x/net/html/charset.NewReader函数的典型用法代码示例。如果您正苦于以下问题:Golang NewReader函数的具体用法?Golang NewReader怎么用?Golang NewReader使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了NewReader函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Golang代码示例。

示例1: changeCharsetEncodingAutoGzipSupport

func (this *HttpDownloader) changeCharsetEncodingAutoGzipSupport(contentTypeStr string, sor io.ReadCloser) string {
	var err error
	gzipReader, err := gzip.NewReader(sor)
	if err != nil {
		mlog.LogInst().LogError(err.Error())
		return ""
	}
	defer gzipReader.Close()
	destReader, err := charset.NewReader(gzipReader, contentTypeStr)

	if err != nil {
		mlog.LogInst().LogError(err.Error())
		destReader = sor
	}

	var sorbody []byte
	if sorbody, err = ioutil.ReadAll(destReader); err != nil {
		mlog.LogInst().LogError(err.Error())
		// For gb2312, an error will be returned.
		// Error like: simplifiedchinese: invalid GBK encoding
		// return ""
	}
	//e,name,certain := charset.DetermineEncoding(sorbody,contentTypeStr)
	bodystr := string(sorbody)

	return bodystr
}
开发者ID:CrocdileChan,项目名称:go_spider,代码行数:27,代码来源:downloader_http.go


示例2: fetchHTML

func fetchHTML(url string) ([]byte, *url.URL, error) {
	r, e := get(url)
	if e != nil {
		return nil, nil, e
	}

	if !(r.StatusCode >= 200 && r.StatusCode < 300) {
		return nil, nil, errors.New("besticon: not found")
	}

	b, e := getBodyBytes(r)
	if e != nil {
		return nil, nil, e
	}
	if len(b) == 0 {
		return nil, nil, errors.New("besticon: empty response")
	}

	reader := bytes.NewReader(b)
	contentType := r.Header.Get("Content-Type")
	utf8reader, e := charset.NewReader(reader, contentType)
	if e != nil {
		return nil, nil, e
	}
	utf8bytes, e := ioutil.ReadAll(utf8reader)
	if e != nil {
		return nil, nil, e
	}

	return utf8bytes, r.Request.URL, nil
}
开发者ID:undernewmanagement,项目名称:besticon,代码行数:31,代码来源:besticon.go


示例3: getHTMLPage

// getHTMLPage - get html by http(s) as http.Response
func getHTMLPage(url string, ua string, timeout int, dontDetectCharset bool) (htmlReader io.Reader, err error) {
	cookie, _ := cookiejar.New(nil)
	client := &http.Client{
		Jar:     cookie,
		Timeout: time.Duration(timeout) * time.Second,
	}

	request, err := http.NewRequest("GET", url, nil)
	if err != nil {
		return htmlReader, err
	}

	if ua != "" {
		request.Header.Set("User-Agent", ua)
	}

	response, err := client.Do(request)
	if err != nil {
		return htmlReader, err
	}

	if contentType := response.Header.Get("Content-Type"); contentType != "" && !dontDetectCharset {
		htmlReader, err = charset.NewReader(response.Body, contentType)
		if err != nil {
			return htmlReader, err
		}
	} else {
		return response.Body, nil
	}

	return htmlReader, nil
}
开发者ID:msoap,项目名称:html2data,代码行数:33,代码来源:html2data.go


示例4: parseItemXml

func parseItemXml(client *http.Client, cookies []*http.Cookie, str string) *feeds.Item {
	var entry EntryXml

	/* print the item xml */
	// fmt.Println(str)

	// change from gbk to utf8
	d := xml.NewDecoder(bytes.NewReader([]byte(str)))
	d.CharsetReader = func(s string, r io.Reader) (io.Reader, error) {
		return charset.NewReader(r, s)
	}
	err := d.Decode(&entry)
	if err != nil {
		fmt.Printf("xml entryXml unmarshal failed: %v\n", err)
		return nil
	}

	url, err := fetchFeedUrl(client, cookies, BaseURL+entry.Item.Display.Url)
	if err != nil {
		return nil
	}

	return &feeds.Item{
		Title:       entry.Item.Display.Title,
		Link:        &feeds.Link{Href: url},
		Description: entry.Item.Display.Content,
		Id:          entry.Item.Display.Docid,
		Author:      &feeds.Author{Name: entry.Item.Display.Source},
		//Created:     entry.Item.Display.Date,
		Updated: modifyTime(entry.Item.Display.Update),
	}
}
开发者ID:choueric,项目名称:gorss,代码行数:32,代码来源:weixin_sougou.go


示例5: httpRequest

// send uses the given *http.Request to make an HTTP request.
func (bow *Browser) httpRequest(req *http.Request) error {
	bow.preSend()
	resp, err := bow.buildClient().Do(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()

	if resp.StatusCode == 503 && resp.Header.Get("Server") == "cloudflare-nginx" {
		if !bow.solveCF(resp, req.URL) {
			return fmt.Errorf("Page protected with cloudflare with unknown algorythm")
		} else {
			return nil
		}
	}

	content_type := resp.Header.Get("Content-Type")

	if resp.StatusCode != 403 {
		if content_type == "text/html; charset=GBK" {
			enc := mahonia.NewDecoder("gbk")
			e := enc.NewReader(resp.Body)
			bow.body, err = ioutil.ReadAll(e)
			if err != nil {
				return err
			}
		} else {
			fixedBody, err := charset.NewReader(resp.Body, content_type)
			if err == nil {
				bow.body, err = ioutil.ReadAll(fixedBody)
				if err != nil {
					return err
				}
			} else {
				bow.body, err = ioutil.ReadAll(resp.Body)
				if err != nil {
					return err
				}
			}
		}
		bow.contentConversion(content_type)
	} else {
		bow.body = []byte(`<html></html>`)
	}

	buff := bytes.NewBuffer(bow.body)
	dom, err := goquery.NewDocumentFromReader(buff)
	if err != nil {
		return err
	}

	bow.history.Push(bow.state)
	bow.state = jar.NewHistoryState(req, resp, dom)
	bow.postSend()

	return nil
}
开发者ID:jabbahotep,项目名称:surf,代码行数:58,代码来源:browser.go


示例6: forecast

func forecast(loc location) (string, error) {
	doc := xmlx.New()
	url := fmt.Sprintf(fcURLFmt, loc.Lat, loc.Lon)
	err := doc.LoadUri(url, func(str string, rdr io.Reader) (io.Reader, error) {
		return charset.NewReader(rdr, str)
	})
	if err != nil {
		return "", errFc
	}

	startTimeNodes := doc.SelectNodes("", "start-valid-time")
	endTimeNodes := doc.SelectNodes("", "end-valid-time")
	if len(startTimeNodes) == 0 || len(endTimeNodes) == 0 {
		return "", errFc
	}
	if len(endTimeNodes) > maxHours {
		endTimeNodes = endTimeNodes[:maxHours]
	}
	startTime, _ := time.Parse(time.RFC3339, startTimeNodes[0].GetValue())
	endTime, _ := time.Parse(time.RFC3339, endTimeNodes[len(endTimeNodes)-1].GetValue())

	temps := findVals("temperature", "hourly", doc)
	humids := findVals("humidity", "", doc)
	precips := findVals("probability-of-precipitation", "", doc)
	speeds := findVals("wind-speed", "sustained", doc)
	dirs := findVals("direction", "", doc)

	minTemp, maxTemp, tempGraph := makeGraph(temps)
	minHumid, maxHumid, humidGraph := makeGraph(humids)
	minPrecip, maxPrecip, precipGraph := makeGraph(precips)
	minSpeed, maxSpeed, speedGraph := makeGraph(speeds)

	dirGraph := ""
	for _, dir := range dirs {
		idx := dirIndex(dir)
		dirGraph += string([]rune(arrows)[idx])
	}

	timeFmt := "2006-01-02 15:04"
	start, end := startTime.Format(timeFmt), endTime.Format(timeFmt)

	tempRange := fmt.Sprintf("%3d %3d", minTemp, maxTemp)
	humidRange := fmt.Sprintf("%3d %3d", minHumid, maxHumid)
	precipRange := fmt.Sprintf("%3d %3d", minPrecip, maxPrecip)
	speedRange := fmt.Sprintf("%3d %3d", minSpeed, maxSpeed)

	out := fmt.Sprintf("Forecast for %s\n", loc.Name)
	out += fmt.Sprintf("         min max %-24s%24s\n", start, end)
	out += fmt.Sprintf("Temp °F  %7s %s\n", tempRange, tempGraph)
	out += fmt.Sprintf("Humid %%  %7s %s\n", humidRange, humidGraph) // esc % 2X for later fmt use
	out += fmt.Sprintf("Precip %% %7s %s\n", precipRange, precipGraph)
	out += fmt.Sprintf("Wind mph %7s %s\n", speedRange, speedGraph)
	out += fmt.Sprintf("Wind dir         %s\n", dirGraph)

	return out, nil
}
开发者ID:voxadam,项目名称:bort,代码行数:56,代码来源:forecast.go


示例7: Parse

// Parse return information about page
// @param s - contains page source
// @params pageURL - contains URL from where the data was taken [optional]
// @params contentType - contains Content-Type header value [optional]
// if no url is given then parser won't attempt to parse oembed info
func (info *HTMLInfo) Parse(s io.Reader, pageURL *string, contentType *string) error {
	contentTypeStr := "text/html"
	if contentType != nil && len(*contentType) > 0 {
		contentTypeStr = *contentType
	}
	utf8s, err := charset.NewReader(s, contentTypeStr)
	if err != nil {
		return err
	}

	if pageURL != nil {
		tu, _ := url.Parse(*pageURL)
		info.url = tu
	}

	doc, err := html.Parse(utf8s)
	if err != nil {
		return err
	}

	var f func(*html.Node)
	f = func(n *html.Node) {
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			if c.Type == html.ElementNode {
				if c.Data == "head" {
					info.parseHead(c)
					continue
				} else if c.Data == "body" {
					info.parseBody(c)
					continue
				}
			}
			f(c)
		}
	}
	f(doc)

	if info.AllowOembedFetching && pageURL != nil && len(info.OembedJSONURL) > 0 {
		pu, _ := url.Parse(info.OembedJSONURL)
		siteName := info.OGInfo.SiteName
		siteURL := strings.ToLower(pu.Scheme) + "://" + pu.Host

		if len(siteName) == 0 {
			siteName = pu.Host
		}

		oiItem := &oembed.Item{EndpointURL: info.OembedJSONURL, ProviderName: siteName, ProviderURL: siteURL, IsEndpointURLComplete: true}
		oi, _ := oiItem.FetchOembed(*pageURL, info.Client)

		if oi != nil && oi.Status < 300 {
			info.OembedInfo = oi
		}
	}

	return nil
}
开发者ID:gswirski,项目名称:go-htmlinfo,代码行数:61,代码来源:htmlinfo.go


示例8: Iconv

func (p *Page) Iconv(reader io.Reader) (io.Reader, error) {
	contentType := p.ContentType
	switch {
	case contain(contentType, "text"):
		return charset.NewReader(reader, contentType)
	}

	return reader, nil

}
开发者ID:sakeven,项目名称:spidergo,代码行数:10,代码来源:page.go


示例9: AutoToUTF8

// 采用surf内核下载时,可以尝试自动转码为utf8
// 采用phantomjs内核时,无需转码(已是utf8)
func AutoToUTF8(resp *http.Response) error {
	destReader, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
	if err == nil {
		resp.Body = &Body{
			ReadCloser: resp.Body,
			Reader:     destReader,
		}
	}
	return err
}
开发者ID:ZenithDandelion,项目名称:pholcus,代码行数:12,代码来源:util.go


示例10: parseXML

func parseXML(content []byte, v interface{}) error {
	d := xml.NewDecoder(bytes.NewReader(content))
	d.CharsetReader = func(s string, r io.Reader) (io.Reader, error) {
		//converts GBK to UTF-8.
		if s == "GBK" {
			return transform.NewReader(r, simplifiedchinese.GB18030.NewDecoder()), nil
		}
		return charset.NewReader(r, s)
	}
	err := d.Decode(v)
	return err
}
开发者ID:ssskip,项目名称:pull,代码行数:12,代码来源:feed.go


示例11: ParseResponse

// ParseResponse - wrapps sequence of URL fate functions
// user is response to handle: defer response.Body.Close()
func ParseResponse(response *http.Response, toks ...Tok) error {
	contentType := response.Header.Get("Content-Type")
	if !IsTextHTML(contentType) {
		return ErrResponseBodyIsNotHTML
	}
	if response.Body == nil {
		return ErrResponseBodyIsEmpty
	}
	r, err := charset.NewReader(response.Body, contentType)
	if err != nil {
		return err
	}
	return ParseReader(r, toks...)
}
开发者ID:linkosmos,项目名称:tokeq,代码行数:16,代码来源:html.go


示例12: changeCharsetEncodingAuto

// Charset auto determine. Use golang.org/x/net/html/charset. Get page body and change it to utf-8
// 自动转码
func (self *HttpDownloader) changeCharsetEncodingAuto(contentTypeStr string, sor io.ReadCloser) string {
	if len(strings.Split(contentTypeStr, " ")) < 2 {
		contentTypeStr = self.DefaultContentType
	}
	destReader, err := charset.NewReader(sor, contentTypeStr)
	if err != nil {
		mlog.LogInst().LogError(err.Error())
		destReader = sor
	}
	var sorbody []byte
	sorbody, err = ioutil.ReadAll(destReader)
	if err != nil {
		mlog.LogInst().LogError(err.Error())
	}
	bodystr := string(sorbody)
	return bodystr
}
开发者ID:aosen,项目名称:robot,代码行数:19,代码来源:httpdownloader.go


示例13: changeCharsetEncodingAuto

func changeCharsetEncodingAuto(sor io.ReadCloser, contentTypeStr string) string {
	var err error
	destReader, err := charset.NewReader(sor, contentTypeStr)

	if err != nil {
		log.Error(err)
		destReader = sor
	}

	var sorbody []byte
	if sorbody, err = ioutil.ReadAll(destReader); err != nil {
		log.Error(err)
	}

	bodystr := string(sorbody)

	return bodystr
}
开发者ID:qgweb,项目名称:new,代码行数:18,代码来源:tao.go


示例14: main

func main() {
	if lang != "ru" {
		prefix = "Title"
		unkPrefix = "HTTP Header"
	}
	if len(flag.Args()) > 0 && len(flag.Arg(0)) > 0 {
		link = flag.Arg(0)
	}
	if len(link) <= 4 || link[0:4] != "http" {
		usage(lang)
		return
	}
	res, err := http.Get(link)

	contType = res.Header.Get("Content-Type")
	if showHeaders || len(contType) >= 9 && contType[0:9] != "text/html" {
		if ok, _ := regexp.MatchString(exclude, contType); ok && exclude != "" {
			return
		}
		fmt.Print(unkPrefix + ":")
		for k, v := range res.Header {
			if showHeaders || (k == "Content-Type" || k == "Content-Length") {
				fmt.Printf("\n%s: %s", k, v)
			}
		}
		return
	}

	if err != nil {
		log.Fatal(err)
	}
	var title string
	if text, err := charset.NewReader(res.Body, contType); err == nil {
		title, _ = getTag(text, "title")
	}
	title = strings.Trim(title, "\n ")
	if len(title) > 0 {
		fmt.Print(prefix + ": " + title)
	}
	res.Body.Close()
	if err != nil {
		log.Fatal(err)
	}
}
开发者ID:shizeeg,项目名称:jagod,代码行数:44,代码来源:gettitle.go


示例15: Fetch

// Fetch get contents and extract it.
func (c *Crawler) Fetch(rawurl string) (*Article, error) {
	client := http.DefaultClient
	client.Timeout = c.config.timeout
	req, err := http.NewRequest("GET", rawurl, nil)
	if err != nil {
		return nil, err
	}
	req.Header.Set("User-Agent", c.config.browserUserAgent)
	resp, err := client.Do(req)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()
	reader, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
	if err != nil {
		return nil, err
	}
	return c.Extract(reader, rawurl)
}
开发者ID:suzuken,项目名称:GoOse,代码行数:20,代码来源:crawler.go


示例16: gethtmlpage

//получение страницы из урла url
func gethtmlpage(url string) []byte {
	resp, err := http.Get(url)
	if err != nil {
		LogFile.Println("HTTP error:", err)
		panic("HTTP error")
	}
	defer resp.Body.Close()
	// вот здесь и начинается самое интересное
	utf8, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
	if err != nil {
		LogFile.Println("Encoding error:", err)
		panic("Encoding error")
	}
	body, err := ioutil.ReadAll(utf8)
	if err != nil {
		LogFile.Println("IO error:", err)
		panic("IO error")
	}
	return body
}
开发者ID:kaefik,项目名称:go-bot-price,代码行数:21,代码来源:tovar.go


示例17: changeCharsetEncodingAuto

// Charset auto determine. Use golang.org/x/net/html/charset. Get response body and change it to utf-8
func changeCharsetEncodingAuto(sor io.ReadCloser, contentTypeStr string) string {
	var err error
	destReader, err := charset.NewReader(sor, contentTypeStr)

	if err != nil {
		logs.Log.Error("%v", err)
		destReader = sor
	}

	var sorbody []byte
	if sorbody, err = ioutil.ReadAll(destReader); err != nil {
		logs.Log.Error("%v", err)
		// For gb2312, an error will be returned.
		// Error like: simplifiedchinese: invalid GBK encoding
		// return ""
	}
	//e,name,certain := charset.DetermineEncoding(sorbody,contentTypeStr)
	bodystr := string(sorbody)

	return bodystr
}
开发者ID:no2key,项目名称:pholcus-1,代码行数:22,代码来源:response.go


示例18: initText

// GetBodyStr returns plain string crawled.
func (self *Context) initText() {
	defer self.Response.Body.Close()
	// get converter to utf-8
	// Charset auto determine. Use golang.org/x/net/html/charset. Get response body and change it to utf-8
	destReader, err := charset.NewReader(self.Response.Body, self.Response.Header.Get("Content-Type"))
	if err != nil {
		logs.Log.Warning(err.Error())
		destReader = self.Response.Body
	}

	sorbody, err := ioutil.ReadAll(destReader)
	if err != nil {
		logs.Log.Error(err.Error())
		return
		// For gb2312, an error will be returned.
		// Error like: simplifiedchinese: invalid GBK encoding
	}
	//e,name,certain := charset.DetermineEncoding(sorbody,self.Response.Header.Get("Content-Type"))

	self.text = util.Bytes2String(sorbody)
}
开发者ID:clock145,项目名称:pholcus,代码行数:22,代码来源:context.go


示例19: changeCharsetEncodingAuto

// Charset auto determine. Use golang.org/x/net/html/charset. Get response body and change it to utf-8
func (self *HttpDownloader) changeCharsetEncodingAuto(contentTypeStr string, sor io.ReadCloser) string {
	var err error
	destReader, err := charset.NewReader(sor, contentTypeStr)

	if err != nil {
		reporter.Log.Println(err.Error())
		destReader = sor
	}

	var sorbody []byte
	if sorbody, err = ioutil.ReadAll(destReader); err != nil {
		reporter.Log.Println(err.Error())
		// For gb2312, an error will be returned.
		// Error like: simplifiedchinese: invalid GBK encoding
		// return ""
	}
	//e,name,certain := charset.DetermineEncoding(sorbody,contentTypeStr)
	bodystr := string(sorbody)

	return bodystr
}
开发者ID:houzhenggang,项目名称:pholcus,代码行数:22,代码来源:downloader_http.go


示例20: Do

func (this *Curl) Do() (*Response, error) {
	resp, err := this.client.Do(this.req.Request)
	if err != nil {
		return NewResponse(nil, this.req.Url, ""), err
	}
	defer resp.Body.Close()

	var body string
	if resp.StatusCode == 200 {
		if resp.Header.Get("Content-Encoding") == "gzip" {
			reader, _ := gzip.NewReader(resp.Body)
			for {
				buf := make([]byte, 1024)
				n, err := reader.Read(buf)
				if err != nil && err != io.EOF {
					return NewResponse(nil, this.req.Url, ""), err
				}
				if n == 0 {
					break
				}
				body += string(buf)
			}
		} else {
			contentType := resp.Header.Get("Content-Type")
			newBody, err := charset.NewReader(resp.Body, contentType)
			if err != nil {
				return NewResponse(nil, this.req.Url, ""), err
			}

			bodyByte, err := ioutil.ReadAll(newBody)
			if err != nil {
				return NewResponse(nil, this.req.Url, ""), err
			}
			body = string(bodyByte)
		}
	} else {
		return NewResponse(nil, this.req.Url, ""), errors.New(fmt.Sprintf("Response StatusCode: %d", resp.StatusCode))
	}
	return NewResponse(resp, this.req.Url, body), nil
}
开发者ID:zhangxiaoyang,项目名称:goDataAccess,代码行数:40,代码来源:curl.go



注:本文中的golang.org/x/net/html/charset.NewReader函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Golang http2.ConfigureServer函数代码示例发布时间:2022-05-28
下一篇:
Golang charset.Lookup函数代码示例发布时间:2022-05-28
热门推荐
热门话题
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap