• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Golang charset.Lookup函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Golang中golang.org/x/net/html/charset.Lookup函数的典型用法代码示例。如果您正苦于以下问题:Golang Lookup函数的具体用法?Golang Lookup怎么用?Golang Lookup使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了Lookup函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Golang代码示例。

示例1: transEnc

//「Golangで文字コード判定」qiita.com/nobuhito/items/ff782f64e32f7ed95e43
func transEnc(text string, encode string) (string, error) {
	body := []byte(text)
	var f []byte

	encodings := []string{"sjis", "utf-8"}
	if encode != "" {
		encodings = append([]string{encode}, encodings...)
	}
	for _, enc := range encodings {
		if enc != "" {
			ee, _ := charset.Lookup(enc)
			if ee == nil {
				continue
			}
			var buf bytes.Buffer
			ic := transform.NewWriter(&buf, ee.NewDecoder())
			_, err := ic.Write(body)
			if err != nil {
				continue
			}
			err = ic.Close()
			if err != nil {
				continue
			}
			f = buf.Bytes()
			break
		}
	}
	return string(f), nil
}
开发者ID:ikawaha,项目名称:dajarep,代码行数:31,代码来源:main.go


示例2: defaultCharsetReader

func defaultCharsetReader(cs string, input io.Reader) (io.Reader, error) {
	e, _ := charset.Lookup(cs)
	if e == nil {
		return nil, fmt.Errorf("cannot decode charset %v", cs)
	}
	return transform.NewReader(input, e.NewDecoder()), nil
}
开发者ID:kissthink,项目名称:goread,代码行数:7,代码来源:utils.go


示例3: Encoding

// Encoding returns an Encoding for the response body.
func (res *Response) Encoding() (encoding.Encoding, error) {
	enc, _ := charset.Lookup(res.Charset)
	if enc == nil {
		return nil, fmt.Errorf("no encoding found for %s", res.Charset)
	}
	return enc, nil
}
开发者ID:moomerman,项目名称:whois,代码行数:8,代码来源:response.go


示例4: ToUtf8WithErr

func ToUtf8WithErr(content []byte) (error, string) {
	charsetLabel, err := DetectEncoding(content)
	if err != nil {
		return err, ""
	}

	if charsetLabel == "utf8" {
		return nil, string(content)
	}

	encoding, _ := charset.Lookup(charsetLabel)

	if encoding == nil {
		return fmt.Errorf("unknow char decoder %s", charsetLabel), string(content)
	}

	result, n, err := transform.String(encoding.NewDecoder(), string(content))

	// If there is an error, we concatenate the nicely decoded part and the
	// original left over. This way we won't loose data.
	if err != nil {
		result = result + string(content[n:])
	}

	return err, result
}
开发者ID:wxiangbo,项目名称:gogs,代码行数:26,代码来源:template.go


示例5: to_utf8

// Shift-JIS -> UTF-8
func to_utf8(str string) (string, error) {
	body, err := ioutil.ReadAll(transform.NewReader(strings.NewReader(str), japanese.ShiftJIS.NewEncoder()))
	if err != nil {
		return "", err
	}

	var f []byte
	encodings := []string{"sjis", "utf-8"}
	for _, enc := range encodings {
		if enc != "" {
			ee, _ := charset.Lookup(enc)
			if ee == nil {
				continue
			}
			var buf bytes.Buffer
			ic := transform.NewWriter(&buf, ee.NewDecoder())
			_, err := ic.Write(body)
			if err != nil {
				continue
			}
			err = ic.Close()
			if err != nil {
				continue
			}
			f = buf.Bytes()
			break
		}
	}
	return string(f), nil
}
开发者ID:sharkattack51,项目名称:go_Shift-JIS_to_UTF-8,代码行数:31,代码来源:main.go


示例6: NewUTF8Reader

func NewUTF8Reader(label string, r io.Reader) (io.Reader, error) {
	e, _ := charset.Lookup(label)
	if e == nil {
		return nil, fmt.Errorf("unsupported charset: %q", label)
	}
	return transform.NewReader(r, unicode.BOMOverride(e.NewDecoder())), nil
}
开发者ID:fanyang01,项目名称:crawler,代码行数:7,代码来源:util.go


示例7: encodeString

func encodeString(input_str, encode string) (output string, err_out error) {
	enc, _ := charset.Lookup(encode)
	r := transform.NewReader(strings.NewReader(input_str), enc.NewEncoder())
	b, err := ioutil.ReadAll(r)
	if err != nil {
		return
	}
	return string(b), nil
}
开发者ID:wolfmetr,项目名称:go-dbf,代码行数:9,代码来源:dbfreader.go


示例8: parseHTML

func parseHTML(content []byte, cs string) (*html.Node, error) {
	var r io.Reader = bytes.NewReader(content)

	if cs != "utf-8" {
		e, _ := charset.Lookup(cs)
		r = transform.NewReader(r, e.NewDecoder())
	}

	return html.Parse(r)
}
开发者ID:vishnuvaradaraj,项目名称:redwood,代码行数:10,代码来源:prune.go


示例9: decodeCharset

// DecodeCharset detects charset of str decodes it.
func decodeCharset(str, label string) (nstr string, err error) {
	enc, _ := charset.Lookup(label)
	if enc == nil {
		enc, _, _ = charset.DetermineEncoding([]byte(str), "text/plain")
	}

	nstr, _, err = transform.String(enc.NewDecoder(), str)
	if err != nil {
		return nstr, err
	}

	return stripNonUTF8(nstr), nil
}
开发者ID:kaey,项目名称:mail,代码行数:14,代码来源:mail.go


示例10: EncodeReader

func EncodeReader(s io.Reader, enc string) ([]byte, error) {
	e, _ := charset.Lookup(enc)
	if e == nil {
		return nil, errors.New(fmt.Sprintf("unsupported charset: %q", enc))
	}
	var buf bytes.Buffer
	writer := transform.NewWriter(&buf, e.NewEncoder())
	_, err := io.Copy(writer, s)
	if err != nil {
		return nil, err
	}
	return buf.Bytes(), nil
}
开发者ID:yuin,项目名称:charsetutil,代码行数:13,代码来源:charsetutil.go


示例11: scanContent

// scanContent scans the content of a document for phrases,
// and updates tally.
func (conf *config) scanContent(content []byte, contentType, cs string, tally map[rule]int) {
	if strings.Contains(contentType, "javascript") {
		conf.scanJSContent(content, tally)
		return
	}

	transformers := make([]transform.Transformer, 0, 3)
	if cs != "utf-8" {
		e, _ := charset.Lookup(cs)
		transformers = append(transformers, e.NewDecoder())
	}

	if strings.Contains(contentType, "html") {
		transformers = append(transformers, entityDecoder{})
	}
	transformers = append(transformers, new(wordTransformer))

	ps := newPhraseScanner(conf.ContentPhraseList, func(s string) {
		tally[rule{t: contentPhrase, content: s}]++
	})
	ps.scanByte(' ')

	var t transform.Transformer
	if len(transformers) == 1 {
		t = transformers[0]
	} else {
		t = transform.Chain(transformers...)
	}

	r := transform.NewReader(bytes.NewReader(content), t)

	buf := make([]byte, 4096)
	for {
		n, err := r.Read(buf)
		for _, c := range buf[:n] {
			ps.scanByte(c)
		}
		if err != nil {
			if err != io.EOF {
				log.Println("Error decoding page content:", err)
			}
			break
		}
	}

	ps.scanByte(' ')
}
开发者ID:ranivishnu,项目名称:redwood,代码行数:49,代码来源:phrase_scan.go


示例12: convertToUtf8

func convertToUtf8(s string) string {
	b := []byte(s)
	d := chardet.NewTextDetector()
	r, err := d.DetectBest(b)
	if err != nil {
		return fmt.Sprintf("<Can't detect string charset: %s>", err.Error())
	}
	encoding, _ := charset.Lookup(r.Charset)
	if encoding == nil {
		return fmt.Sprintf("<Can't find encoding: %s>", r.Charset)
	}
	str, _, err := transform.String(encoding.NewDecoder(), s)
	if err != nil {
		return fmt.Sprintf("<Can't convert string from encoding %s to UTF8: %s>", r.Charset, err.Error())
	}
	return str
}
开发者ID:dimitriss,项目名称:torrent2http,代码行数:17,代码来源:torrent2http.go


示例13: convToUTF8

func (r *Response) convToUTF8(preview []byte, query func(*url.URL) string) {
	// Convert to UTF-8
	if media.IsHTML(r.ContentType) {
		e, name, certain := charset.DetermineEncoding(
			preview, r.ContentType,
		)
		// according to charset package source, default unknown charset is windows-1252.
		if !certain && name == "windows-1252" {
			if e, name = charset.Lookup(query(r.URL)); e != nil {
				certain = true
			}
		}
		r.Charset, r.CertainCharset, r.Encoding = name, certain, e
		if name != "" && e != nil {
			r.Body, _ = util.NewUTF8Reader(name, r.Body)
		}
	}
}
开发者ID:fanyang01,项目名称:crawler,代码行数:18,代码来源:fetch.go


示例14: decode

func decode(data []byte, charsetName string) ([]byte, error) {
	encoding, _ := charset.Lookup(charsetName)
	if encoding == nil {
		return nil, fmt.Errorf("Unsupported charset: %v", charsetName)
	}

	reader := bytes.NewReader(data)
	var b bytes.Buffer
	writer := bufio.NewWriter(&b)

	decodeReader := transform.NewReader(reader, encoding.NewDecoder())
	if _, err := io.Copy(writer, decodeReader); err != nil {
		return nil, err
	}
	if err := writer.Flush(); err != nil {
		return nil, err
	}

	if isUTF8Charset(charsetName) {
		return stripBOM(b.Bytes()), nil
	}
	return b.Bytes(), nil
}
开发者ID:shiwano,项目名称:master,代码行数:23,代码来源:encoding.go


示例15: main

func main() {
	flag.Parse()

	var in io.Reader
	in = os.Stdin

	if *cs != "utf-8" {
		e, _ := charset.Lookup(*cs)
		in = transform.NewReader(in, e.NewDecoder())
	}

	s := bufio.NewScanner(in)
	for s.Scan() {
		line := s.Text()
		if strings.Contains(line, ">,<") {
			continue
		}

		endPhrase := strings.Index(line, "><")
		if endPhrase != -1 {
			phrase := line[:endPhrase+1]
			rest := line[endPhrase+2:]
			endScore := strings.Index(rest, ">")
			if endScore != -1 {
				score := rest[:endScore]
				rest = strings.TrimSpace(rest[endScore+1:])
				fmt.Println(phrase, score, rest)
				continue
			}
		}

		fmt.Println(line)
	}
	if err := s.Err(); err != nil {
		log.Println(err)
	}
}
开发者ID:ranivishnu,项目名称:redwood,代码行数:37,代码来源:convert.go


示例16: UTF8encode

// UTF8encode converts a string from the source character set to UTF-8, skipping invalid byte sequences
// @see http://stackoverflow.com/questions/32512500/ignore-illegal-bytes-when-decoding-text-with-go
func UTF8encode(raw string, sourceCharset string) string {
	enc, name := charset.Lookup(sourceCharset)
	if nil == enc {
		fmt.Println("Cannot convert from", sourceCharset, ":", name)
		return raw
	}

	dst := make([]byte, len(raw))
	d := enc.NewDecoder()

	var (
		in  int
		out int
	)
	for in < len(raw) {
		// Do the transformation
		ndst, nsrc, err := d.Transform(dst[out:], []byte(raw[in:]), true)
		in += nsrc
		out += ndst
		if err == nil {
			// Completed transformation
			break
		}
		if err == transform.ErrShortDst {
			// Our output buffer is too small, so we need to grow it
			t := make([]byte, (cap(dst)+1)*2)
			copy(t, dst)
			dst = t
			continue
		}
		// We're here because of at least one illegal character. Skip over the current rune
		// and try again.
		_, width := utf8.DecodeRuneInString(raw[in:])
		in += width
	}
	return string(dst)
}
开发者ID:quipo,项目名称:GoOse,代码行数:39,代码来源:charset.go


示例17: ParsePatch


//.........这里部分代码省略.........
			ranges := strings.Split(ss[1][1:], " ")
			leftLine, _ = com.StrTo(strings.Split(ranges[0], ",")[0][1:]).Int()
			if len(ranges) > 1 {
				rightLine, _ = com.StrTo(strings.Split(ranges[1], ",")[0]).Int()
			} else {
				log.Warn("Parse line number failed: %v", line)
				rightLine = leftLine
			}
			continue
		case line[0] == '+':
			curFile.Addition++
			diff.TotalAddition++
			diffLine := &DiffLine{Type: DIFF_LINE_ADD, Content: line, RightIdx: rightLine}
			rightLine++
			curSection.Lines = append(curSection.Lines, diffLine)
			continue
		case line[0] == '-':
			curFile.Deletion++
			diff.TotalDeletion++
			diffLine := &DiffLine{Type: DIFF_LINE_DEL, Content: line, LeftIdx: leftLine}
			if leftLine > 0 {
				leftLine++
			}
			curSection.Lines = append(curSection.Lines, diffLine)
		case strings.HasPrefix(line, "Binary"):
			curFile.IsBin = true
			continue
		}

		// Get new file.
		if strings.HasPrefix(line, DIFF_HEAD) {
			if isTooLong {
				break
			}

			beg := len(DIFF_HEAD)
			a := line[beg : (len(line)-beg)/2+beg]

			// In case file name is surrounded by double quotes(it happens only in git-shell).
			if a[0] == '"' {
				a = a[1 : len(a)-1]
				a = strings.Replace(a, `\"`, `"`, -1)
			}

			curFile = &DiffFile{
				Name:     a[strings.Index(a, "/")+1:],
				Index:    len(diff.Files) + 1,
				Type:     DIFF_FILE_CHANGE,
				Sections: make([]*DiffSection, 0, 10),
			}
			diff.Files = append(diff.Files, curFile)

			// Check file diff type.
			for scanner.Scan() {
				switch {
				case strings.HasPrefix(scanner.Text(), "new file"):
					curFile.Type = DIFF_FILE_ADD
					curFile.IsDeleted = false
					curFile.IsCreated = true
				case strings.HasPrefix(scanner.Text(), "deleted"):
					curFile.Type = DIFF_FILE_DEL
					curFile.IsCreated = false
					curFile.IsDeleted = true
				case strings.HasPrefix(scanner.Text(), "index"):
					curFile.Type = DIFF_FILE_CHANGE
					curFile.IsCreated = false
					curFile.IsDeleted = false
				}
				if curFile.Type > 0 {
					break
				}
			}
		}
	}

	for _, f := range diff.Files {
		buf.Reset()
		for _, sec := range f.Sections {
			for _, l := range sec.Lines {
				buf.WriteString(l.Content)
				buf.WriteString("\n")
			}
		}
		charsetLabel, err := base.DetectEncoding(buf.Bytes())
		if charsetLabel != "UTF-8" && err == nil {
			encoding, _ := charset.Lookup(charsetLabel)
			if encoding != nil {
				d := encoding.NewDecoder()
				for _, sec := range f.Sections {
					for _, l := range sec.Lines {
						if c, _, err := transform.String(d, l.Content); err == nil {
							l.Content = c
						}
					}
				}
			}
		}
	}
	return diff, nil
}
开发者ID:pecastro,项目名称:gogs,代码行数:101,代码来源:git_diff.go


示例18: ParsePatch


//.........这里部分代码省略.........
		}

		// Get new file.
		if strings.HasPrefix(line, DIFF_HEAD) {
			middle := -1

			// Note: In case file name is surrounded by double quotes (it happens only in git-shell).
			// e.g. diff --git "a/xxx" "b/xxx"
			hasQuote := line[len(DIFF_HEAD)] == '"'
			if hasQuote {
				middle = strings.Index(line, ` "b/`)
			} else {
				middle = strings.Index(line, " b/")
			}

			beg := len(DIFF_HEAD)
			a := line[beg+2 : middle]
			b := line[middle+3:]
			if hasQuote {
				a = string(git.UnescapeChars([]byte(a[1 : len(a)-1])))
				b = string(git.UnescapeChars([]byte(b[1 : len(b)-1])))
			}

			curFile = &DiffFile{
				Name:     a,
				Index:    len(diff.Files) + 1,
				Type:     DIFF_FILE_CHANGE,
				Sections: make([]*DiffSection, 0, 10),
			}
			diff.Files = append(diff.Files, curFile)
			if len(diff.Files) >= maxFiles {
				diff.IsIncomplete = true
				io.Copy(ioutil.Discard, reader)
				break
			}
			curFileLinesCount = 0

			// Check file diff type and is submodule.
			for {
				line, err := input.ReadString('\n')
				if err != nil {
					if err == io.EOF {
						isEOF = true
					} else {
						return nil, fmt.Errorf("ReadString: %v", err)
					}
				}

				switch {
				case strings.HasPrefix(line, "new file"):
					curFile.Type = DIFF_FILE_ADD
					curFile.IsCreated = true
				case strings.HasPrefix(line, "deleted"):
					curFile.Type = DIFF_FILE_DEL
					curFile.IsDeleted = true
				case strings.HasPrefix(line, "index"):
					curFile.Type = DIFF_FILE_CHANGE
				case strings.HasPrefix(line, "similarity index 100%"):
					curFile.Type = DIFF_FILE_RENAME
					curFile.IsRenamed = true
					curFile.OldName = curFile.Name
					curFile.Name = b
				}
				if curFile.Type > 0 {
					if strings.HasSuffix(line, " 160000\n") {
						curFile.IsSubmodule = true
					}
					break
				}
			}
		}
	}

	// FIXME: detect encoding while parsing.
	var buf bytes.Buffer
	for _, f := range diff.Files {
		buf.Reset()
		for _, sec := range f.Sections {
			for _, l := range sec.Lines {
				buf.WriteString(l.Content)
				buf.WriteString("\n")
			}
		}
		charsetLabel, err := base.DetectEncoding(buf.Bytes())
		if charsetLabel != "UTF-8" && err == nil {
			encoding, _ := charset.Lookup(charsetLabel)
			if encoding != nil {
				d := encoding.NewDecoder()
				for _, sec := range f.Sections {
					for _, l := range sec.Lines {
						if c, _, err := transform.String(d, l.Content); err == nil {
							l.Content = c
						}
					}
				}
			}
		}
	}
	return diff, nil
}
开发者ID:yweber,项目名称:gogs,代码行数:101,代码来源:git_diff.go


示例19: main


//.........这里部分代码省略.........
			case name == "separator":
				separator = argv[n+1]
				n++
			case name == "null":
				zeroFile = true
			case name == "null-data":
				zeroData = true
			case name == "help":
				usage(false)
			default:
				usage(true)
			}
		} else {
			args = append(args, argv[n])
		}
	}

	if len(args) == 0 {
		usage(true)
	}

	var err error
	var pattern interface{}
	if encs != "" {
		encodings = strings.Split(encs, ",")
	} else {
		enc_env := os.Getenv("JVGREP_ENCODINGS")
		if enc_env != "" {
			encodings = strings.Split(enc_env, ",")
		}
	}
	out_enc := os.Getenv("JVGREP_OUTPUT_ENCODING")
	if out_enc != "" {
		ee, _ := charset.Lookup(out_enc)
		if ee == nil {
			errorline(fmt.Sprintf("unknown encoding: %s", out_enc))
			os.Exit(1)
		}
		oc = transform.NewWriter(os.Stdout, ee.NewEncoder())
	}

	instr := ""
	argindex := 0
	if len(infile) > 0 {
		b, err := ioutil.ReadFile(infile)
		if err != nil {
			errorline(err.Error())
			os.Exit(1)
		}
		instr = strings.TrimSpace(string(b))
	} else {
		instr = args[0]
		argindex = 1
	}
	if fixed {
		pattern = instr
	} else if perl {
		re, err := syntax.Parse(instr, syntax.Perl)
		if err != nil {
			errorline(err.Error())
			os.Exit(1)
		}
		rec, err := syntax.Compile(re)
		if err != nil {
			errorline(err.Error())
			os.Exit(1)
开发者ID:koron,项目名称:jvgrep,代码行数:67,代码来源:jvgrep.go


示例20: doGrep

func doGrep(path string, f []byte, arg *GrepArg) {
	encs := encodings

	if ignorebinary {
		if bytes.IndexFunc(f, func(r rune) bool { return 0 < r && r < 0x9 }) != -1 {
			return
		}
	}

	if len(f) > 2 {
		if f[0] == 0xfe && f[1] == 0xff {
			arg.bom = f[0:2]
			f = f[2:]
		} else if f[0] == 0xff && f[1] == 0xfe {
			arg.bom = f[0:2]
			f = f[2:]
		}
	}
	if len(arg.bom) > 0 {
		if arg.bom[0] == 0xfe && arg.bom[1] == 0xff {
			encs = []string{"utf-16be"}
		} else if arg.bom[0] == 0xff && arg.bom[1] == 0xfe {
			encs = []string{"utf-16le"}
		}
	}

	for _, enc := range encs {
		if verbose {
			println("trying("+enc+"):", path)
		}
		if len(arg.bom) > 0 && enc != "utf-16be" && enc != "utf-16le" {
			continue
		}

		did := false
		var t []byte
		var n, l, size, next, prev int

		if enc != "" {
			if len(arg.bom) > 0 || bytes.IndexFunc(f, func(r rune) bool { return 0 < r && r < 0x9 }) == -1 {
				ee, _ := charset.Lookup(enc)
				if ee == nil {
					continue
				}
				var buf bytes.Buffer
				ic := transform.NewWriter(&buf, ee.NewDecoder())
				_, err := ic.Write(f)
				if err != nil {
					next = -1
					continue
				}
				lf := false
				if len(arg.bom) > 0 && len(f)%2 != 0 {
					ic.Write([]byte{0})
					lf = true
				}
				err = ic.Close()
				if err != nil {
					if verbose {
						println(err.Error())
					}
					next = -1
					continue
				}
				f = buf.Bytes()
				if lf {
					f = f[:len(f)-1]
				}
			}
		}
		size = len(f)
		if size == 0 {
			continue
		}

		for next != -1 {
			for {
				if next >= size {
					next = -1
					break
				}
				if f[next] == '\n' {
					break
				}
				next++
			}
			n++
			if next == -1 {
				t = f[prev:]
			} else {
				t = f[prev:next]
				prev = next + 1
				next++
			}

			l = len(t)
			if l > 0 && t[l-1] == '\r' {
				t = t[:l-1]
				l--
			}
//.........这里部分代码省略.........
开发者ID:koron,项目名称:jvgrep,代码行数:101,代码来源:jvgrep.go



注:本文中的golang.org/x/net/html/charset.Lookup函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Golang charset.NewReader函数代码示例发布时间:2022-05-28
下一篇:
Golang html.Tokenizer类代码示例发布时间:2022-05-28
热门推荐
热门话题
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap