本文整理汇总了Golang中golang.org/x/net/html.Tokenizer类的典型用法代码示例。如果您正苦于以下问题:Golang Tokenizer类的具体用法?Golang Tokenizer怎么用?Golang Tokenizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Tokenizer类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Golang代码示例。
示例1: setEndTagRaw
// setEndTagRaw sets an endTagRaw to the parent.
func setEndTagRaw(tokenizer *html.Tokenizer, parent *tagElement, tagName string) string {
if parent != nil && parent.tagName == tagName {
parent.endTagRaw = string(tokenizer.Raw())
return ""
}
return tagName
}
开发者ID:blevesearch,项目名称:hugoidx,代码行数:8,代码来源:parser.go
示例2: flushTagToken
func flushTagToken(htmlBuf *[]byte, tz *html.Tokenizer, url string) string {
*htmlBuf = append(*htmlBuf, '<')
tagName, hasAttr := tz.TagName()
*htmlBuf = append(*htmlBuf, tagName...)
if hasAttr {
for {
attrKey, attrValue, hasMore := tz.TagAttr()
*htmlBuf = append(*htmlBuf, ' ')
*htmlBuf = append(*htmlBuf, attrKey...)
*htmlBuf = append(*htmlBuf, '=', '"')
if tagAttrToProxy[string(tagName)][string(attrKey)] {
urlInAttr := string(attrValue)
*htmlBuf = append(*htmlBuf, []byte(GetProxiedUrl(urlInAttr, url))...)
} else {
*htmlBuf = append(*htmlBuf, attrValue...)
}
*htmlBuf = append(*htmlBuf, '"')
if !hasMore {
break
}
}
}
*htmlBuf = append(*htmlBuf, '>')
if string(tagName) == "head" {
*htmlBuf = append(*htmlBuf, []byte(getJsHookTag())...)
}
return string(tagName)
}
开发者ID:gongshw,项目名称:lighthouse,代码行数:28,代码来源:html.go
示例3: getInclude
func getInclude(z *html.Tokenizer, attrs []html.Attribute) (startMarker, endMarker string, error error) {
var srcString string
if url, hasUrl := getAttr(attrs, "src"); !hasUrl {
return "", "", fmt.Errorf("include definition without src %s", z.Raw())
} else {
srcString = strings.TrimSpace(url.Val)
if strings.HasPrefix(srcString, "#") {
srcString = srcString[1:]
}
}
required := false
if r, hasRequired := getAttr(attrs, "required"); hasRequired {
if requiredBool, err := strconv.ParseBool(r.Val); err != nil {
return "", "", fmt.Errorf("error parsing bool in %s: %s", z.Raw(), err.Error())
} else {
required = requiredBool
}
}
if required {
return fmt.Sprintf("§[> %s]§", srcString), "", nil
} else {
return fmt.Sprintf("§[#> %s]§", srcString), fmt.Sprintf("§[/%s]§", srcString), nil
}
}
开发者ID:tarent,项目名称:lib-compose,代码行数:26,代码来源:html_content_parser.go
示例4: skipSubtreeIfUicRemove
func skipSubtreeIfUicRemove(z *html.Tokenizer, tt html.TokenType, tagName string, attrs []html.Attribute) bool {
_, foundRemoveTag := getAttr(attrs, UicRemove)
if !foundRemoveTag {
return false
}
if isSelfClosingTag(tagName, tt) {
return true
}
depth := 0
for {
tt := z.Next()
tag, _ := z.TagName()
switch {
case tt == html.ErrorToken:
return true
case tt == html.StartTagToken && !isSelfClosingTag(string(tag), tt):
depth++
case tt == html.EndTagToken:
depth--
if depth < 0 {
return true
}
}
}
}
开发者ID:tarent,项目名称:lib-compose,代码行数:28,代码来源:html_content_parser.go
示例5: advanceToTextToken
func advanceToTextToken(z *html.Tokenizer) *html.Token {
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
return nil
case html.TextToken:
t := z.Token()
return &t
}
}
}
开发者ID:ericdaugherty,项目名称:gotsport-scraper,代码行数:13,代码来源:scraper.go
示例6: readAttributes
func readAttributes(z *html.Tokenizer, buff []html.Attribute) []html.Attribute {
buff = buff[:0]
for {
key, value, more := z.TagAttr()
if key != nil {
buff = append(buff, html.Attribute{Key: string(key), Val: string(value)})
}
if !more {
return buff
}
}
}
开发者ID:tarent,项目名称:lib-compose,代码行数:13,代码来源:html_content_parser.go
示例7: readNameAndLink
func (item *AnimeConventionItem) readNameAndLink(t *html.Tokenizer) {
if label := t.Next(); label == html.StartTagToken {
_, hasmore := t.TagName()
if hasmore {
if key, val, _ := t.TagAttr(); strings.EqualFold(string(key), "href") {
item.siteURL = string(val)
}
}
}
if label := t.Next(); label == html.TextToken {
item.name = string(t.Text())
}
}
开发者ID:John-zhy,项目名称:First-Go-Project---A-Crawler,代码行数:13,代码来源:AnimeConventionCrawler.go
示例8: parse2
func parse2(z *html.Tokenizer) (*Schedule, error) {
schedule := &Schedule{}
currentDate := ""
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
return schedule, nil
case html.StartTagToken:
t := z.Token()
if isTokenTagWithAttr("font", "class", "PageHeading", &t, z) {
z.Next()
currentDate = z.Token().Data
} else if isTokenTagWithAttr("tr", "bgcolor", "#ffffff", &t, z) || isTokenTagWithAttr("tr", "bgcolor", "#f5f5f5", &t, z) {
game, err := parseGame(currentDate, z)
if err != nil {
return nil, err
}
schedule.Games = append(schedule.Games, game)
}
}
}
}
开发者ID:ericdaugherty,项目名称:gotsport-scraper,代码行数:26,代码来源:scraper.go
示例9: AttrMap
// AttrMap parses the attributes of the current element into a friendly map.
// It only makes sense to call this while processing a start or self closing tag token.
func AttrMap(hasAttr bool, z *html.Tokenizer) map[string]string {
attrs := make(map[string]string)
if !hasAttr {
return attrs
}
for {
k, v, more := z.TagAttr()
attrs[string(k)] = string(v)
if !more {
break
}
}
return attrs
}
开发者ID:tborg,项目名称:metascraper,代码行数:16,代码来源:page.go
示例10: Parse
func (item *AnimeConventionItem) Parse(t *html.Tokenizer) {
for {
label := t.Next()
switch label {
case html.ErrorToken:
fmt.Errorf("%v\n", t.Err())
return
case html.TextToken:
switch string(t.Text()) {
case "Advance Rates:":
//fmt.Println("rate")
item.readadvanceRate(t)
case "At-Door Rates:":
item.readatDoorRate(t)
}
case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken:
tag, hasmore := t.TagName()
if strings.EqualFold(string(tag), "big") {
item.readResgiterNowurl(t)
} else if hasmore {
key, val, hasmore := t.TagAttr()
if strings.EqualFold(string(key), "itemprop") {
//fmt.Println(string(val))
switch string(val) {
case "description":
item.readDescription(t)
case "latitude":
item.readLatitude(t)
case "longitude":
item.readLongitude(t)
case "startDate":
item.readStartDate(t)
case "endDate":
item.readEndDate(t)
case "location":
item.readLocation(t)
case "addressLocality":
item.readCity(t)
case "addressRegion":
item.readState(t)
case "addressCountry":
item.readCountry(t, hasmore)
case "name":
item.readNameAndLink(t)
}
}
}
}
}
}
开发者ID:John-zhy,项目名称:First-Go-Project---A-Crawler,代码行数:50,代码来源:AnimeConventionCrawler.go
示例11: getMatchInfoTitle
func getMatchInfoTitle(z *html.Tokenizer) string {
eof := false
for !eof {
tt := z.Next()
switch {
case tt == html.ErrorToken:
eof = true
case tt == html.StartTagToken:
t := z.Token()
// Check if the token is a <title> tag
isTitle := t.Data == "title"
if isTitle {
z.Next()
// This is the title
return z.Token().Data
}
}
}
// If we reached here something went wrong :^(
Error.Printf("Could not get title...")
return ""
}
开发者ID:Newbrict,项目名称:EzSkins,代码行数:28,代码来源:scrape.go
示例12: advanceToStartTag
func advanceToStartTag(tagName string, z *html.Tokenizer) *html.Token {
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
return nil
case html.StartTagToken:
t := z.Token()
if t.Data == tagName {
return &t
}
}
}
}
开发者ID:ericdaugherty,项目名称:gotsport-scraper,代码行数:15,代码来源:scraper.go
示例13: readLocation
func (item *AnimeConventionItem) readLocation(t *html.Tokenizer) {
for {
if label := t.Next(); label == html.StartTagToken {
_, hasmore := t.TagName()
if hasmore {
if _, val, _ := t.TagAttr(); strings.EqualFold(string(val), "name") {
break
}
}
}
}
if label := t.Next(); label == html.TextToken {
item.location = string(t.Text())
}
}
开发者ID:John-zhy,项目名称:First-Go-Project---A-Crawler,代码行数:15,代码来源:AnimeConventionCrawler.go
示例14: ParseToken
// ParseToken is to parse token
func ParseToken(z *html.Tokenizer, tag string) {
for {
tt := z.Next()
switch {
case tt == html.ErrorToken:
// End of the document, we're done
return
case tt == html.StartTagToken:
t := z.Token()
// check element
checkElement(t, tag)
}
}
}
开发者ID:hiromaily,项目名称:golibs,代码行数:17,代码来源:html.go
示例15: parseFragment
func parseFragment(z *html.Tokenizer) (f Fragment, dependencies []*FetchDefinition, err error) {
attrs := make([]html.Attribute, 0, 10)
dependencies = make([]*FetchDefinition, 0, 0)
buff := bytes.NewBuffer(nil)
forloop:
for {
tt := z.Next()
tag, _ := z.TagName()
raw := byteCopy(z.Raw()) // create a copy here, because readAttributes modifies z.Raw, if attributes contain an &
attrs = readAttributes(z, attrs)
switch {
case tt == html.ErrorToken:
if z.Err() != io.EOF {
return nil, nil, z.Err()
}
break forloop
case tt == html.StartTagToken || tt == html.SelfClosingTagToken:
if string(tag) == UicInclude {
if replaceTextStart, replaceTextEnd, err := getInclude(z, attrs); err != nil {
return nil, nil, err
} else {
fmt.Fprintf(buff, replaceTextStart)
// Enhancement: WriteOut sub tree, to allow alternative content
// for optional includes.
fmt.Fprintf(buff, replaceTextEnd)
continue
}
}
if skipSubtreeIfUicRemove(z, tt, string(tag), attrs) {
continue
}
case tt == html.EndTagToken:
if string(tag) == UicFragment || string(tag) == UicTail {
break forloop
}
}
buff.Write(raw)
}
return StringFragment(buff.String()), dependencies, nil
}
开发者ID:tarent,项目名称:lib-compose,代码行数:45,代码来源:html_content_parser.go
示例16: parseDivX86
func parseDivX86(z *html.Tokenizer, in *Intrinsic) *Intrinsic {
more := true
var k, v []byte
for more {
k, v, more = z.TagAttr()
// fmt.Println("attr:", string(k))
switch string(k) {
case "class":
val := string(v)
if strings.Contains(val, "intrinsic") {
in.FinishX86()
return NewIntrinsic()
}
switch val {
case "cpuid":
in.CpuID = getText(z)
case "instruction":
in.Instruction = strings.ToUpper(getText(z))
case "rettype":
in.RetType = fixTypeX86(getText(z))
case "param_type":
in.cParam = &Param{Type: fixTypeX86(getText(z))}
case "param_name":
in.cParam.Name = getText(z)
if !in.Params.HasParam(in.cParam.Name) {
in.Params = append(in.Params, *in.cParam)
}
in.cParam = nil
case "description":
in.Description = strings.TrimSpace(getTextR(z))
case "name":
in.OrgName = getText(z)
in.Name = fixFuncNameX86(in.OrgName)
case "operation":
in.Operation = strings.TrimSpace(getText(z))
default:
//fmt.Println("unparsed class:", string(v))
}
}
}
return in
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:43,代码来源:parseintrin.go
示例17: parseHead
func (parser *HtmlContentParser) parseHead(z *html.Tokenizer, c *MemoryContent) error {
attrs := make([]html.Attribute, 0, 10)
headBuff := bytes.NewBuffer(nil)
forloop:
for {
tt := z.Next()
tag, _ := z.TagName()
raw := byteCopy(z.Raw()) // create a copy here, because readAttributes modifies z.Raw, if attributes contain an &
attrs = readAttributes(z, attrs)
switch {
case tt == html.ErrorToken:
if z.Err() != io.EOF {
return z.Err()
}
break forloop
case tt == html.StartTagToken || tt == html.SelfClosingTagToken:
if skipSubtreeIfUicRemove(z, tt, string(tag), attrs) {
continue
}
if string(tag) == "script" && attrHasValue(attrs, "type", ScriptTypeMeta) {
if err := parseMetaJson(z, c); err != nil {
return err
}
continue
}
case tt == html.EndTagToken:
if string(tag) == "head" {
break forloop
}
}
headBuff.Write(raw)
}
s := headBuff.String()
st := strings.Trim(s, " \n")
if len(st) > 0 {
c.head = StringFragment(st)
}
return nil
}
开发者ID:tarent,项目名称:lib-compose,代码行数:42,代码来源:html_content_parser.go
示例18: parseMetaJson
func parseMetaJson(z *html.Tokenizer, c *MemoryContent) error {
tt := z.Next()
if tt != html.TextToken {
return fmt.Errorf("expected text node for meta json, but found %v, (%s)", tt.String(), z.Raw())
}
bytes := z.Text()
err := json.Unmarshal(bytes, &c.meta)
if err != nil {
return fmt.Errorf("error while parsing json from meta json element: %v", err.Error())
}
tt = z.Next()
tag, _ := z.TagName()
if tt != html.EndTagToken || string(tag) != "script" {
return fmt.Errorf("Tag not properly ended. Expected </script>, but found %s", z.Raw())
}
return nil
}
开发者ID:tarent,项目名称:lib-compose,代码行数:20,代码来源:html_content_parser.go
示例19: getQ
func getQ(tknzer html.Tokenizer, ch chan string) {
tknzer.Next()
tknzer.Next()
tknzer.Next()
tknzer.Next()
ch <- string(tknzer.Text())
}
开发者ID:carol-hsu,项目名称:go-study,代码行数:7,代码来源:web-crawler.go
示例20: parseTableX86
func parseTableX86(z *html.Tokenizer, in *Intrinsic) *Intrinsic {
in.Performance = make(map[string]Timing)
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
return in
case html.StartTagToken, html.EndTagToken:
tn, _ := z.TagName()
tns := strings.ToLower(string(tn))
switch tns {
case "tr":
if tt == html.StartTagToken {
n := 0
p := Timing{}
for {
tt = z.Next()
tn, _ = z.TagName()
tns = strings.ToLower(string(tn))
if tt == html.EndTagToken && tns == "tr" {
break
}
if tt == html.StartTagToken && tns == "td" {
switch n {
case 0:
p.Arch = getText(z)
case 1:
p.Latency, _ = strconv.ParseFloat(getText(z), 64)
case 2:
p.Throughput, _ = strconv.ParseFloat(getText(z), 64)
in.Performance[p.Arch] = p
}
n++
}
}
} else {
panic("tr ended")
}
case "table":
if tt == html.EndTagToken {
return in
} else {
panic("table started")
}
}
}
}
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:50,代码来源:parseintrin.go
注:本文中的golang.org/x/net/html.Tokenizer类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论