• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Golang goquery.Document类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Golang中github.com/advancedlogic/goquery.Document的典型用法代码示例。如果您正苦于以下问题:Golang Document类的具体用法?Golang Document怎么用?Golang Document使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Document类的16个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Golang代码示例。

示例1: cleanCites

func (this *cleaner) cleanCites(doc *goquery.Document) *goquery.Document {
	cites := doc.Find("cite")
	cites.Each(func(i int, s *goquery.Selection) {
		this.config.parser.removeNode(s)
	})
	return doc
}
开发者ID:minond,项目名称:GoOse,代码行数:7,代码来源:cleaner.go


示例2: cleanDivs

func (c *Cleaner) cleanDivs(doc *goquery.Document) *goquery.Document {
	frames := make(map[string]int)
	framesNodes := make(map[string]*list.List)
	divs := doc.Find("div")
	divs.Each(func(i int, s *goquery.Selection) {
		children := s.Children()
		if children.Size() == 0 {
			text := s.Text()
			text = strings.Trim(text, " ")
			text = strings.Trim(text, "\t")
			text = strings.ToLower(text)
			frames[text]++
			if framesNodes[text] == nil {
				framesNodes[text] = list.New()
			}
			framesNodes[text].PushBack(s)
		}
	})
	for text, freq := range frames {
		if freq > 1 {
			selections := framesNodes[text]
			for s := selections.Front(); s != nil; s = s.Next() {
				selection := s.Value.(*goquery.Selection)
				c.config.parser.removeNode(selection)
			}
		}
	}
	return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:29,代码来源:cleaner.go


示例3: cleanBadTags

func (this *cleaner) cleanBadTags(doc *goquery.Document) *goquery.Document {
	body := doc.Find("body")
	children := body.Children()
	selectors := []string{"id", "class", "name"}
	for _, selector := range selectors {
		children.Each(func(i int, s *goquery.Selection) {
			naughtyList := s.Find("*[" + selector + "]")
			cont := 0
			naughtyList.Each(func(j int, e *goquery.Selection) {
				attribute, _ := e.Attr(selector)
				if this.matchNodeRegEx(attribute, REMOVENODES_RE) {
					if this.config.debug {

						log.Printf("Cleaning: Removing node with %s: %s\n", selector, this.config.parser.name(selector, e))
					}
					this.config.parser.removeNode(e)
					cont++
				}
			})
			if this.config.debug && cont > 0 {
				log.Printf("%d naughty %s elements found", cont, selector)
			}
		})
	}
	return doc
}
开发者ID:minond,项目名称:GoOse,代码行数:26,代码来源:cleaner.go


示例4: cleanAside

func (this *cleaner) cleanAside(doc *goquery.Document) *goquery.Document {
	aside := doc.Find("aside")
	aside.Each(func(i int, s *goquery.Selection) {
		this.config.parser.removeNode(s)
	})
	return doc
}
开发者ID:minond,项目名称:GoOse,代码行数:7,代码来源:cleaner.go


示例5: cleanFooter

func (this *cleaner) cleanFooter(doc *goquery.Document) *goquery.Document {
	footer := doc.Find("footer")
	footer.Each(func(i int, s *goquery.Selection) {
		this.config.parser.removeNode(s)
	})
	return doc
}
开发者ID:minond,项目名称:GoOse,代码行数:7,代码来源:cleaner.go


示例6: removeTags

func (c *Cleaner) removeTags(doc *goquery.Document, tags *[]string) *goquery.Document {
	for _, tag := range *tags {
		node := doc.Find(tag)
		node.Each(func(i int, s *goquery.Selection) {
			c.config.parser.removeNode(s)
		})
	}
	return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:9,代码来源:cleaner.go


示例7: cleanArticleTags

func (c *Cleaner) cleanArticleTags(doc *goquery.Document) *goquery.Document {
	tags := [3]string{"id", "name", "class"}
	articles := doc.Find("article")
	articles.Each(func(i int, s *goquery.Selection) {
		for _, tag := range tags {
			c.config.parser.delAttr(s, tag)
		}
	})
	return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:10,代码来源:cleaner.go


示例8: cleanParaSpans

func (c *Cleaner) cleanParaSpans(doc *goquery.Document) *goquery.Document {
	spans := doc.Find("span")
	spans.Each(func(i int, s *goquery.Selection) {
		parent := s.Parent()
		if parent != nil && parent.Length() > 0 && parent.Get(0).DataAtom == atom.P {
			node := s.Get(0)
			node.Data = s.Text()
			node.Type = html.TextNode
		}
	})
	return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:12,代码来源:cleaner.go


示例9: cleanEMTags

func (c *Cleaner) cleanEMTags(doc *goquery.Document) *goquery.Document {
	ems := doc.Find("em")
	ems.Each(func(i int, s *goquery.Selection) {
		images := s.Find("img")
		if images.Length() == 0 {
			c.config.parser.dropTag(s)
		}
	})
	if c.config.debug {
		log.Printf("Cleaning %d EM tags\n", ems.Size())
	}
	return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:13,代码来源:cleaner.go


示例10: nodesToCheck

//returns a list of nodes we want to search on like paragraphs and tables
func (this *contentExtractor) nodesToCheck(doc *goquery.Document) []*goquery.Selection {
	output := make([]*goquery.Selection, 0)
	tags := []string{"p", "pre", "td"}
	for _, tag := range tags {
		selections := doc.Children().Find(tag)
		if selections != nil {
			selections.Each(func(i int, s *goquery.Selection) {
				output = append(output, s)
			})
		}
	}
	return output
}
开发者ID:minond,项目名称:GoOse,代码行数:14,代码来源:extractor.go


示例11: removeScriptsStyle

func (c *Cleaner) removeScriptsStyle(doc *goquery.Document) *goquery.Document {
	if c.config.debug {
		log.Println("Starting to remove script tags")
	}
	count := 0 // remove
	scripts := doc.Find("script,noscript,style")
	scripts.Each(func(i int, s *goquery.Selection) {
		c.config.parser.removeNode(s)
		count++
	})
	if c.config.debug && count > 0 {
		log.Printf("Removed %d script and style tags\n", scripts.Size())
	}
	return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:15,代码来源:cleaner.go


示例12: dropCaps

func (c *Cleaner) dropCaps(doc *goquery.Document) *goquery.Document {
	items := doc.Find("span")
	count := 0 // remove
	items.Each(func(i int, s *goquery.Selection) {
		attribute, exists := s.Attr("class")
		if exists && (strings.Contains(attribute, "dropcap") || strings.Contains(attribute, "drop_cap")) {
			c.config.parser.dropTag(s)
			count++
		}
	})
	if c.config.debug && count > 0 {
		log.Printf("Cleaned %d dropcap tags\n", count)
	}
	return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:15,代码来源:cleaner.go


示例13: removeScriptsStyle

func (this *cleaner) removeScriptsStyle(doc *goquery.Document) *goquery.Document {
	if this.config.debug {
		log.Println("Starting to remove script tags")
	}
	scripts := doc.Find("script,noscript,style")
	scripts.Each(func(i int, s *goquery.Selection) {
		this.config.parser.removeNode(s)
	})
	if this.config.debug {
		log.Printf("Removed %d script and style tags\n", scripts.Size())
	}

	//remove comments :) How????
	return doc
}
开发者ID:minond,项目名称:GoOse,代码行数:15,代码来源:cleaner.go


示例14: removeNodesRegEx

func (this *cleaner) removeNodesRegEx(doc *goquery.Document, pattern *regexp.Regexp) *goquery.Document {
	selectors := [3]string{"id", "class", "name"}
	for _, selector := range selectors {
		naughtyList := doc.Find("*[" + selector + "]")
		cont := 0
		naughtyList.Each(func(i int, s *goquery.Selection) {
			attribute, _ := s.Attr(selector)
			if this.matchNodeRegEx(attribute, pattern) {
				cont++
				this.config.parser.removeNode(s)
			}
		})

		if this.config.debug {
			log.Printf("regExRemoveNodes %d %s elements found against pattern %s\n", cont, selector, pattern.String())
		}
	}
	return doc
}
开发者ID:minond,项目名称:GoOse,代码行数:19,代码来源:cleaner.go


示例15: cleanBadTags

func (c *Cleaner) cleanBadTags(doc *goquery.Document, pattern *regexp.Regexp, selectors *[]string) *goquery.Document {
	body := doc.Find("html")
	children := body.Children()
	children.Each(func(i int, s *goquery.Selection) {
		for _, selector := range *selectors {
			naughtyList := s.Find("*[" + selector + "]")
			count := 0
			naughtyList.Each(func(j int, node *goquery.Selection) {
				attribute, _ := node.Attr(selector)
				if pattern.MatchString(attribute) {
					if c.config.debug {
						log.Printf("Cleaning: Removing node with %s: %s\n", selector, c.config.parser.name(selector, node))
					}
					c.config.parser.removeNode(node)
					count++
				}
			})
			if c.config.debug && count > 0 {
				log.Printf("%d naughty %s elements found", count, selector)
			}
		}
	})
	return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:24,代码来源:cleaner.go


示例16: convertDivsToParagraphs

func (c *Cleaner) convertDivsToParagraphs(doc *goquery.Document, domType string) *goquery.Document {
	if c.config.debug {
		log.Println("Starting to replace bad divs...")
	}
	badDivs := 0
	convertedTextNodes := 0
	divs := doc.Find(domType)

	divs.Each(func(i int, div *goquery.Selection) {
		divHTML, _ := div.Html()
		if divToPElementsPattern.Match([]byte(divHTML)) {
			c.replaceWithPara(div)
			badDivs++
		} else {
			var replacementText []string
			nodesToRemove := list.New()
			children := div.Contents()
			if c.config.debug {
				log.Printf("Found %d children of div\n", children.Size())
			}
			children.EachWithBreak(func(i int, kid *goquery.Selection) bool {
				text := kid.Text()
				kidNode := kid.Get(0)
				tag := kidNode.Data
				if tag == text {
					tag = "#text"
				}
				if tag == "#text" {
					text = strings.Replace(text, "\n", "", -1)
					text = tabsRegEx.ReplaceAllString(text, "")
					if text == "" {
						return true
					}
					if len(text) > 1 {
						prev := kidNode.PrevSibling
						if c.config.debug {
							log.Printf("PARENT CLASS: %s NODENAME: %s\n", c.config.parser.name("class", div), tag)
							log.Printf("TEXTREPLACE: %s\n", strings.Replace(text, "\n", "", -1))
						}
						if prev != nil && prev.DataAtom == atom.A {
							nodeSelection := kid.HasNodes(prev)
							html, _ := nodeSelection.Html()
							replacementText = append(replacementText, html)
							if c.config.debug {
								log.Printf("SIBLING NODENAME ADDITION: %s TEXT: %s\n", prev.Data, html)
							}
						}
						replacementText = append(replacementText, text)
						nodesToRemove.PushBack(kidNode)
						convertedTextNodes++
					}

				}
				return true
			})

			newNode := new(html.Node)
			newNode.Type = html.ElementNode
			newNode.Data = strings.Join(replacementText, "")
			newNode.DataAtom = atom.P
			div.First().AddNodes(newNode)

			for s := nodesToRemove.Front(); s != nil; s = s.Next() {
				node := s.Value.(*html.Node)
				if node != nil && node.Parent != nil {
					node.Parent.RemoveChild(node)
				}
			}
		}
	})
	if c.config.debug {
		log.Printf("Found %d total divs with %d bad divs replaced and %d textnodes converted inside divs", divs.Size(), badDivs, convertedTextNodes)
	}
	return doc

}
开发者ID:ejamesc,项目名称:GoOse,代码行数:76,代码来源:cleaner.go



注:本文中的github.com/advancedlogic/goquery.Document类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Golang goquery.Selection类代码示例发布时间:2022-05-24
下一篇:
Golang protocol.MessageReadWriteCloser类代码示例发布时间:2022-05-24
热门推荐
热门话题
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap