本文整理汇总了Golang中github.com/advancedlogic/goquery.Document类的典型用法代码示例。如果您正苦于以下问题:Golang Document类的具体用法?Golang Document怎么用?Golang Document使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Document类的16个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Golang代码示例。
示例1: cleanCites
func (this *cleaner) cleanCites(doc *goquery.Document) *goquery.Document {
cites := doc.Find("cite")
cites.Each(func(i int, s *goquery.Selection) {
this.config.parser.removeNode(s)
})
return doc
}
开发者ID:minond,项目名称:GoOse,代码行数:7,代码来源:cleaner.go
示例2: cleanDivs
func (c *Cleaner) cleanDivs(doc *goquery.Document) *goquery.Document {
frames := make(map[string]int)
framesNodes := make(map[string]*list.List)
divs := doc.Find("div")
divs.Each(func(i int, s *goquery.Selection) {
children := s.Children()
if children.Size() == 0 {
text := s.Text()
text = strings.Trim(text, " ")
text = strings.Trim(text, "\t")
text = strings.ToLower(text)
frames[text]++
if framesNodes[text] == nil {
framesNodes[text] = list.New()
}
framesNodes[text].PushBack(s)
}
})
for text, freq := range frames {
if freq > 1 {
selections := framesNodes[text]
for s := selections.Front(); s != nil; s = s.Next() {
selection := s.Value.(*goquery.Selection)
c.config.parser.removeNode(selection)
}
}
}
return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:29,代码来源:cleaner.go
示例3: cleanBadTags
func (this *cleaner) cleanBadTags(doc *goquery.Document) *goquery.Document {
body := doc.Find("body")
children := body.Children()
selectors := []string{"id", "class", "name"}
for _, selector := range selectors {
children.Each(func(i int, s *goquery.Selection) {
naughtyList := s.Find("*[" + selector + "]")
cont := 0
naughtyList.Each(func(j int, e *goquery.Selection) {
attribute, _ := e.Attr(selector)
if this.matchNodeRegEx(attribute, REMOVENODES_RE) {
if this.config.debug {
log.Printf("Cleaning: Removing node with %s: %s\n", selector, this.config.parser.name(selector, e))
}
this.config.parser.removeNode(e)
cont++
}
})
if this.config.debug && cont > 0 {
log.Printf("%d naughty %s elements found", cont, selector)
}
})
}
return doc
}
开发者ID:minond,项目名称:GoOse,代码行数:26,代码来源:cleaner.go
示例4: cleanAside
func (this *cleaner) cleanAside(doc *goquery.Document) *goquery.Document {
aside := doc.Find("aside")
aside.Each(func(i int, s *goquery.Selection) {
this.config.parser.removeNode(s)
})
return doc
}
开发者ID:minond,项目名称:GoOse,代码行数:7,代码来源:cleaner.go
示例5: cleanFooter
func (this *cleaner) cleanFooter(doc *goquery.Document) *goquery.Document {
footer := doc.Find("footer")
footer.Each(func(i int, s *goquery.Selection) {
this.config.parser.removeNode(s)
})
return doc
}
开发者ID:minond,项目名称:GoOse,代码行数:7,代码来源:cleaner.go
示例6: removeTags
func (c *Cleaner) removeTags(doc *goquery.Document, tags *[]string) *goquery.Document {
for _, tag := range *tags {
node := doc.Find(tag)
node.Each(func(i int, s *goquery.Selection) {
c.config.parser.removeNode(s)
})
}
return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:9,代码来源:cleaner.go
示例7: cleanArticleTags
func (c *Cleaner) cleanArticleTags(doc *goquery.Document) *goquery.Document {
tags := [3]string{"id", "name", "class"}
articles := doc.Find("article")
articles.Each(func(i int, s *goquery.Selection) {
for _, tag := range tags {
c.config.parser.delAttr(s, tag)
}
})
return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:10,代码来源:cleaner.go
示例8: cleanParaSpans
func (c *Cleaner) cleanParaSpans(doc *goquery.Document) *goquery.Document {
spans := doc.Find("span")
spans.Each(func(i int, s *goquery.Selection) {
parent := s.Parent()
if parent != nil && parent.Length() > 0 && parent.Get(0).DataAtom == atom.P {
node := s.Get(0)
node.Data = s.Text()
node.Type = html.TextNode
}
})
return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:12,代码来源:cleaner.go
示例9: cleanEMTags
func (c *Cleaner) cleanEMTags(doc *goquery.Document) *goquery.Document {
ems := doc.Find("em")
ems.Each(func(i int, s *goquery.Selection) {
images := s.Find("img")
if images.Length() == 0 {
c.config.parser.dropTag(s)
}
})
if c.config.debug {
log.Printf("Cleaning %d EM tags\n", ems.Size())
}
return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:13,代码来源:cleaner.go
示例10: nodesToCheck
//returns a list of nodes we want to search on like paragraphs and tables
func (this *contentExtractor) nodesToCheck(doc *goquery.Document) []*goquery.Selection {
output := make([]*goquery.Selection, 0)
tags := []string{"p", "pre", "td"}
for _, tag := range tags {
selections := doc.Children().Find(tag)
if selections != nil {
selections.Each(func(i int, s *goquery.Selection) {
output = append(output, s)
})
}
}
return output
}
开发者ID:minond,项目名称:GoOse,代码行数:14,代码来源:extractor.go
示例11: removeScriptsStyle
func (c *Cleaner) removeScriptsStyle(doc *goquery.Document) *goquery.Document {
if c.config.debug {
log.Println("Starting to remove script tags")
}
count := 0 // remove
scripts := doc.Find("script,noscript,style")
scripts.Each(func(i int, s *goquery.Selection) {
c.config.parser.removeNode(s)
count++
})
if c.config.debug && count > 0 {
log.Printf("Removed %d script and style tags\n", scripts.Size())
}
return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:15,代码来源:cleaner.go
示例12: dropCaps
func (c *Cleaner) dropCaps(doc *goquery.Document) *goquery.Document {
items := doc.Find("span")
count := 0 // remove
items.Each(func(i int, s *goquery.Selection) {
attribute, exists := s.Attr("class")
if exists && (strings.Contains(attribute, "dropcap") || strings.Contains(attribute, "drop_cap")) {
c.config.parser.dropTag(s)
count++
}
})
if c.config.debug && count > 0 {
log.Printf("Cleaned %d dropcap tags\n", count)
}
return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:15,代码来源:cleaner.go
示例13: removeScriptsStyle
func (this *cleaner) removeScriptsStyle(doc *goquery.Document) *goquery.Document {
if this.config.debug {
log.Println("Starting to remove script tags")
}
scripts := doc.Find("script,noscript,style")
scripts.Each(func(i int, s *goquery.Selection) {
this.config.parser.removeNode(s)
})
if this.config.debug {
log.Printf("Removed %d script and style tags\n", scripts.Size())
}
//remove comments :) How????
return doc
}
开发者ID:minond,项目名称:GoOse,代码行数:15,代码来源:cleaner.go
示例14: removeNodesRegEx
func (this *cleaner) removeNodesRegEx(doc *goquery.Document, pattern *regexp.Regexp) *goquery.Document {
selectors := [3]string{"id", "class", "name"}
for _, selector := range selectors {
naughtyList := doc.Find("*[" + selector + "]")
cont := 0
naughtyList.Each(func(i int, s *goquery.Selection) {
attribute, _ := s.Attr(selector)
if this.matchNodeRegEx(attribute, pattern) {
cont++
this.config.parser.removeNode(s)
}
})
if this.config.debug {
log.Printf("regExRemoveNodes %d %s elements found against pattern %s\n", cont, selector, pattern.String())
}
}
return doc
}
开发者ID:minond,项目名称:GoOse,代码行数:19,代码来源:cleaner.go
示例15: cleanBadTags
func (c *Cleaner) cleanBadTags(doc *goquery.Document, pattern *regexp.Regexp, selectors *[]string) *goquery.Document {
body := doc.Find("html")
children := body.Children()
children.Each(func(i int, s *goquery.Selection) {
for _, selector := range *selectors {
naughtyList := s.Find("*[" + selector + "]")
count := 0
naughtyList.Each(func(j int, node *goquery.Selection) {
attribute, _ := node.Attr(selector)
if pattern.MatchString(attribute) {
if c.config.debug {
log.Printf("Cleaning: Removing node with %s: %s\n", selector, c.config.parser.name(selector, node))
}
c.config.parser.removeNode(node)
count++
}
})
if c.config.debug && count > 0 {
log.Printf("%d naughty %s elements found", count, selector)
}
}
})
return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:24,代码来源:cleaner.go
示例16: convertDivsToParagraphs
func (c *Cleaner) convertDivsToParagraphs(doc *goquery.Document, domType string) *goquery.Document {
if c.config.debug {
log.Println("Starting to replace bad divs...")
}
badDivs := 0
convertedTextNodes := 0
divs := doc.Find(domType)
divs.Each(func(i int, div *goquery.Selection) {
divHTML, _ := div.Html()
if divToPElementsPattern.Match([]byte(divHTML)) {
c.replaceWithPara(div)
badDivs++
} else {
var replacementText []string
nodesToRemove := list.New()
children := div.Contents()
if c.config.debug {
log.Printf("Found %d children of div\n", children.Size())
}
children.EachWithBreak(func(i int, kid *goquery.Selection) bool {
text := kid.Text()
kidNode := kid.Get(0)
tag := kidNode.Data
if tag == text {
tag = "#text"
}
if tag == "#text" {
text = strings.Replace(text, "\n", "", -1)
text = tabsRegEx.ReplaceAllString(text, "")
if text == "" {
return true
}
if len(text) > 1 {
prev := kidNode.PrevSibling
if c.config.debug {
log.Printf("PARENT CLASS: %s NODENAME: %s\n", c.config.parser.name("class", div), tag)
log.Printf("TEXTREPLACE: %s\n", strings.Replace(text, "\n", "", -1))
}
if prev != nil && prev.DataAtom == atom.A {
nodeSelection := kid.HasNodes(prev)
html, _ := nodeSelection.Html()
replacementText = append(replacementText, html)
if c.config.debug {
log.Printf("SIBLING NODENAME ADDITION: %s TEXT: %s\n", prev.Data, html)
}
}
replacementText = append(replacementText, text)
nodesToRemove.PushBack(kidNode)
convertedTextNodes++
}
}
return true
})
newNode := new(html.Node)
newNode.Type = html.ElementNode
newNode.Data = strings.Join(replacementText, "")
newNode.DataAtom = atom.P
div.First().AddNodes(newNode)
for s := nodesToRemove.Front(); s != nil; s = s.Next() {
node := s.Value.(*html.Node)
if node != nil && node.Parent != nil {
node.Parent.RemoveChild(node)
}
}
}
})
if c.config.debug {
log.Printf("Found %d total divs with %d bad divs replaced and %d textnodes converted inside divs", divs.Size(), badDivs, convertedTextNodes)
}
return doc
}
开发者ID:ejamesc,项目名称:GoOse,代码行数:76,代码来源:cleaner.go
注:本文中的github.com/advancedlogic/goquery.Document类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论