本文整理汇总了Golang中golang.org/x/net/html.Node类的典型用法代码示例。如果您正苦于以下问题:Golang Node类的具体用法?Golang Node怎么用?Golang Node使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Node类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Golang代码示例。
示例1: removeNegativeAttributeMatches
func removeNegativeAttributeMatches(n *html.Node) *html.Node {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type != html.TextNode && containerregrex.MatchString(c.Data) {
for _, attr := range c.Attr {
key := strings.ToLower(attr.Key)
if key == "id" || key == "class" {
val := strings.ToLower(attr.Val)
values := nonwordregex.Split(val, -1)
penalty := 0
for _, value := range values {
if negativeregex.MatchString(value) {
penalty = penalty + 4
}
}
if penalty > 0 {
if c.PrevSibling != nil {
c.PrevSibling.NextSibling = c.NextSibling
} else {
n.FirstChild = c.NextSibling
}
} else {
d := removeNegativeAttributeMatches(c)
if c.PrevSibling != nil {
c.PrevSibling.NextSibling = d
} else {
n.FirstChild = c.NextSibling
}
}
}
}
}
}
return n
}
开发者ID:hygerth,项目名称:brooklet,代码行数:34,代码来源:siteparser.go
示例2: wrapText
func wrapText(nodes []*html.Node) []*html.Node {
wrapped := make([]*html.Node, 0, len(nodes))
var wrapper *html.Node
appendWrapper := func() {
if wrapper != nil {
// render and re-parse so p-inline-p expands
wrapped = append(wrapped, ParseDepth(Render(wrapper), 0)...)
wrapper = nil
}
}
for _, n := range nodes {
if n.Type == html.ElementNode && isBlockElement[n.DataAtom] {
appendWrapper()
wrapped = append(wrapped, n)
continue
}
if wrapper == nil && n.Type == html.TextNode && strings.TrimSpace(n.Data) == "" {
wrapped = append(wrapped, n)
continue
}
if wrapper == nil {
wrapper = &html.Node{
Type: html.ElementNode,
Data: "p",
DataAtom: atom.P,
}
}
wrapper.AppendChild(n)
}
appendWrapper()
return wrapped
}
开发者ID:BenLubar,项目名称:htmlcleaner,代码行数:33,代码来源:cleaner.go
示例3: getSiblingsContent
func (this *contentExtractor) getSiblingsContent(currentSibling *goquery.Selection, baselinescoreSiblingsPara float64) []*goquery.Selection {
ps := make([]*goquery.Selection, 0)
if currentSibling.Get(0).DataAtom.String() == "p" && len(currentSibling.Text()) > 0 {
ps = append(ps, currentSibling)
return ps
} else {
potentialParagraphs := currentSibling.Find("p")
potentialParagraphs.Each(func(i int, s *goquery.Selection) {
text := s.Text()
if len(text) > 0 {
ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, text)
paragraphScore := ws.stopWordCount
siblingBaselineScore := 0.30
highLinkDensity := this.isHighLinkDensity(s)
score := siblingBaselineScore * baselinescoreSiblingsPara
if score < float64(paragraphScore) && !highLinkDensity {
node := new(html.Node)
node.Type = html.TextNode
node.Data = text
node.DataAtom = atom.P
nodes := make([]*html.Node, 1)
nodes[0] = node
newSelection := new(goquery.Selection)
newSelection.Nodes = nodes
ps = append(ps, newSelection)
}
}
})
}
return ps
}
开发者ID:hotei,项目名称:GoOse,代码行数:32,代码来源:extractor.go
示例4: cleanseDom
// cleansDom performs brute reduction and simplification
//
func cleanseDom(n *html.Node, lvl int) {
n.Attr = removeAttr(n.Attr, unwantedAttrs)
// Children
for c := n.FirstChild; c != nil; c = c.NextSibling {
cleanseDom(c, lvl+1)
}
if directlyRemoveUnwanted {
removeUnwanted(n)
} else {
convertUnwanted(n)
}
// ---
convertExotic(n)
// one time text normalization
if n.Type == html.TextNode {
n.Data = stringspb.NormalizeInnerWhitespace(n.Data)
}
}
开发者ID:aarzilli,项目名称:tools,代码行数:27,代码来源:01_cleanse.go
示例5: FindTitleAndBody_Ria
// finds article's title and body in ria.ru html style
// works cleary on 15.12.2015
func FindTitleAndBody_Ria(node *html.Node) (*html.Node, *html.Node) {
var title, fulltext *html.Node
if node.Type == html.ElementNode {
for _, tag := range node.Attr {
if tag.Key == "itemprop" {
if tag.Val == "articleBody" {
node.Data = "body"
fulltext = node
break
}
if tag.Val == "name" {
node.Data = "title"
title = node
break
}
}
}
}
for c := node.FirstChild; c != nil; c = c.NextSibling {
ptitle, pfulltext := FindTitleAndBody_Ria(c)
if ptitle != nil {
title = ptitle
}
if pfulltext != nil {
fulltext = pfulltext
}
if title != nil && fulltext != nil {
break
}
}
return title, fulltext
}
开发者ID:Vetcher,项目名称:pagedownloader,代码行数:37,代码来源:cleaner.go
示例6: FindTitleAndBody_MK
func FindTitleAndBody_MK(node *html.Node) (*html.Node, *html.Node) {
var title, fulltext *html.Node
if node.Type == html.ElementNode {
for _, tag := range node.Attr {
if tag.Key == "class" {
if tag.Val == "content" {
title = FindTitleMK(node)
node.Data = "body"
fulltext = node
break
}
}
}
}
for c := node.FirstChild; c != nil; c = c.NextSibling {
ptitle, pfulltext := FindTitleAndBody_MK(c)
if ptitle != nil {
title = ptitle
title.Data = "title"
}
if pfulltext != nil {
fulltext = pfulltext
}
if title != nil && fulltext != nil {
break
}
}
return title, fulltext
}
开发者ID:Vetcher,项目名称:pagedownloader,代码行数:32,代码来源:cleaner.go
示例7: copyNode
func copyNode(to, from *html.Node) {
to.Attr = from.Attr
to.Data = from.Data
to.DataAtom = from.DataAtom
to.Namespace = from.Namespace
to.Type = from.Type
}
开发者ID:documize,项目名称:html-diff,代码行数:7,代码来源:nodes.go
示例8: img2Link
func img2Link(img *html.Node) {
if img.Data == "img" {
img.Data = "a"
for i := 0; i < len(img.Attr); i++ {
if img.Attr[i].Key == "src" {
img.Attr[i].Key = "href"
}
}
double := closureTextNodeExists(img)
imgContent := ""
title := attrX(img.Attr, "title")
if double {
imgContent = fmt.Sprintf("[img] %v %v | ",
"[ctdr]", // content title double removed
urlBeautify(attrX(img.Attr, "href")))
} else {
imgContent = fmt.Sprintf("[img] %v %v | ",
title,
urlBeautify(attrX(img.Attr, "href")))
}
img.Attr = attrSet(img.Attr, "cfrom", "img")
nd := dom.Nd("text", imgContent)
img.AppendChild(nd)
}
}
开发者ID:aarzilli,项目名称:tools,代码行数:32,代码来源:06_img2link.go
示例9: CompactNode
func CompactNode(n *html.Node) {
var appendNodes []*html.Node
for c := n.FirstChild; c != nil; {
CompactNode(c)
if _mergeTextElements[c.Data] {
appendNodes = append(appendNodes, GetChildNodes(c)...)
log.Info("delete", c.Data)
c = RemoveNode(c)
} else if c.Type == html.ElementNode && c.FirstChild == nil && !_voidElements[c.Data] {
log.Info("delete", c.Data)
c = RemoveNode(c)
} else {
c = c.NextSibling
}
}
DetachNodes(appendNodes)
AppendChildNodes(n, appendNodes)
if n.FirstChild != nil && n.FirstChild.NextSibling == nil {
if n.FirstChild.Data == n.Data || (n.FirstChild.Data == "br" && (n.Data == "p" || n.Data == "div")) {
childNodes := GetChildNodes(n.FirstChild)
log.Info("delete", n.FirstChild.Data)
n.RemoveChild(n.FirstChild)
DetachNodes(childNodes)
AppendChildNodes(n, childNodes)
} else if n.FirstChild.Data == "img" && n.Data == "a" {
*n = *n.FirstChild
}
}
}
开发者ID:justintan,项目名称:gox,代码行数:30,代码来源:node.go
示例10: toDiv
func (m *minificationHTML) toDiv(node *html.Node) (*html.Node, error) {
node.DataAtom = atom.Div
node.Data = "div"
node.Attr = nil
return m.parseChildren(node)
}
开发者ID:ReanGD,项目名称:go-web-search,代码行数:7,代码来源:minification_html.go
示例11: convert
// convert nodes to /x/net/html.Node siblings.
// Document node children are integrated as siblings.
// Nils are skipped.
func (s Siblings) convert(parent *html.Node) (first, last *html.Node) {
var prev *html.Node
for _, n := range s {
if n == nil {
continue
}
if n.Type == html.DocumentNode {
start, end := n.Children.convert(parent)
if prev != nil {
prev.NextSibling = start
} else {
first = start
}
prev = end
continue
}
h := n.convert()
h.Parent = parent
h.PrevSibling = prev
if prev != nil {
prev.NextSibling = h
} else {
first = h
}
prev = h
}
return first, prev
}
开发者ID:arnehormann,项目名称:hck,代码行数:31,代码来源:nodes.go
示例12: reIndent
func reIndent(n *html.Node, lvl int) {
if lvl > cScaffoldLvls && n.Parent == nil {
bb := dom.PrintSubtree(n)
_ = bb
// log.Printf("%s", bb.Bytes())
hint := ""
if ml3[n] > 0 {
hint = " from ml3"
}
log.Print("reIndent: no parent ", hint)
return
}
// Before children processing
switch n.Type {
case html.ElementNode:
if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode {
ind := strings.Repeat("\t", lvl-2)
dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n" + ind})
}
case html.CommentNode:
dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n"})
case html.TextNode:
n.Data = strings.TrimSpace(n.Data) + " "
if !strings.HasPrefix(n.Data, ",") && !strings.HasPrefix(n.Data, ".") {
n.Data = " " + n.Data
}
// link texts without trailing space
if n.Parent != nil && n.Parent.Data == "a" {
n.Data = strings.TrimSpace(n.Data)
}
}
// Children
for c := n.FirstChild; c != nil; c = c.NextSibling {
reIndent(c, lvl+1)
}
// After children processing
switch n.Type {
case html.ElementNode:
// I dont know why,
// but this needs to happend AFTER the children
if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode {
ind := strings.Repeat("\t", lvl-2)
ind = "\n" + ind
// link texts without new line
if n.Data == "a" {
ind = ""
}
if n.LastChild != nil {
dom.InsertAfter(n.LastChild, &html.Node{Type: html.TextNode, Data: ind})
}
}
}
}
开发者ID:aarzilli,项目名称:tools,代码行数:58,代码来源:09_reformat_indent.go
示例13: topDownV1
/*
div div
div p
p TO img
img p
p
Operates from the *middle* div.
Saves all children in inverted slice.
Removes each child and reattaches it one level higher.
Finally the intermediary, now childless div is removed.
\ /
\ /\ /
\_____/ \_____/
\ /
\_____/\_____/
\__________/ => Breaks are gone
\p1___p2___/ => Wrapping preserves breaks
*/
func topDownV1(n *html.Node, couple []string, parentType string) {
if noParent(n) {
return
}
p := n.Parent
parDiv := p.Type == html.ElementNode && p.Data == couple[0] // Parent is a div
iAmDiv := n.Type == html.ElementNode && n.Data == couple[1] // I am a div
noSiblings := n.PrevSibling == nil && n.NextSibling == nil
only1Child := n.FirstChild != nil && n.FirstChild == n.LastChild
svrlChildn := n.FirstChild != nil && n.FirstChild != n.LastChild
noChildren := n.FirstChild == nil
_, _ = noSiblings, noChildren
if parDiv && iAmDiv {
if only1Child || svrlChildn {
var children []*html.Node
for c := n.FirstChild; c != nil; c = c.NextSibling {
children = append([]*html.Node{c}, children...) // order inversion
}
insertionPoint := n.NextSibling
for _, c1 := range children {
n.RemoveChild(c1)
if c1.Type == html.TextNode || c1.Data == "a" {
// pf("wrapping %v\n", NodeTypeStr(c1.Type))
wrap := html.Node{Type: html.ElementNode, Data: "p",
Attr: []html.Attribute{html.Attribute{Key: "cfrm", Val: "div"}}}
wrap.FirstChild = c1
p.InsertBefore(&wrap, insertionPoint)
c1.Parent = &wrap
insertionPoint = &wrap
} else {
p.InsertBefore(c1, insertionPoint)
insertionPoint = c1
}
}
p.RemoveChild(n)
if p.Data != parentType {
p.Data = parentType
}
}
}
}
开发者ID:aarzilli,项目名称:tools,代码行数:89,代码来源:03_top_down_v1.go
示例14: runMergeNodes
func runMergeNodes(parent, prev, next *html.Node, addSeparator bool) *html.Node {
var u parserUtils
if prev != nil {
parent.AppendChild(prev)
}
if next != nil {
parent.AppendChild(next)
}
return u.mergeNodes(parent, prev, next, addSeparator)
}
开发者ID:ReanGD,项目名称:go-web-search,代码行数:10,代码来源:parser_utils_test.go
示例15: setNodeText
// Replace the given node's children with the given string.
func setNodeText(node *html.Node, s string) {
// remove all existing children
for node.FirstChild != nil {
node.RemoveChild(node.FirstChild)
}
// add the text
node.AppendChild(&html.Node{
Type: html.TextNode,
Data: s,
})
}
开发者ID:albertjin,项目名称:goquery,代码行数:12,代码来源:mutate.go
示例16: openTag
func (m *minificationText) openTag(node *html.Node) {
parent := node.Parent
for it := node.FirstChild; it != nil; it = it.NextSibling {
it.Parent = parent
}
parent.FirstChild = node.FirstChild
parent.LastChild = node.LastChild
node.FirstChild = nil
node.LastChild = nil
node.Parent = nil
}
开发者ID:ReanGD,项目名称:go-web-search,代码行数:11,代码来源:minification_text.go
示例17: removeUnwanted
func removeUnwanted(n *html.Node) {
cc := []*html.Node{}
for c := n.FirstChild; c != nil; c = c.NextSibling {
cc = append(cc, c)
}
for _, c := range cc {
if n.Type == html.ElementNode && n.Data == "script" || n.Type == html.CommentNode {
n.RemoveChild(c)
}
}
}
开发者ID:aarzilli,项目名称:tools,代码行数:11,代码来源:6_dir_digest_3.go
示例18: removeUnwanted
// We want to remove some children.
// A direct loop is impossible,
// since "NextSibling" is set to nil during Remove().
// Therefore:
// First assemble children separately.
// Then remove them.
func removeUnwanted(n *html.Node) {
cc := []*html.Node{}
for c := n.FirstChild; c != nil; c = c.NextSibling {
cc = append(cc, c)
}
for _, c := range cc {
if unwanteds[c.Data] {
n.RemoveChild(c)
}
}
}
开发者ID:aarzilli,项目名称:tools,代码行数:17,代码来源:01_cleanse.go
示例19: TestParseATagNoHref
func TestParseATagNoHref(t *testing.T) {
node := new(html.Node)
node.Data = "a"
page := newWebPage(startUrl)
page.parseATag(node)
expected1 := 0
val1 := page.links.Len()
if val1 != expected1 {
t.Error("Expected:", expected1, " Got:", val1)
}
}
开发者ID:zlisinski,项目名称:go_crawl,代码行数:13,代码来源:go_crawl_test.go
示例20: replaceNodeWithChildren
func replaceNodeWithChildren(n *html.Node) {
var next *html.Node
parent := n.Parent
for c := n.FirstChild; c != nil; c = next {
next = c.NextSibling
n.RemoveChild(c)
parent.InsertBefore(c, n)
}
parent.RemoveChild(n)
}
开发者ID:jpoehls,项目名称:feedmailer,代码行数:13,代码来源:readability.go
注:本文中的golang.org/x/net/html.Node类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论