Golang html.Node类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了Golang中golang.org/x/net/html.Node类的典型用法代码示例。如果您正苦于以下问题：Golang Node类的具体用法？Golang Node怎么用？Golang Node使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了Node类的20个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的Golang代码示例。

示例1: removeNegativeAttributeMatches

func removeNegativeAttributeMatches(n *html.Node) *html.Node {
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		if c.Type != html.TextNode && containerregrex.MatchString(c.Data) {
			for _, attr := range c.Attr {
				key := strings.ToLower(attr.Key)
				if key == "id" || key == "class" {
					val := strings.ToLower(attr.Val)
					values := nonwordregex.Split(val, -1)
					penalty := 0
					for _, value := range values {
						if negativeregex.MatchString(value) {
							penalty = penalty + 4
						}
					}
					if penalty > 0 {
						if c.PrevSibling != nil {
							c.PrevSibling.NextSibling = c.NextSibling
						} else {
							n.FirstChild = c.NextSibling
						}
					} else {
						d := removeNegativeAttributeMatches(c)
						if c.PrevSibling != nil {
							c.PrevSibling.NextSibling = d
						} else {
							n.FirstChild = c.NextSibling
						}
					}
				}
			}
		}
	}
	return n
}

开发者ID:hygerth，项目名称:brooklet，代码行数:34，代码来源:siteparser.go

示例2: wrapText

func wrapText(nodes []*html.Node) []*html.Node {
	wrapped := make([]*html.Node, 0, len(nodes))
	var wrapper *html.Node
	appendWrapper := func() {
		if wrapper != nil {
			// render and re-parse so p-inline-p expands
			wrapped = append(wrapped, ParseDepth(Render(wrapper), 0)...)
			wrapper = nil
		}
	}
	for _, n := range nodes {
		if n.Type == html.ElementNode && isBlockElement[n.DataAtom] {
			appendWrapper()
			wrapped = append(wrapped, n)
			continue
		}
		if wrapper == nil && n.Type == html.TextNode && strings.TrimSpace(n.Data) == "" {
			wrapped = append(wrapped, n)
			continue
		}
		if wrapper == nil {
			wrapper = &html.Node{
				Type:     html.ElementNode,
				Data:     "p",
				DataAtom: atom.P,
			}
		}

		wrapper.AppendChild(n)
	}
	appendWrapper()
	return wrapped
}

开发者ID:BenLubar，项目名称:htmlcleaner，代码行数:33，代码来源:cleaner.go

示例3: getSiblingsContent

func (this *contentExtractor) getSiblingsContent(currentSibling *goquery.Selection, baselinescoreSiblingsPara float64) []*goquery.Selection {
	ps := make([]*goquery.Selection, 0)
	if currentSibling.Get(0).DataAtom.String() == "p" && len(currentSibling.Text()) > 0 {
		ps = append(ps, currentSibling)
		return ps
	} else {
		potentialParagraphs := currentSibling.Find("p")
		potentialParagraphs.Each(func(i int, s *goquery.Selection) {
			text := s.Text()
			if len(text) > 0 {
				ws := this.config.stopWords.stopWordsCount(this.config.targetLanguage, text)
				paragraphScore := ws.stopWordCount
				siblingBaselineScore := 0.30
				highLinkDensity := this.isHighLinkDensity(s)
				score := siblingBaselineScore * baselinescoreSiblingsPara
				if score < float64(paragraphScore) && !highLinkDensity {
					node := new(html.Node)
					node.Type = html.TextNode
					node.Data = text
					node.DataAtom = atom.P
					nodes := make([]*html.Node, 1)
					nodes[0] = node
					newSelection := new(goquery.Selection)
					newSelection.Nodes = nodes
					ps = append(ps, newSelection)
				}
			}

		})
	}
	return ps
}

开发者ID:hotei，项目名称:GoOse，代码行数:32，代码来源:extractor.go

示例4: cleanseDom

// cleansDom performs brute reduction and simplification
//
func cleanseDom(n *html.Node, lvl int) {

	n.Attr = removeAttr(n.Attr, unwantedAttrs)

	// Children
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		cleanseDom(c, lvl+1)
	}

	if directlyRemoveUnwanted {
		removeUnwanted(n)
	} else {
		convertUnwanted(n)
	}

	// ---

	convertExotic(n)

	// one time text normalization
	if n.Type == html.TextNode {
		n.Data = stringspb.NormalizeInnerWhitespace(n.Data)
	}

}

开发者ID:aarzilli，项目名称:tools，代码行数:27，代码来源:01_cleanse.go

示例5: FindTitleAndBody_Ria

// finds article's title and body in ria.ru html style
// works cleary on 15.12.2015
func FindTitleAndBody_Ria(node *html.Node) (*html.Node, *html.Node) {
	var title, fulltext *html.Node

	if node.Type == html.ElementNode {
		for _, tag := range node.Attr {
			if tag.Key == "itemprop" {
				if tag.Val == "articleBody" {
					node.Data = "body"
					fulltext = node
					break
				}
				if tag.Val == "name" {
					node.Data = "title"
					title = node
					break
				}
			}
		}
	}

	for c := node.FirstChild; c != nil; c = c.NextSibling {
		ptitle, pfulltext := FindTitleAndBody_Ria(c)
		if ptitle != nil {
			title = ptitle
		}
		if pfulltext != nil {
			fulltext = pfulltext
		}
		if title != nil && fulltext != nil {
			break
		}
	}
	return title, fulltext

}

开发者ID:Vetcher，项目名称:pagedownloader，代码行数:37，代码来源:cleaner.go

示例6: FindTitleAndBody_MK

func FindTitleAndBody_MK(node *html.Node) (*html.Node, *html.Node) {
	var title, fulltext *html.Node

	if node.Type == html.ElementNode {
		for _, tag := range node.Attr {
			if tag.Key == "class" {
				if tag.Val == "content" {
					title = FindTitleMK(node)
					node.Data = "body"
					fulltext = node
					break
				}
			}
		}
	}

	for c := node.FirstChild; c != nil; c = c.NextSibling {
		ptitle, pfulltext := FindTitleAndBody_MK(c)
		if ptitle != nil {
			title = ptitle
			title.Data = "title"
		}
		if pfulltext != nil {
			fulltext = pfulltext
		}
		if title != nil && fulltext != nil {
			break
		}
	}
	return title, fulltext

}

开发者ID:Vetcher，项目名称:pagedownloader，代码行数:32，代码来源:cleaner.go

示例7: copyNode

func copyNode(to, from *html.Node) {
	to.Attr = from.Attr
	to.Data = from.Data
	to.DataAtom = from.DataAtom
	to.Namespace = from.Namespace
	to.Type = from.Type
}

开发者ID:documize，项目名称:html-diff，代码行数:7，代码来源:nodes.go

示例8: img2Link

func img2Link(img *html.Node) {

	if img.Data == "img" {

		img.Data = "a"
		for i := 0; i < len(img.Attr); i++ {
			if img.Attr[i].Key == "src" {
				img.Attr[i].Key = "href"
			}
		}

		double := closureTextNodeExists(img)
		imgContent := ""
		title := attrX(img.Attr, "title")

		if double {
			imgContent = fmt.Sprintf("[img] %v %v | ",
				"[ctdr]", // content title double removed
				urlBeautify(attrX(img.Attr, "href")))

		} else {
			imgContent = fmt.Sprintf("[img] %v %v | ",
				title,
				urlBeautify(attrX(img.Attr, "href")))
		}

		img.Attr = attrSet(img.Attr, "cfrom", "img")
		nd := dom.Nd("text", imgContent)
		img.AppendChild(nd)
	}

}

开发者ID:aarzilli，项目名称:tools，代码行数:32，代码来源:06_img2link.go

示例9: CompactNode

func CompactNode(n *html.Node) {
	var appendNodes []*html.Node
	for c := n.FirstChild; c != nil; {
		CompactNode(c)
		if _mergeTextElements[c.Data] {
			appendNodes = append(appendNodes, GetChildNodes(c)...)
			log.Info("delete", c.Data)
			c = RemoveNode(c)
		} else if c.Type == html.ElementNode && c.FirstChild == nil && !_voidElements[c.Data] {
			log.Info("delete", c.Data)
			c = RemoveNode(c)
		} else {
			c = c.NextSibling
		}
	}

	DetachNodes(appendNodes)
	AppendChildNodes(n, appendNodes)
	if n.FirstChild != nil && n.FirstChild.NextSibling == nil {
		if n.FirstChild.Data == n.Data || (n.FirstChild.Data == "br" && (n.Data == "p" || n.Data == "div")) {
			childNodes := GetChildNodes(n.FirstChild)
			log.Info("delete", n.FirstChild.Data)
			n.RemoveChild(n.FirstChild)
			DetachNodes(childNodes)
			AppendChildNodes(n, childNodes)
		} else if n.FirstChild.Data == "img" && n.Data == "a" {
			*n = *n.FirstChild
		}
	}
}

开发者ID:justintan，项目名称:gox，代码行数:30，代码来源:node.go

示例10: toDiv

func (m *minificationHTML) toDiv(node *html.Node) (*html.Node, error) {
	node.DataAtom = atom.Div
	node.Data = "div"
	node.Attr = nil

	return m.parseChildren(node)
}

开发者ID:ReanGD，项目名称:go-web-search，代码行数:7，代码来源:minification_html.go

示例11: convert

// convert nodes to /x/net/html.Node siblings.
// Document node children are integrated as siblings.
// Nils are skipped.
func (s Siblings) convert(parent *html.Node) (first, last *html.Node) {
	var prev *html.Node
	for _, n := range s {
		if n == nil {
			continue
		}
		if n.Type == html.DocumentNode {
			start, end := n.Children.convert(parent)
			if prev != nil {
				prev.NextSibling = start
			} else {
				first = start
			}
			prev = end
			continue
		}
		h := n.convert()
		h.Parent = parent
		h.PrevSibling = prev
		if prev != nil {
			prev.NextSibling = h
		} else {
			first = h
		}
		prev = h
	}
	return first, prev
}

开发者ID:arnehormann，项目名称:hck，代码行数:31，代码来源:nodes.go

示例12: reIndent

func reIndent(n *html.Node, lvl int) {

	if lvl > cScaffoldLvls && n.Parent == nil {
		bb := dom.PrintSubtree(n)
		_ = bb
		// log.Printf("%s", bb.Bytes())
		hint := ""
		if ml3[n] > 0 {
			hint = "   from ml3"
		}
		log.Print("reIndent: no parent ", hint)
		return
	}

	// Before children processing
	switch n.Type {
	case html.ElementNode:
		if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode {
			ind := strings.Repeat("\t", lvl-2)
			dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n" + ind})
		}
	case html.CommentNode:
		dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n"})
	case html.TextNode:
		n.Data = strings.TrimSpace(n.Data) + " "
		if !strings.HasPrefix(n.Data, ",") && !strings.HasPrefix(n.Data, ".") {
			n.Data = " " + n.Data
		}
		// link texts without trailing space
		if n.Parent != nil && n.Parent.Data == "a" {
			n.Data = strings.TrimSpace(n.Data)
		}
	}

	// Children
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		reIndent(c, lvl+1)
	}

	// After children processing
	switch n.Type {
	case html.ElementNode:
		// I dont know why,
		// but this needs to happend AFTER the children
		if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode {
			ind := strings.Repeat("\t", lvl-2)
			ind = "\n" + ind
			// link texts without new line
			if n.Data == "a" {
				ind = ""
			}
			if n.LastChild != nil {
				dom.InsertAfter(n.LastChild, &html.Node{Type: html.TextNode, Data: ind})
			}
		}
	}

}

开发者ID:aarzilli，项目名称:tools，代码行数:58，代码来源:09_reformat_indent.go

示例13: topDownV1

/*
   div                     div
       div                     p
           p         TO        img
           img                 p
           p


	Operates from the *middle* div.
	Saves all children in inverted slice.
	Removes each child and reattaches it one level higher.
	Finally the intermediary, now childless div is removed.




   \                  /
    \       /\       /
     \_____/  \_____/

     \              /
      \_____/\_____/

       \__________/     => Breaks are gone


       \p1___p2___/     => Wrapping preserves breaks




*/
func topDownV1(n *html.Node, couple []string, parentType string) {

	if noParent(n) {
		return
	}
	p := n.Parent

	parDiv := p.Type == html.ElementNode && p.Data == couple[0] // Parent is a div
	iAmDiv := n.Type == html.ElementNode && n.Data == couple[1] // I am a div

	noSiblings := n.PrevSibling == nil && n.NextSibling == nil

	only1Child := n.FirstChild != nil && n.FirstChild == n.LastChild
	svrlChildn := n.FirstChild != nil && n.FirstChild != n.LastChild
	noChildren := n.FirstChild == nil

	_, _ = noSiblings, noChildren

	if parDiv && iAmDiv {

		if only1Child || svrlChildn {

			var children []*html.Node
			for c := n.FirstChild; c != nil; c = c.NextSibling {
				children = append([]*html.Node{c}, children...) // order inversion
			}

			insertionPoint := n.NextSibling
			for _, c1 := range children {

				n.RemoveChild(c1)

				if c1.Type == html.TextNode || c1.Data == "a" {
					// pf("wrapping %v\n", NodeTypeStr(c1.Type))
					wrap := html.Node{Type: html.ElementNode, Data: "p",
						Attr: []html.Attribute{html.Attribute{Key: "cfrm", Val: "div"}}}
					wrap.FirstChild = c1
					p.InsertBefore(&wrap, insertionPoint)
					c1.Parent = &wrap
					insertionPoint = &wrap

				} else {
					p.InsertBefore(c1, insertionPoint)
					insertionPoint = c1
				}

			}
			p.RemoveChild(n)
			if p.Data != parentType {
				p.Data = parentType
			}

		}

	}

}

开发者ID:aarzilli，项目名称:tools，代码行数:89，代码来源:03_top_down_v1.go

示例14: runMergeNodes

func runMergeNodes(parent, prev, next *html.Node, addSeparator bool) *html.Node {
	var u parserUtils
	if prev != nil {
		parent.AppendChild(prev)
	}
	if next != nil {
		parent.AppendChild(next)
	}
	return u.mergeNodes(parent, prev, next, addSeparator)
}

开发者ID:ReanGD，项目名称:go-web-search，代码行数:10，代码来源:parser_utils_test.go

示例15: setNodeText

// Replace the given node's children with the given string.
func setNodeText(node *html.Node, s string) {
	// remove all existing children
	for node.FirstChild != nil {
		node.RemoveChild(node.FirstChild)
	}
	// add the text
	node.AppendChild(&html.Node{
		Type: html.TextNode,
		Data: s,
	})
}

开发者ID:albertjin，项目名称:goquery，代码行数:12，代码来源:mutate.go

示例16: openTag

func (m *minificationText) openTag(node *html.Node) {
	parent := node.Parent
	for it := node.FirstChild; it != nil; it = it.NextSibling {
		it.Parent = parent
	}
	parent.FirstChild = node.FirstChild
	parent.LastChild = node.LastChild
	node.FirstChild = nil
	node.LastChild = nil
	node.Parent = nil
}

开发者ID:ReanGD，项目名称:go-web-search，代码行数:11，代码来源:minification_text.go

示例17: removeUnwanted

func removeUnwanted(n *html.Node) {
	cc := []*html.Node{}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		cc = append(cc, c)
	}
	for _, c := range cc {
		if n.Type == html.ElementNode && n.Data == "script" || n.Type == html.CommentNode {
			n.RemoveChild(c)
		}
	}
}

开发者ID:aarzilli，项目名称:tools，代码行数:11，代码来源:6_dir_digest_3.go

示例18: removeUnwanted

// We want to remove some children.
// A direct loop is impossible,
// since "NextSibling" is set to nil during Remove().
// Therefore:
//   First assemble children separately.
//   Then remove them.
func removeUnwanted(n *html.Node) {
	cc := []*html.Node{}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		cc = append(cc, c)
	}
	for _, c := range cc {
		if unwanteds[c.Data] {
			n.RemoveChild(c)
		}
	}
}

开发者ID:aarzilli，项目名称:tools，代码行数:17，代码来源:01_cleanse.go

示例19: TestParseATagNoHref

func TestParseATagNoHref(t *testing.T) {
	node := new(html.Node)
	node.Data = "a"

	page := newWebPage(startUrl)
	page.parseATag(node)

	expected1 := 0
	val1 := page.links.Len()
	if val1 != expected1 {
		t.Error("Expected:", expected1, " Got:", val1)
	}
}

开发者ID:zlisinski，项目名称:go_crawl，代码行数:13，代码来源:go_crawl_test.go

示例20: replaceNodeWithChildren

func replaceNodeWithChildren(n *html.Node) {
	var next *html.Node
	parent := n.Parent

	for c := n.FirstChild; c != nil; c = next {
		next = c.NextSibling
		n.RemoveChild(c)

		parent.InsertBefore(c, n)
	}

	parent.RemoveChild(n)
}

开发者ID:jpoehls，项目名称:feedmailer，代码行数:13，代码来源:readability.go

注：本文中的golang.org/x/net/html.Node类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。