Golang scrape.Find函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了Golang中github.com/yhat/scrape.Find函数的典型用法代码示例。如果您正苦于以下问题：Golang Find函数的具体用法？Golang Find怎么用？Golang Find使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了Find函数的20个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的Golang代码示例。

示例1: parseBroadcastFromHtmlNode

func (bc *broadcast) parseBroadcastFromHtmlNode(root *html.Node) (ret []*r.Broadcast, err error) {
	{
		// Author
		meta, _ := scrape.Find(root, func(n *html.Node) bool {
			return atom.Meta == n.DataAtom && "Author" == scrape.Attr(n, "name")
		})
		if nil != meta {
			content := scrape.Attr(meta, "content")
			bc.Author = &content
		}
	}
	for idx, epg := range scrape.FindAll(root, func(n *html.Node) bool {
		return atom.Div == n.DataAtom && "epg-content-right" == scrape.Attr(n, "class")
	}) {
		if idx != 0 {
			err = errors.New("There was more than 1 <div class='epg-content-right'/>")
			return
		}
		{
			// TitleEpisode
			txt, _ := scrape.Find(epg, func(n *html.Node) bool {
				return html.TextNode == n.Type && atom.H3 == n.Parent.DataAtom && atom.Br == n.NextSibling.DataAtom
			})
			if nil != txt {
				t := strings.TrimSpace(r.NormaliseWhiteSpace(txt.Data))
				bc.TitleEpisode = &t
				txt.Parent.RemoveChild(txt.NextSibling)
				txt.Parent.RemoveChild(txt)
			}
		}
		{
			// Subject
			a, _ := scrape.Find(epg, func(n *html.Node) bool {
				return atom.Div == n.Parent.DataAtom && "sendungsLink" == scrape.Attr(n.Parent, "class") && atom.A == n.DataAtom
			})
			if nil != a {
				u, _ := url.Parse(scrape.Attr(a, "href"))
				bc.Subject = bc.Source.ResolveReference(u)
			}
		}
		// purge some cruft
		for _, nn := range scrape.FindAll(epg, func(n *html.Node) bool {
			clz := scrape.Attr(n, "class")
			return atom.H2 == n.DataAtom ||
				"mod modSharing" == clz ||
				"modGalery" == clz ||
				"sendungsLink" == clz ||
				"tabs-container" == clz
		}) {
			nn.Parent.RemoveChild(nn)
		}
		{
			description := r.TextWithBrFromNodeSet(scrape.FindAll(epg, func(n *html.Node) bool { return epg == n.Parent }))
			bc.Description = &description
		}
	}
	bc_ := r.Broadcast(*bc)
	ret = append(ret, &bc_)
	return
}

开发者ID:mro，项目名称:internet-radio-recorder，代码行数:60，代码来源:wdr.go

示例2: fetchExtraScheduleInfo

// fetchExtraScheduleInfo gets more information about each component.
//
// The rootNode argument should be the parsed schedule list view.
func fetchExtraScheduleInfo(client *http.Client, courses []Course, rootNode *html.Node) error {
	psForm, ok := scrape.Find(rootNode, scrape.ByClass("PSForm"))
	if !ok {
		return errors.New("could not find PSForm")
	}
	icsid, ok := scrape.Find(psForm, scrape.ById("ICSID"))
	if !ok {
		return errors.New("could not find ICSID")
	}

	formAction := getNodeAttribute(psForm, "action")
	sid := getNodeAttribute(icsid, "value")

	// TODO: figure out if there's a way to make this more robust or to load it lazily.
	sectionIndex := 0
	for courseIndex := range courses {
		course := &courses[courseIndex]
		for componentIndex := range course.Components {
			component := &course.Components[componentIndex]

			postData := generateClassDetailForm(sid, sectionIndex)
			res, reqErr := client.PostForm(formAction, postData)
			if res != nil {
				defer res.Body.Close()
			}
			if reqErr != nil {
				return reqErr
			}

			courseOpen, parseErr := parseExtraComponentInfo(res.Body, component)
			if parseErr != nil {
				return parseErr
			}
			course.Open = &courseOpen

			postData = generateClassDetailBackForm(sid, sectionIndex)
			res, reqErr = client.PostForm(formAction, postData)
			if res != nil {
				defer res.Body.Close()
			}
			if reqErr != nil {
				return reqErr
			}

			sectionIndex++
		}
	}

	return nil
}

开发者ID:unixpickle，项目名称:better-student-center，代码行数:53，代码来源:schedule.go

示例3: NewListing

func NewListing(ctx appengine.Context, url string) (*Listing, error) {
	client := urlfetch.Client(ctx)
	resp, err := client.Get("http://167.88.16.61:2138/" + url)
	if err != nil {
		ctx.Errorf("%s", err)
	}
	ctx.Debugf("Craigslist request came back with status: %s", resp.Status)
	if err != nil {
		ctx.Errorf("%s", err)
		return nil, errors.New("Get listing failed")
	}
	root, err := html.Parse(resp.Body)
	if err != nil {
		ctx.Errorf("%s", "Parsing Error")
		return nil, errors.New("Parse body failed")
	}

	title, ok := scrape.Find(root, scrape.ByTag(atom.Title))
	if !ok {
		ctx.Errorf("%s", "Error getting title")
		return nil, errors.New("Get title failed")
	}
	price, ok := scrape.Find(root, scrape.ByClass("price"))
	if !ok {
		ctx.Errorf("%s", "Error getting price")
		return nil, errors.New("Get price failed")
	}
	intPrice, err := strconv.Atoi(scrape.Text(price)[1:])
	if err != nil {
		ctx.Errorf("Error casting price: %s", scrape.Text(price))
		return nil, err
	}
	images := scrape.FindAll(root, scrape.ByTag(atom.Img))
	imageUrl := ""
	for _, image := range images {
		if scrape.Attr(image, "title") == "image 1" {
			imageUrl = scrape.Attr(image, "src")
		}
	}

	ctx.Debugf("Craigslist returned listing.Price: %d, listing.Title: %s", intPrice, scrape.Text(title))

	return &Listing{
		Url:      url,
		Title:    scrape.Text(title),
		Price:    intPrice,
		ImageUrl: imageUrl,
	}, nil
}

开发者ID:matthewdu，项目名称:powerplug，代码行数:49，代码来源:craigslist.go

示例4: Auth

// Auth attempts to access a given URL, then enters the given
// credentials when the URL redirects to a login page.
func (s *Session) Auth(serviceURL, email, password string) error {
	resp, err := s.Get(serviceURL)
	if err != nil {
		return err
	}
	defer resp.Body.Close()
	parsed, err := html.ParseFragment(resp.Body, nil)
	if err != nil || len(parsed) == 0 {
		return err
	}
	root := parsed[0]
	form, ok := scrape.Find(root, scrape.ById("gaia_loginform"))
	if !ok {
		return errors.New("failed to process login page")
	}
	submission := url.Values{}
	for _, input := range scrape.FindAll(form, scrape.ByTag(atom.Input)) {
		submission.Add(getAttribute(input, "name"), getAttribute(input, "value"))
	}
	submission["Email"] = []string{email}
	submission["Passwd"] = []string{password}

	postResp, err := s.PostForm(resp.Request.URL.String(), submission)
	if err != nil {
		return err
	}
	postResp.Body.Close()

	if postResp.Request.Method == "POST" {
		return errors.New("login incorrect")
	}

	return nil
}

开发者ID:unixpickle，项目名称:gscrape，代码行数:36，代码来源:auth.go

示例5: TorrentList

func TorrentList(url string) ([]Torrent, error) {
	// request and parse the front page
	resp, err := http.Get(url)
	if err != nil {
		return make([]Torrent, 0), err
	}
	root, err := html.Parse(resp.Body)
	if err != nil {
		return make([]Torrent, 0), err
	}
	var torrents []Torrent
	if content, ok := scrape.Find(root, scrape.ById("searchResult")); ok {
		// define a matcher
		matcher := func(n *html.Node) bool {
			// must check for nil values
			if n.DataAtom == atom.Tr && n.Parent.DataAtom == atom.Tbody {
				return true
			}
			return false
		}
		// grab all articles and print them
		trs := scrape.FindAll(content, matcher)
		for _, tr := range trs {
			torrents = append(torrents, ParseRecord(tr))
		}
	}
	resp.Body.Close()
	return torrents, nil
}

开发者ID:anykao，项目名称:p，代码行数:29，代码来源:main.go

示例6: indexPage

func indexPage(page string) (ind map[string]int, branches []string, err error) {
	resp, err := http.Get(page)
	if err != nil {
		return
	}
	root, err := html.Parse(resp.Body)
	resp.Body.Close()
	if err != nil {
		return
	}

	content, ok := scrape.Find(root, scrape.ById("bodyContent"))
	if !ok {
		return nil, nil, errors.New("no bodyContent element")
	}

	paragraphs := scrape.FindAll(content, scrape.ByTag(atom.P))
	pageText := ""
	for _, p := range paragraphs {
		pageText += elementInnerText(p) + " "
	}
	words := strings.Fields(strings.ToLower(pageText))

	ind = map[string]int{}
	for _, word := range words {
		ind[word] = ind[word] + 1
	}

	links := findWikiLinks(content)
	branches = make([]string, len(links))
	for i, link := range links {
		branches[i] = "https://en.wikipedia.org" + link
	}
	return
}

开发者ID:unixpickle，项目名称:weakai，代码行数:35，代码来源:index.go

示例7: fillJobStruct

func fillJobStruct(n *html.Node) *Job {
	job := new(Job)
	job.Title = scrape.Text(n)
	job.RetriveOn = time.Now().Format(time.RFC822Z)
	job.url = jobUrl(n)
	fmt.Println(job.url)
	job.ID = jobID(job.url)
	job.EmailFormLink = jobEmailFromUrl + job.ID
	jp := fetchByID(job.ID)
	job.jobPage = jp
	desc, _ := scrape.Find(job.jobPage, descriptionMatcher)
	job.Description = scrape.Text(desc)
	req, _ := scrape.Find(job.jobPage, requiermentMatcher)
	job.Requierments = scrape.Text(req)

	return job
}

开发者ID:gozes，项目名称:co，代码行数:17，代码来源:co.go

示例8: parseGenericLoginForm

// parseGenericLoginForm takes a login page and parses the first form it finds, treating it as the
// login form.
func parseGenericLoginForm(res *http.Response) (result *loginFormInfo, err error) {
	parsed, err := html.ParseFragment(res.Body, nil)
	if err != nil {
		return
	} else if len(parsed) != 1 {
		return nil, errors.New("wrong number of root elements")
	}

	root := parsed[0]

	var form loginFormInfo

	htmlForm, ok := scrape.Find(root, scrape.ByTag(atom.Form))
	if !ok {
		return nil, errors.New("no form element found")
	}

	if actionStr := getNodeAttribute(htmlForm, "action"); actionStr == "" {
		form.action = res.Request.URL.String()
	} else {
		actionURL, err := url.Parse(actionStr)
		if err != nil {
			return nil, err
		}
		if actionURL.Host == "" {
			actionURL.Host = res.Request.URL.Host
		}
		if actionURL.Scheme == "" {
			actionURL.Scheme = res.Request.URL.Scheme
		}
		if !path.IsAbs(actionURL.Path) {
			actionURL.Path = path.Join(res.Request.URL.Path, actionURL.Path)
		}
		form.action = actionURL.String()
	}

	inputs := scrape.FindAll(root, scrape.ByTag(atom.Input))
	form.otherFields = url.Values{}
	for _, input := range inputs {
		inputName := getNodeAttribute(input, "name")
		switch getNodeAttribute(input, "type") {
		case "text":
			form.usernameField = inputName
		case "password":
			form.passwordField = inputName
		default:
			form.otherFields.Add(inputName, getNodeAttribute(input, "value"))
		}
	}

	if form.usernameField == "" {
		return nil, errors.New("no username field found")
	} else if form.passwordField == "" {
		return nil, errors.New("no password field found")
	}

	return &form, nil
}

开发者ID:unixpickle，项目名称:better-student-center，代码行数:60，代码来源:html.go

示例9: parsepost

func parsepost(n *html.Node) Post {
	post := Post{}

	// get the title. uses a scrape inbuilt matcher
	title_scrape, _ := scrape.Find(n, scrape.ByClass("title"))
	title := scrape.Text(title_scrape.FirstChild)

	// get the subreddit. This requires a custom matcher.
	matcher := func(n *html.Node) bool {
		if n.DataAtom == atom.A && n.Parent != nil {
			return scrape.Attr(n, "class") == "subreddit hover may-blank"
		}
		return false
	}
	sub, _ := scrape.Find(n, matcher)
	subreddit := scrape.Text(sub)

	// get the url to the comments. requires custom matcher.
	matcher = func(n *html.Node) bool {
		if n.DataAtom == atom.Ul && n.FirstChild != nil {
			return scrape.Attr(n, "class") == "flat-list buttons" && scrape.Attr(n.FirstChild, "class") == "first"
		}
		return false
	}
	ul, _ := scrape.Find(n, matcher)          // ul is a list of two buttons: one that links to a post's comments page, one a "share" function
	li := ul.FirstChild                       // the first list item of ul -- this will always be the comments page link.
	url := scrape.Attr(li.FirstChild, "href") // finally, the url found in the list item.

	// get the author. Uses custom matcher and magic.
	matcher = func(n *html.Node) bool {
		if n.DataAtom == atom.A && n.Parent.DataAtom == atom.P {
			return strings.Contains(scrape.Attr(n, "href"), "/user/")
		}
		return false
	}
	author_scrape, _ := scrape.Find(n, matcher)
	author := scrape.Text(author_scrape)

	post.title = title
	post.subreddit = subreddit
	post.url = url
	post.author = author

	return post
}

开发者ID:jalavosus，项目名称:redditscraper，代码行数:45，代码来源:reddit_scraper.go

示例10: findHTMLTitle

func findHTMLTitle(doc *html.Node) string {
	el, found := scrape.Find(doc, scrape.ByTag(atom.Title))

	if !found {
		return ""
	}

	return scrape.Text(el)
}

开发者ID:mcmillan，项目名称:socialite，代码行数:9，代码来源:title.go

示例11: ParseName

func ParseName(n *html.Node) (string, string, string) {
	matcher := func(n *html.Node) bool {
		// must check for nil values
		if n.DataAtom == atom.A && n.Parent.DataAtom == atom.Td {
			return true
		}
		return false
	}

	var name, magnet, desc string

	if detName, ok := scrape.Find(n, scrape.ByClass("detName")); ok {
		name = scrape.Text(detName)
	}
	if anchor, ok := scrape.Find(n, matcher); ok {
		magnet = scrape.Attr(anchor, "href")
	}
	if detDesc, ok := scrape.Find(n, scrape.ByClass("detDesc")); ok {
		desc = scrape.Text(detDesc)
	}
	return name, magnet, desc
}

开发者ID:anykao，项目名称:p，代码行数:22，代码来源:main.go

示例12: TweetsToUser

func TweetsToUser(u user.User) []tweet.Tweet {
	reqURL := SearchURL
	_url.SetQueryParams(&reqURL, map[string]string{
		"q": "to:" + u.ScreenName,
		"f": "tweets",
	})

	res, err := http.Get(reqURL.String())
	PanicIf(err)
	root, err := html.Parse(res.Body)
	PanicIf(err)

	tweetsMatcher := func(n *html.Node) bool {
		return n.DataAtom == atom.Div && strings.HasPrefix(scrape.Attr(n, "class"), "tweet original-tweet")
	}
	tweetScreenNameMatcher := func(n *html.Node) bool {
		return n.DataAtom == atom.Span && strings.HasPrefix(scrape.Attr(n, "class"), "username")
	}
	tweetTextMatcher := func(n *html.Node) bool {
		return n.DataAtom == atom.P && strings.HasSuffix(scrape.Attr(n, "class"), "tweet-text")
	}

	tweetNodes := scrape.FindAll(root, tweetsMatcher)
	tweets := make([]tweet.Tweet, len(tweetNodes))
	for i, n := range tweetNodes {
		t := tweet.Tweet{
			ID: scrape.Attr(n, "data-user-id"),
		}
		if child, ok := scrape.Find(n, tweetScreenNameMatcher); ok {
			t.Author = *user.NewUser(scrape.Text(child))
		}
		if child, ok := scrape.Find(n, tweetTextMatcher); ok {
			t.Text = scrape.Text(child)
		}
		tweets[i] = t
	}

	return tweets
}

开发者ID:mrap，项目名称:twitterget，代码行数:39，代码来源:search.go

示例13: resolveUrl

func resolveUrl(website string) string {
	site := getURL(website)

	contents, err := html.Parse(site.Body)
	if err != nil {
		fmt.Printf("%s", err)
		os.Exit(1)
		panic(err)
	}
	title, _ := scrape.Find(contents, scrape.ByTag(atom.Title))
	var titulo string = scrape.Text(title)
	return titulo

}

开发者ID:ChrisFernandez，项目名称:GoBot，代码行数:14，代码来源:gobot.go

示例14: findOpenGraphTitle

func findOpenGraphTitle(doc *html.Node) string {
	el, found := scrape.Find(doc, func(n *html.Node) bool {
		if n.DataAtom == atom.Meta {
			return scrape.Attr(n, "property") == "og:title" && scrape.Attr(n, "content") != ""
		}

		return false
	})

	if !found {
		return ""
	}

	return scrape.Attr(el, "content")
}

开发者ID:mcmillan，项目名称:socialite，代码行数:15，代码来源:title.go

示例15: findTwitterTitle

func findTwitterTitle(doc *html.Node) string {
	el, found := scrape.Find(doc, func(n *html.Node) bool {
		if n.DataAtom == atom.Meta {
			return scrape.Attr(n, "name") == "twitter:title" && scrape.Attr(n, "content") != ""
		}

		return false
	})

	if !found {
		return ""
	}

	return scrape.Attr(el, "content")
}

开发者ID:mcmillan，项目名称:socialite，代码行数:15，代码来源:title.go

示例16: queryWikipedia

func queryWikipedia(word string) string {
	word = strings.TrimSpace(word)
	website := "http://en.wikipedia.com/wiki/" + word
	site := getURL(website)
	contents, err := html.Parse(site.Body)

	if err != nil {
		fmt.Print("%s", err)
		panic(err)
		os.Exit(1)
	}
	intro, _ := scrape.Find(contents, scrape.ByTag(atom.P))
	var resp string = scrape.Text(intro)
	return resp
}

开发者ID:ChrisFernandez，项目名称:GoBot，代码行数:15，代码来源:gobot.go

示例17: History

// History asynchronously fetches the user's
// video viewing history.
// You may provide a cancel channel which you
// can close to cancel the fetch mid-way.
func (y *Youtube) History(cancel <-chan struct{}) (<-chan *YoutubeVideoInfo, <-chan error) {
	videoChan := make(chan *YoutubeVideoInfo)
	errChan := make(chan error, 1)

	go func() {
		defer close(videoChan)
		defer close(errChan)

		historyReq, _ := http.NewRequest("GET", "https://www.youtube.com/feed/history", nil)
		historyReq.Header.Set("User-Agent", spoofedUserAgent)
		resp, err := y.s.Do(historyReq)
		rootNode, err := html.Parse(resp.Body)
		resp.Body.Close()
		if err != nil {
			errChan <- err
			return
		}

		loadMoreHTML := rootNode
		contentHTML := rootNode
		for {
			items := parseHistoryItems(contentHTML)
			for _, item := range items {
				select {
				case videoChan <- item:
				case <-cancel:
					return
				}
			}

			if loadMoreHTML == nil {
				break
			}

			loadButton, ok := scrape.Find(loadMoreHTML, scrape.ByClass("yt-uix-load-more"))
			if ok {
				morePath := scrape.Attr(loadButton, "data-uix-load-more-href")
				loadMoreHTML, contentHTML, err = y.fetchMoreHistory(morePath)
				if err != nil {
					errChan <- err
					return
				}
			}
		}
	}()

	return videoChan, errChan
}

开发者ID:unixpickle，项目名称:gscrape，代码行数:52，代码来源:youtube.go

示例18: main

func main() {

	resp, err := http.Get("https://www.reddit.com")
	if err != nil {
		panic(err)
	}
	root, err := html.Parse(resp.Body)
	if err != nil {
		panic(err)
	}

	matcher := func(n *html.Node) bool {
		if n.DataAtom == atom.Div && n.Parent != nil {
			return scrape.Attr(n, "id") == "siteTable"
		}
		return false
	}
	table, ok := scrape.Find(root, matcher)
	if !ok {
		panic(ok)
	}
	matcher = func(n *html.Node) bool {
		if n.DataAtom == atom.Div && n.Parent != nil {
			return scrape.Attr(n, "data-type") == "link"
		}
		return false
	}

	articles := scrape.FindAll(table, matcher)
	var posts []Post

	for i := 0; i < len(articles); i++ {
		wg.Add(1)
		go func(n *html.Node) {
			post := parsepost(n)
			posts = append(posts, post)
			wg.Done()
		}(articles[i])
	}

	wg.Wait()

	for i := 0; i < len(posts); i++ {
		printpost(posts[i])
	}

}

开发者ID:jalavosus，项目名称:redditscraper，代码行数:47，代码来源:reddit_scraper.go

示例19: parseSchedule

// parseCurrentSchedule parses the courses from the schedule list view page.
//
// If fetchMoreInfo is true, this will perform a request for each component to find out information
// about it.
func parseSchedule(rootNode *html.Node) ([]Course, error) {
	courseTables := scrape.FindAll(rootNode, scrape.ByClass("PSGROUPBOXWBO"))
	result := make([]Course, 0, len(courseTables))
	for _, classTable := range courseTables {
		println("found course")

		titleElement, ok := scrape.Find(classTable, scrape.ByClass("PAGROUPDIVIDER"))
		if !ok {
			// This will occur at least once, since the filter options are a PSGROUPBOXWBO.
			continue
		}

		infoTables := scrape.FindAll(classTable, scrape.ByClass("PSLEVEL3GRIDNBO"))
		if len(infoTables) != 2 {
			return nil, errors.New("expected exactly 2 info tables but found " +
				strconv.Itoa(len(infoTables)))
		}

		courseInfoTable := infoTables[0]
		course, err := parseCourseInfoTable(courseInfoTable)
		if err != nil {
			return nil, err
		}

		// NOTE: there isn't really a standard way to parse the department/number.
		course.Name = nodeInnerText(titleElement)

		componentsInfoTable := infoTables[1]
		componentMaps, err := tableEntriesAsMaps(componentsInfoTable)
		if err != nil {
			return nil, err
		}
		course.Components = make([]Component, len(componentMaps))
		for i, componentMap := range componentMaps {
			course.Components[i], err = parseComponentInfoMap(componentMap)
			if err != nil {
				return nil, err
			}
		}

		result = append(result, course)
	}
	return result, nil
}

开发者ID:unixpickle，项目名称:better-student-center，代码行数:48，代码来源:schedule.go

示例20: getTitle

func getTitle(url string) string {

	resp, err := http.Get(url)
	if err != nil {
		fmt.Println("error:", err)
		return "error"
	}

	root, err := html.Parse(resp.Body)
	if err != nil {
		fmt.Println("error:", err)
		return "error"
	}

	title, ok := scrape.Find(root, scrape.ByTag(atom.Title))

	if ok {
		return scrape.Text(title)
	}

	return "unknown"
}

开发者ID:jasonthomas，项目名称:teddy-go，代码行数:22，代码来源:main.go

注：本文中的github.com/yhat/scrape.Find函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。