• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Golang scrape.FindAll函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Golang中github.com/yhat/scrape.FindAll函数的典型用法代码示例。如果您正苦于以下问题:Golang FindAll函数的具体用法?Golang FindAll怎么用?Golang FindAll使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了FindAll函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Golang代码示例。

示例1: parseBroadcastFromHtmlNode

func (bc *broadcast) parseBroadcastFromHtmlNode(root *html.Node) (ret []*r.Broadcast, err error) {
	{
		// Author
		meta, _ := scrape.Find(root, func(n *html.Node) bool {
			return atom.Meta == n.DataAtom && "Author" == scrape.Attr(n, "name")
		})
		if nil != meta {
			content := scrape.Attr(meta, "content")
			bc.Author = &content
		}
	}
	for idx, epg := range scrape.FindAll(root, func(n *html.Node) bool {
		return atom.Div == n.DataAtom && "epg-content-right" == scrape.Attr(n, "class")
	}) {
		if idx != 0 {
			err = errors.New("There was more than 1 <div class='epg-content-right'/>")
			return
		}
		{
			// TitleEpisode
			txt, _ := scrape.Find(epg, func(n *html.Node) bool {
				return html.TextNode == n.Type && atom.H3 == n.Parent.DataAtom && atom.Br == n.NextSibling.DataAtom
			})
			if nil != txt {
				t := strings.TrimSpace(r.NormaliseWhiteSpace(txt.Data))
				bc.TitleEpisode = &t
				txt.Parent.RemoveChild(txt.NextSibling)
				txt.Parent.RemoveChild(txt)
			}
		}
		{
			// Subject
			a, _ := scrape.Find(epg, func(n *html.Node) bool {
				return atom.Div == n.Parent.DataAtom && "sendungsLink" == scrape.Attr(n.Parent, "class") && atom.A == n.DataAtom
			})
			if nil != a {
				u, _ := url.Parse(scrape.Attr(a, "href"))
				bc.Subject = bc.Source.ResolveReference(u)
			}
		}
		// purge some cruft
		for _, nn := range scrape.FindAll(epg, func(n *html.Node) bool {
			clz := scrape.Attr(n, "class")
			return atom.H2 == n.DataAtom ||
				"mod modSharing" == clz ||
				"modGalery" == clz ||
				"sendungsLink" == clz ||
				"tabs-container" == clz
		}) {
			nn.Parent.RemoveChild(nn)
		}
		{
			description := r.TextWithBrFromNodeSet(scrape.FindAll(epg, func(n *html.Node) bool { return epg == n.Parent }))
			bc.Description = &description
		}
	}
	bc_ := r.Broadcast(*bc)
	ret = append(ret, &bc_)
	return
}
开发者ID:mro,项目名称:internet-radio-recorder,代码行数:60,代码来源:wdr.go


示例2: TextWithBrFromNodeSet

func TextWithBrFromNodeSet(nodes []*html.Node) string {
	parts := make([]string, len(nodes))
	for i, node := range nodes {
		for _, tag := range []atom.Atom{atom.Br, atom.Tr} {
			for _, n := range scrape.FindAll(node, func(n *html.Node) bool { return tag == n.DataAtom }) {
				lfn := html.Node{Type: html.TextNode, Data: lineFeedMarker}
				n.Parent.InsertBefore(&lfn, n.NextSibling)
			}
		}
		for _, tag := range []atom.Atom{atom.P, atom.Div} {
			for _, n := range scrape.FindAll(node, func(n *html.Node) bool { return tag == n.DataAtom }) {
				lfn := html.Node{Type: html.TextNode, Data: lineFeedMarker + lineFeedMarker}
				n.Parent.InsertBefore(&lfn, n.NextSibling)
			}
		}
		tmp := []string{}
		for _, n := range scrape.FindAll(node, func(n *html.Node) bool { return html.TextNode == n.Type }) {
			tmp = append(tmp, n.Data)
		}
		parts[i] = strings.Join(tmp, "")
	}
	ret := strings.Join(parts, lineFeedMarker+lineFeedMarker)
	ret = NormaliseWhiteSpace(ret)
	ret = strings.Replace(ret, lineFeedMarker, "\n", -1)
	re := regexp.MustCompile("[ ]*(\\s)[ ]*") // collapse whitespace, keep \n
	ret = re.ReplaceAllString(ret, "$1")      // collapse whitespace (not the \n\n however)
	{
		re := regexp.MustCompile("\\s*\\n\\s*\\n\\s*") // collapse linefeeds
		ret = re.ReplaceAllString(ret, "\n\n")
	}
	return strings.TrimSpace(ret)
}
开发者ID:mro,项目名称:internet-radio-recorder,代码行数:32,代码来源:html.go


示例3: parseBroadcastURLsNode

func (day *timeURL) parseBroadcastURLsNode(root *html.Node) (ret []*broadcastURL, err error) {
	const closeDownHour int = 5
	for _, h4 := range scrape.FindAll(root, func(n *html.Node) bool { return atom.H4 == n.DataAtom }) {
		year, month, day_, err := timeForH4(scrape.Text(h4), &day.Time)
		if nil != err {
			panic(err)
		}
		// fmt.Printf("%d-%d-%d %s\n", year, month, day, err)
		for _, a := range scrape.FindAll(h4.Parent, func(n *html.Node) bool { return atom.A == n.DataAtom && atom.Dt == n.Parent.DataAtom }) {
			m := hourMinuteTitleRegExp.FindStringSubmatch(scrape.Text(a))
			if nil == m {
				panic(errors.New("Couldn't parse <a>"))
			}
			ur, _ := url.Parse(scrape.Attr(a, "href"))
			hour := r.MustParseInt(m[1])
			dayOffset := 0
			if hour < closeDownHour {
				dayOffset = 1
			}
			// fmt.Printf("%s %s\n", b.r.TimeURL.String(), b.Title)
			bcu := broadcastURL(r.BroadcastURL{
				TimeURL: r.TimeURL{
					Time:    time.Date(year, month, day_+dayOffset, hour, r.MustParseInt(m[2]), 0, 0, localLoc),
					Source:  *day.Source.ResolveReference(ur),
					Station: day.Station,
				},
				Title: strings.TrimSpace(m[3]),
			})
			ret = append(ret, &bcu)
		}
	}
	return
}
开发者ID:mro,项目名称:internet-radio-recorder,代码行数:33,代码来源:br.go


示例4: parseBroadcastsFromNode

func (day *timeURL) parseBroadcastsFromNode(root *html.Node) (ret []*r.Broadcast, err error) {
	nodes := scrape.FindAll(root, func(n *html.Node) bool { return atom.Div == n.DataAtom && "time" == scrape.Attr(n, "class") })
	ret = make([]*r.Broadcast, len(nodes))
	for index, tim := range nodes {
		// prepare response
		bc := r.Broadcast{
			BroadcastURL: r.BroadcastURL{
				TimeURL: r.TimeURL(*day),
			},
		}
		// some defaults
		bc.Language = &lang_de
		bc.Publisher = &publisher
		// set start time
		{
			div_t := strings.TrimSpace(scrape.Text(tim))
			if 5 != len(div_t) {
				continue
			}
			hour := r.MustParseInt(div_t[0:2])
			minute := r.MustParseInt(div_t[3:5])
			bc.Time = time.Date(day.Year(), day.Month(), day.Day(), hour, minute, 0, 0, day.TimeZone)
			if index > 0 {
				ret[index-1].DtEnd = &bc.Time
			}
		}
		for _, tit := range scrape.FindAll(tim.Parent, func(n *html.Node) bool {
			return atom.A == n.DataAtom && atom.Div == n.Parent.DataAtom && "descr" == scrape.Attr(n.Parent, "class")
		}) {
			// Title
			bc.Title = strings.TrimSpace(scrape.Text(tit))
			href := scrape.Attr(tit, "href")
			if "" != href {
				u, _ := url.Parse(href)
				bc.Subject = day.Source.ResolveReference(u)
			}

			desc_node := tit.Parent
			desc_node.RemoveChild(tit)
			description := r.TextWithBrFromNodeSet([]*html.Node{desc_node})
			bc.Description = &description
			// fmt.Fprintf(os.Stderr, "\n")
		}
		ret[index] = &bc
	}
	// fmt.Fprintf(os.Stderr, "len(ret) = %d '%s'\n", len(ret), day.Source.String())
	if len(nodes) > 0 {
		midnight := time.Date(day.Year(), day.Month(), day.Day(), 24, 0, 0, 0, day.TimeZone)
		ret[len(nodes)-1].DtEnd = &midnight
	}
	return
}
开发者ID:mro,项目名称:internet-radio-recorder,代码行数:52,代码来源:m945.go


示例5: Scrape

// Scrape scrapes a site for a keyword
func (q *query) Scrape() []*match {

	// Request the URL
	resp, err := http.Get(q.SiteURL)
	if err != nil {
		panic(err)
		log.Fatal("Couldn't GET ", q.SiteURL)
	}

	// Parse the contents of the URL
	root, err := html.Parse(resp.Body)
	if err != nil {
		panic(err)
		log.Fatal("Unable to parse response")
	}

	// Grab all the posts and print them
	posts := scrape.FindAll(root, scrape.ByClass("description"))
	matches := make([]*match, len(posts))
	for i, post := range posts {
		matches[i] = &match{
			Title:       scrape.Text(post.FirstChild.NextSibling),
			Description: scrape.Text(post),
			Link:        "http://kijiji.ca" + scrape.Attr(post.FirstChild.NextSibling, "href"),
			Price:       scrape.Text(post.NextSibling.NextSibling),
			Matched:     false,
		}
	}

	return matches
}
开发者ID:bentranter,项目名称:kijiji-scrape,代码行数:32,代码来源:main.go


示例6: main

func main() {
	// request and parse the front page
	resp, err := http.Get("https://torguard.net/downloads.php")
	if err != nil {
		panic(err)
	}
	root, err := html.Parse(resp.Body)
	if err != nil {
		panic(err)
	}

	// define a matcher
	matcher := func(n *html.Node) bool {
		// must check for nil values
		// if n.DataAtom == atom.A && n.Parent != nil && n.Parent.Parent != nil {
		if n.DataAtom == atom.Tr {
			return true
		}
		return false
	}
	// grab all articles and print them
	articles := scrape.FindAll(root, matcher)
	for _, article := range articles {
		if strings.Contains(scrape.Text(article), "DEBIAN x64Bit") {
			fmt.Printf("%s\n", scrape.Text(article))
		}
		//fmt.Printf("%2d %s (%s)\n", i, scrape.Text(article), scrape.Attr(article, "href"))
	}
}
开发者ID:jmonmane,项目名称:scrape,代码行数:29,代码来源:main.go


示例7: Search

func Search(s JobSearch) []*Job {
	jobSlice := []*Job{}
	fmt.Println("before loop in search")

	for i := 0; i < 1000; i++ {
		go getPage(urlCh, respCh)
	}

	for s.root = fetchByKeyword(s.Keyword); checkNextPage(s) == true; s.root = fetchNextPage(s.Keyword) {
		fmt.Println("in loop in search")
		jobs := scrape.FindAll(s.root, allJobMatcher)
		fmt.Println(len(jobs))

		for i, job := range jobs {
			fmt.Println(i)
			fmt.Println(job)
			j := fillJobStruct(job)
			jobSlice = append(jobSlice, j)
			fmt.Println(pager)
		}
		fmt.Println("befor if")
		if len(jobs) < 50 {
			break
		}

	}

	return jobSlice

}
开发者ID:gozes,项目名称:co,代码行数:30,代码来源:co.go


示例8: TorrentList

func TorrentList(url string) ([]Torrent, error) {
	// request and parse the front page
	resp, err := http.Get(url)
	if err != nil {
		return make([]Torrent, 0), err
	}
	root, err := html.Parse(resp.Body)
	if err != nil {
		return make([]Torrent, 0), err
	}
	var torrents []Torrent
	if content, ok := scrape.Find(root, scrape.ById("searchResult")); ok {
		// define a matcher
		matcher := func(n *html.Node) bool {
			// must check for nil values
			if n.DataAtom == atom.Tr && n.Parent.DataAtom == atom.Tbody {
				return true
			}
			return false
		}
		// grab all articles and print them
		trs := scrape.FindAll(content, matcher)
		for _, tr := range trs {
			torrents = append(torrents, ParseRecord(tr))
		}
	}
	resp.Body.Close()
	return torrents, nil
}
开发者ID:anykao,项目名称:p,代码行数:29,代码来源:main.go


示例9: indexPage

func indexPage(page string) (ind map[string]int, branches []string, err error) {
	resp, err := http.Get(page)
	if err != nil {
		return
	}
	root, err := html.Parse(resp.Body)
	resp.Body.Close()
	if err != nil {
		return
	}

	content, ok := scrape.Find(root, scrape.ById("bodyContent"))
	if !ok {
		return nil, nil, errors.New("no bodyContent element")
	}

	paragraphs := scrape.FindAll(content, scrape.ByTag(atom.P))
	pageText := ""
	for _, p := range paragraphs {
		pageText += elementInnerText(p) + " "
	}
	words := strings.Fields(strings.ToLower(pageText))

	ind = map[string]int{}
	for _, word := range words {
		ind[word] = ind[word] + 1
	}

	links := findWikiLinks(content)
	branches = make([]string, len(links))
	for i, link := range links {
		branches[i] = "https://en.wikipedia.org" + link
	}
	return
}
开发者ID:unixpickle,项目名称:weakai,代码行数:35,代码来源:index.go


示例10: main

func main() {
	// request and parse the front page
	resp, err := http.Get("https://news.ycombinator.com/")
	if err != nil {
		panic(err)
	}
	root, err := html.Parse(resp.Body)
	if err != nil {
		panic(err)
	}

	// define a matcher
	matcher := func(n *html.Node) bool {
		// must check for nil values
		if n.DataAtom == atom.A && n.Parent != nil && n.Parent.Parent != nil {
			return scrape.Attr(n.Parent.Parent, "class") == "athing"
		}
		return false
	}
	// grab all articles and print them
	articles := scrape.FindAll(root, matcher)
	for i, article := range articles {
		fmt.Printf("%2d %s (%s)\n", i, scrape.Text(article), scrape.Attr(article, "href"))
	}
}
开发者ID:abejenaru,项目名称:vagrant-boxes,代码行数:25,代码来源:first.go


示例11: Auth

// Auth attempts to access a given URL, then enters the given
// credentials when the URL redirects to a login page.
func (s *Session) Auth(serviceURL, email, password string) error {
	resp, err := s.Get(serviceURL)
	if err != nil {
		return err
	}
	defer resp.Body.Close()
	parsed, err := html.ParseFragment(resp.Body, nil)
	if err != nil || len(parsed) == 0 {
		return err
	}
	root := parsed[0]
	form, ok := scrape.Find(root, scrape.ById("gaia_loginform"))
	if !ok {
		return errors.New("failed to process login page")
	}
	submission := url.Values{}
	for _, input := range scrape.FindAll(form, scrape.ByTag(atom.Input)) {
		submission.Add(getAttribute(input, "name"), getAttribute(input, "value"))
	}
	submission["Email"] = []string{email}
	submission["Passwd"] = []string{password}

	postResp, err := s.PostForm(resp.Request.URL.String(), submission)
	if err != nil {
		return err
	}
	postResp.Body.Close()

	if postResp.Request.Method == "POST" {
		return errors.New("login incorrect")
	}

	return nil
}
开发者ID:unixpickle,项目名称:gscrape,代码行数:36,代码来源:auth.go


示例12: parseGenericLoginForm

// parseGenericLoginForm takes a login page and parses the first form it finds, treating it as the
// login form.
func parseGenericLoginForm(res *http.Response) (result *loginFormInfo, err error) {
	parsed, err := html.ParseFragment(res.Body, nil)
	if err != nil {
		return
	} else if len(parsed) != 1 {
		return nil, errors.New("wrong number of root elements")
	}

	root := parsed[0]

	var form loginFormInfo

	htmlForm, ok := scrape.Find(root, scrape.ByTag(atom.Form))
	if !ok {
		return nil, errors.New("no form element found")
	}

	if actionStr := getNodeAttribute(htmlForm, "action"); actionStr == "" {
		form.action = res.Request.URL.String()
	} else {
		actionURL, err := url.Parse(actionStr)
		if err != nil {
			return nil, err
		}
		if actionURL.Host == "" {
			actionURL.Host = res.Request.URL.Host
		}
		if actionURL.Scheme == "" {
			actionURL.Scheme = res.Request.URL.Scheme
		}
		if !path.IsAbs(actionURL.Path) {
			actionURL.Path = path.Join(res.Request.URL.Path, actionURL.Path)
		}
		form.action = actionURL.String()
	}

	inputs := scrape.FindAll(root, scrape.ByTag(atom.Input))
	form.otherFields = url.Values{}
	for _, input := range inputs {
		inputName := getNodeAttribute(input, "name")
		switch getNodeAttribute(input, "type") {
		case "text":
			form.usernameField = inputName
		case "password":
			form.passwordField = inputName
		default:
			form.otherFields.Add(inputName, getNodeAttribute(input, "value"))
		}
	}

	if form.usernameField == "" {
		return nil, errors.New("no username field found")
	} else if form.passwordField == "" {
		return nil, errors.New("no password field found")
	}

	return &form, nil
}
开发者ID:unixpickle,项目名称:better-student-center,代码行数:60,代码来源:html.go


示例13: parseSchedule

// parseCurrentSchedule parses the courses from the schedule list view page.
//
// If fetchMoreInfo is true, this will perform a request for each component to find out information
// about it.
func parseSchedule(rootNode *html.Node) ([]Course, error) {
	courseTables := scrape.FindAll(rootNode, scrape.ByClass("PSGROUPBOXWBO"))
	result := make([]Course, 0, len(courseTables))
	for _, classTable := range courseTables {
		println("found course")

		titleElement, ok := scrape.Find(classTable, scrape.ByClass("PAGROUPDIVIDER"))
		if !ok {
			// This will occur at least once, since the filter options are a PSGROUPBOXWBO.
			continue
		}

		infoTables := scrape.FindAll(classTable, scrape.ByClass("PSLEVEL3GRIDNBO"))
		if len(infoTables) != 2 {
			return nil, errors.New("expected exactly 2 info tables but found " +
				strconv.Itoa(len(infoTables)))
		}

		courseInfoTable := infoTables[0]
		course, err := parseCourseInfoTable(courseInfoTable)
		if err != nil {
			return nil, err
		}

		// NOTE: there isn't really a standard way to parse the department/number.
		course.Name = nodeInnerText(titleElement)

		componentsInfoTable := infoTables[1]
		componentMaps, err := tableEntriesAsMaps(componentsInfoTable)
		if err != nil {
			return nil, err
		}
		course.Components = make([]Component, len(componentMaps))
		for i, componentMap := range componentMaps {
			course.Components[i], err = parseComponentInfoMap(componentMap)
			if err != nil {
				return nil, err
			}
		}

		result = append(result, course)
	}
	return result, nil
}
开发者ID:unixpickle,项目名称:better-student-center,代码行数:48,代码来源:schedule.go


示例14: getLink

func getLink(r *html.Node) (s string) {
	buttons := scrape.FindAll(r, scrape.ByClass("downloadbtn"))
	for _, button := range buttons {
		windowLocation := scrape.Attr(button, "onclick")
		link := strings.Split(windowLocation, "=")[1]
		s := strings.Trim(link, "'")
		return s
	}
	return
}
开发者ID:jmonmane,项目名称:scrape,代码行数:10,代码来源:main.go


示例15: parseHistoryItems

func parseHistoryItems(rootNode *html.Node) []*YoutubeVideoInfo {
	videoElements := scrape.FindAll(rootNode, scrape.ByClass("yt-lockup-video"))

	res := make([]*YoutubeVideoInfo, len(videoElements))
	for i, element := range videoElements {
		res[i] = parseVideoInfo(element)
	}

	return res
}
开发者ID:unixpickle,项目名称:gscrape,代码行数:10,代码来源:youtube.go


示例16: parseBroadcastSeedNode

// Get Time, Source and Image from json html snippet
func (item *calendarItem) parseBroadcastSeedNode(root *html.Node) (bc *broadcastURL, err error) {
	bc = &broadcastURL{}
	bc.Station = *item.Station
	bc.Time = time.Time(item.DateTime)
	for _, a := range scrape.FindAll(root, func(n *html.Node) bool {
		if atom.A != n.DataAtom {
			return false
		}
		href := scrape.Attr(n, "href")
		return strings.HasPrefix(href, "/programm/radio/ausstrahlung-") && strings.HasSuffix(href, ".html")
	}) {
		ru, _ := url.Parse(scrape.Attr(a, "href"))
		bc.Source = *item.Station.ProgramURL.ResolveReference(ru)
	}
	for _, img := range scrape.FindAll(root, func(n *html.Node) bool { return atom.Img == n.DataAtom }) {
		ru, _ := url.Parse(scrape.Attr(img, "src"))
		bc.Image = item.Station.ProgramURL.ResolveReference(ru)
	}
	return
}
开发者ID:mro,项目名称:internet-radio-recorder,代码行数:21,代码来源:b4.go


示例17: Get

// Get devuelve el conjunto de tiempos de llegada para los buses de la parada
// dada. Hay que comprobar que no se devuelve error.
func Get(parada int) (TiemposParada, error) {
	resp, err := http.Get("http://www.auvasa.es/paradamb.asp?codigo=" +
		strconv.Itoa(parada))
	if err != nil {
		return TiemposParada{}, errors.New("Error al conectar con el servidor de AUVASA.")
	}

	rInUTF8 := transform.NewReader(resp.Body, charmap.Windows1252.NewDecoder())
	root, err := html.Parse(rInUTF8)
	if err != nil {
		return TiemposParada{}, errors.New("Error en la respuesta de AUVASA.")
	}

	headers := scrape.FindAll(root, scrape.ByTag(atom.H1))
	if len(headers) < 2 {
		return TiemposParada{}, errors.New("La parada indicada parece errónea.")
	}

	lineasTiempos := scrape.FindAll(root, scrape.ByClass("style36"))
	resultados := make([]ProximoBus, len(lineasTiempos))
	for i, item := range lineasTiempos {
		valores := scrape.FindAll(item, scrape.ByClass("style38"))
		resultados[i] = ProximoBus{
			Linea:   scrape.Text(valores[0]),
			Destino: scrape.Text(valores[2]),
			Minutos: scrape.Text(valores[3]),
		}
	}

	if len(resultados) == 0 {
		return TiemposParada{}, errors.New("No hay tiempos para la parada especificada. Puede que sea errónea o que ya no haya buses.")
	}

	return TiemposParada{
		Nombre:  scrape.Text(headers[1]),
		Tiempos: resultados,
		Momento: time.Now(),
		Codigo:  parada,
	}, nil

}
开发者ID:adrm,项目名称:auvasa,代码行数:43,代码来源:auvasa.go


示例18: TestTextWithBrFromNodeSet_001

func TestTextWithBrFromNodeSet_001(t *testing.T) {
	f, err := os.Open("testdata/TextWithBrFromNodeSet_001.html")
	assert.NotNil(t, f, "ouch")
	assert.Nil(t, err, "ouch")
	root, err := html.Parse(f)
	assert.NotNil(t, root, "ouch")
	nodes := scrape.FindAll(root, func(n *html.Node) bool { return atom.Div == n.DataAtom })

	txt := TextWithBrFromNodeSet(nodes)
	assert.Equal(t, "foo\n\nbar\nfoo\n\nbar", txt, "ouch")

}
开发者ID:mro,项目名称:internet-radio-recorder,代码行数:12,代码来源:html_test.go


示例19: extractEventDetails

func extractEventDetails(root *html.Node) []*html.Node {
	eventNames := scrape.FindAll(root, eventNameMatcher)
	eventDescriptions := scrape.FindAll(root, eventDescriptionMatcher)
	eventDates := scrape.FindAll(root, eventDateMatcher)
	eventTimes := scrape.FindAll(root, eventTimeMatcher)
	eventLocations := scrape.FindAll(root, eventLocationMatcher)
	eventContacts := scrape.FindAll(root, eventContactPersonMatcher)

	// return nil if mandatory attributes are not found
	if len(eventNames) == 0 ||
		len(eventDates) == 0 ||
		len(eventContacts) == 0 {
		return nil
	}

	ensureAtMostOneElement(eventNames, eventNameToMatch)
	ensureAtMostOneElement(eventDescriptions, eventDescriptionToMatch)
	ensureAtMostOneElement(eventDates, eventDateToMatch)
	ensureAtMostOneElement(eventTimes, eventTimeToMatch)
	ensureAtMostOneElement(eventLocations, eventLocationToMatch)
	ensureAtMostOneElement(eventContacts, eventContactPersonToMatch)

	return []*html.Node{
		eventNames[0],
		eventDescriptions[0],
		eventDates[0],
		eventTimes[0],
		eventLocations[0],
		eventContacts[0],
	}
}
开发者ID:jamesma,项目名称:html-scraper,代码行数:31,代码来源:chamberorganizer.go


示例20: NewListing

func NewListing(ctx appengine.Context, url string) (*Listing, error) {
	client := urlfetch.Client(ctx)
	resp, err := client.Get("http://167.88.16.61:2138/" + url)
	if err != nil {
		ctx.Errorf("%s", err)
	}
	ctx.Debugf("Craigslist request came back with status: %s", resp.Status)
	if err != nil {
		ctx.Errorf("%s", err)
		return nil, errors.New("Get listing failed")
	}
	root, err := html.Parse(resp.Body)
	if err != nil {
		ctx.Errorf("%s", "Parsing Error")
		return nil, errors.New("Parse body failed")
	}

	title, ok := scrape.Find(root, scrape.ByTag(atom.Title))
	if !ok {
		ctx.Errorf("%s", "Error getting title")
		return nil, errors.New("Get title failed")
	}
	price, ok := scrape.Find(root, scrape.ByClass("price"))
	if !ok {
		ctx.Errorf("%s", "Error getting price")
		return nil, errors.New("Get price failed")
	}
	intPrice, err := strconv.Atoi(scrape.Text(price)[1:])
	if err != nil {
		ctx.Errorf("Error casting price: %s", scrape.Text(price))
		return nil, err
	}
	images := scrape.FindAll(root, scrape.ByTag(atom.Img))
	imageUrl := ""
	for _, image := range images {
		if scrape.Attr(image, "title") == "image 1" {
			imageUrl = scrape.Attr(image, "src")
		}
	}

	ctx.Debugf("Craigslist returned listing.Price: %d, listing.Title: %s", intPrice, scrape.Text(title))

	return &Listing{
		Url:      url,
		Title:    scrape.Text(title),
		Price:    intPrice,
		ImageUrl: imageUrl,
	}, nil
}
开发者ID:matthewdu,项目名称:powerplug,代码行数:49,代码来源:craigslist.go



注:本文中的github.com/yhat/scrape.FindAll函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Golang scrape.Text函数代码示例发布时间:2022-05-28
下一篇:
Golang scrape.Find函数代码示例发布时间:2022-05-28
热门推荐
热门话题
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap