本文整理汇总了Golang中github.com/yhat/scrape.Find函数的典型用法代码示例。如果您正苦于以下问题:Golang Find函数的具体用法?Golang Find怎么用?Golang Find使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了Find函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Golang代码示例。
示例1: parseBroadcastFromHtmlNode
func (bc *broadcast) parseBroadcastFromHtmlNode(root *html.Node) (ret []*r.Broadcast, err error) {
{
// Author
meta, _ := scrape.Find(root, func(n *html.Node) bool {
return atom.Meta == n.DataAtom && "Author" == scrape.Attr(n, "name")
})
if nil != meta {
content := scrape.Attr(meta, "content")
bc.Author = &content
}
}
for idx, epg := range scrape.FindAll(root, func(n *html.Node) bool {
return atom.Div == n.DataAtom && "epg-content-right" == scrape.Attr(n, "class")
}) {
if idx != 0 {
err = errors.New("There was more than 1 <div class='epg-content-right'/>")
return
}
{
// TitleEpisode
txt, _ := scrape.Find(epg, func(n *html.Node) bool {
return html.TextNode == n.Type && atom.H3 == n.Parent.DataAtom && atom.Br == n.NextSibling.DataAtom
})
if nil != txt {
t := strings.TrimSpace(r.NormaliseWhiteSpace(txt.Data))
bc.TitleEpisode = &t
txt.Parent.RemoveChild(txt.NextSibling)
txt.Parent.RemoveChild(txt)
}
}
{
// Subject
a, _ := scrape.Find(epg, func(n *html.Node) bool {
return atom.Div == n.Parent.DataAtom && "sendungsLink" == scrape.Attr(n.Parent, "class") && atom.A == n.DataAtom
})
if nil != a {
u, _ := url.Parse(scrape.Attr(a, "href"))
bc.Subject = bc.Source.ResolveReference(u)
}
}
// purge some cruft
for _, nn := range scrape.FindAll(epg, func(n *html.Node) bool {
clz := scrape.Attr(n, "class")
return atom.H2 == n.DataAtom ||
"mod modSharing" == clz ||
"modGalery" == clz ||
"sendungsLink" == clz ||
"tabs-container" == clz
}) {
nn.Parent.RemoveChild(nn)
}
{
description := r.TextWithBrFromNodeSet(scrape.FindAll(epg, func(n *html.Node) bool { return epg == n.Parent }))
bc.Description = &description
}
}
bc_ := r.Broadcast(*bc)
ret = append(ret, &bc_)
return
}
开发者ID:mro,项目名称:internet-radio-recorder,代码行数:60,代码来源:wdr.go
示例2: fetchExtraScheduleInfo
// fetchExtraScheduleInfo gets more information about each component.
//
// The rootNode argument should be the parsed schedule list view.
func fetchExtraScheduleInfo(client *http.Client, courses []Course, rootNode *html.Node) error {
psForm, ok := scrape.Find(rootNode, scrape.ByClass("PSForm"))
if !ok {
return errors.New("could not find PSForm")
}
icsid, ok := scrape.Find(psForm, scrape.ById("ICSID"))
if !ok {
return errors.New("could not find ICSID")
}
formAction := getNodeAttribute(psForm, "action")
sid := getNodeAttribute(icsid, "value")
// TODO: figure out if there's a way to make this more robust or to load it lazily.
sectionIndex := 0
for courseIndex := range courses {
course := &courses[courseIndex]
for componentIndex := range course.Components {
component := &course.Components[componentIndex]
postData := generateClassDetailForm(sid, sectionIndex)
res, reqErr := client.PostForm(formAction, postData)
if res != nil {
defer res.Body.Close()
}
if reqErr != nil {
return reqErr
}
courseOpen, parseErr := parseExtraComponentInfo(res.Body, component)
if parseErr != nil {
return parseErr
}
course.Open = &courseOpen
postData = generateClassDetailBackForm(sid, sectionIndex)
res, reqErr = client.PostForm(formAction, postData)
if res != nil {
defer res.Body.Close()
}
if reqErr != nil {
return reqErr
}
sectionIndex++
}
}
return nil
}
开发者ID:unixpickle,项目名称:better-student-center,代码行数:53,代码来源:schedule.go
示例3: NewListing
func NewListing(ctx appengine.Context, url string) (*Listing, error) {
client := urlfetch.Client(ctx)
resp, err := client.Get("http://167.88.16.61:2138/" + url)
if err != nil {
ctx.Errorf("%s", err)
}
ctx.Debugf("Craigslist request came back with status: %s", resp.Status)
if err != nil {
ctx.Errorf("%s", err)
return nil, errors.New("Get listing failed")
}
root, err := html.Parse(resp.Body)
if err != nil {
ctx.Errorf("%s", "Parsing Error")
return nil, errors.New("Parse body failed")
}
title, ok := scrape.Find(root, scrape.ByTag(atom.Title))
if !ok {
ctx.Errorf("%s", "Error getting title")
return nil, errors.New("Get title failed")
}
price, ok := scrape.Find(root, scrape.ByClass("price"))
if !ok {
ctx.Errorf("%s", "Error getting price")
return nil, errors.New("Get price failed")
}
intPrice, err := strconv.Atoi(scrape.Text(price)[1:])
if err != nil {
ctx.Errorf("Error casting price: %s", scrape.Text(price))
return nil, err
}
images := scrape.FindAll(root, scrape.ByTag(atom.Img))
imageUrl := ""
for _, image := range images {
if scrape.Attr(image, "title") == "image 1" {
imageUrl = scrape.Attr(image, "src")
}
}
ctx.Debugf("Craigslist returned listing.Price: %d, listing.Title: %s", intPrice, scrape.Text(title))
return &Listing{
Url: url,
Title: scrape.Text(title),
Price: intPrice,
ImageUrl: imageUrl,
}, nil
}
开发者ID:matthewdu,项目名称:powerplug,代码行数:49,代码来源:craigslist.go
示例4: Auth
// Auth attempts to access a given URL, then enters the given
// credentials when the URL redirects to a login page.
func (s *Session) Auth(serviceURL, email, password string) error {
resp, err := s.Get(serviceURL)
if err != nil {
return err
}
defer resp.Body.Close()
parsed, err := html.ParseFragment(resp.Body, nil)
if err != nil || len(parsed) == 0 {
return err
}
root := parsed[0]
form, ok := scrape.Find(root, scrape.ById("gaia_loginform"))
if !ok {
return errors.New("failed to process login page")
}
submission := url.Values{}
for _, input := range scrape.FindAll(form, scrape.ByTag(atom.Input)) {
submission.Add(getAttribute(input, "name"), getAttribute(input, "value"))
}
submission["Email"] = []string{email}
submission["Passwd"] = []string{password}
postResp, err := s.PostForm(resp.Request.URL.String(), submission)
if err != nil {
return err
}
postResp.Body.Close()
if postResp.Request.Method == "POST" {
return errors.New("login incorrect")
}
return nil
}
开发者ID:unixpickle,项目名称:gscrape,代码行数:36,代码来源:auth.go
示例5: TorrentList
func TorrentList(url string) ([]Torrent, error) {
// request and parse the front page
resp, err := http.Get(url)
if err != nil {
return make([]Torrent, 0), err
}
root, err := html.Parse(resp.Body)
if err != nil {
return make([]Torrent, 0), err
}
var torrents []Torrent
if content, ok := scrape.Find(root, scrape.ById("searchResult")); ok {
// define a matcher
matcher := func(n *html.Node) bool {
// must check for nil values
if n.DataAtom == atom.Tr && n.Parent.DataAtom == atom.Tbody {
return true
}
return false
}
// grab all articles and print them
trs := scrape.FindAll(content, matcher)
for _, tr := range trs {
torrents = append(torrents, ParseRecord(tr))
}
}
resp.Body.Close()
return torrents, nil
}
开发者ID:anykao,项目名称:p,代码行数:29,代码来源:main.go
示例6: indexPage
func indexPage(page string) (ind map[string]int, branches []string, err error) {
resp, err := http.Get(page)
if err != nil {
return
}
root, err := html.Parse(resp.Body)
resp.Body.Close()
if err != nil {
return
}
content, ok := scrape.Find(root, scrape.ById("bodyContent"))
if !ok {
return nil, nil, errors.New("no bodyContent element")
}
paragraphs := scrape.FindAll(content, scrape.ByTag(atom.P))
pageText := ""
for _, p := range paragraphs {
pageText += elementInnerText(p) + " "
}
words := strings.Fields(strings.ToLower(pageText))
ind = map[string]int{}
for _, word := range words {
ind[word] = ind[word] + 1
}
links := findWikiLinks(content)
branches = make([]string, len(links))
for i, link := range links {
branches[i] = "https://en.wikipedia.org" + link
}
return
}
开发者ID:unixpickle,项目名称:weakai,代码行数:35,代码来源:index.go
示例7: fillJobStruct
func fillJobStruct(n *html.Node) *Job {
job := new(Job)
job.Title = scrape.Text(n)
job.RetriveOn = time.Now().Format(time.RFC822Z)
job.url = jobUrl(n)
fmt.Println(job.url)
job.ID = jobID(job.url)
job.EmailFormLink = jobEmailFromUrl + job.ID
jp := fetchByID(job.ID)
job.jobPage = jp
desc, _ := scrape.Find(job.jobPage, descriptionMatcher)
job.Description = scrape.Text(desc)
req, _ := scrape.Find(job.jobPage, requiermentMatcher)
job.Requierments = scrape.Text(req)
return job
}
开发者ID:gozes,项目名称:co,代码行数:17,代码来源:co.go
示例8: parseGenericLoginForm
// parseGenericLoginForm takes a login page and parses the first form it finds, treating it as the
// login form.
func parseGenericLoginForm(res *http.Response) (result *loginFormInfo, err error) {
parsed, err := html.ParseFragment(res.Body, nil)
if err != nil {
return
} else if len(parsed) != 1 {
return nil, errors.New("wrong number of root elements")
}
root := parsed[0]
var form loginFormInfo
htmlForm, ok := scrape.Find(root, scrape.ByTag(atom.Form))
if !ok {
return nil, errors.New("no form element found")
}
if actionStr := getNodeAttribute(htmlForm, "action"); actionStr == "" {
form.action = res.Request.URL.String()
} else {
actionURL, err := url.Parse(actionStr)
if err != nil {
return nil, err
}
if actionURL.Host == "" {
actionURL.Host = res.Request.URL.Host
}
if actionURL.Scheme == "" {
actionURL.Scheme = res.Request.URL.Scheme
}
if !path.IsAbs(actionURL.Path) {
actionURL.Path = path.Join(res.Request.URL.Path, actionURL.Path)
}
form.action = actionURL.String()
}
inputs := scrape.FindAll(root, scrape.ByTag(atom.Input))
form.otherFields = url.Values{}
for _, input := range inputs {
inputName := getNodeAttribute(input, "name")
switch getNodeAttribute(input, "type") {
case "text":
form.usernameField = inputName
case "password":
form.passwordField = inputName
default:
form.otherFields.Add(inputName, getNodeAttribute(input, "value"))
}
}
if form.usernameField == "" {
return nil, errors.New("no username field found")
} else if form.passwordField == "" {
return nil, errors.New("no password field found")
}
return &form, nil
}
开发者ID:unixpickle,项目名称:better-student-center,代码行数:60,代码来源:html.go
示例9: parsepost
func parsepost(n *html.Node) Post {
post := Post{}
// get the title. uses a scrape inbuilt matcher
title_scrape, _ := scrape.Find(n, scrape.ByClass("title"))
title := scrape.Text(title_scrape.FirstChild)
// get the subreddit. This requires a custom matcher.
matcher := func(n *html.Node) bool {
if n.DataAtom == atom.A && n.Parent != nil {
return scrape.Attr(n, "class") == "subreddit hover may-blank"
}
return false
}
sub, _ := scrape.Find(n, matcher)
subreddit := scrape.Text(sub)
// get the url to the comments. requires custom matcher.
matcher = func(n *html.Node) bool {
if n.DataAtom == atom.Ul && n.FirstChild != nil {
return scrape.Attr(n, "class") == "flat-list buttons" && scrape.Attr(n.FirstChild, "class") == "first"
}
return false
}
ul, _ := scrape.Find(n, matcher) // ul is a list of two buttons: one that links to a post's comments page, one a "share" function
li := ul.FirstChild // the first list item of ul -- this will always be the comments page link.
url := scrape.Attr(li.FirstChild, "href") // finally, the url found in the list item.
// get the author. Uses custom matcher and magic.
matcher = func(n *html.Node) bool {
if n.DataAtom == atom.A && n.Parent.DataAtom == atom.P {
return strings.Contains(scrape.Attr(n, "href"), "/user/")
}
return false
}
author_scrape, _ := scrape.Find(n, matcher)
author := scrape.Text(author_scrape)
post.title = title
post.subreddit = subreddit
post.url = url
post.author = author
return post
}
开发者ID:jalavosus,项目名称:redditscraper,代码行数:45,代码来源:reddit_scraper.go
示例10: findHTMLTitle
func findHTMLTitle(doc *html.Node) string {
el, found := scrape.Find(doc, scrape.ByTag(atom.Title))
if !found {
return ""
}
return scrape.Text(el)
}
开发者ID:mcmillan,项目名称:socialite,代码行数:9,代码来源:title.go
示例11: ParseName
func ParseName(n *html.Node) (string, string, string) {
matcher := func(n *html.Node) bool {
// must check for nil values
if n.DataAtom == atom.A && n.Parent.DataAtom == atom.Td {
return true
}
return false
}
var name, magnet, desc string
if detName, ok := scrape.Find(n, scrape.ByClass("detName")); ok {
name = scrape.Text(detName)
}
if anchor, ok := scrape.Find(n, matcher); ok {
magnet = scrape.Attr(anchor, "href")
}
if detDesc, ok := scrape.Find(n, scrape.ByClass("detDesc")); ok {
desc = scrape.Text(detDesc)
}
return name, magnet, desc
}
开发者ID:anykao,项目名称:p,代码行数:22,代码来源:main.go
示例12: TweetsToUser
func TweetsToUser(u user.User) []tweet.Tweet {
reqURL := SearchURL
_url.SetQueryParams(&reqURL, map[string]string{
"q": "to:" + u.ScreenName,
"f": "tweets",
})
res, err := http.Get(reqURL.String())
PanicIf(err)
root, err := html.Parse(res.Body)
PanicIf(err)
tweetsMatcher := func(n *html.Node) bool {
return n.DataAtom == atom.Div && strings.HasPrefix(scrape.Attr(n, "class"), "tweet original-tweet")
}
tweetScreenNameMatcher := func(n *html.Node) bool {
return n.DataAtom == atom.Span && strings.HasPrefix(scrape.Attr(n, "class"), "username")
}
tweetTextMatcher := func(n *html.Node) bool {
return n.DataAtom == atom.P && strings.HasSuffix(scrape.Attr(n, "class"), "tweet-text")
}
tweetNodes := scrape.FindAll(root, tweetsMatcher)
tweets := make([]tweet.Tweet, len(tweetNodes))
for i, n := range tweetNodes {
t := tweet.Tweet{
ID: scrape.Attr(n, "data-user-id"),
}
if child, ok := scrape.Find(n, tweetScreenNameMatcher); ok {
t.Author = *user.NewUser(scrape.Text(child))
}
if child, ok := scrape.Find(n, tweetTextMatcher); ok {
t.Text = scrape.Text(child)
}
tweets[i] = t
}
return tweets
}
开发者ID:mrap,项目名称:twitterget,代码行数:39,代码来源:search.go
示例13: resolveUrl
func resolveUrl(website string) string {
site := getURL(website)
contents, err := html.Parse(site.Body)
if err != nil {
fmt.Printf("%s", err)
os.Exit(1)
panic(err)
}
title, _ := scrape.Find(contents, scrape.ByTag(atom.Title))
var titulo string = scrape.Text(title)
return titulo
}
开发者ID:ChrisFernandez,项目名称:GoBot,代码行数:14,代码来源:gobot.go
示例14: findOpenGraphTitle
func findOpenGraphTitle(doc *html.Node) string {
el, found := scrape.Find(doc, func(n *html.Node) bool {
if n.DataAtom == atom.Meta {
return scrape.Attr(n, "property") == "og:title" && scrape.Attr(n, "content") != ""
}
return false
})
if !found {
return ""
}
return scrape.Attr(el, "content")
}
开发者ID:mcmillan,项目名称:socialite,代码行数:15,代码来源:title.go
示例15: findTwitterTitle
func findTwitterTitle(doc *html.Node) string {
el, found := scrape.Find(doc, func(n *html.Node) bool {
if n.DataAtom == atom.Meta {
return scrape.Attr(n, "name") == "twitter:title" && scrape.Attr(n, "content") != ""
}
return false
})
if !found {
return ""
}
return scrape.Attr(el, "content")
}
开发者ID:mcmillan,项目名称:socialite,代码行数:15,代码来源:title.go
示例16: queryWikipedia
func queryWikipedia(word string) string {
word = strings.TrimSpace(word)
website := "http://en.wikipedia.com/wiki/" + word
site := getURL(website)
contents, err := html.Parse(site.Body)
if err != nil {
fmt.Print("%s", err)
panic(err)
os.Exit(1)
}
intro, _ := scrape.Find(contents, scrape.ByTag(atom.P))
var resp string = scrape.Text(intro)
return resp
}
开发者ID:ChrisFernandez,项目名称:GoBot,代码行数:15,代码来源:gobot.go
示例17: History
// History asynchronously fetches the user's
// video viewing history.
// You may provide a cancel channel which you
// can close to cancel the fetch mid-way.
func (y *Youtube) History(cancel <-chan struct{}) (<-chan *YoutubeVideoInfo, <-chan error) {
videoChan := make(chan *YoutubeVideoInfo)
errChan := make(chan error, 1)
go func() {
defer close(videoChan)
defer close(errChan)
historyReq, _ := http.NewRequest("GET", "https://www.youtube.com/feed/history", nil)
historyReq.Header.Set("User-Agent", spoofedUserAgent)
resp, err := y.s.Do(historyReq)
rootNode, err := html.Parse(resp.Body)
resp.Body.Close()
if err != nil {
errChan <- err
return
}
loadMoreHTML := rootNode
contentHTML := rootNode
for {
items := parseHistoryItems(contentHTML)
for _, item := range items {
select {
case videoChan <- item:
case <-cancel:
return
}
}
if loadMoreHTML == nil {
break
}
loadButton, ok := scrape.Find(loadMoreHTML, scrape.ByClass("yt-uix-load-more"))
if ok {
morePath := scrape.Attr(loadButton, "data-uix-load-more-href")
loadMoreHTML, contentHTML, err = y.fetchMoreHistory(morePath)
if err != nil {
errChan <- err
return
}
}
}
}()
return videoChan, errChan
}
开发者ID:unixpickle,项目名称:gscrape,代码行数:52,代码来源:youtube.go
示例18: main
func main() {
resp, err := http.Get("https://www.reddit.com")
if err != nil {
panic(err)
}
root, err := html.Parse(resp.Body)
if err != nil {
panic(err)
}
matcher := func(n *html.Node) bool {
if n.DataAtom == atom.Div && n.Parent != nil {
return scrape.Attr(n, "id") == "siteTable"
}
return false
}
table, ok := scrape.Find(root, matcher)
if !ok {
panic(ok)
}
matcher = func(n *html.Node) bool {
if n.DataAtom == atom.Div && n.Parent != nil {
return scrape.Attr(n, "data-type") == "link"
}
return false
}
articles := scrape.FindAll(table, matcher)
var posts []Post
for i := 0; i < len(articles); i++ {
wg.Add(1)
go func(n *html.Node) {
post := parsepost(n)
posts = append(posts, post)
wg.Done()
}(articles[i])
}
wg.Wait()
for i := 0; i < len(posts); i++ {
printpost(posts[i])
}
}
开发者ID:jalavosus,项目名称:redditscraper,代码行数:47,代码来源:reddit_scraper.go
示例19: parseSchedule
// parseCurrentSchedule parses the courses from the schedule list view page.
//
// If fetchMoreInfo is true, this will perform a request for each component to find out information
// about it.
func parseSchedule(rootNode *html.Node) ([]Course, error) {
courseTables := scrape.FindAll(rootNode, scrape.ByClass("PSGROUPBOXWBO"))
result := make([]Course, 0, len(courseTables))
for _, classTable := range courseTables {
println("found course")
titleElement, ok := scrape.Find(classTable, scrape.ByClass("PAGROUPDIVIDER"))
if !ok {
// This will occur at least once, since the filter options are a PSGROUPBOXWBO.
continue
}
infoTables := scrape.FindAll(classTable, scrape.ByClass("PSLEVEL3GRIDNBO"))
if len(infoTables) != 2 {
return nil, errors.New("expected exactly 2 info tables but found " +
strconv.Itoa(len(infoTables)))
}
courseInfoTable := infoTables[0]
course, err := parseCourseInfoTable(courseInfoTable)
if err != nil {
return nil, err
}
// NOTE: there isn't really a standard way to parse the department/number.
course.Name = nodeInnerText(titleElement)
componentsInfoTable := infoTables[1]
componentMaps, err := tableEntriesAsMaps(componentsInfoTable)
if err != nil {
return nil, err
}
course.Components = make([]Component, len(componentMaps))
for i, componentMap := range componentMaps {
course.Components[i], err = parseComponentInfoMap(componentMap)
if err != nil {
return nil, err
}
}
result = append(result, course)
}
return result, nil
}
开发者ID:unixpickle,项目名称:better-student-center,代码行数:48,代码来源:schedule.go
示例20: getTitle
func getTitle(url string) string {
resp, err := http.Get(url)
if err != nil {
fmt.Println("error:", err)
return "error"
}
root, err := html.Parse(resp.Body)
if err != nil {
fmt.Println("error:", err)
return "error"
}
title, ok := scrape.Find(root, scrape.ByTag(atom.Title))
if ok {
return scrape.Text(title)
}
return "unknown"
}
开发者ID:jasonthomas,项目名称:teddy-go,代码行数:22,代码来源:main.go
注:本文中的github.com/yhat/scrape.Find函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论