本文整理汇总了Golang中github.com/PuerkitoBio/goquery.Document类的典型用法代码示例。如果您正苦于以下问题:Golang Document类的具体用法?Golang Document怎么用?Golang Document使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Document类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Golang代码示例。
示例1: doWork
func doWork(links <-chan string, results chan<- string) {
for link := range links {
var doc *goquery.Document
for i := 1; ; i++ {
var err error
doc, err = goquery.NewDocument(link)
if err == nil {
break
}
fmt.Fprintf(os.Stderr, "[Tentativa %d] Erro tentando processar página de servidor: %s. Erro: %q", i, link, err)
if i == maxRetries {
fmt.Fprintf(os.Stderr, "Página não processada: %s", link)
return
}
time.Sleep(time.Duration(i) * time.Duration(rand.Intn(5)) * time.Second)
}
var row []string
doc.Find("td.desc").Each(func(i int, s *goquery.Selection) {
cell := strings.Replace(
strings.Trim(s.Next().Text(), " \n"),
",",
".",
1)
row = append(row, cell)
})
if len(row) > 0 {
results <- strings.Join(row, *sep)
} else {
fmt.Fprintf(os.Stderr, "Não achou td.desc: %s\n", link)
}
}
}
开发者ID:danielfireman,项目名称:phd,代码行数:32,代码来源:main.go
示例2: parseTrendingRepos
func parseTrendingRepos(doc *goquery.Document) []GithubRepo {
var repos []GithubRepo
var regStars = regexp.MustCompile("[0-9]+")
doc.Find("li.repo-list-item").Each(func(i int, s *goquery.Selection) {
title := strings.Trim(s.Find("h3.repo-list-name a").Text(), "\n\t ")
title = strings.Replace(title, " ", "", -1)
title = strings.Replace(title, "\n", "", -1)
description := strings.Trim(s.Find("p.repo-list-description").Text(), "\n\t ")
url, _ := s.Find("h3.repo-list-name a").Attr("href")
url = "https://github.com" + url
starsString := s.Find("p.repo-list-meta").Text()
starsString = strings.Replace(starsString, ",", "", -1)
starsString = regStars.FindString(starsString)
if starsString == "" {
starsString = "0"
}
stars, _ := strconv.Atoi(starsString)
repo := GithubRepo{
Title: title,
Description: description,
Url: url,
Stars: stars,
Forks: 0,
Date: time.Now().UTC().Unix(),
}
repos = append(repos, repo)
})
return repos
}
开发者ID:hypebeast,项目名称:gostats,代码行数:33,代码来源:github.go
示例3: cleanCites
func (this *cleaner) cleanCites(doc *goquery.Document) *goquery.Document {
cites := doc.Find("cite")
cites.Each(func(i int, s *goquery.Selection) {
this.config.parser.removeNode(s)
})
return doc
}
开发者ID:ngs,项目名称:GoOse,代码行数:7,代码来源:cleaner.go
示例4: defaultHTML
// ogtags extracts the og:title, og:image, ... tags from a webpage
func defaultHTML(i *data.Item, sourceURL string, doc *goquery.Document) {
fmt.Println("Running OG extract.")
selection := doc.Find("title")
if len(selection.Nodes) != 0 {
i.Caption = selection.Nodes[0].FirstChild.Data
}
selection = doc.Find("meta[property*='og']")
for _, e := range selection.Nodes {
m := htmlAttributeToMap(e.Attr)
if m["property"] == "og:title" {
i.Caption = m["content"]
}
if m["property"] == "og:image" {
if !govalidator.IsRequestURL(m["content"]) {
log.Println("Invalid url in og:image. " + sourceURL)
continue
}
i.ImageURL = m["content"]
}
if m["property"] == "og:url" {
if !govalidator.IsRequestURL(m["content"]) {
log.Println("Invalid url in og:url. " + sourceURL)
continue
}
i.URL = m["content"]
}
if m["property"] == "og:description" {
i.Description = m["content"]
}
}
}
开发者ID:koffeinsource,项目名称:notreddit,代码行数:36,代码来源:defaultHTML.go
示例5: parseOrderListPage
func parseOrderListPage(s *goquery.Document) ([]Order, bool, error) {
c := s.Find(".container").First()
t := c.Find("div").First().Text()
if t != ">注文情報(一覧)<" && t != ">注文情報(検索)<" {
return nil, false, fmt.Errorf("cannot open \"注文情報(一覧)\", but %#v", t)
}
// タイトル行の削除
c.Find("hr").First().Next().PrevAll().Remove()
results := []Order{}
c.Find("a").Each(
func(_ int, s *goquery.Selection) {
href, ok := s.Attr("href")
if !ok || !strings.HasPrefix(href, "../otc/C003.html?") {
return
}
u, err := url.Parse(href)
if err != nil || u.RawQuery == "" {
return
}
v, err := url.ParseQuery(u.RawQuery)
results = append(results, Order{
OrderId: v.Get("order_id"),
OrderMethod: v.Get("order_method"),
})
})
return results, c.Find("a[accesskey=\"#\"]").Length() == 1, nil
}
开发者ID:imos,项目名称:fxtools,代码行数:29,代码来源:order.go
示例6: GetShopName
//获取店铺名称
func GetShopName(p *goquery.Document) string {
name := p.Find(".tb-shop-name").Text()
if name == "" {
name = p.Find(".slogo-shopname").Text()
}
return strings.TrimSpace(name)
}
开发者ID:qgweb,项目名称:new,代码行数:8,代码来源:tao.go
示例7: feedsFromDoc
func feedsFromDoc(doc *goquery.Document, text string) []string {
sel := "link[type='application/rss+xml']"
sel += ", link[type='application/atom+xml']"
matches := doc.Find(sel)
if matches.Length() > 0 {
feeds := make([]string, matches.Length())
matches.Each(func(i int, s *goquery.Selection) {
url, _ := s.Attr("href")
feeds[i] = url
})
return feeds
}
rx := regexp.MustCompile(`href=['"]([^'"]*(rss|atom|feed|xml)[^'"]*)['"]`)
if rx.FindString(text) != "" {
matches := rx.FindAllStringSubmatch(text, -1)
feeds := make([]string, len(matches))
for i, e := range matches {
feeds[i] = e[1]
}
return feeds
}
return make([]string, 0)
}
开发者ID:golibri,项目名称:website,代码行数:26,代码来源:website.go
示例8: perseHTML
// Parse html
func perseHTML(htmldata *goquery.Document) []string {
var dates []string
htmldata.Find("a.bt-open").Each(func(_ int, s *goquery.Selection) {
if jsonData, ok := s.Attr("id"); ok {
//decode
htmlStringDecode(&jsonData)
//analyze json object
var jsonObject map[string]interface{}
//json.JsonAnalyze(jsonData, &jsonObject)
json.Unmarshal([]byte(jsonData), &jsonObject)
//extract date from json object
//e.g. 2016-02-27 03:30:00
strDate := jsonObject["field19"].(string)
if isTimeApplicable(strDate) {
dates = append(dates, strDate)
}
}
})
return dates
}
开发者ID:hiromaily,项目名称:go-book-teacher,代码行数:26,代码来源:analyzehtml.go
示例9: garfield
func garfield(i *data.Item, sourceURL string, doc *goquery.Document) {
if !strings.Contains(sourceURL, "www.gocomics.com/garfield") {
return
}
fmt.Println("Running Garfield plugin.")
// update title
selection := doc.Find(".strip")
if len(selection.Nodes) == 0 {
fmt.Println("Garfield plugin found no .strip. " + sourceURL)
} else {
if len(selection.Nodes) > 1 {
fmt.Println("Garfield plugin found >1 .strip. " + sourceURL)
}
m := htmlAttributeToMap(selection.Nodes[0].Attr)
if govalidator.IsRequestURL(m["src"]) {
i.Description = "<img src =\""
i.Description += m["src"]
i.Description += "\" />"
} else {
fmt.Println("Amazon plugin invalid url. " + m["src"])
}
i.ImageURL = ""
}
}
开发者ID:koffeinsource,项目名称:notreddit,代码行数:29,代码来源:garfield.go
示例10: getItems
func getItems(doc *goquery.Document) (items []item, maxWidth int) {
doc.Find("td.title a").EachWithBreak(func(i int, s *goquery.Selection) bool {
if i == maxItems {
return false
}
if s.Text() == "More" {
return true
}
href, _ := s.Attr("href")
title := s.Text()
points := s.Parent().Parent().Next().Find("span").Text()
a, b := len(fmt.Sprintf("%s (%s)", title, points)), len(href)
maxWidth = max(a, b, maxWidth)
items = append(items, item{
title: title,
url: href,
points: points,
})
return true
})
return
}
开发者ID:coolhacks,项目名称:gohn,代码行数:26,代码来源:main.go
示例11: getTeamsId
/*
Get the two teams in a match
*/
func getTeamsId(d *goquery.Document) ([2]int, error) {
var ids [2]int
url1, ok := d.Find("div.container.left h3 a").Attr("href")
if !ok {
return ids, errors.New("could not find team a")
}
idA, err := parseTeam(BASE + url1)
if err != nil {
return ids, err
}
url2, ok := d.Find("div.container.right h3 a").Attr("href")
if !ok {
return ids, errors.New("could not find team b")
}
idB, err := parseTeam(BASE + url2)
if err != nil {
return ids, err
}
ids[0] = idA
ids[1] = idB
return ids, nil
}
开发者ID:trtstm,项目名称:zeejongparser,代码行数:30,代码来源:matchPage.go
示例12: GetFFInfo
/*
** get friends' friends info
*/
func (w *SocialWorker) GetFFInfo(query *goquery.Document) {
var user User
// var uid string
var usex string
// var usersId []string
// var usersName []string
// uidString, _ := query.Find("div.c").Eq(1).Find("a").Attr("href")
// var digitsRegexp = regexp.MustCompile(`(^|&|\?)uid=([^&]*)(&|$)`)
/*
** 获取粉丝的粉丝的uid(str)
*/
// str := digitsRegexp.FindStringSubmatch(uidString)
// uid = crawlUrl.Id
// usersId = append(usersId, uid)
uStr := query.Find("div.c").Eq(2).Text()
nameStr_1 := GetBetweenStr(uStr, ":", "性别")
nameStr_2 := GetBetweenStr(nameStr_1, ":", "认证")
nameStr_3 := strings.Split(nameStr_2, ":")
uname := nameStr_3[1]
sexStr_1 := GetBetweenStr(uStr, "性别", "地区")
sexStr_2 := strings.Split(sexStr_1, ":")
if sexStr_2[1] == "男" {
usex = "male"
} else {
usex = "famale"
}
user.uid = crawlUrl.FatherId
user.friendid = crawlUrl.Id
user.uname = uname
user.usex = usex
glog.Infoln(user)
w.putItems(user)
}
开发者ID:luzh0422,项目名称:spider-docker,代码行数:37,代码来源:spider.go
示例13: GetFriendsUrl
/*
**get friends url
*/
func (w *SocialWorker) GetFriendsUrl(query *goquery.Document, p *page.Page) {
var str_1 string
// newCrawlUrl := models.CrawlUrl{}
query.Find("div.c").Find("table").Find("tbody").Find("tr").Find("a:last-child").Each(func(j int, s *goquery.Selection) {
if j%2 != 0 {
friendsUrlString, _ := s.Attr("href")
var digitsRegexp = regexp.MustCompile(`(^|&|\?)uid=([^&]*)(&|$)`)
str := digitsRegexp.FindStringSubmatch(friendsUrlString)
if str == nil {
str_1 = "1"
} else {
str_1 = str[2]
}
friendsInfoUrl := "http://weibo.cn/" + str_1 + "/info"
// newCrawlUrl.Url = "http://weibo.cn/" + str_1 + "/fans"
// p.AddTargetRequestWithHeaderFile(friendsInfoUrl, "html", "./header.json")
// newCrawlUrl.Id = str_1
// newCrawlUrl.Layer = crawlUrl.Layer + 1
// newCrawlUrl.FatherId = crawlUrl.Id
// w.SendMessageToSQS(newCrawlUrl)
Urls = append(Urls, friendsInfoUrl)
UrlsLevel = append(UrlsLevel, UrlsLevel[i]+1)
}
})
}
开发者ID:luzh0422,项目名称:spider-docker,代码行数:29,代码来源:spider.go
示例14: Parse
// Parse 获取url对应的资源并根据规则进行解析
func (this *RedditLogic) Parse(redditUrl string) error {
redditUrl = strings.TrimSpace(redditUrl)
if redditUrl == "" {
redditUrl = this.domain + this.golang
} else if !strings.HasPrefix(redditUrl, "https") {
redditUrl = "https://" + redditUrl
}
var (
doc *goquery.Document
err error
)
// if doc, err = goquery.NewDocument(redditUrl); err != nil {
if doc, err = this.newDocumentFromResp(redditUrl); err != nil {
logger.Errorln("goquery reddit newdocument error:", err)
return err
}
// 最后面的先入库处理
resourcesSelection := doc.Find("#siteTable .link")
for i := resourcesSelection.Length() - 1; i >= 0; i-- {
err = this.dealRedditOneResource(goquery.NewDocumentFromNode(resourcesSelection.Get(i)).Selection)
if err != nil {
logger.Errorln(err)
}
}
return err
}
开发者ID:studygolang,项目名称:studygolang,代码行数:33,代码来源:reddit.go
示例15: ARSOPotresi
// ARSOPotresi returs slice of Potres struct
func ARSOPotresi() []Potres {
var potresi []Potres
var doc *goquery.Document
var e error
if res, found := cacheArso.Get("potresi"); found {
return res.([]Potres)
}
if doc, e = goquery.NewDocument("http://www.arso.gov.si/potresi/obvestila%20o%20potresih/aip/"); e != nil {
return potresi
}
doc.Find("#glavna td.vsebina table tr").Each(func(i int, s *goquery.Selection) {
magnituda, err := strconv.ParseFloat(s.Find("td:nth-child(4)").Text(), 2)
if magnituda > 0 && err == nil {
potres := Potres{}
potres.Magnituda = magnituda
potres.Lat, _ = strconv.ParseFloat(s.Find("td:nth-child(2)").Text(), 3)
potres.Lon, _ = strconv.ParseFloat(s.Find("td:nth-child(3)").Text(), 3)
potres.Lokacija = s.Find("td:nth-child(6)").Text()
potres.Datum = s.Find("td:nth-child(1)").Text()
potresi = append(potresi, potres)
}
})
cacheArso.Set("potresi", potresi, cache.DefaultExpiration)
return potresi
}
开发者ID:ubuntu-si,项目名称:arso-api,代码行数:29,代码来源:scrapers.go
示例16: scrapeSearch
func scrapeSearch(document *goquery.Document, url string) {
pagesStr := document.Find("a.next_page").Prev().Text()
pages, _ := strconv.Atoi(pagesStr)
page := 1
for page <= pages {
pageURL := url + "&p=" + strconv.Itoa(page)
fmt.Println("Analyzing page: " + pageURL)
doc := downloadURL(pageURL)
doc.Find(".user-list-item").Each(func(i int, s *goquery.Selection) {
email := s.Find("a.email").Text()
profileURL, _ := s.Find("a").Eq(1).Attr("href")
username := profileURL[1:len(profileURL)]
profileURL = "http://github.com" + profileURL
info := s.Find(".user-list-info")
_ = info.Find("ul.user-list-meta").Remove()
_ = info.Find("a").Remove()
name := strings.TrimSpace(info.Text())
fmt.Println("Parsed user: " + username)
user := user{name: name, email: email, url: profileURL, username: username}
dumpToCSV(user)
})
page = page + 1
}
}
开发者ID:timchunght,项目名称:gophers,代码行数:25,代码来源:scrape.go
示例17: GetAttrbuites
//获取淘宝属性信息
func GetAttrbuites(p *goquery.Document) string {
attribute := make([]string, 0, 20)
p.Find("#J_AttrUL li").Each(func(index int, element *goquery.Selection) {
as := strings.Split(element.Text(), ":")
if len(as) < 2 {
as = strings.Split(element.Text(), ":")
}
b := ""
if len(as) >= 2 && !utf8.ValidString(as[1]) {
as[1] = as[1]
b = as[1]
}
attribute = append(attribute, as[0]+":"+b)
})
if len(attribute) == 0 {
p.Find("#attributes .attributes-list li").Each(func(index int, element *goquery.Selection) {
attribute = append(attribute, element.Text())
})
}
return strings.Join(attribute, "##")
}
开发者ID:qgweb,项目名称:new,代码行数:27,代码来源:tao.go
示例18: getTerms
func getTerms(doc *goquery.Document) ([]string, error) {
terms := make([]string, 0)
doc.Find("p").Each(func(i int, s *goquery.Selection) {
// Decode any HTML-encoded characters so they can be parsed correctly.
bdy := html.UnescapeString(s.Text())
// TODO: condense into a regex?
bdy = strings.Replace(bdy, "-", " ", -1)
bdy = strings.Replace(bdy, ",", " ", -1)
bdy = strings.Replace(bdy, ".", " ", -1)
bdy = strings.Replace(bdy, ";", " ", -1)
bdy = strings.Replace(bdy, "\"", " ", -1)
terms = append(terms, strings.Fields(bdy)...)
})
re, err := regexp.Compile("[^A-Za-z0-9]+")
if err != nil {
log.Println("Unexpected regex compilation error: " + err.Error())
return []string{}, err
}
for i := 0; i < len(terms); i++ {
terms[i] = re.ReplaceAllString(terms[i], "")
}
return terms, nil
}
开发者ID:anyweez,项目名称:newsflash,代码行数:26,代码来源:extractor.go
示例19: GetShopUrl
//获取店铺地址
func GetShopUrl(p *goquery.Document) string {
href, _ := p.Find(".tb-seller-name").Attr("href")
if href == "" {
href, _ = p.Find(".slogo-shopname").Attr("href")
}
return strings.TrimSpace("https:" + href)
}
开发者ID:qgweb,项目名称:new,代码行数:8,代码来源:tao.go
示例20: getSValue
func (t *TownClient) getSValue() (sValue string) {
log.WithField("tag", TAG).Info("getting sValue for town login")
sValue = ""
var doc *goquery.Document
var e error
log.WithField("tag", TAG).Infof("GET %v", ROOT)
if doc, e = goquery.NewDocument(ROOT); e != nil {
log.WithField("tag", TAG).Errorf("%s", e.Error())
return
}
doc.Find("input").Each(func(i int, s *goquery.Selection) {
attr, exists := s.Attr("name")
if exists == true {
if attr == "s" {
bla, exists := s.Attr("value")
if exists == true {
sValue = bla
}
}
}
})
log.WithField("tag", TAG).Infof("sValue: %v", sValue)
return sValue
}
开发者ID:Kemonozume,项目名称:nzbcrawler,代码行数:26,代码来源:client.go
注:本文中的github.com/PuerkitoBio/goquery.Document类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论