• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

比较爬虫用的语言Python与Go

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

Python是我比较喜欢的语言,莫名的喜欢,对Python的学习可能起初是敲错了网址开始的,哈哈哈~

 

工作的任务从一个网站后台做登录、爬取数据,写入服务器Redis中,同事认为我会用PHP来写,哼!让你猜到那该多没意思,于是乎有了如下Python的代码,你看50多行搞定了。

 1 #!/usr/bin/python3
 2 import requests
 3 import re
 4 import redis
 5 from pyquery import PyQuery as pq
 6 
 7 loginUrl = \'https://manage.xxx.com.cn/home/login\'
 8 userName = \'xxx\'
 9 passWord = \'xxx\'
10 
11 redisServer = \'192.168.0.2\'
12 redisPort = 6379
13 redisPass = \'\'
14 
15 productList = {\'椰油\':\'CL_Spot\',\'咖啡\':\'COFFEE\',\'工业铜\':\'COPPER\'}
16 volumeList = {\'CL_Spot\':[0, 0], \'COFFEE\':[0, 0], \'COPPER\':[0, 0]}
17 
18 def main():
19     jsessionid = getCookie()
20     doLogin(jsessionid)
21     dataUrl = \'https://manage.xxx.cn/?pageNo=1&pageSize=100\'
22     cookies = {\'JSESSIONID\': jsessionid}
23     r = requests.get(dataUrl, cookies = cookies)
24     dom = pq(r.text)
25     lines = dom(\'table\').eq(1).find(\'tr\').items()
26     for line in lines:
27         line = re.sub(r\'<!--.*-->\', \'\', str(line))
28         pattern = re.compile(r\'<td>(.*?)</td>\')
29         group = pattern.findall(line)
30         if not group:
31             continue
32         productCode = productList[group[3]]
33         if group[6] == \'\':
34             volumeList[productCode][0]+= int(group[7]) * int(group[8])
35         if group[6] == \'\':
36             volumeList[productCode][1]+= int(group[7]) * int(group[8])
37 
38     redisClient = redis.Redis(host=redisServer, port=redisPort, password=redisPass)
39     for x in volumeList:
40         keyUp = \'redis_order_count_u_%s\' % x
41         keyDown = \'redis_order_count_d_%s\' % x
42         redisClient.set(keyUp, int(volumeList[x][0]))
43         redisClient.set(keyDown, int(volumeList[x][1]))
44 
45 def getCookie():
46     ua = {\'user-agent\': \'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36\'}
47     r = requests.get(loginUrl, headers = ua)
48     return r.cookies[\'JSESSIONID\']
49 
50 def doLogin(jsessionid):
51     param = {\'userName\': userName, \'password\': passWord}
52     cookies = {\'JSESSIONID\': jsessionid}
53     requests.post(loginUrl, data = param, cookies = cookies)
54     
55 
56 if __name__ == \'__main__\':
57     main()

另一个服务也需要这个需求,用了最近看的Golang来实现一次,瞧写了100多行

  1 package main
  2 
  3 import (
  4     "fmt"
  5     "net/http"
  6     "net/url"
  7     "os"
  8     "strings"
  9     "strconv"
 10     "gopkg.in/redis.v4"
 11     "github.com/PuerkitoBio/goquery"
 12 )
 13 
 14 var loginUrl string = "https://manage.xxx.com.cn/home/login"
 15 var dataUrl string = "https://manage.xxx.com.cn/?pageNo=1&pageSize=100"
 16 var userName string = "xxx"
 17 var passWord string = "xxx"
 18 var redisServer string = "192.168.1.2"
 19 var redisPort string = "6379"
 20 var redisPass string = ""
 21 var redisDB   int = 0
 22 
 23 func main() {
 24     productList := make(map[string] string)
 25     productList["椰油"] = "CL_Spot"
 26     productList["咖啡"] = "COFFEE"
 27     productList["工业铜"] = "COPPER"
 28     volumeList := make(map[string] int)
 29     volumeList["u_CL_Spot"] = 0
 30     volumeList["d_CL_Spot"] = 0
 31     volumeList["u_COFFEE"] = 0
 32     volumeList["d_COFFEE"] = 0
 33     volumeList["u_COPPER"] = 0
 34     volumeList["d_COPPER"] = 0
 35     jsessionid := getCookie()
 36     doLogin(jsessionid)
 37 
 38     request, err := http.NewRequest("GET", dataUrl, nil)
 39     request.AddCookie(&http.Cookie{Name: "JSESSIONID", Value: jsessionid})
 40     client := &http.Client{}
 41     response, err := client.Do(request)
 42     if err != nil {
 43         fmt.Println(err.Error())
 44         os.Exit(0)
 45     }
 46     defer response.Body.Close()
 47     doc, err := goquery.NewDocumentFromReader(response.Body)
 48     doc.Find("table").Eq(1).Find("tr").Each(func(i int, tr *goquery.Selection) {
 49         td := tr.Find("td")
 50         name := td.Eq(3).Text()
 51         dir := td.Eq(6).Text()
 52         if val, ok := productList[name]; ok {
 53             buyNum, _ := strconv.Atoi(td.Eq(7).Text())
 54             buyUnit, _ := strconv.Atoi(td.Eq(8).Text())
 55             num :=  buyNum * buyUnit
 56             cacheKey := ""
 57             if dir == "" {
 58                 cacheKey = fmt.Sprintf("u_%s", val)
 59             } else if dir == "" {
 60                 cacheKey = fmt.Sprintf("d_%s", val)
 61             }
 62             volumeList[cacheKey] += num
 63         }
 64     })
 65     redisClient := redis.NewClient(&redis.Options{
 66         Addr:     fmt.Sprintf("%s:%s", redisServer, redisPort),
 67         Password: redisPass,
 68         DB:       redisDB,
 69     })
 70     for k, v := range volumeList {
 71         strKey := fmt.Sprintf("redis_order_count_%s", k)
 72         redisClient.Set(strKey, int(v), 0)
 73     }
 74     fmt.Println("puti volume get success")
 75 }
 76 
 77 func getCookie() string {
 78     jsessionid := ""
 79     response, err := http.Get(loginUrl)
 80     if err != nil {
 81         fmt.Println(err.Error())
 82         os.Exit(0)
 83     }
 84     defer response.Body.Close()
 85     for _, val := range response.Cookies() {
 86         if val.Name == "JSESSIONID" {
 87             jsessionid = val.Value
 88         }
 89     }
 90     return jsessionid
 91 }
 92 
 93 func doLogin(jsessionid string) bool {
 94     data := url.Values{}
 95     data.Set("userName", userName)
 96     data.Add("password", passWord)
 97     request, _ := http.NewRequest("POST", loginUrl, strings.NewReader(data.Encode()))
 98     request.Header.Add("Content-Type", "application/x-www-form-urlencoded")
 99     request.Header.Add("Content-Length", strconv.Itoa(len(data.Encode())))
100     request.AddCookie(&http.Cookie{Name: "JSESSIONID", Value: jsessionid})
101     client := &http.Client{}
102     response, err := client.Do(request)
103     if err != nil {
104         fmt.Println(err.Error())
105         os.Exit(0)
106     }
107     defer response.Body.Close()
108     return true
109 }

Python的实现到上线半天的功夫搞定了,Go足足搞了1整天,蹩脚的语法与不熟悉的语法让我学习了很多知识点,最后Mac编译到Linux上执行也给我上了一课。

觉得入门学习这两门语言挺好,一个是脚本语言另一个是编译语言,用处都很广泛。轩轩你准备好了吗?


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
python 协程与go协程的区别发布时间:2022-07-10
下一篇:
《深度剖析CPython解释器》32. Python 和 Go 联合编程发布时间:2022-07-10
热门推荐
热门话题
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap