parent
5dbd2c792b
commit
e05b64c18d
@ -0,0 +1,16 @@ |
||||
import requests |
||||
from bs4 import BeautifulSoup |
||||
HEADERS = { |
||||
"X-Requested-With": "XMLHttpRequest", |
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36" |
||||
"(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36", |
||||
} |
||||
html=requests.get('https://jobs.51job.com/guangzhou/119982727.html?s=01&t=0',headers=HEADERS).content |
||||
bs_tmp = BeautifulSoup(html, 'lxml').select( |
||||
'body > div.tCompanyPage > div.tCompany_center.clearfix > div.tHeader.tHjob > div > div.cn')[0] |
||||
bs_tmp1 = bs_tmp.select('h1')[0] |
||||
bs_tmp2 = bs_tmp.select('strong')[0] |
||||
bs_tmp3 = bs_tmp.select('p.cname > a.catn')[0] |
||||
bs_tmp4 = bs_tmp.select(' p.msg.ltype')[0].text.replace(u'\xa0', '').split('|') |
||||
tmp = {"岗位": bs_tmp1.text, "公司": bs_tmp3['title'], "薪资": bs_tmp2.text, '位置': bs_tmp4[0], |
||||
'工作经验': bs_tmp4[1], '学历': bs_tmp4[2], '招聘人数': bs_tmp4[3], '发布时间': bs_tmp4[4]} |
Loading…
Reference in new issue