import requests from bs4 import BeautifulSoup HEADERS = { "X-Requested-With": "XMLHttpRequest", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36" "(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36", } html=requests.get('https://jobs.51job.com/guangzhou/119982727.html?s=01&t=0',headers=HEADERS).content bs_tmp = BeautifulSoup(html, 'lxml').select( 'body > div.tCompanyPage > div.tCompany_center.clearfix > div.tHeader.tHjob > div > div.cn')[0] bs_tmp1 = bs_tmp.select('h1')[0] bs_tmp2 = bs_tmp.select('strong')[0] bs_tmp3 = bs_tmp.select('p.cname > a.catn')[0] bs_tmp4 = bs_tmp.select(' p.msg.ltype')[0].text.replace(u'\xa0', '').split('|') tmp = {"岗位": bs_tmp1.text, "公司": bs_tmp3['title'], "薪资": bs_tmp2.text, '位置': bs_tmp4[0], '工作经验': bs_tmp4[1], '学历': bs_tmp4[2], '招聘人数': bs_tmp4[3], '发布时间': bs_tmp4[4]}