You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
99 lines
3.7 KiB
99 lines
3.7 KiB
5 years ago
|
import pandas as pd
|
||
|
import re
|
||
|
import pyecharts.options as opts
|
||
|
from pyecharts.charts import Line, Bar
|
||
|
|
||
|
# 岗位信息数据预处理
|
||
|
with open('data/岗位信息.txt','rb') as file:
|
||
|
job_list = []
|
||
|
while True:
|
||
|
line = file.readline()
|
||
|
if not line:
|
||
|
break
|
||
|
line = eval(line.decode('utf-8'))
|
||
|
try:
|
||
|
line['位置'] = re.split('-',line['位置'])[1]
|
||
|
danwei = re.findall('[\u4e00-\u9fa5]+',line['薪资'])
|
||
|
xinzi = re.findall('\d+.*\d',line['薪资'])[0].split('-')
|
||
|
if not xinzi[1]:
|
||
|
xinzi[1] = xinzi[0]
|
||
|
if danwei[0][0] == '万' and danwei[1] == '月':
|
||
|
line['薪资'] = round((float(xinzi[0])+float(xinzi[1]))/2,2)
|
||
|
elif danwei[0][0] == '万' and danwei[1] == '年':
|
||
|
line['薪资'] = round((float(xinzi[0]) + float(xinzi[1])) / 2 /12, 2)
|
||
|
elif danwei[0] == '千' and danwei[1] == '月':
|
||
|
line['薪资'] = round((float(xinzi[0]) + float(xinzi[1])) / 2 / 10, 2)
|
||
|
elif danwei[0] == '元' and danwei[1:] == '小时':
|
||
|
line['薪资'] = round((float(xinzi[0]) + float(xinzi[1]))*8*22 / 2 / 100, 2)
|
||
|
except:
|
||
|
continue
|
||
|
job_list.append(line)
|
||
|
job_list_DF = pd.DataFrame(job_list)
|
||
|
|
||
|
# 房源信息预处理
|
||
|
with open('data/房源信息.txt','rb') as file:
|
||
|
house_list = []
|
||
|
while True:
|
||
|
line = file.readline()
|
||
|
if not line:
|
||
|
break
|
||
|
line = eval(line.decode('utf-8'))
|
||
|
line['面积'] = int(re.findall('\d+',line['面积'])[0])
|
||
|
line['价格'] = int(re.findall('\d+',line['价格'])[0])
|
||
|
house_list.append(line)
|
||
|
house_list_DF = pd.DataFrame(house_list)
|
||
|
|
||
|
xingzhengqu = [item for item in set(house_list_DF.get(key='行政区')) if item]
|
||
|
|
||
|
# 获取每个区 单日每平方米的价格
|
||
|
def houserGetAvgPrice(xingzhengqu):
|
||
|
totalPrice = 0
|
||
|
totalArea = 0
|
||
|
for item in house_list:
|
||
|
if item['行政区'] == xingzhengqu:
|
||
|
totalArea = totalArea + item['面积']
|
||
|
totalPrice = totalPrice + item['价格']
|
||
|
return totalPrice / totalArea if totalArea >0 else 1
|
||
|
|
||
|
# 房租每日单价
|
||
|
house_totalAvgPriceList = []
|
||
|
for index,item in enumerate(xingzhengqu):
|
||
|
avg_price = houserGetAvgPrice(item)
|
||
|
house_totalAvgPriceList.append(round(avg_price/30,2))
|
||
|
attr, house_value = (xingzhengqu,house_totalAvgPriceList)
|
||
|
|
||
|
# 获取每个区 单日薪资
|
||
|
def jobGetAvgPrice(xingzhengqu):
|
||
|
totalPrice = 0
|
||
|
total = 0
|
||
|
for item in job_list:
|
||
|
if item['位置'] == xingzhengqu:
|
||
|
total = total + 1
|
||
|
totalPrice = totalPrice + item['薪资']
|
||
|
return totalPrice / total if total >0 else 0
|
||
|
|
||
|
# 获取每个区 单时薪资
|
||
|
job_totalAvgPriceList = []
|
||
|
for index,item in enumerate(xingzhengqu):
|
||
|
avg_price = jobGetAvgPrice(item+'区')
|
||
|
job_totalAvgPriceList.append(round(avg_price*10000/30/24,2))
|
||
|
attr, job_value = (xingzhengqu,job_totalAvgPriceList)
|
||
|
|
||
|
# 广州房租-薪资图
|
||
|
line =Line(init_opts=opts.InitOpts(width='800px',height='800px'))
|
||
|
line.add_xaxis(xaxis_data=attr)
|
||
|
line.add_yaxis('房租:元/日(1平方米)', house_value)
|
||
|
line.add_yaxis("薪资:元/日", job_value)
|
||
|
line.render('images/both/广州房租-薪资.html')
|
||
|
|
||
|
difference = []
|
||
|
for i in range(len(job_value)):
|
||
|
difference.append(round(job_value[i]-house_value[i],2))
|
||
|
# 广州房租-薪资差距图
|
||
|
bar = Bar(init_opts=opts.InitOpts(width='1200px',height='1400px'))
|
||
|
bar.add_xaxis(attr)
|
||
|
bar.add_yaxis("广州房租-薪资差距图:元",difference)
|
||
|
bar.set_global_opts(title_opts=opts.TitleOpts(title='广州房租-薪资差距图:元'),xaxis_opts=opts.AxisOpts(axislabel_opts={"rotate":"270"}))
|
||
|
bar.render('images/both/广州房租-薪资差距.html')
|
||
|
|