|
|
|
import pandas as pd
|
|
|
|
import re
|
|
|
|
import os
|
|
|
|
|
|
|
|
# 数据预处理
|
|
|
|
import util
|
|
|
|
|
|
|
|
with open('data/岗位信息.txt', 'rb') as file:
|
|
|
|
job_list = []
|
|
|
|
while True:
|
|
|
|
line = file.readline()
|
|
|
|
if not line:
|
|
|
|
break
|
|
|
|
line = eval(line.decode('utf-8'))
|
|
|
|
try:
|
|
|
|
line['位置'] = re.split('-', line['位置'])[1]
|
|
|
|
danwei = re.findall('[\u4e00-\u9fa5]+', line['薪资'])
|
|
|
|
xinzi = re.findall('\d+.*\d', line['薪资'])[0].split('-')
|
|
|
|
if not xinzi[1]:
|
|
|
|
xinzi[1] = xinzi[0]
|
|
|
|
if danwei[0][0] == '万' and danwei[1] == '月':
|
|
|
|
line['薪资'] = round((float(xinzi[0]) + float(xinzi[1])) / 2, 2)
|
|
|
|
elif danwei[0][0] == '万' and danwei[1] == '年':
|
|
|
|
line['薪资'] = round((float(xinzi[0]) + float(xinzi[1])) / 2 / 12, 2)
|
|
|
|
elif danwei[0] == '千' and danwei[1] == '月':
|
|
|
|
line['薪资'] = round((float(xinzi[0]) + float(xinzi[1])) / 2 / 10, 2)
|
|
|
|
elif danwei[0] == '元' and danwei[1:] == '小时':
|
|
|
|
line['薪资'] = round((float(xinzi[0]) + float(xinzi[1])) * 8 * 22 / 2 / 100, 2)
|
|
|
|
except:
|
|
|
|
continue
|
|
|
|
job_list.append(line)
|
|
|
|
job_list_DF = pd.DataFrame(job_list)
|
|
|
|
xingzhengqu = [item for item in set(job_list_DF.get(key='位置')) if item]
|
|
|
|
|
|
|
|
# 广州各区岗位分布
|
|
|
|
from pyecharts import options as opts
|
|
|
|
from pyecharts.charts import Pie
|
|
|
|
|
|
|
|
|
|
|
|
def getAreaWeight():
|
|
|
|
result = job_list_DF.groupby('位置').size().reset_index(name='count')
|
|
|
|
areaName = list(result.位置.values)
|
|
|
|
areaWeight = list(map(int, result['count'].values))
|
|
|
|
areaName_tmp = []
|
|
|
|
for index, item in enumerate(areaName):
|
|
|
|
areaName_tmp.append(item + str(round(areaWeight[index] / sum(areaWeight) * 100, 2)) + '%')
|
|
|
|
return (areaName_tmp, areaWeight)
|
|
|
|
|
|
|
|
|
|
|
|
pie = Pie(init_opts=opts.InitOpts(width='800px', height='800px'))
|
|
|
|
data = getAreaWeight()
|
|
|
|
pie.add("", [list(z) for z in zip(data[0], data[1])])
|
|
|
|
pie.set_global_opts(title_opts=opts.TitleOpts(title='广州各区岗位分布'))
|
|
|
|
image_dir="images/job"
|
|
|
|
if os.path.exists(image_dir):
|
|
|
|
util.clearDir(image_dir)
|
|
|
|
else:
|
|
|
|
os.makedirs(image_dir)
|
|
|
|
pie.render('images/job/广州各区岗位分布.html')
|
|
|
|
|
|
|
|
# 求广州单月薪资
|
|
|
|
from pyecharts.charts import TreeMap
|
|
|
|
|
|
|
|
|
|
|
|
def getAvgPrice(xingzhengqu):
|
|
|
|
totalPrice = 0
|
|
|
|
total = 0
|
|
|
|
for item in job_list:
|
|
|
|
if item['位置'] == xingzhengqu:
|
|
|
|
total = total + 1
|
|
|
|
totalPrice = totalPrice + item['薪资']
|
|
|
|
return totalPrice / total if total > 0 else 0
|
|
|
|
|
|
|
|
|
|
|
|
# 获取每个区 单月薪资
|
|
|
|
def getTotalAvgPrice():
|
|
|
|
totalAvgPriceList = []
|
|
|
|
totalAvgPriceDirList = []
|
|
|
|
for index, item in enumerate(xingzhengqu):
|
|
|
|
avg_price = getAvgPrice(item)
|
|
|
|
totalAvgPriceList.append(round(avg_price, 2))
|
|
|
|
totalAvgPriceDirList.append(
|
|
|
|
{'value': round(avg_price, 2), 'name': item + " ¥" + str(round(avg_price, 2)) + ' 万'})
|
|
|
|
return totalAvgPriceDirList
|
|
|
|
|
|
|
|
|
|
|
|
data = getTotalAvgPrice()
|
|
|
|
treemap = TreeMap(init_opts=opts.InitOpts(width='1200px', height='1400px'))
|
|
|
|
treemap.add('广州各区每月薪资:万/月', data, label_opts=opts.LabelOpts(is_show=True, position='inside', font_size=13))
|
|
|
|
treemap.render('images/job/广州各区每月薪资.html')
|
|
|
|
|
|
|
|
# 获取每个区 单日薪资
|
|
|
|
from pyecharts.charts import Bar
|
|
|
|
|
|
|
|
totalAvgPriceList = []
|
|
|
|
for index, item in enumerate(xingzhengqu):
|
|
|
|
avg_price = getAvgPrice(item)
|
|
|
|
totalAvgPriceList.append(round(avg_price * 10000 / 30, 2))
|
|
|
|
attr, value = (xingzhengqu, totalAvgPriceList)
|
|
|
|
bar = Bar(init_opts=opts.InitOpts(width='1200px', height='1400px'))
|
|
|
|
bar.add_xaxis(attr)
|
|
|
|
bar.add_yaxis("广州", value)
|
|
|
|
bar.set_global_opts(title_opts=opts.TitleOpts(title='广州各区单日薪资:元/日'),
|
|
|
|
xaxis_opts=opts.AxisOpts(axislabel_opts={"rotate": "270"}))
|
|
|
|
bar.render('images/job/广州各区单日薪资.html')
|
|
|
|
|
|
|
|
# 获取岗位数据
|
|
|
|
from pyecharts.charts import WordCloud
|
|
|
|
|
|
|
|
|
|
|
|
def getRooms():
|
|
|
|
results = job_list_DF.groupby('岗位').size().reset_index(name='count')
|
|
|
|
room_list = list(results.岗位.values)
|
|
|
|
weight_list = list(map(int, results['count'].values))
|
|
|
|
return (room_list, weight_list)
|
|
|
|
|
|
|
|
|
|
|
|
attr, value = getRooms()
|
|
|
|
wordcloud = WordCloud(init_opts=opts.InitOpts(width='900px', height='400px'))
|
|
|
|
wordcloud.add('', zip(attr, value), word_size_range=[2, 100])
|
|
|
|
wordcloud.render('images/job/广州岗位数据.html')
|
|
|
|
|
|
|
|
# 获取各个区的岗位数量比重
|
|
|
|
from pyecharts.charts import Pie
|
|
|
|
|
|
|
|
|
|
|
|
def getAreaWeight():
|
|
|
|
result = job_list_DF.groupby('位置').size().reset_index(name='count')
|
|
|
|
areaName = list(result.位置.values)
|
|
|
|
areaWeight = list(map(int, result['count'].values))
|
|
|
|
areaName_tmp = []
|
|
|
|
for index, item in enumerate(areaName):
|
|
|
|
areaName_tmp.append(item + str(round(areaWeight[index] / sum(areaWeight) * 100, 2)) + '%')
|
|
|
|
return (areaName_tmp, areaWeight)
|
|
|
|
|
|
|
|
|
|
|
|
pie = Pie(init_opts=opts.InitOpts(width='1200px', height='1200px'))
|
|
|
|
data = getAreaWeight()
|
|
|
|
pie.add("", [list(z) for z in zip(data[0], data[1])])
|
|
|
|
pie.set_global_opts(title_opts=opts.TitleOpts(title='广州各区岗位数量分布'))
|
|
|
|
pie.render('images/job/广州各区岗位数量分布.html')
|