import pandas as pd import re import os # 数据预处理 import util with open('data/岗位信息.txt', 'rb') as file: job_list = [] while True: line = file.readline() if not line: break line = eval(line.decode('utf-8')) try: line['位置'] = re.split('-', line['位置'])[1] danwei = re.findall('[\u4e00-\u9fa5]+', line['薪资']) xinzi = re.findall('\d+.*\d', line['薪资'])[0].split('-') if not xinzi[1]: xinzi[1] = xinzi[0] if danwei[0][0] == '万' and danwei[1] == '月': line['薪资'] = round((float(xinzi[0]) + float(xinzi[1])) / 2, 2) elif danwei[0][0] == '万' and danwei[1] == '年': line['薪资'] = round((float(xinzi[0]) + float(xinzi[1])) / 2 / 12, 2) elif danwei[0] == '千' and danwei[1] == '月': line['薪资'] = round((float(xinzi[0]) + float(xinzi[1])) / 2 / 10, 2) elif danwei[0] == '元' and danwei[1:] == '小时': line['薪资'] = round((float(xinzi[0]) + float(xinzi[1])) * 8 * 22 / 2 / 100, 2) except: continue job_list.append(line) job_list_DF = pd.DataFrame(job_list) xingzhengqu = [item for item in set(job_list_DF.get(key='位置')) if item] # 广州各区岗位分布 from pyecharts import options as opts from pyecharts.charts import Pie def getAreaWeight(): result = job_list_DF.groupby('位置').size().reset_index(name='count') areaName = list(result.位置.values) areaWeight = list(map(int, result['count'].values)) areaName_tmp = [] for index, item in enumerate(areaName): areaName_tmp.append(item + str(round(areaWeight[index] / sum(areaWeight) * 100, 2)) + '%') return (areaName_tmp, areaWeight) pie = Pie(init_opts=opts.InitOpts(width='800px', height='800px')) data = getAreaWeight() pie.add("", [list(z) for z in zip(data[0], data[1])]) pie.set_global_opts(title_opts=opts.TitleOpts(title='广州各区岗位分布')) image_dir="images/job" if os.path.exists(image_dir): util.clearDir(image_dir) else: os.makedirs(image_dir) pie.render('images/job/广州各区岗位分布.html') # 求广州单月薪资 from pyecharts.charts import TreeMap def getAvgPrice(xingzhengqu): totalPrice = 0 total = 0 for item in job_list: if item['位置'] == xingzhengqu: total = total + 1 totalPrice = totalPrice + item['薪资'] return totalPrice / total if total > 0 else 0 # 获取每个区 单月薪资 def getTotalAvgPrice(): totalAvgPriceList = [] totalAvgPriceDirList = [] for index, item in enumerate(xingzhengqu): avg_price = getAvgPrice(item) totalAvgPriceList.append(round(avg_price, 2)) totalAvgPriceDirList.append( {'value': round(avg_price, 2), 'name': item + " ¥" + str(round(avg_price, 2)) + ' 万'}) return totalAvgPriceDirList data = getTotalAvgPrice() treemap = TreeMap(init_opts=opts.InitOpts(width='1200px', height='1400px')) treemap.add('广州各区每月薪资:万/月', data, label_opts=opts.LabelOpts(is_show=True, position='inside', font_size=13)) treemap.render('images/job/广州各区每月薪资.html') # 获取每个区 单日薪资 from pyecharts.charts import Bar totalAvgPriceList = [] for index, item in enumerate(xingzhengqu): avg_price = getAvgPrice(item) totalAvgPriceList.append(round(avg_price * 10000 / 30, 2)) attr, value = (xingzhengqu, totalAvgPriceList) bar = Bar(init_opts=opts.InitOpts(width='1200px', height='1400px')) bar.add_xaxis(attr) bar.add_yaxis("广州", value) bar.set_global_opts(title_opts=opts.TitleOpts(title='广州各区单日薪资:元/日'), xaxis_opts=opts.AxisOpts(axislabel_opts={"rotate": "270"})) bar.render('images/job/广州各区单日薪资.html') # 获取岗位数据 from pyecharts.charts import WordCloud def getRooms(): results = job_list_DF.groupby('岗位').size().reset_index(name='count') room_list = list(results.岗位.values) weight_list = list(map(int, results['count'].values)) return (room_list, weight_list) attr, value = getRooms() wordcloud = WordCloud(init_opts=opts.InitOpts(width='900px', height='400px')) wordcloud.add('', zip(attr, value), word_size_range=[2, 100]) wordcloud.render('images/job/广州岗位数据.html') # 获取各个区的岗位数量比重 from pyecharts.charts import Pie def getAreaWeight(): result = job_list_DF.groupby('位置').size().reset_index(name='count') areaName = list(result.位置.values) areaWeight = list(map(int, result['count'].values)) areaName_tmp = [] for index, item in enumerate(areaName): areaName_tmp.append(item + str(round(areaWeight[index] / sum(areaWeight) * 100, 2)) + '%') return (areaName_tmp, areaWeight) pie = Pie(init_opts=opts.InitOpts(width='1200px', height='1200px')) data = getAreaWeight() pie.add("", [list(z) for z in zip(data[0], data[1])]) pie.set_global_opts(title_opts=opts.TitleOpts(title='广州各区岗位数量分布')) pie.render('images/job/广州各区岗位数量分布.html')