import pandas as pd import re import pyecharts.options as opts from pyecharts.charts import Line, Bar # 岗位信息数据预处理 with open('data/岗位信息.txt','rb') as file: job_list = [] while True: line = file.readline() if not line: break line = eval(line.decode('utf-8')) try: line['位置'] = re.split('-',line['位置'])[1] danwei = re.findall('[\u4e00-\u9fa5]+',line['薪资']) xinzi = re.findall('\d+.*\d',line['薪资'])[0].split('-') if not xinzi[1]: xinzi[1] = xinzi[0] if danwei[0][0] == '万' and danwei[1] == '月': line['薪资'] = round((float(xinzi[0])+float(xinzi[1]))/2,2) elif danwei[0][0] == '万' and danwei[1] == '年': line['薪资'] = round((float(xinzi[0]) + float(xinzi[1])) / 2 /12, 2) elif danwei[0] == '千' and danwei[1] == '月': line['薪资'] = round((float(xinzi[0]) + float(xinzi[1])) / 2 / 10, 2) elif danwei[0] == '元' and danwei[1:] == '小时': line['薪资'] = round((float(xinzi[0]) + float(xinzi[1]))*8*22 / 2 / 100, 2) except: continue job_list.append(line) job_list_DF = pd.DataFrame(job_list) # 房源信息预处理 with open('data/房源信息.txt','rb') as file: house_list = [] while True: line = file.readline() if not line: break line = eval(line.decode('utf-8')) line['面积'] = int(re.findall('\d+',line['面积'])[0]) line['价格'] = int(re.findall('\d+',line['价格'])[0]) house_list.append(line) house_list_DF = pd.DataFrame(house_list) xingzhengqu = [item for item in set(house_list_DF.get(key='行政区')) if item] # 获取每个区 单日每平方米的价格 def houserGetAvgPrice(xingzhengqu): totalPrice = 0 totalArea = 0 for item in house_list: if item['行政区'] == xingzhengqu: totalArea = totalArea + item['面积'] totalPrice = totalPrice + item['价格'] return totalPrice / totalArea if totalArea >0 else 1 # 房租每日单价 house_totalAvgPriceList = [] for index,item in enumerate(xingzhengqu): avg_price = houserGetAvgPrice(item) house_totalAvgPriceList.append(round(avg_price/30,2)) attr, house_value = (xingzhengqu,house_totalAvgPriceList) # 获取每个区 单日薪资 def jobGetAvgPrice(xingzhengqu): totalPrice = 0 total = 0 for item in job_list: if item['位置'] == xingzhengqu: total = total + 1 totalPrice = totalPrice + item['薪资'] return totalPrice / total if total >0 else 0 # 获取每个区 单时薪资 job_totalAvgPriceList = [] for index,item in enumerate(xingzhengqu): avg_price = jobGetAvgPrice(item+'区') job_totalAvgPriceList.append(round(avg_price*10000/30/24,2)) attr, job_value = (xingzhengqu,job_totalAvgPriceList) # 广州房租-薪资图 line =Line(init_opts=opts.InitOpts(width='800px',height='800px')) line.add_xaxis(xaxis_data=attr) line.add_yaxis('房租:元/日(1平方米)', house_value) line.add_yaxis("薪资:元/日", job_value) line.render('images/both/广州房租-薪资.html') difference = [] for i in range(len(job_value)): difference.append(round(job_value[i]-house_value[i],2)) # 广州房租-薪资差距图 bar = Bar(init_opts=opts.InitOpts(width='1200px',height='1400px')) bar.add_xaxis(attr) bar.add_yaxis("广州房租-薪资差距图:元",difference) bar.set_global_opts(title_opts=opts.TitleOpts(title='广州房租-薪资差距图:元'),xaxis_opts=opts.AxisOpts(axislabel_opts={"rotate":"270"})) bar.render('images/both/广州房租-薪资差距.html')