import pandas as pd import pyecharts.options as opts from pyecharts.charts import Pie import re with open('data/房源信息.txt','rb') as file: house_list = [] while True: line = file.readline() if not line: break line = eval(line.decode('utf-8')) line['面积'] = int(re.findall('\d+',line['面积'])[0]) line['价格'] = int(re.findall('\d+',line['价格'])[0]) house_list.append(line) house_list_DF = pd.DataFrame(house_list) xingzhengqu = [item for item in set(house_list_DF.get(key='行政区')) if item] # 租房面积统计 bins = [-1,30,60,90,120,200,300,400,10000] attr = ['0-30平方米','30-60平方米','60-90平方米','90-120平方米','120-200平方米','200-300平方米','300-400平方米','400+平方米'] tmpDF = house_list_DF.groupby(pd.cut(house_list_DF['面积'],bins = bins,labels=attr)).size().reset_index(name = 'count') value = list(map(int,tmpDF['count'].values)) pie = Pie(init_opts=opts.InitOpts(width='800px',height='800px')) pie.add('',zip(attr,value)).set_global_opts(title_opts=opts.TitleOpts(title='租房面积统计')) pie.render('images/house/广州租房面积统计.html') # 求每个区的每平方米的租房单价 from pyecharts.charts import TreeMap def getAvgPrice(xingzhengqu): totalPrice = 0 totalArea = 0 for item in house_list: if item['行政区'] == xingzhengqu: totalArea = totalArea + item['面积'] totalPrice = totalPrice + item['价格'] return totalPrice / totalArea if totalArea >0 else 1 # 获取每个区 单月每平方米的价格 def getTotalAvgPrice(): totalAvgPriceList = [] totalAvgPriceDirList = [] for index, item in enumerate(xingzhengqu): avg_price = getAvgPrice(item) totalAvgPriceList.append(round(avg_price,3)) totalAvgPriceDirList.append({'value':round(avg_price,3),'name':item + " ¥" + str(round(avg_price,3))}) return totalAvgPriceDirList # 获取每月每平方米的价格 data = getTotalAvgPrice() treemap = TreeMap(init_opts=opts.InitOpts(width='900px',height='800px')) treemap.add('广州各区房租单价:平方米/月',data,label_opts=opts.LabelOpts(is_show=True, position='inside',font_size=13)) treemap.render('images/house/广州各区房租单价.html') # 获取每个区 单日每平方米的价格 from pyecharts.charts import Bar totalAvgPriceList = [] for index,item in enumerate(xingzhengqu): avg_price = getAvgPrice(item) totalAvgPriceList.append(round(avg_price/30,3)) attr, value = (xingzhengqu,totalAvgPriceList) bar = Bar(init_opts=opts.InitOpts(width='900px',height='800px')) bar.add_xaxis(attr) bar.add_yaxis("广州",value) bar.set_global_opts(title_opts=opts.TitleOpts(title='广州各区房租单价:平方米/日')) bar.render('images/house/广州每日每平方米的价格.html') # 获取户型数据 from pyecharts.charts import WordCloud def getRooms(): results = house_list_DF.groupby('房间').size().reset_index(name='count') room_list = list(results.房间.values) weight_list = list(map(int,results['count'].values)) return (room_list, weight_list) attr, value = getRooms() wordcloud = WordCloud(init_opts=opts.InitOpts(width='900px',height='400px')) wordcloud.add('',zip(attr,value),word_size_range=[2,100]) wordcloud.render('images/house/广州户型数据.html') # 获取各个区的房源比重 from pyecharts.charts import Pie def getAreaWeight(): result = house_list_DF.groupby('行政区').size().reset_index(name='count') areaName = list(result.行政区.values) areaWeight = list(map(int,result['count'].values)) areaName_tmp = [] for index,item in enumerate(areaName): areaName_tmp.append(item + str(round(areaWeight[index]/sum(areaWeight)*100,2))+'%') return zip(areaName_tmp,areaWeight) pie = Pie(init_opts=opts.InitOpts(width='600px',height='400px')) pie.add('',getAreaWeight()).set_global_opts(title_opts=opts.TitleOpts(title='广州房源分布')) pie.render('images/house/广州房源分布.html')