问题:某段时间内,北京各个城区发帖数量的 top3 类目
所需数据结构:
原始 :series = [{'name': 'name','data': [100]},{'name': 'name','data': [100]}]
实际上:{name:类目,data:发帖量}
目标:{'_id': ['北京二手家电'], 'counts': 175}
柱形图所需数据格式如下:
series = [
{
'name': 'name',
'data': [100],
'type': 'column'
},{
'name': 'name2',
'data': [102],
'type': 'column'
}]
options = {
'chart' : {'zoomType':'xy'},
'title' : {'text': '发帖数量最大的类目'},
'subtitle': {'text': '数据图表'},
'yAxis' : {'title': {'text': '数量'}}
}
charts.plot(series,options=options,show='inline')
代码:
import pymongo
import charts
client = pymongo.MongoClient('localhost', 27017)
ganji1 = client['ganji1_db']
item_info = ganji1['test2_item_info']
for i in item_info.find().limit(3):
print(i)
pipeline 讲解
#pipeline = [
#{'$match':{'price': '1260'}},
#{'$match':{'$and': [{'cates':'手机},{'url':'2100'}] }}, # 多重条件筛选的方法,and是逻辑符
#{'$group':{'_id':'$price','a':{'sum':1 }}}, # group接收2个参数,_id表示你以什么作为分组,a为命名,后来跟函数表示你要做什么 主要用于数据的组团计算的,$price区别其他的$,它是表示调用原来的price
#{'$sort':{'counts':1} } # 1表示从小到大正序排列,-1反之
#{'$limit':3} # 筛选出出现频率最高三组数
#]
#for i in item_info.aggregate(pipeline): # 它与find函数很像,当然也可以筛选多个条件
# print(i)
pipeline1 = [
{'$match':{'$and':[{'pub_date':{'$gte':'2015.12.20','$lte':'2015.12.25'}},{'area':{'$all':['朝阳']}}]}},
{'$group':{'_id':'$cates','counts':{'$sum':1}}},
{'$limit':3}
]
for i in item_info.aggregate(pipeline1):
print(i)
def data_gen(date1,date2,area,limit):
pipeline1 = [
{'$match':{'$and':[{'pub_date':{'$gte':date1,'$lte':date2}},{'area':{'$all':area}}]}},
{'$group':{'_id':'$cates','counts':{'$sum':1}}},
{'$limit':limit},
{'$sort':{'counts':-1}}
]
for i in item_info.aggregate(pipeline1):
data = {
'name': i['_id'],
'data': [i['counts']],
'type': 'column'
}
yield data
for i in data_gen('2015.12.20','2015.12.25',['朝阳'],3):
print(i)
图表化
series = [i for i in data_gen('2015.12.20','2015.12.25',['朝阳'],5)]
options = {
'chart' : {'zoomType':'xy'},
'title' : {'text': '发帖数量最大的类目'},
'subtitle': {'text': '数据图表'},
'yAxis' : {'title': {'text': '数量'}}
}
charts.plot(series,options=options,show='inline')

图示
问题:某段时间内,北京各个城区发帖数量的 top3 类目
所需数据结构:
原始 :series = [{'name': 'name','data': [100]},{'name': 'name','data': [100]}]
实际上:{name:类目,data:发帖量}
目标:{'_id': ['北京二手家电'], 'counts': 175}
图表套路:
data = [1,2,3,4,5,6,7]
options = {
'title': {'text': '新旧-价格'},
'xAxis' : {'categories': ['报废机/尸体','7成新及以下','8成新','9成新','95成新','99成新', '全新']},
'yAxis' : {'title': {'text': '价格'}},
}
charts.plot(data,show='inline', options=options)
for i in item_info.find({},{'_id':0,'look':1}).limit(100):
print(i)
pipeline2 = [
{'$match':{'$and':[{'pub_date':{'$gte':'2015.12.25','$lte':'2015.12.27'}},
{'cates':{'$all':['北京二手手机']}},
{'look':{'$nin':['-']}}
]}},
{'$group':{'_id':'$look','avg_price':{'$avg':'$price'}}},
{'$sort':{'avg_price':-1}}
]
for i in item_info.aggregate(pipeline2):
print(i)
def data_gen2(date1,date2,cates):
pipeline = [
{'$match':{'$and':[{'pub_date':{'$gte':date1,'$lte':date2}},
{'cates':{'$all':cates}},
{'look':{'$nin':['-']}}
]}},
{'$group':{'_id':'$look','avg_price':{'$avg':'$price'}}},
{'$sort':{'avg_price':1}}
]
for i in item_info.aggregate(pipeline):
yield i['avg_price']
data = [i for i in data_gen2('2015.12.24','2016.01.10',['北京二手手机'])]
options = {
'title': {'text': '新旧-价格'},
'xAxis' : {'categories': ['报废机/尸体','7成新及以下','8成新','9成新','95成新','99成新', '全新']},
'yAxis' : {'title': {'text': '价格'}},
}
charts.plot(data,show='inline', options=options)

