课时9 解析网页中的元素

from bs4 import BeautifulSoup
path = 'C:/Users/Google/Desktop/web/index.html'with open(path, 'r') as wb_data:    
soup = BeautifulSoup(wb_data, 'lxml')    titles = soup.select('body > div.container > div.row > div.col-md-9 > div.row > div.col-sm-4.col-lg-4.col-md-4 > div.thumbnail > div.caption > h4 > a')    images = soup.select('body > div.container > div.row > div.col-md-9 > div.row > div.col-sm-4.col-lg-4.col-md-4 > div.thumbnail > img')    reviews = soup.select('div.ratings > p.pull-right')    prices = soup.select('div.caption > h4.pull-right')    stars = soup.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p:nth-of-type(2)')for title, image, review, price, star in zip(titles, images, reviews, prices, stars):    data = {        'title': title.get_text(),        'image': image.get('src'),        'review': review.get_text(),        'price': price.get_text(),        'star': len(star.find_all("span", class_='glyphicon glyphicon-star'))    }    print(data)
  • 需要注意的点
'star': len(star.find_all("span", class_='glyphicon glyphicon-star'))

这句话的 作用是需要在

1.jpg

这里面 计算出 标签是class 值 等于 glyphicon glyphicon-star 的个数。

from bs4 import BeautifulSoup
path = 'C:/Users/Google/Desktop/web/index.html'

with open(path, 'r') as wb_data:
    soup = BeautifulSoup(wb_data, 'lxml')
    titles = soup.select('body > div.container > div.row > div.col-md-9 > div.row > div.col-sm-4.col-lg-4.col-md-4 > div.thumbnail > div.caption > h4 > a')
    images = soup.select('body > div.container > div.row > div.col-md-9 > div.row > div.col-sm-4.col-lg-4.col-md-4 > div.thumbnail > img')
    reviews = soup.select('div.ratings > p.pull-right')
    prices = soup.select('div.caption > h4.pull-right')
    stars = soup.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p:nth-of-type(2)')
    print (stars)
for title, image, review, price, star in zip(titles, images, reviews, prices, stars):
    data = {
        'title': title.get_text(),
        'image': image.get('src'),
        'review': review.get_text(),
        'price': price.get_text(),
        'star': len(star.find_all("span", class_='glyphicon glyphicon-star'))
    }
    print(data)
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容