linux cpu占用率如何看
558
2022-09-27
爬取唯品会口红数据,这次哪家打折力度最大?我看到0.8折的
前言
双十一期间,各大平台都的商品都在打折,哪些店铺的折扣是最低的
本次目标
爬取唯品会口红商品数据
120个商品的id值,问题它又双叒叕,总不能只爬取一页的数据吧,所以还要分析获取ID值每一页的url变化,还是一样想知道url的变化规律多看几页就知道了~
这里就省略了~
第一页
第二页
pageOffset参数的变化每120个数据翻一页嘛,ID都获取了,前面也看到每个商品数据接口对应的是50条数据,经过分析就知道 120个商品划分为是三个 50,50,20 分别传入相对应的商品ID就可以了。
开始爬虫的代码
导入工具
import requestsimport reimport csv
请求网页
def get_data(num_id): data_url = 'https://mapi.vip.com/vips-mobile/rest/shopping/pc/product/module/list/v2' headers = { # 'cookie': 'vip_address=%257B%2522pid%2522%253A%2522104104%2522%252C%2522cid%2522%253A%2522104104101%2522%252C%2522pname%2522%253A%2522%255Cu5e7f%255Cu4e1c%255Cu7701%2522%252C%2522cname%2522%253A%2522%255Cu5e7f%255Cu5dde%255Cu5e02%2522%257D; vip_province=104104; vip_province_name=%E5%B9%BF%E4%B8%9C%E7%9C%81; vip_city_name=%E5%B9%BF%E5%B7%9E%E5%B8%82; vip_city_code=104104101; vip_wh=VIP_NH; vip_ipver=31; mars_pid=20; cps=adp%3Ag1o71nr0%3A%3A%3A%3A; user_class=a; VipUINFO=luc%3Aa%7Csuc%3Aa%7Cbct%3Ac_new%7Chct%3Ac_new%7Cbdts%3A0%7Cbcts%3A0%7Ckfts%3A0%7Cc10%3A0%7Crcabt%3A0%7Cp2%3A0%7Cp3%3A1%7Cp4%3A0%7Cp5%3A1%7Cul%3A3105; mars_sid=a46fb0bf05a51955082f9a561da8893a; visit_id=B288281FDDBDD306C6D856C9D2959935; vip_tracker_source_from=; pg_session_no=11; mars_cid=1602569282048_0b4beb3d18306a0a0143c359ddb34fae', 'referer': 'https://category.vip.com/suggest.php?keyword=%E5%8F%A3%E7%BA%A2&ff=235%7C12%7C1%7C1&page=3', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36' } params = { 'callback': 'getMerchandiseDroplets2', 'app_name': 'shop_pc', 'app_version': '4.0', 'warehouse': 'VIP_NH', 'fdc_area_id': '104104101', 'client': 'pc', 'mobile_platform': '1', 'province_id': '104104', 'api_key': '70f71280d5d547b2a7bb370a529aeea1', 'user_id': '', 'mars_cid': '1602569282048_0b4beb3d18306a0a0143c359ddb34fae', 'wap_consumer': 'a', 'productIds': '{}'.format(num_id), 'scene': 'search', 'standby_id': 'nature', 'extParams': '{"stdSizeVids":"","preheatTipsVer":"3","couponVer":"v2","exclusivePrice":"1","iconSpec":"2x"}', 'context': '', '_': '1603721644366', } response_2 = requests.get(url=data_url, params=params, headers=headers)for page in range(0, 1201, 120): url = 'https://mapi.vip.com/vips-mobile/rest/shopping/pc/search/product/rank' headers = { # 'cookie': 'vip_address=%257B%2522pid%2522%253A%2522104104%2522%252C%2522cid%2522%253A%2522104104101%2522%252C%2522pname%2522%253A%2522%255Cu5e7f%255Cu4e1c%255Cu7701%2522%252C%2522cname%2522%253A%2522%255Cu5e7f%255Cu5dde%255Cu5e02%2522%257D; vip_province=104104; vip_province_name=%E5%B9%BF%E4%B8%9C%E7%9C%81; vip_city_name=%E5%B9%BF%E5%B7%9E%E5%B8%82; vip_city_code=104104101; vip_wh=VIP_NH; vip_ipver=31; mars_pid=20; cps=adp%3Ag1o71nr0%3A%3A%3A%3A; user_class=a; VipUINFO=luc%3Aa%7Csuc%3Aa%7Cbct%3Ac_new%7Chct%3Ac_new%7Cbdts%3A0%7Cbcts%3A0%7Ckfts%3A0%7Cc10%3A0%7Crcabt%3A0%7Cp2%3A0%7Cp3%3A1%7Cp4%3A0%7Cp5%3A1%7Cul%3A3105; mars_sid=a46fb0bf05a51955082f9a561da8893a; visit_id=B288281FDDBDD306C6D856C9D2959935; vip_tracker_source_from=; pg_session_no=11; mars_cid=1602569282048_0b4beb3d18306a0a0143c359ddb34fae', 'referer': 'https://category.vip.com/suggest.php?keyword=%E5%8F%A3%E7%BA%A2&ff=235%7C12%7C1%7C1&page=3', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36' } params = { 'callback': 'getMerchandiseIds', 'app_name': 'shop_pc', 'app_version': '4.0', 'warehouse': 'VIP_NH', 'fdc_area_id': '104104101', 'client': 'pc', 'mobile_platform': '1', 'province_id': '104104', 'api_key': '70f71280d5d547b2a7bb370a529aeea1', 'user_id': '', 'mars_cid': '1602569282048_0b4beb3d18306a0a0143c359ddb34fae', 'wap_consumer': 'a', 'standby_id': 'nature', 'keyword': '口红', 'lv3CatIds': '', 'lv2CatIds': '', 'lv1CatIds': '', 'brandStoreSns': '', 'props': '', 'priceMin': '', 'priceMax': '', 'vipService': '', 'sort': '0', 'pageOffset': '{}'.format(page), 'channelId': '1', 'gPlatform': 'PC', 'batchSize': '120', '_': '1603721644362', } response = requests.get(url=url, params=params, headers=headers)
解析网页数据
titles = re.findall('"title":"(.*?)"', response_2.text, re.S) # 标题 salePrice = re.findall(',"salePrice":"(.*?)",', response_2.text, re.S) # 售价 marketPrice = re.findall('"marketPrice":"(.*?)"', response_2.text, re.S) # 原价 saleDiscount = re.findall('"saleDiscount":"(.*?)"', response_2.text, re.S) # 折扣 smallImage = re.findall('"smallImage":"(.*?)"', response_2.text, re.S) # 商品图片地址 lis = zip(titles, salePrice, marketPrice, saleDiscount, smallImage) dit = {} for li in lis: dit['商品名字'] = li[0] dit['售价'] = li[1] dit['原价'] = li[2] dit['折扣'] = li[3] dit['商品图片地址'] = li[4] csv_writer.writerow(dit) print(dit)
保存数据
f = open('唯品会商品数据.csv', mode='a', encoding='utf-8-sig', newline='')csv_writer = csv.DictWriter(f, fieldnames=['商品名字', '售价', '原价', '折扣', '商品图片地址'])csv_writer.writeheader()
运行代码,效果如下图
看到有几家原价100多,折后价是10元的,这种你确定是口红不是画笔?
版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。
发表评论
暂时没有评论,来抢沙发吧~