文章目录[隐藏]
应朋友需要,想将某客户的数据从某站里导出,先去某站搞个账号,建几条数据观察一番,心里有底后开搞。
1.Python环境搭建
之前电脑有安装过PyCharm Community 2019.1,具体安装过程就不写了,先跑个HelloWorld,输出正常后正式开整。
2.利用抓包工具或者Google浏览器调试模式拿到请求参数
Cookies参数如下:
cookies = { 'JSESSIONID': 'XXX', 'phone': 'XXX', 'password': 'XXX', 'isAuto': '0', ' loginAccess': 'XXX' }
headers请求头信息构造:
headers = { 'Connection': 'keep-alive', 'sec-ch-ua': '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'sec-ch-ua-mobile': '?0', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTM<div style="color:transparent">本文来源gaodai.ma#com搞##代!^码网(</div>L, like Gecko) Chrome/89.0.4389.90 Safari/537.36', 'Content-Type': 'application/json', 'Sec-Fetch-Site': 'same-origin', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Dest': 'empty', 'Referer': 'https://xxx.xxx.xxx', 'Accept-Language': 'zh-CN,zh;q=0.9',}
请求路径:
params = ( ('method', 'getGoodsList'))
请求参数组装:
data = '{ "pageSize":1000, "pageNumber":1, "searchText":"", "sortOrder":"asc", "isAdvancedSearch":false}'
pageSize:每页数据数量
pageNumber:页码
searchText:搜索条件
sortOrder:排序
3.利用Requests模拟请求并获取数据
response = requests.post( 'https://xxx.xxx.xxx', headers=headers, params=params, cookies=cookies, data=data ) print(response.text.encode('utf8')) res = json.loads(response.text)
4.创建Excel表格
t = time.time() randStr = int(round(t * 1000)) tSheetName = "a_" + str(randStr) + ".xlsx" workbook = xlsxwriter.Workbook(tSheetName) worksheet = workbook.add_worksheet()
5.表头及数据组装
cursor = 0 picurl = '' writeExcel(row=cursor) for obj in res["rows"]: cursor += 1 picurl = '' if obj['ImageKID']: picurl = 'https://xxx.xxx.xxx? imageKid='+obj['ImageKID'] writeExcel(row=cursor,Description=obj['Description'], Category=obj['Category'], Series=obj['Series'],BaseUnit=obj['BaseUnit'],Qty=obj['Qty'], CostPrice=obj['CostPrice'],SalePrice=obj['SalePrice'], RetailPrice=obj['RetailPrice'],Barcode=obj['Barcode'], Remark=obj['Remark'], ImageKID=picurl)
6.将数据写入Excel表格中
def writeExcel(row=0, Description='', Category='', Series='', BaseUnit='', Qty='', CostPrice='', SalePrice='', RetailPrice='', Barcode='', Remark='',ImageKID=''): if row == 0: worksheet.write(row, 0, '名称') worksheet.write(row, 1, '货号') worksheet.write(row, 2, '规格') worksheet.write(row, 3, '单位') worksheet.write(row, 4, '库存') worksheet.write(row, 5, '成本') worksheet.write(row, 6, '批发价') worksheet.write(row, 7, '零售价') worksheet.write(row, 8, '条码') worksheet.write(row, 9, '备注') worksheet.write(row, 10, '图片') else: if ImageKID!='': image_data = io.BytesIO(urllib.urlopen(ImageKID).read()) worksheet.insert_image(row, 10, ImageKID, {'image_data': image_data}) worksheet.write(row, 0, Description) worksheet.write(row, 1, Category) worksheet.write(row, 2, Series) worksheet.write(row, 3, BaseUnit) worksheet.write(row, 4, Qty) worksheet.write(row, 5, CostPrice) worksheet.write(row, 6, SalePrice) worksheet.write(row, 7, RetailPrice) worksheet.write(row, 8, Barcode) worksheet.write(row, 9, Remark) worksheet.set_column(10, 10, 23) worksheet.set_row(row, 150)