# -*- coding=utf-8 -*-
import json
import requests
header = {
'User-Agent': 'googlespider',
'Content-Encoding': 'gzip',
'X-Forwarded-For': '202.101.43.22',
}
class Business(object):
def __init__(self):
pass
def my_JSONDecodeError(self):
"""我出现JSONDecodeError的代码"""
url = "https://dealer.autohome.com.cn/Ajax/GetDealerInfo?DealerId=2052112"
r = requests.get(url=url, headers=header)
response = r.text
# 将json字符串转为python的字典
body = json.loads(response)
print(body, '\n', type(body))
error_json_data_example = '{"Address":"北仑区新�\街道明州西路555号1幢1号-1"}'
def my_problem_and_solution(self):
"""我的问题所在及解决方式,是request返回数据的编码解码问题,主要在这个生僻汉字'碶'上"""
url = "https://dealer.autohome.com.cn/Ajax/GetDealerInfo?DealerId=2052112"
r = requests.get(url=url, headers=header)
try:
response = r.content.decode('utf-8')
except UnicodeDecodeError:
response = r.content.decode('GB18030')
# 将json字符串转为python的字典
body = json.loads(response)
print(body, '\n', type(body))
if __name__ == "__main__":
b = Business()
# 解决后
b.my_problem_and_solution()
# 解决前
b.my_JSONDecodeError()
运行以上代码:
解决后
{'DealerId': 2052112, 'DealerInfoId': 2052106, 'MinistieName': None, 'MinistieSimpleName': None, 'KindId': 1, 'Is24h': True, 'IsAuth': False, 'IsCurrent': False, 'SiteTemplateID': 0, 'BsnsLcncCmpynName': None, 'BsnsLcncExpire': None, 'BsnsLcncNo': None, 'ContractType': 0, 'MaintainState': 0, 'IsCPL': False, 'PayType': 16, 'IsCPL_2019': False, 'MainBrandImgUrl': None, 'BrandName': None, 'BrandsNameString': None, 'BusinessArea': '售本市', 'GroupHotSeriesModel': None, 'HasCloseTestDriver': False, 'HasLowerPrice': False, 'HasUseVerificationCode': False, 'LeadsRangeType': 0, 'LeadsRangeTypeTittle': None, 'Phone_400': None, 'refStylePhone': None, 'SellPhone': '4008307068', 'ServicePhone': '0574-86968822', 'RescuePhone': None, 'IconStyle': None, 'AcceptCIds': None, 'AcceptPIds': None, 'MapLatBaidu': 29.915025, 'MapLonBaidu': 121.821191, 'LeadsRatingScore': 1, 'CallRate400': 1.0, 'StarLevel': 4, 'MainBrands': None, 'DealerType': 0, 'CompanyId': 0, 'CompanyName': None, 'Company': '宁波宝利行丰田汽车销售服务有限公司', 'CompanySimpleName': None, 'CompanySimple': '宁波宝利行', 'CID': 330200, 'CityName': '宁波', 'CityPinyin': None, 'SID': 330206, 'CountyName': None, 'PID': 330000, 'ProvinceName': None, 'GroupID': 0, 'GroupSimpleName': None, 'HasCloseOrder': False, 'CompanyDesc': None, 'Address': '宁波北仑区新碶街道大港中路29号1幢1号1层-3', 'Fax': None, 'CompanyURL': None}
解决前
Traceback (most recent call last):
File "/mac/project/webcrawler/service/analysis/boker.py", line 52, in
b.my_JSONDecodeError()
File "/mac/project/webcrawler/service/analysis/boker.py", line 25, in my_JSONDecodeError
body = json.loads(response)
File "/mac/anaconda3/lib/python3.6/json/__init__.py", line 354, in loads
return _default_decoder.decode(s)
File "/mac/anaconda3/lib/python3.6/json/decoder.py", line 339, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/mac/anaconda3/lib/python3.6/json/decoder.py", line 355, in raw_decode
obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 1 column 1169 (char 1168)
问题解决。