写了个阿里巴巴招聘网站爬虫,这次是存在csv文件里,小伙伴们可以对比一下~
import requests,json,csv
#写入csv文件中
list=[]
csv_file=open('阿里巴巴招聘.csv','w+',newline='',encoding='utf=8')
writer=csv.writer(csv_file)
list_head=['职位名称','部门','是否招聘','学历','大类','小类','工作地','工作经历','要求','职责']
writer.writerow(list_head)
#爬虫
keyword=str(input('请输入职位关键字:'))
url='https://job.alibaba.com/zhaopin/socialPositionList/doList.json'
headers={
'Referer': 'https://job.alibaba.com/zhaopin/positionList.htm?spm=a2obv.11875463.0.0.2ac23a5dSb7Wbw',
}
for p in range(10):
p=p+1
data={
'MIME 类型': 'application/x-www-form-urlencoded; charset=UTF-8',
'pageSize': '10',
't': '0.9218667109769481',
'keyWord': keyword,
'location'
'second'
'first'
'pageIndex': str(p)
}
cookies={
'Cookie': ' isg=BM7OlPeDH1cyaKt12PikCz_-HaKQT5JJOm4ATvgXOlGNW2y1at3VWbNdkwc32Iph; l=cBLBoTMuqFP2KvuXBOCanurza77OSId4YuPzaNbMi_5BG6L1xV_Okk5T4Fp6csWFGhLk4nXUfzeT6FibJPDjtQPl2eGJXZ5..; CNZZDATA1000004769=1840857478-1567157075-%7C1567549123; tmp0=6vzoNBZKbkA%2BBLyFAxc%2F%2B5%2FiZdeVqJ%2BQe1wqEFgcolw%2Bv%2BNF%2BSOMmjyEAbaxmDieCW6DN%2FW1NgF3ABlBHs%2BQVVV2hLJ2KNwMGtDuyycdGSJ6QuN6svOgAsp7j9AZ0WOupUzUlbSrvipNMHgwXXNByNszrL0xUXa2GZogaVwPGKi7SGNCFAAV69vqvj6X2DlVaPMaTpJ3iqH20Tnd%2FLw7QVdbcEn3PhXzGaBl3yQmiW9LYRA8MDOBcbOWVZJ1lrwS8QCt4%2FQhkY70vFtrIvKAQhtb4iXr6OkIZI793Ao1syY%3D; _hvn_login=0; csg=87ae08c0; _tb_token_=eeed45e87e5e7; cookie2=14659eb4996e47c896f304074d300557; JSESSIONID=JFYJXFOV-FDC82NZA4NVS98Y8CX7N3-KRA0E30K-9MTB2; t=81b209fbe498754d386d92ccdaa8ff67; UM_distinctid=16ce1eb6c4342c-05f3012e7a8dcb8-3f616f4a-13c680-16ce1eb6c443e1; cna=ft7vFfKyZQACAbeW36y9ZReA'
}
res_post=requests.post(url,headers=headers,data=data,cookies=cookies)
res_text=res_post.text
res_dict=json.loads(res_text)
positions=res_dict['returnValue']['datas']
for position in positions:
requirement=position['requirement']
departmentName=position['departmentName'][5:]
workExperience=position['workExperience']
description=position['description']
name=position['name']
degree=position['degree']
firstCategory=position['firstCategory']
secondCategory=position['secondCategory']
workLocation=position['workLocation']
status=position['status']
list_content=[name,departmentName,status,degree,firstCategory,secondCategory,workLocation,workExperience,requirement,description]
list.append(list_content)
writer.writerow(list_content)
csv_file.close()
print('爬取成功!')