Python爬取拉勾网实例
# -*-.coding: utf-8 -*-
# __author__ = 'xiaobai'
# Email: 517840374@qq.com
import
time
import
re
from
selenium
.
webdriver
import
Chrome
# 导入浏览器的包
from
selenium
.
webdriver
.
common
.
keys
import
Keys
n
=
1
# 创建浏览器
web
=
Chrome
(
)
# 打开浏览器,请求到拉钩
web
.
get
(
"https://www.lagou.com"
)
web
.
find_element_by_xpath
(
'//*[@id="cboxClose"]'
)
.
click
(
)
time
.
sleep
(
1
)
# 找到那个文本框,输入python,然后点查询
web
.
find_element_by_xpath
(
'//*[@id="search_input"]'
)
.
send_keys
(
'python'
,
Keys
.
ENTER
)
alst
=
web
.
find_elements_by_class_name
(
"position_link"
)
for
a
in
alst
:
# 找到H3并点击
a
.
find_element_by_tag_name
(
"h3"
)
.
click
(
)
web
.
switch_to
.
window
(
web
.
window_handles
[
-
1
]
)
text
=
web
.
find_element_by_xpath
(
'//*[@id="job_detail"]/dd[2]'
)
.
text
# 拿文本
# 把招聘信息保存到文件中
with
open
(
f
'需求_{n}.txt'
,
mode
=
'w'
,
encoding
=
'utf-8'
)
as
f
:
f
.
write
(
text
)
n
+=
1
# 关闭窗口
web
.
close
(
)
# 调整窗口到最开始的那个页面
web
.
switch_to
.
window
(
web
.
window_handles
[
0
]
)
time
.
sleep
(
1
)