文章目录
- 全部高亮
- 单个高亮
- [mysql]高亮查询
- 存html
- 存excel
全部高亮
w
=
'比赛'
t
=
'比赛开始没多久就结束了比赛,现在没有比赛'
def
replace_color
(
text
,
word
)
:
new_word
=
'\033[031m'
+
word
+
'\033[0m'
# red
len_w
=
len
(
word
)
len_t
=
len
(
text
)
for
i
in
range
(
len_t
-
len_w
,
-
1
,
-
1
)
:
if
text
[
i
:
i
+
len_w
]
==
word
:
text
=
text
[
:
i
]
+
new_word
+
text
[
i
+
len_w
:
]
return
text
print
(
t
)
print
(
replace_color
(
t
,
w
)
)
单个高亮
from
jieba
import
tokenize
text
=
'我用小米手机订购了一袋小米'
entity
=
'小米'
replace_color
=
lambda
word
:
'\033[033m'
+
word
+
'\033[0m'
replace_word
=
lambda
sentence
,
word
,
head
,
tail
:
sentence
[
:
head
]
+
word
+
sentence
[
tail
:
]
for
word
,
head
,
tail
in
tokenize
(
text
)
:
if
word
==
entity
:
word
=
replace_color
(
word
)
print
(
replace_word
(
text
,
word
,
head
,
tail
)
)
mysql高亮查询
def
highlight
(
self
,
field
,
table
,
keyword
,
n
=
99
)
:
sql
=
"SELECT %s FROM %s WHERE INSTR(%s,'%s')>0;"
%
(
field
,
table
,
field
,
keyword
)
for
i
in
self
.
fetchone
(
sql
,
n
)
:
text
=
i
[
0
]
highlight_word
=
'\033[031m'
+
keyword
+
'\033[0m'
# red
len_w
=
len
(
keyword
)
len_t
=
len
(
text
)
for
i
in
range
(
len_t
-
len_w
,
-
1
,
-
1
)
:
if
text
[
i
:
i
+
len_w
]
==
keyword
:
text
=
text
[
:
i
]
+
highlight_word
+
text
[
i
+
len_w
:
]
print
(
text
)
存html
- py文件
def
replace_html_tag
(
text
,
word
)
:
new_word
=
'
'
+
word
+
''
len_w
=
len
(
word
)
len_t
=
len
(
text
)
for
i
in
range
(
len_t
-
len_w
,
-
1
,
-
1
)
:
if
text
[
i
:
i
+
len_w
]
==
word
:
text
=
text
[
:
i
]
+
new_word
+
text
[
i
+
len_w
:
]
return
text
def
save_html
(
ls_of_ls
,
prefix
)
:
fname
=
prefix
+
'.html'
with
open
(
fname
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
f
.
write
(
'
)
for
in
:
.
(
'
'
)
for
in
:
.
(
'
'
.
format
(
)
)
.
(
'\n'
)
.
(
'
\n'
ls
ls_of_ls
f
write
i
ls
f
write
{}
i
f
write
f
write
'
)
texts
=
[
'深扣菊花舔指笑'
,
'菊花菊花一闪闪'
,
'接天莲叶无穷碧'
,
'硬日菊花别样红'
]
word
=
'菊花'
ls_of_ls
=
[
]
for
text
in
texts
:
ls_of_ls
.
append
(
[
word
,
replace_html_tag
(
text
,
word
)
]
)
save_html
(
ls_of_ls
,
word
)
- 生成的html代码
<
html
>
<
head
>
<
meta
charset
=
"
UTF-8
"
>
head
>
<
body
>
<
table
border
=
"
1
"
>
<
tr
>
<
td
>
<
font
size
=
"
4
"
>
菊花
font
>
td
>
<
td
>
<
font
size
=
"
4
"
>
深扣
<
font
color
=
"
red
"
>
菊花
font
>
舔指笑
font
>
td
>
tr
>
<
tr
>
<
td
>
<
font
size
=
"
4
"
>
菊花
font
>
td
>
<
td
>
<
font
size
=
"
4
"
>
<
font
color
=
"
red
"
>
菊花
font
>
<
font
color
=
"
red
"
>
菊花
font
>
一闪闪
font
>
td
>
tr
>
<
tr
>
<
td
>
<
font
size
=
"
4
"
>
菊花
font
>
td
>
<
td
>
<
font
size
=
"
4
"
>
接天莲叶无穷碧
font
>
td
>
tr
>
<
tr
>
<
td
>
<
font
size
=
"
4
"
>
菊花
font
>
td
>
<
td
>
<
font
size
=
"
4
"
>
硬日
<
font
color
=
"
red
"
>
菊花
font
>
别样红
font
>
td
>
tr
>
table
>
body
>
html
>
存excel
from
pandas
import
DataFrame
from
jieba
import
tokenize
from
xlwings
import
App
replace_word
=
lambda
word
:
'【'
+
word
+
'】'
replace_sentence
=
lambda
sentence
,
word
,
head
,
tail
:
sentence
[
:
head
]
+
word
+
sentence
[
tail
:
]
def
ner
(
text
)
:
for
sentence
in
text
.
split
(
','
)
:
# 切句
for
word
,
head
,
tail
in
tokenize
(
sentence
)
:
# 分词+位置
if
word
in
{
'小米'
,
'苹果'
}
:
# NER
yield
[
text
,
replace_sentence
(
sentence
,
replace_word
(
word
)
,
head
,
tail
)
,
word
,
]
def
lss2excel
(
ls_of_ls
,
columns
,
fname
)
:
DataFrame
(
ls_of_ls
,
columns
=
columns
)
.
to_excel
(
fname
,
index
=
False
)
def
merge_cells
(
fname
)
:
# 打开excel
app
=
App
(
add_book
=
False
,
visible
=
False
)
# 关闭警告
app
.
display_alerts
=
False
# 打开book
book
=
app
.
books
.
open
(
fname
)
try
:
# 打开sheet
for
sheet
in
book
.
sheets
:
# 当前区域
current_region
=
sheet
.
cells
(
1
,
1
)
.
current_region
# 列宽
current_region
.
column_width
=
16
# 字体格式
current_region
.
api
.
Font
.
Size
=
9
# 最后一个单元格(的行)
last_row_index
=
current_region
.
last_cell
.
row
# 合并单元格
i
=
2
while
i
<
last_row_index
:
for
j
in
range
(
i
+
1
,
last_row_index
+
2
)
:
if
sheet
.
cells
(
i
,
1
)
.
value
!=
sheet
.
cells
(
j
,
1
)
.
value
:
cells
=
sheet
.
range
(
'A{}:A{}'
.
format
(
i
,
j
-
1
)
)
.
api
cells
.
MergeCells
=
True
# 合并
cells
.
WrapText
=
True
# 换行
i
=
j
except
Exception
as
e
:
print
(
'\033[031m{}\033[0m'
.
format
(
e
)
)
# 开启警告
app
.
display_alerts
=
True
# 保存
book
.
save
(
)
# 关闭excel
app
.
quit
(
)
fname
=
'手机.xlsx'
fields
=
[
'text'
,
'phrase'
,
'word'
]
texts
=
[
'买小米机,送了袋小米和苹果'
,
'诺基亚'
,
'买华为送苹果'
]
ls_of_ls
=
[
ls
for
text
in
texts
for
ls
in
ner
(
text
)
]
lss2excel
(
ls_of_ls
,
fields
,
fname
)
merge_cells
(
fname
)