统计学习方法——朴素贝叶斯法原理
1. 贝叶斯估计
1.1 为什么要用贝叶斯估计(极大似然的缺点)
1.2 贝叶斯估计原理
贝叶斯估计的算法过程合极大似然估计的算法过程一模一样,代码也几乎一模一样,只是加了一个λ。
2. Python代码
def
priorProbability
(
labelList
,
Lambda
)
:
# 加入一个lambda
labelSet
=
set
(
labelList
)
labelCountDict
=
{
}
for
label
in
labelList
:
if
label
not
in
labelCountDict
:
labelCountDict
[
label
]
=
0
labelCountDict
[
label
]
+=
1
priorProbabilityDict
=
{
}
for
label
in
labelSet
:
priorProbabilityDict
[
label
]
=
(
labelCountDict
[
label
]
+
Lambda
)
/
(
len
(
labelList
)
+
len
(
labelSet
)
*
Lambda
)
return
priorProbabilityDict
def
conditionProbability
(
dataSet
,
labelList
,
Lambda
)
:
dimNum
=
len
(
dataSet
[
0
]
)
characterVal
=
[
]
for
i
in
range
(
dimNum
)
:
temp
=
[
]
for
j
in
range
(
len
(
dataSet
)
)
:
if
dataSet
[
j
]
[
i
]
not
in
temp
:
temp
.
append
(
dataSet
[
j
]
[
i
]
)
characterVal
.
append
(
temp
)
probability
=
[
]
labelSet
=
list
(
set
(
labelList
)
)
for
dim
in
range
(
dimNum
)
:
tempMemories
=
{
}
for
val
in
characterVal
[
dim
]
:
for
label
in
labelSet
:
labelCount
=
0
mixCount
=
0
for
i
in
range
(
len
(
labelList
)
)
:
if
labelList
[
i
]
==
label
:
labelCount
+=
1
if
dataSet
[
i
]
[
dim
]
==
val
:
mixCount
+=
1
tempMemories
[
str
(
val
)
+
"|"
+
str
(
label
)
]
=
(
mixCount
+
Lambda
)
/
(
labelCount
+
len
(
characterVal
[
dim
]
)
*
Lambda
)
probability
.
append
(
tempMemories
)
return
probability
def
naiveBayes
(
x
,
dataSet
,
labelList
,
Lambda
)
:
priorProbabilityDict
=
priorProbability
(
labelList
,
Lambda
)
probability
=
conditionProbability
(
dataSet
,
labelList
,
Lambda
)
bayesProbability
=
{
}
labelSet
=
list
(
set
(
labelList
)
)
for
label
in
labelSet
:
tempProb
=
priorProbabilityDict
[
label
]
for
dim
in
range
(
len
(
x
)
)
:
tempProb
*=
probability
[
dim
]
[
str
(
x
[
dim
]
)
+
"|"
+
str
(
label
)
]
bayesProbability
[
label
]
=
tempProb
result
=
sorted
(
bayesProbability
.
items
(
)
,
key
=
lambda
x
:
x
[
1
]
,
reverse
=
True
)
return
result
[
0
]
[
0
]