- 逻辑回归的推导过程:https://blog.csdn.net/ACM_hades/article/details/90448785
 - 代码主要实现了下面公式: W k + 1 = W k + λ X ( Y − f W k ( X T ) ) W^{k+1}=W^k+λX(Y-f_{W^k } (X^T)) W k + 1 = W k + λ X ( Y − f W k  ( X T ) )
 - 数据集 :我们选择MNIST数据集进行实验,它包含各种手写数字(0-9)图片,图片大小28*28。MNIST数据集本身有10个类别,为了将其变成二分类问题我们进行如下处理:label等于0的继续等于0,label大于0改为1。这样就将十分类的数据改为二分类的数据。
 - 
              
                特征选择
              
              :可选择的特征有很多,包括: 
    
              
- 自己提取特征
 - 将整个图片作为特征向量
 - HOG特征
 
 - 我们将整个图片作为特征(784=28×28)。
 
            
              
                import
              
               time
              
                import
              
               numpy 
              
                as
              
               np
              
                import
              
               pandas 
              
                as
              
               pd
              
                from
              
               sklearn
              
                .
              
              model_selection 
              
                import
              
               train_test_split
              
                from
              
               sklearn
              
                .
              
              metrics 
              
                import
              
               accuracy_score
              
                class
              
              
                Logistic
              
              
                :
              
              
                def
              
              
                __init__
              
              
                (
              
              self
              
                ,
              
              feature_len
              
                )
              
              
                :
              
              
        self
              
                .
              
              weights 
              
                =
              
               np
              
                .
              
              ones
              
                (
              
              
                (
              
              feature_len
              
                ,
              
              
                1
              
              
                )
              
              
                )
              
              
                def
              
              
                model_function
              
              
                (
              
              self
              
                ,
              
               X
              
                )
              
              
                :
              
              
        W_X
              
                =
              
              np
              
                .
              
              matmul
              
                (
              
              X
              
                ,
              
               self
              
                .
              
              weights
              
                )
              
              
        temp_1
              
                =
              
              
                (
              
              W_X
              
                >=
              
              
                0
              
              
                )
              
              
                .
              
              astype
              
                (
              
              np
              
                .
              
              
                float
              
              
                )
              
              
        temp_0 
              
                =
              
              
                (
              
              W_X 
              
                <
              
              
                0
              
              
                )
              
              
                .
              
              astype
              
                (
              
              np
              
                .
              
              
                float
              
              
                )
              
              
        resut_1
              
                =
              
              
                1.0
              
              
                /
              
              
                (
              
              
                1
              
              
                +
              
               np
              
                .
              
              exp
              
                (
              
              
                -
              
              temp_1
              
                *
              
              W_X
              
                )
              
              
                )
              
              
                *
              
              temp_1
        
              
                #W_X为负数是,因为参数值inx很大时,exp(inx)可能会发生溢出,所以修改计算方式
              
              
        resut_0 
              
                =
              
               np
              
                .
              
              exp
              
                (
              
              temp_0
              
                *
              
              W_X
              
                )
              
              
                /
              
              
                (
              
              
                1
              
              
                +
              
               np
              
                .
              
              exp
              
                (
              
              temp_0
              
                *
              
              W_X
              
                )
              
              
                )
              
              
                *
              
              temp_0
        
              
                return
              
               resut_1
              
                +
              
              resut_0
    
              
                def
              
              
                train
              
              
                (
              
              self
              
                ,
              
               Data
              
                ,
              
               label
              
                )
              
              
                :
              
              
                #训练
              
              
        label 
              
                =
              
               label
              
                .
              
              reshape
              
                (
              
              
                (
              
              
                -
              
              
                1
              
              
                ,
              
              
                1
              
              
                )
              
              
                )
              
              
        alpha 
              
                =
              
              
                0.01
              
              
        max_iter 
              
                =
              
              
                500
              
              
                for
              
               i 
              
                in
              
              
                range
              
              
                (
              
              max_iter
              
                )
              
              
                :
              
              
                #迭代
              
              
            pres 
              
                =
              
               self
              
                .
              
              model_function
              
                (
              
              Data
              
                )
              
              
            error 
              
                =
              
               label
              
                -
              
              pres    
              
                #预测值和标签值所形成的误差
              
              
            self
              
                .
              
              weights 
              
                =
              
               self
              
                .
              
              weights 
              
                +
              
                alpha 
              
                *
              
               np
              
                .
              
              matmul
              
                (
              
              Data
              
                .
              
              T 
              
                ,
              
               error
              
                )
              
              
                #权重的更新
              
              
                def
              
              
                predict
              
              
                (
              
              self
              
                ,
              
               Data
              
                )
              
              
                :
              
              
                return
              
               self
              
                .
              
              model_function
              
                (
              
              Data
              
                )
              
              
                .
              
              reshape
              
                (
              
              
                -
              
              
                1
              
              
                )
              
              
                if
              
               __name__ 
              
                ==
              
              
                '__main__'
              
              
                :
              
              
                print
              
              
                (
              
              
                'Start read data'
              
              
                )
              
              
    S 
              
                =
              
               time
              
                .
              
              time
              
                (
              
              
                )
              
              
    raw_data 
              
                =
              
               pd
              
                .
              
              read_csv
              
                (
              
              
                './lihang_book_algorithm-master/data/train_binary.csv'
              
              
                )
              
              
                # 读取数据
              
              
    data 
              
                =
              
               raw_data
              
                .
              
              values  
              
                # 获取数据
              
              
                print
              
              
                (
              
              
                "data shape:"
              
              
                ,
              
               data
              
                .
              
              shape
              
                )
              
              
    imgs 
              
                =
              
               data
              
                [
              
              
                :
              
              
                ,
              
              
                1
              
              
                :
              
              
                ]
              
              
    labels 
              
                =
              
               data
              
                [
              
              
                :
              
              
                ,
              
              
                0
              
              
                ]
              
              
                print
              
              
                (
              
              
                "imgs shape:"
              
              
                ,
              
               imgs
              
                .
              
              shape
              
                )
              
              
    imgs 
              
                =
              
               np
              
                .
              
              concatenate
              
                (
              
              
                (
              
              imgs
              
                ,
              
               np
              
                .
              
              ones
              
                (
              
              
                (
              
              imgs
              
                .
              
              shape
              
                [
              
              
                0
              
              
                ]
              
              
                ,
              
              
                1
              
              
                )
              
              
                )
              
              
                )
              
              
                ,
              
               axis
              
                =
              
              
                1
              
              
                )
              
              
                #拼接常数项
              
              
                print
              
              
                (
              
              
                "imgs shape:"
              
              
                ,
              
               imgs
              
                .
              
              shape
              
                )
              
              
                print
              
              
                (
              
              
                "labels shape:"
              
              
                ,
              
               labels
              
                .
              
              shape
              
                )
              
              
                print
              
              
                (
              
              
                "label:"
              
              
                ,
              
              
                list
              
              
                (
              
              
                set
              
              
                (
              
              labels
              
                )
              
              
                )
              
              
                )
              
              
    Model 
              
                =
              
               Logistic
              
                (
              
              imgs
              
                .
              
              shape
              
                [
              
              
                -
              
              
                1
              
              
                ]
              
              
                )
              
              
                # 选取 2/3 数据作为训练集, 1/3 数据作为测试集
              
              
    train_features
              
                ,
              
               test_features
              
                ,
              
               train_labels
              
                ,
              
               test_labels 
              
                =
              
               train_test_split
              
                (
              
              
        imgs
              
                ,
              
               labels
              
                ,
              
               test_size
              
                =
              
              
                0.33
              
              
                ,
              
               random_state
              
                =
              
              
                23323
              
              
                )
              
              
                print
              
              
                (
              
              
                "train data count :%d"
              
              
                %
              
              
                len
              
              
                (
              
              train_labels
              
                )
              
              
                )
              
              
                print
              
              
                (
              
              
                "test data count :%d"
              
              
                %
              
              
                len
              
              
                (
              
              test_labels
              
                )
              
              
                )
              
              
                print
              
              
                (
              
              
                'read data cost '
              
              
                ,
              
               time
              
                .
              
              time
              
                (
              
              
                )
              
              
                -
              
               S
              
                ,
              
              
                ' second'
              
              
                )
              
              
                print
              
              
                (
              
              
                'Start training'
              
              
                )
              
              
    S 
              
                =
              
               time
              
                .
              
              time
              
                (
              
              
                )
              
              
    Model
              
                .
              
              train
              
                (
              
              train_features
              
                ,
              
               train_labels
              
                )
              
              
                print
              
              
                (
              
              
                'training cost '
              
              
                ,
              
               time
              
                .
              
              time
              
                (
              
              
                )
              
              
                -
              
               S
              
                ,
              
              
                ' second'
              
              
                )
              
              
                print
              
              
                (
              
              
                'Start predicting'
              
              
                )
              
              
    S 
              
                =
              
               time
              
                .
              
              time
              
                (
              
              
                )
              
              
    test_predict 
              
                =
              
               Model
              
                .
              
              predict
              
                (
              
              test_features
              
                )
              
              
                print
              
              
                (
              
              
                'predicting cost '
              
              
                ,
              
               time
              
                .
              
              time
              
                (
              
              
                )
              
              
                -
              
               S
              
                ,
              
              
                ' second'
              
              
                )
              
              
    score 
              
                =
              
               accuracy_score
              
                (
              
              test_labels
              
                ,
              
               test_predict
              
                )
              
              
                print
              
              
                (
              
              
                "The accruacy socre is "
              
              
                ,
              
               score
              
                )
              
            
          
          
            
              结果:
	Start read data
	data shape: (42000, 785)
	imgs_origin shape: (42000, 784)
	imgs shape: (42000, 785)
	labels shape: (42000,)
	label: [0, 1]
	train data count :28140
	test data count :13860
	read data cost  4.148890018463135  second
	Start training
	training cost  15.161401748657227  second
	Start predicting
	predicting cost  0.007978200912475586  second
	The accruacy socre is  0.9892496392496393
            
          
        

