LR共线性问题样例
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
import numpy as np from sklearn.linear_model import LogisticRegression from sklearn import datasets iris = datasets.load_iris() X = np.array([ [0,1,1,1], [0,1,1,1], [0,1,1,0], [0,1,1,1], [0,1,1,1], [0,1,0,0], [0,1,0,0], [0,0,0,0], [0,0,1,0], [0,1,0,0], ]) y = np.array([1,1,1,1,1,0,0,0,0,0]) lr = LogisticRegression(C=1, penalty='l2',intercept_scaling=0.01) # lr = LogisticRegression(C=1, penalty='l2') lr.fit(X, y) print(lr.intercept_,lr.coef_) # [-0.00012842] [[ 0. -0.14693214 0.59779321 0.85362485]] # 1、正样本为0,负样本为1. # 2、第二列特征,命中特征的样本数8个,正样本3个,负样本5个,负样本大于正样本,从单个特征看,应该对结果1产生支持,也就是weight应该大于0。 # 但,算出来的权值:-0.14693214 负的,对0预测产生支持。 |