머신러닝

머신 러닝 XGboost

J.H_DA 2022. 4. 14. 15:05
import xgboost as xgb
from xgboost import plot_importance

import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import f1_score, roc_auc_score
from xgboost import XGBClassifier

import warnings
warnings.filterwarnings('ignore')

dataset = load_breast_cancer()
X_features= dataset.data
y_label = dataset.target

cancer_df = pd.DataFrame(data=X_features, columns=dataset.feature_names)
cancer_df['target']= y_label
cancer_df.head(3)
Out[28]:
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimensiontarget012
17.99 10.38 122.8 1001.0 0.11840 0.27760 0.3001 0.14710 0.2419 0.07871 ... 17.33 184.6 2019.0 0.1622 0.6656 0.7119 0.2654 0.4601 0.11890 0
20.57 17.77 132.9 1326.0 0.08474 0.07864 0.0869 0.07017 0.1812 0.05667 ... 23.41 158.8 1956.0 0.1238 0.1866 0.2416 0.1860 0.2750 0.08902 0
19.69 21.25 130.0 1203.0 0.10960 0.15990 0.1974 0.12790 0.2069 0.05999 ... 25.53 152.5 1709.0 0.1444 0.4245 0.4504 0.2430 0.3613 0.08758 0

3 rows × 31 columns

In [11]:
print(dataset.target_names)
print(cancer_df['target'].value_counts())
['malignant' 'benign']
1    357
0    212
Name: target, dtype: int64
In [29]:
# 전체 데이터 중 80%는 학습용 데이터, 20%는 테스트용 데이터 추출
X_train, X_test, y_train, y_test=train_test_split(X_features, y_label,
                                         test_size=0.2, random_state=156 )
print(X_train.shape , X_test.shape)
(455, 30) (114, 30)
In [30]:
xgb_clt=XGBClassifier(n_estimators = 400, learning_rate=0.1, max_depth = 3, )
xgb_clt.fit(X_train, y_train)
pred = xgb_clt.predict(X_test)
pred_proba=xgb_clt.predict_proba(X_test)
[13:48:47] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.5.1/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
In [31]:
# 수정된 get_clf_eval() 함수 
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt
def get_clf_eval(y_test, pred):
    confusion = confusion_matrix( y_test, pred)
    accuracy = accuracy_score(y_test , pred)
    precision = precision_score(y_test , pred)
    recall = recall_score(y_test , pred)
    f1 = f1_score(y_test,pred)
    # ROC-AUC 추가 
    roc_auc = roc_auc_score(y_test, pred)
    lr_probs=pred_proba[:,1]
    ns_probs = [0 for _ in range(len(y_test))]
    ns_fpr, ns_tpr, _ = roc_curve(y_test, ns_probs)
    lr_fpr, lr_tpr, _ = roc_curve(y_test, lr_probs)
    
    plt.plot(ns_fpr, ns_tpr, linestyle="--", label ="No Skill")
    plt.plot(lr_fpr, lr_tpr, marker=".", label="XGBOOST")
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend()
    plt.show()

    print('오차 행렬')
    print(confusion)
    # ROC-AUC print 추가
    print('정확도: {0:.4f}, 정밀도: {1:.4f}, 재현율: {2:.4f},\
    F1: {3:.4f}, AUC:{4:.4f}'.format(accuracy, precision, recall, f1, roc_auc))
In [32]:
get_clf_eval(y_test, pred )
오차 행렬
[[35  2]
 [ 1 76]]
정확도: 0.9737, 정밀도: 0.9744, 재현율: 0.9870,    F1: 0.9806, AUC:0.9665
In [33]:
fig, ax = plt.subplots(figsize=(10,12))

plot_importance(xgb_clt, ax=ax)
Out[33]:
<AxesSubplot:title={'center':'Feature importance'}, xlabel='F score', ylabel='Features'>
In [43]:
xgb_clt=XGBClassifier(n_estimators = 400, learning_rate=0.1, max_depth = 3, )
xgb_clt.fit(X_train, y_train, early_stopping_rounds=200, eval_metric='logloss', eval_set=[(X_test,y_test)], verbose=True) # 400번 진행중 100번 동안 반응없으면 끝내기
pred = xgb_clt.predict(X_test)
pred_proba=xgb_clt.predict_proba(X_test)
ns_probs = [0 for _ in range(len(y_test))]
ns_fpr, ns_tpr, _ = roc_curve(y_test, ns_probs)
[0]	validation_0-logloss:0.61352
[1]	validation_0-logloss:0.54784
[2]	validation_0-logloss:0.49425
[3]	validation_0-logloss:0.44799
[4]	validation_0-logloss:0.40911
[5]	validation_0-logloss:0.37498
[6]	validation_0-logloss:0.34571
[7]	validation_0-logloss:0.32053
[8]	validation_0-logloss:0.29721
[9]	validation_0-logloss:0.27799
[10]	validation_0-logloss:0.26030
[11]	validation_0-logloss:0.24604
[12]	validation_0-logloss:0.23156
[13]	validation_0-logloss:0.22005
[14]	validation_0-logloss:0.20857
[15]	validation_0-logloss:0.19999
[16]	validation_0-logloss:0.19012
[17]	validation_0-logloss:0.18182
[18]	validation_0-logloss:0.17473
[19]	validation_0-logloss:0.16766
[20]	validation_0-logloss:0.15820
[21]	validation_0-logloss:0.15473
[22]	validation_0-logloss:0.14895
[23]	validation_0-logloss:0.14331
[24]	validation_0-logloss:0.13634
[25]	validation_0-logloss:0.13278
[26]	validation_0-logloss:0.12791
[27]	validation_0-logloss:0.12526
[28]	validation_0-logloss:0.11998
[29]	validation_0-logloss:0.11641
[30]	validation_0-logloss:0.11450
[31]	validation_0-logloss:0.11257
[32]	validation_0-logloss:0.11154
[33]	validation_0-logloss:0.10868
[34]	validation_0-logloss:0.10668
[35]	validation_0-logloss:0.10421
[36]	validation_0-logloss:0.10296
[37]	validation_0-logloss:0.10058
[38]	validation_0-logloss:0.09868
[39]	validation_0-logloss:0.09644
[40]	validation_0-logloss:0.09587
[41]	validation_0-logloss:0.09424
[42]	validation_0-logloss:0.09471
[43]	validation_0-logloss:0.09427
[44]	validation_0-logloss:0.09389
[45]	validation_0-logloss:0.09418
[46]	validation_0-logloss:0.09402
[47]	validation_0-logloss:0.09236
[48]	validation_0-logloss:0.09301
[49]	validation_0-logloss:0.09127
[50]	validation_0-logloss:0.09005
[51]	validation_0-logloss:0.08961
[52]	validation_0-logloss:0.08958
[53]	validation_0-logloss:0.09070
[54]	validation_0-logloss:0.08958
[55]	validation_0-logloss:0.09036
[56]	validation_0-logloss:0.09159
[57]	validation_0-logloss:0.09153
[58]	validation_0-logloss:0.09199
[59]	validation_0-logloss:0.09195
[60]	validation_0-logloss:0.09194
[61]	validation_0-logloss:0.09146
[62]	validation_0-logloss:0.09031
[63]	validation_0-logloss:0.08941
[64]	validation_0-logloss:0.08972
[65]	validation_0-logloss:0.08974
[66]	validation_0-logloss:0.08962
[67]	validation_0-logloss:0.08873
[68]	validation_0-logloss:0.08862
[69]	validation_0-logloss:0.08974
[70]	validation_0-logloss:0.08998
[71]	validation_0-logloss:0.08978
[72]	validation_0-logloss:0.08958
[73]	validation_0-logloss:0.08953
[74]	validation_0-logloss:0.08875
[75]	validation_0-logloss:0.08860
[76]	validation_0-logloss:0.08812
[77]	validation_0-logloss:0.08840
[78]	validation_0-logloss:0.08874
[79]	validation_0-logloss:0.08815
[80]	validation_0-logloss:0.08758
[81]	validation_0-logloss:0.08741
[82]	validation_0-logloss:0.08849
[83]	validation_0-logloss:0.08858
[84]	validation_0-logloss:0.08807
[85]	validation_0-logloss:0.08764
[86]	validation_0-logloss:0.08742
[87]	validation_0-logloss:0.08761
[88]	validation_0-logloss:0.08707
[89]	validation_0-logloss:0.08727
[90]	validation_0-logloss:0.08716
[91]	validation_0-logloss:0.08696
[92]	validation_0-logloss:0.08717
[93]	validation_0-logloss:0.08707
[94]	validation_0-logloss:0.08659
[95]	validation_0-logloss:0.08612
[96]	validation_0-logloss:0.08714
[97]	validation_0-logloss:0.08677
[98]	validation_0-logloss:0.08669
[99]	validation_0-logloss:0.08655
[100]	validation_0-logloss:0.08650
[101]	validation_0-logloss:0.08641
[102]	validation_0-logloss:0.08629
[103]	validation_0-logloss:0.08626
[104]	validation_0-logloss:0.08683
[105]	validation_0-logloss:0.08677
[106]	validation_0-logloss:0.08732
[107]	validation_0-logloss:0.08730
[108]	validation_0-logloss:0.08728
[109]	validation_0-logloss:0.08730
[110]	validation_0-logloss:0.08729
[111]	validation_0-logloss:0.08800
[112]	validation_0-logloss:0.08794
[113]	validation_0-logloss:0.08784
[114]	validation_0-logloss:0.08807
[115]	validation_0-logloss:0.08765
[116]	validation_0-logloss:0.08730
[117]	validation_0-logloss:0.08780
[118]	validation_0-logloss:0.08775
[119]	validation_0-logloss:0.08768
[120]	validation_0-logloss:0.08763
[121]	validation_0-logloss:0.08757
[122]	validation_0-logloss:0.08755
[123]	validation_0-logloss:0.08716
[124]	validation_0-logloss:0.08767
[125]	validation_0-logloss:0.08774
[126]	validation_0-logloss:0.08828
[127]	validation_0-logloss:0.08831
[128]	validation_0-logloss:0.08827
[129]	validation_0-logloss:0.08789
[130]	validation_0-logloss:0.08886
[131]	validation_0-logloss:0.08868
[132]	validation_0-logloss:0.08874
[133]	validation_0-logloss:0.08922
[134]	validation_0-logloss:0.08918
[135]	validation_0-logloss:0.08882
[136]	validation_0-logloss:0.08851
[137]	validation_0-logloss:0.08848
[138]	validation_0-logloss:0.08839
[139]	validation_0-logloss:0.08915
[140]	validation_0-logloss:0.08911
[141]	validation_0-logloss:0.08876
[142]	validation_0-logloss:0.08868
[143]	validation_0-logloss:0.08839
[144]	validation_0-logloss:0.08927
[145]	validation_0-logloss:0.08924
[146]	validation_0-logloss:0.08914
[147]	validation_0-logloss:0.08891
[148]	validation_0-logloss:0.08942
[149]	validation_0-logloss:0.08939
[150]	validation_0-logloss:0.08911
[151]	validation_0-logloss:0.08873
[152]	validation_0-logloss:0.08872
[153]	validation_0-logloss:0.08848
[154]	validation_0-logloss:0.08847
[155]	validation_0-logloss:0.08854
[156]	validation_0-logloss:0.08852
[157]	validation_0-logloss:0.08855
[158]	validation_0-logloss:0.08828
[159]	validation_0-logloss:0.08830
[160]	validation_0-logloss:0.08828
[161]	validation_0-logloss:0.08801
[162]	validation_0-logloss:0.08776
[163]	validation_0-logloss:0.08778
[164]	validation_0-logloss:0.08778
[165]	validation_0-logloss:0.08752
[166]	validation_0-logloss:0.08754
[167]	validation_0-logloss:0.08764
[168]	validation_0-logloss:0.08739
[169]	validation_0-logloss:0.08738
[170]	validation_0-logloss:0.08730
[171]	validation_0-logloss:0.08737
[172]	validation_0-logloss:0.08740
[173]	validation_0-logloss:0.08739
[174]	validation_0-logloss:0.08713
[175]	validation_0-logloss:0.08716
[176]	validation_0-logloss:0.08696
[177]	validation_0-logloss:0.08705
[178]	validation_0-logloss:0.08697
[179]	validation_0-logloss:0.08697
[180]	validation_0-logloss:0.08704
[181]	validation_0-logloss:0.08680
[182]	validation_0-logloss:0.08683
[183]	validation_0-logloss:0.08658
[184]	validation_0-logloss:0.08659
[185]	validation_0-logloss:0.08661
[186]	validation_0-logloss:0.08637
[187]	validation_0-logloss:0.08637
[188]	validation_0-logloss:0.08630
[189]	validation_0-logloss:0.08610
[190]	validation_0-logloss:0.08602
[191]	validation_0-logloss:0.08605
[192]	validation_0-logloss:0.08615
[193]	validation_0-logloss:0.08592
[194]	validation_0-logloss:0.08592
[195]	validation_0-logloss:0.08598
[196]	validation_0-logloss:0.08601
[197]	validation_0-logloss:0.08592
[198]	validation_0-logloss:0.08585
[199]	validation_0-logloss:0.08587
[200]	validation_0-logloss:0.08589
[201]	validation_0-logloss:0.08595
[202]	validation_0-logloss:0.08573
[203]	validation_0-logloss:0.08573
[204]	validation_0-logloss:0.08575
[205]	validation_0-logloss:0.08582
[206]	validation_0-logloss:0.08584
[207]	validation_0-logloss:0.08578
[208]	validation_0-logloss:0.08569
[209]	validation_0-logloss:0.08571
[210]	validation_0-logloss:0.08581
[211]	validation_0-logloss:0.08559
[212]	validation_0-logloss:0.08580
[213]	validation_0-logloss:0.08581
[214]	validation_0-logloss:0.08574
[215]	validation_0-logloss:0.08566
[216]	validation_0-logloss:0.08584
[217]	validation_0-logloss:0.08563
[218]	validation_0-logloss:0.08573
[219]	validation_0-logloss:0.08578
[220]	validation_0-logloss:0.08579
[221]	validation_0-logloss:0.08582
[222]	validation_0-logloss:0.08576
[223]	validation_0-logloss:0.08567
[224]	validation_0-logloss:0.08586
[225]	validation_0-logloss:0.08587
[226]	validation_0-logloss:0.08593
[227]	validation_0-logloss:0.08595
[228]	validation_0-logloss:0.08587
[229]	validation_0-logloss:0.08606
[230]	validation_0-logloss:0.08600
[231]	validation_0-logloss:0.08592
[232]	validation_0-logloss:0.08610
[233]	validation_0-logloss:0.08611
[234]	validation_0-logloss:0.08617
[235]	validation_0-logloss:0.08626
[236]	validation_0-logloss:0.08629
[237]	validation_0-logloss:0.08622
[238]	validation_0-logloss:0.08639
[239]	validation_0-logloss:0.08634
[240]	validation_0-logloss:0.08618
[241]	validation_0-logloss:0.08619
[242]	validation_0-logloss:0.08625
[243]	validation_0-logloss:0.08626
[244]	validation_0-logloss:0.08629
[245]	validation_0-logloss:0.08622
[246]	validation_0-logloss:0.08640
[247]	validation_0-logloss:0.08635
[248]	validation_0-logloss:0.08628
[249]	validation_0-logloss:0.08645
[250]	validation_0-logloss:0.08629
[251]	validation_0-logloss:0.08631
[252]	validation_0-logloss:0.08636
[253]	validation_0-logloss:0.08639
[254]	validation_0-logloss:0.08649
[255]	validation_0-logloss:0.08644
[256]	validation_0-logloss:0.08629
[257]	validation_0-logloss:0.08646
[258]	validation_0-logloss:0.08639
[259]	validation_0-logloss:0.08644
[260]	validation_0-logloss:0.08646
[261]	validation_0-logloss:0.08649
[262]	validation_0-logloss:0.08644
[263]	validation_0-logloss:0.08647
[264]	validation_0-logloss:0.08632
[265]	validation_0-logloss:0.08649
[266]	validation_0-logloss:0.08654
[267]	validation_0-logloss:0.08647
[268]	validation_0-logloss:0.08650
[269]	validation_0-logloss:0.08652
[270]	validation_0-logloss:0.08669
[271]	validation_0-logloss:0.08674
[272]	validation_0-logloss:0.08683
[273]	validation_0-logloss:0.08668
[274]	validation_0-logloss:0.08664
[275]	validation_0-logloss:0.08650
[276]	validation_0-logloss:0.08636
[277]	validation_0-logloss:0.08652
[278]	validation_0-logloss:0.08657
[279]	validation_0-logloss:0.08659
[280]	validation_0-logloss:0.08668
[281]	validation_0-logloss:0.08664
[282]	validation_0-logloss:0.08650
[283]	validation_0-logloss:0.08636
[284]	validation_0-logloss:0.08640
[285]	validation_0-logloss:0.08643
[286]	validation_0-logloss:0.08646
[287]	validation_0-logloss:0.08650
[288]	validation_0-logloss:0.08637
[289]	validation_0-logloss:0.08646
[290]	validation_0-logloss:0.08645
[291]	validation_0-logloss:0.08632
[292]	validation_0-logloss:0.08628
[293]	validation_0-logloss:0.08615
[294]	validation_0-logloss:0.08620
[295]	validation_0-logloss:0.08622
[296]	validation_0-logloss:0.08631
[297]	validation_0-logloss:0.08618
[298]	validation_0-logloss:0.08626
[299]	validation_0-logloss:0.08613
[300]	validation_0-logloss:0.08618
[301]	validation_0-logloss:0.08605
[302]	validation_0-logloss:0.08602
[303]	validation_0-logloss:0.08610
[304]	validation_0-logloss:0.08598
[305]	validation_0-logloss:0.08606
[306]	validation_0-logloss:0.08597
[307]	validation_0-logloss:0.08600
[308]	validation_0-logloss:0.08600
[309]	validation_0-logloss:0.08588
[310]	validation_0-logloss:0.08592
[311]	validation_0-logloss:0.08595
[312]	validation_0-logloss:0.08603
[313]	validation_0-logloss:0.08611
[314]	validation_0-logloss:0.08599
[315]	validation_0-logloss:0.08590
[316]	validation_0-logloss:0.08595
[317]	validation_0-logloss:0.08598
[318]	validation_0-logloss:0.08600
[319]	validation_0-logloss:0.08588
[320]	validation_0-logloss:0.08597
[321]	validation_0-logloss:0.08605
[322]	validation_0-logloss:0.08609
[323]	validation_0-logloss:0.08598
[324]	validation_0-logloss:0.08598
[325]	validation_0-logloss:0.08590
[326]	validation_0-logloss:0.08578
[327]	validation_0-logloss:0.08586
[328]	validation_0-logloss:0.08594
[329]	validation_0-logloss:0.08582
[330]	validation_0-logloss:0.08587
[331]	validation_0-logloss:0.08589
[332]	validation_0-logloss:0.08592
[333]	validation_0-logloss:0.08584
[334]	validation_0-logloss:0.08574
[335]	validation_0-logloss:0.08582
[336]	validation_0-logloss:0.08589
[337]	validation_0-logloss:0.08594
[338]	validation_0-logloss:0.08583
[339]	validation_0-logloss:0.08591
[340]	validation_0-logloss:0.08583
[341]	validation_0-logloss:0.08573
[342]	validation_0-logloss:0.08568
[343]	validation_0-logloss:0.08572
[344]	validation_0-logloss:0.08580
[345]	validation_0-logloss:0.08582
[346]	validation_0-logloss:0.08571
[347]	validation_0-logloss:0.08579
[348]	validation_0-logloss:0.08583
[349]	validation_0-logloss:0.08573
[350]	validation_0-logloss:0.08566
[351]	validation_0-logloss:0.08573
[352]	validation_0-logloss:0.08581
[353]	validation_0-logloss:0.08571
[354]	validation_0-logloss:0.08566
[355]	validation_0-logloss:0.08570
[356]	validation_0-logloss:0.08563
[357]	validation_0-logloss:0.08553
[358]	validation_0-logloss:0.08560
[359]	validation_0-logloss:0.08568
[360]	validation_0-logloss:0.08558
[361]	validation_0-logloss:0.08560
[362]	validation_0-logloss:0.08564
[363]	validation_0-logloss:0.08571
[364]	validation_0-logloss:0.08579
[365]	validation_0-logloss:0.08569
[366]	validation_0-logloss:0.08573
[367]	validation_0-logloss:0.08568
[368]	validation_0-logloss:0.08559
[369]	validation_0-logloss:0.08552
[370]	validation_0-logloss:0.08559
[371]	validation_0-logloss:0.08550
[372]	validation_0-logloss:0.08556
[373]	validation_0-logloss:0.08561
[374]	validation_0-logloss:0.08563
[375]	validation_0-logloss:0.08553
[376]	validation_0-logloss:0.08561
[377]	validation_0-logloss:0.08567
[378]	validation_0-logloss:0.08571
[379]	validation_0-logloss:0.08562
[380]	validation_0-logloss:0.08558
[381]	validation_0-logloss:0.08562
[382]	validation_0-logloss:0.08564
[383]	validation_0-logloss:0.08555
[384]	validation_0-logloss:0.08562
[385]	validation_0-logloss:0.08562
[386]	validation_0-logloss:0.08555
[387]	validation_0-logloss:0.08546
[388]	validation_0-logloss:0.08550
[389]	validation_0-logloss:0.08546
[390]	validation_0-logloss:0.08532
[391]	validation_0-logloss:0.08539
[392]	validation_0-logloss:0.08530
[393]	validation_0-logloss:0.08537
[394]	validation_0-logloss:0.08530
[395]	validation_0-logloss:0.08537
[396]	validation_0-logloss:0.08528
[397]	validation_0-logloss:0.08532
[398]	validation_0-logloss:0.08528
[399]	validation_0-logloss:0.08520
In [44]:
get_clf_eval(y_test, pred )
오차 행렬
[[35  2]
 [ 1 76]]
정확도: 0.9737, 정밀도: 0.9744, 재현율: 0.9870,    F1: 0.9806, AUC:0.9665
In [ ]:
 
728x90