Project
항공사 만족도 예측 -python활용
J.H_DA
2022. 2. 28. 20:41
학부 시절 mysql로 kaggle의 항공사 만족도 데이터를 가지고 분석을 해본 경험을 바탕으로 데이콘의 대회 참여를 위해 본 프로젝트를 진행해 보았다.
https://dacon.io/competitions/official/235871/overview/description
항공사 고객 만족도 예측 경진대회 - DACON
좋아요는 1분 내에 한 번만 클릭 할 수 있습니다.
dacon.io
데이터의 EDA 및 예측 모델링은 구글의 colab으로 진행하였다.
EDA 및 시각화에 필요한 툴들을 import 해주고 train set의 학습을 통한 test set의 테스트를 위한 사이킷 런 등을 import 하였다.
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
from sklearn import datasets
from sklearn.model_selection import train_test_split
%matplotlib inline
plt.style.use('seaborn')
import warnings
warnings.simplefilter("ignore")
In [ ]:
satisfy_test = pd.read_csv('/content/drive/MyDrive/test.csv')
satisfy_train = pd.read_csv('/content/drive/MyDrive/train.csv')
In [ ]:
satisfy_train.head()
Out[ ]:
idGenderCustomer TypeAgeType of TravelClassFlight DistanceSeat comfortDeparture/Arrival time convenientFood and drinkGate locationInflight wifi serviceInflight entertainmentOnline supportEase of Online bookingOn-board serviceLeg room serviceBaggage handlingCheckin serviceCleanlinessOnline boardingDeparture Delay in MinutesArrival Delay in Minutestarget01234
1 | Female | disloyal Customer | 22 | Business travel | Eco | 1599 | 3 | 0 | 3 | 3 | 4 | 3 | 4 | 4 | 5 | 4 | 4 | 4 | 5 | 4 | 0 | 0.0 | 0 |
2 | Female | Loyal Customer | 37 | Business travel | Business | 2810 | 2 | 4 | 4 | 4 | 1 | 4 | 3 | 5 | 5 | 4 | 2 | 1 | 5 | 2 | 18 | 18.0 | 0 |
3 | Male | Loyal Customer | 46 | Business travel | Business | 2622 | 1 | 1 | 1 | 1 | 4 | 5 | 5 | 4 | 4 | 4 | 4 | 5 | 4 | 3 | 0 | 0.0 | 1 |
4 | Female | disloyal Customer | 24 | Business travel | Eco | 2348 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 2 | 4 | 5 | 3 | 4 | 3 | 10 | 2.0 | 0 |
5 | Female | Loyal Customer | 58 | Business travel | Business | 105 | 3 | 3 | 3 | 3 | 4 | 4 | 5 | 4 | 4 | 4 | 4 | 4 | 4 | 5 | 0 | 0.0 | 1 |
결측치를 확인해 본다.
In [ ]:
# 결측치 확인
def check_missing_col(dataframe):
missing_col = []
for col in dataframe.columns:
missing_values = sum(dataframe[col].isna())
is_missing = True if missing_values >= 1 else False
if is_missing:
print(f'결측치가 있는 컬럼은: {col} 입니다')
print(f'해당 컬럼에 총 {missing_values} 개의 결측치가 존재합니다.')
missing_col.append([col, dataframe[col].dtype])
if missing_col == []:
print('결측치가 존재하지 않습니다')
return missing_col
missing_col = check_missing_col(satisfy_train)
결측치가 존재하지 않습니다
In [ ]:
# 데이터 타입 확인
satisfy_train.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 24 columns):
id 3000 non-null int64
Gender 3000 non-null object
Customer Type 3000 non-null object
Age 3000 non-null int64
Type of Travel 3000 non-null object
Class 3000 non-null object
Flight Distance 3000 non-null int64
Seat comfort 3000 non-null int64
Departure/Arrival time convenient 3000 non-null int64
Food and drink 3000 non-null int64
Gate location 3000 non-null int64
Inflight wifi service 3000 non-null int64
Inflight entertainment 3000 non-null int64
Online support 3000 non-null int64
Ease of Online booking 3000 non-null int64
On-board service 3000 non-null int64
Leg room service 3000 non-null int64
Baggage handling 3000 non-null int64
Checkin service 3000 non-null int64
Cleanliness 3000 non-null int64
Online boarding 3000 non-null int64
Departure Delay in Minutes 3000 non-null int64
Arrival Delay in Minutes 3000 non-null float64
target 3000 non-null int64
dtypes: float64(1), int64(19), object(4)
memory usage: 562.6+ KB
데이터를 숫자 데이터인 numeric_feature와 평가 점수인 ordinal_feature, 범주 feature인 categorical_feature로 분류하였다.
In [ ]:
numeric_feature = ['Age', 'Flight Distance', 'Departure Delay in Minutes', 'Arrival Delay in Minutes']
ordinal_feature = ['Seat comfort', 'Departure/Arrival time convenient', 'Food and drink', 'Gate location',
'Inflight wifi service', 'Inflight entertainment', 'Online support', 'Ease of Online booking',
'On-board service', 'Leg room service', 'Baggage handling', 'Checkin service', 'Cleanliness', 'Online boarding']
categorical_feature = ['Gender', 'Customer Type', 'Type of Travel', 'Class']
숫자가 아닌 범주형 데이터 들을 숫자로 변경해 주었다.
In [ ]:
## 범주형 데이터 숫자로 변경하기
## Gender column
gender_mapping = {"Female": 1, "Male": 0}
satisfy_train['Gender'] = satisfy_train['Gender'].map(gender_mapping)
satisfy_test['Gender'] = satisfy_test['Gender'].map(gender_mapping)
## Customer type
customer_type_mapping = {"Loyal Customer": 1, "disloyal Customer": 0}
satisfy_train['Customer Type'] = satisfy_train['Customer Type'].map(customer_type_mapping)
satisfy_test['Customer Type'] = satisfy_test['Customer Type'].map(customer_type_mapping)
## Type of travel
travel_type_mapping = {"Business travel": 2, "Personal Travel": 1}
satisfy_train['Type of Travel'] = satisfy_train['Type of Travel'].map(travel_type_mapping)
satisfy_test['Type of Travel'] = satisfy_test['Type of Travel'].map(travel_type_mapping)
## Class column
class_mapping = {"Business": 3, "Eco Plus": 2, "Eco": 1}
satisfy_train['Class'] = satisfy_train['Class'].map(class_mapping)
satisfy_test['Class'] = satisfy_test['Class'].map(class_mapping)
In [ ]:
# id 제거
satisfy_train=satisfy_train.drop('id', axis=1)
satisfy_test=satisfy_test.drop('id', axis=1)
In [ ]:
df=satisfy_train.corr()
fig, ax = plt.subplots(figsize=(15,15))
mask = np.zeros_like(df, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
sns.heatmap(df,
cmap = 'RdYlBu_r',
annot = True,
mask=mask,
linewidths=.5,
cbar_kws={"shrink": .5},
vmin = -1,vmax = 1
)
plt.show()

In [ ]:
ordinal_feature = ['Seat comfort', 'Departure/Arrival time convenient', 'Food and drink', 'Gate location',
'Inflight wifi service', 'Inflight entertainment', 'Online support', 'Ease of Online booking',
'On-board service', 'Leg room service', 'Baggage handling', 'Checkin service', 'Cleanliness', 'Online boarding']
In [ ]:
print("Train data missing value")
for feature in ordinal_feature:
if len(satisfy_train[satisfy_train[feature] == 0]) > 0:
print(feature, len(satisfy_train[satisfy_train[feature] == 0]))
print("=======================")
print("Test data missing value")
for feature in ordinal_feature:
if len(satisfy_test[satisfy_test[feature] == 0]) > 0:
print(feature, len(satisfy_test[satisfy_test[feature] == 0]))
Train data missing value
Seat comfort 106
Departure/Arrival time convenient 154
Food and drink 129
Inflight wifi service 2
Inflight entertainment 73
Ease of Online booking 1
Leg room service 11
Online boarding 1
=======================
Test data missing value
Seat comfort 64
Departure/Arrival time convenient 90
Food and drink 80
Inflight wifi service 1
Inflight entertainment 39
Leg room service 10
In [ ]:
satisfy_train["Ease of Online booking"][satisfy_train["Ease of Online booking"] == 0] = 3
satisfy_train["Online boarding"][satisfy_train["Online boarding"] == 0] = 3
In [ ]:
for i in range(len(satisfy_train)):
for feature in ordinal_feature:
if satisfy_train[feature].iloc[i] == 0:
satisfy_train[feature].iloc[i] = np.nan
for i in range(len(satisfy_test)):
for feature in ordinal_feature:
if satisfy_test[feature].iloc[i] == 0:
satisfy_test[feature].iloc[i] = np.nan
In [ ]:
corr_matrix = satisfy_train.corr()
corr_matrix["Inflight wifi service"].sort_values(ascending=False)[:5]
Out[ ]:
Inflight wifi service 1.000000
Online boarding 0.630754
Ease of Online booking 0.602117
Online support 0.562900
Inflight entertainment 0.290907
Name: Inflight wifi service, dtype: float64
In [ ]:
!pip install datawig
Requirement already satisfied: datawig in /usr/local/lib/python3.7/dist-packages (0.2.0)
Requirement already satisfied: scikit-learn[alldeps]==0.22.1 in /usr/local/lib/python3.7/dist-packages (from datawig) (0.22.1)
Requirement already satisfied: typing==3.6.6 in /usr/local/lib/python3.7/dist-packages (from datawig) (3.6.6)
Requirement already satisfied: pandas==0.25.3 in /usr/local/lib/python3.7/dist-packages (from datawig) (0.25.3)
Requirement already satisfied: mxnet==1.4.0 in /usr/local/lib/python3.7/dist-packages (from datawig) (1.4.0)
Requirement already satisfied: requests>=2.20.0 in /usr/local/lib/python3.7/dist-packages (from mxnet==1.4.0->datawig) (2.27.1)
Requirement already satisfied: graphviz<0.9.0,>=0.8.1 in /usr/local/lib/python3.7/dist-packages (from mxnet==1.4.0->datawig) (0.8.4)
Requirement already satisfied: numpy<1.15.0,>=1.8.2 in /usr/local/lib/python3.7/dist-packages (from mxnet==1.4.0->datawig) (1.14.6)
Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.7/dist-packages (from pandas==0.25.3->datawig) (2.8.2)
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas==0.25.3->datawig) (2018.9)
Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn[alldeps]==0.22.1->datawig) (1.5.4)
Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn[alldeps]==0.22.1->datawig) (1.0.1)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.6.1->pandas==0.25.3->datawig) (1.15.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20.0->mxnet==1.4.0->datawig) (2021.10.8)
Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20.0->mxnet==1.4.0->datawig) (2.0.11)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20.0->mxnet==1.4.0->datawig) (1.24.3)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20.0->mxnet==1.4.0->datawig) (2.10)
In [ ]:
import datawig
target_feature = "Inflight wifi service"
imputer = datawig.SimpleImputer(input_columns=['Online boarding', 'Ease of Online booking', 'Online support'],
output_column=target_feature)
imputer.fit(train_df=satisfy_train, num_epochs=50)
null_train =satisfy_train[satisfy_train[target_feature].isnull()]
null_imputed = imputer.predict(null_train)
imputed_train = pd.DataFrame(null_imputed)
n = 0
for i in range(len(satisfy_train)):
if np.isnan(satisfy_train[target_feature].iloc[i]) == True:
satisfy_train[target_feature].iloc[i] = int(round(imputed_train[target_feature+"_imputed"].iloc[n]))
n += 1
null_train = satisfy_test[satisfy_test[target_feature].isnull()]
null_imputed = imputer.predict(null_train)
imputed_train = pd.DataFrame(null_imputed)
n = 0
for i in range(len(satisfy_test)):
if np.isnan(satisfy_test[target_feature].iloc[i]) == True:
satisfy_test[target_feature].iloc[i] = int(round(imputed_train[target_feature+"_imputed"].iloc[n]))
n += 1
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-48-7aac68fff42e> in <module>()
8
9 null_train =satisfy_train[satisfy_train[target_feature].isnull()]
---> 10 null_imputed = imputer.predict(null_train)
11 imputed_train = pd.DataFrame(null_imputed)
12
/usr/local/lib/python3.7/dist-packages/datawig/simple_imputer.py in predict(self, data_frame, precision_threshold, imputation_suffix, score_suffix, inplace)
418 """
419 imputations = self.imputer.predict(data_frame, precision_threshold, imputation_suffix,
--> 420 score_suffix, inplace=inplace)
421
422 return imputations
/usr/local/lib/python3.7/dist-packages/datawig/imputer.py in predict(self, data_frame, precision_threshold, imputation_suffix, score_suffix, inplace)
827 *[c.input_columns for c in self.label_encoders if isinstance(c, NumericalEncoder)]))
828
--> 829 predictions = self.predict_above_precision(data_frame, precision_threshold).items()
830 for label, imputations in predictions:
831 imputation_col = label + imputation_suffix
/usr/local/lib/python3.7/dist-packages/datawig/imputer.py in predict_above_precision(self, data_frame, precision_threshold)
878
879 """
--> 880 mxnet_iter = self.__mxnet_iter_from_df(data_frame)
881 return self.__predict_above_precision_mxnet_iter(mxnet_iter,
882 precision_threshold=precision_threshold)
/usr/local/lib/python3.7/dist-packages/datawig/imputer.py in __mxnet_iter_from_df(self, data_frame)
1046 data_columns=self.data_encoders,
1047 label_columns=self.label_encoders,
-> 1048 batch_size=self.batch_size
1049 )
1050
/usr/local/lib/python3.7/dist-packages/datawig/iterators.py in __init__(self, data_frame, data_columns, label_columns, batch_size)
229 # custom padding for having to discard the last batch in mxnet for sparse data
230 padding_n_rows = self._n_rows_padding(data_frame)
--> 231 self.start_padding_idx = int(data_frame.index.max() + 1)
232 for idx in range(self.start_padding_idx, self.start_padding_idx + padding_n_rows):
233 data_frame.loc[idx, :] = data_frame.loc[self.start_padding_idx - 1, :]
ValueError: cannot convert float NaN to integer
In [ ]:
import datawig
target_feature = "Leg room service"
imputer = datawig.SimpleImputer(input_columns=['Baggage handling', 'On-board service', 'Cleanliness'],
output_column=target_feature)
imputer.fit(train_df=satisfy_train, num_epochs=50)
null_train =satisfy_train[satisfy_train[target_feature].isnull()]
null_imputed = imputer.predict(null_train)
imputed_train = pd.DataFrame(null_imputed)
n = 0
for i in range(len(satisfy_train)):
if np.isnan(satisfy_train[target_feature].iloc[i]) == True:
satisfy_train[target_feature].iloc[i] = int(round(imputed_train[target_feature+"_imputed"].iloc[n]))
n += 1
null_train = satisfy_test[satisfy_test[target_feature].isnull()]
null_imputed = imputer.predict(null_train)
imputed_train = pd.DataFrame(null_imputed)
n = 0
for i in range(len(satisfy_test)):
if np.isnan(satisfy_test[target_feature].iloc[i]) == True:
satisfy_test[target_feature].iloc[i] = int(round(imputed_train[target_feature+"_imputed"].iloc[n]))
n += 1
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-49-aba194bd9f1f> in <module>()
8
9 null_train =satisfy_train[satisfy_train[target_feature].isnull()]
---> 10 null_imputed = imputer.predict(null_train)
11 imputed_train = pd.DataFrame(null_imputed)
12
/usr/local/lib/python3.7/dist-packages/datawig/simple_imputer.py in predict(self, data_frame, precision_threshold, imputation_suffix, score_suffix, inplace)
418 """
419 imputations = self.imputer.predict(data_frame, precision_threshold, imputation_suffix,
--> 420 score_suffix, inplace=inplace)
421
422 return imputations
/usr/local/lib/python3.7/dist-packages/datawig/imputer.py in predict(self, data_frame, precision_threshold, imputation_suffix, score_suffix, inplace)
827 *[c.input_columns for c in self.label_encoders if isinstance(c, NumericalEncoder)]))
828
--> 829 predictions = self.predict_above_precision(data_frame, precision_threshold).items()
830 for label, imputations in predictions:
831 imputation_col = label + imputation_suffix
/usr/local/lib/python3.7/dist-packages/datawig/imputer.py in predict_above_precision(self, data_frame, precision_threshold)
878
879 """
--> 880 mxnet_iter = self.__mxnet_iter_from_df(data_frame)
881 return self.__predict_above_precision_mxnet_iter(mxnet_iter,
882 precision_threshold=precision_threshold)
/usr/local/lib/python3.7/dist-packages/datawig/imputer.py in __mxnet_iter_from_df(self, data_frame)
1046 data_columns=self.data_encoders,
1047 label_columns=self.label_encoders,
-> 1048 batch_size=self.batch_size
1049 )
1050
/usr/local/lib/python3.7/dist-packages/datawig/iterators.py in __init__(self, data_frame, data_columns, label_columns, batch_size)
229 # custom padding for having to discard the last batch in mxnet for sparse data
230 padding_n_rows = self._n_rows_padding(data_frame)
--> 231 self.start_padding_idx = int(data_frame.index.max() + 1)
232 for idx in range(self.start_padding_idx, self.start_padding_idx + padding_n_rows):
233 data_frame.loc[idx, :] = data_frame.loc[self.start_padding_idx - 1, :]
ValueError: cannot convert float NaN to integer
In [ ]:
# 다중공선성이 높을 것으로 예상되는 Food and drink, Depature Delay in minutes 제거
satisfy_train.drop('Departure Delay in Minutes', axis = 1, inplace = True)
satisfy_train.drop('Food and drink', axis = 1, inplace = True)
satisfy_train.drop('Departure/Arrival time convenient', axis = 1, inplace = True)
satisfy_test.drop('Departure Delay in Minutes', axis = 1, inplace = True)
satisfy_test.drop('Food and drink', axis = 1, inplace = True)
satisfy_test.drop('Departure/Arrival time convenient', axis = 1, inplace = True)
In [ ]:
corr_matrix = satisfy_train.corr()
corr_matrix["Seat comfort"].sort_values(ascending=False)[:5]
Out[ ]:
Seat comfort 1.000000
Food and drink 0.692309
Departure/Arrival time convenient 0.497028
Gate location 0.451671
Inflight entertainment 0.447442
Name: Seat comfort, dtype: float64
In [ ]:
import datawig
target_feature = "Seat comfort"
imputer = datawig.SimpleImputer(input_columns=['Gate location', 'Inflight entertainment'],
output_column=target_feature)
imputer.fit(train_df=satisfy_train, num_epochs=50)
null_train =satisfy_train[satisfy_train[target_feature].isnull()]
null_imputed = imputer.predict(null_train)
imputed_train = pd.DataFrame(null_imputed)
n = 0
for i in range(len(satisfy_train)):
if np.isnan(satisfy_train[target_feature].iloc[i]) == True:
satisfy_train[target_feature].iloc[i] = int(round(imputed_train[target_feature+"_imputed"].iloc[n]))
n += 1
null_train = satisfy_test[satisfy_test[target_feature].isnull()]
null_imputed = imputer.predict(null_train)
imputed_train = pd.DataFrame(null_imputed)
n = 0
for i in range(len(satisfy_test)):
if np.isnan(satisfy_test[target_feature].iloc[i]) == True:
satisfy_test[target_feature].iloc[i] = int(round(imputed_train[target_feature+"_imputed"].iloc[n]))
n += 1
In [ ]:
satisfy_train.drop(['Food and drink', 'Departure/Arrival time convenient', 'Inflight entertainment', 'Online support', 'Ease of Online booking'], axis=1, inplace=True)
satisfy_test.drop(['Food and drink', 'Departure/Arrival time convenient', 'Inflight entertainment', 'Online support', 'Ease of Online booking'], axis=1, inplace=True)
In [ ]:
satisfy_train.head()
Out[ ]:
GenderCustomer TypeAgeType of TravelClassFlight DistanceSeat comfortGate locationInflight wifi serviceOn-board serviceLeg room serviceBaggage handlingCheckin serviceCleanlinessOnline boardingDeparture Delay in MinutesArrival Delay in Minutestarget01234
Female | disloyal Customer | 22 | Business travel | Eco | 1599 | 3.0 | 3 | 4.0 | 5 | 4.0 | 4 | 4 | 5 | 4 | 0 | 0.0 | 0 |
Female | Loyal Customer | 37 | Business travel | Business | 2810 | 2.0 | 4 | 1.0 | 5 | 4.0 | 2 | 1 | 5 | 2 | 18 | 18.0 | 0 |
Male | Loyal Customer | 46 | Business travel | Business | 2622 | 1.0 | 1 | 4.0 | 4 | 4.0 | 4 | 5 | 4 | 3 | 0 | 0.0 | 1 |
Female | disloyal Customer | 24 | Business travel | Eco | 2348 | 3.0 | 3 | 3.0 | 2 | 4.0 | 5 | 3 | 4 | 3 | 10 | 2.0 | 0 |
Female | Loyal Customer | 58 | Business travel | Business | 105 | 3.0 | 3 | 4.0 | 4 | 4.0 | 4 | 4 | 4 | 5 | 0 | 0.0 | 1 |
In [ ]:
satisfy_train.drop('Departure Delay in Minutes', axis = 1, inplace = True)
satisfy_test.drop('Departure Delay in Minutes', axis = 1, inplace = True)
In [ ]:
!pip install pycaret
Requirement already satisfied: pycaret in /usr/local/lib/python3.7/dist-packages (2.3.6)
Requirement already satisfied: imbalanced-learn==0.7.0 in /usr/local/lib/python3.7/dist-packages (from pycaret) (0.7.0)
Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from pycaret) (3.2.2)
Requirement already satisfied: textblob in /usr/local/lib/python3.7/dist-packages (from pycaret) (0.15.3)
Requirement already satisfied: mlxtend>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from pycaret) (0.19.0)
Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from pycaret) (0.25.3)
Requirement already satisfied: Boruta in /usr/local/lib/python3.7/dist-packages (from pycaret) (0.3)
Requirement already satisfied: pandas-profiling>=2.8.0 in /usr/local/lib/python3.7/dist-packages (from pycaret) (3.1.0)
Requirement already satisfied: kmodes>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from pycaret) (0.11.1)
Requirement already satisfied: pyLDAvis in /usr/local/lib/python3.7/dist-packages (from pycaret) (3.2.2)
Requirement already satisfied: nltk in /usr/local/lib/python3.7/dist-packages (from pycaret) (3.2.5)
Requirement already satisfied: pyyaml<6.0.0 in /usr/local/lib/python3.7/dist-packages (from pycaret) (5.4.1)
Requirement already satisfied: wordcloud in /usr/local/lib/python3.7/dist-packages (from pycaret) (1.5.0)
Requirement already satisfied: pyod in /usr/local/lib/python3.7/dist-packages (from pycaret) (0.9.7)
Requirement already satisfied: seaborn in /usr/local/lib/python3.7/dist-packages (from pycaret) (0.11.2)
Requirement already satisfied: IPython in /usr/local/lib/python3.7/dist-packages (from pycaret) (5.5.0)
Requirement already satisfied: scikit-plot in /usr/local/lib/python3.7/dist-packages (from pycaret) (0.3.7)
Requirement already satisfied: umap-learn in /usr/local/lib/python3.7/dist-packages (from pycaret) (0.5.2)
Requirement already satisfied: yellowbrick>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from pycaret) (1.3.post1)
Requirement already satisfied: ipywidgets in /usr/local/lib/python3.7/dist-packages (from pycaret) (7.6.5)
Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from pycaret) (1.0.1)
Requirement already satisfied: spacy<2.4.0 in /usr/local/lib/python3.7/dist-packages (from pycaret) (2.2.4)
Requirement already satisfied: gensim<4.0.0 in /usr/local/lib/python3.7/dist-packages (from pycaret) (3.6.0)
Requirement already satisfied: cufflinks>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from pycaret) (0.17.3)
Requirement already satisfied: lightgbm>=2.3.1 in /usr/local/lib/python3.7/dist-packages (from pycaret) (3.3.2)
Requirement already satisfied: mlflow in /usr/local/lib/python3.7/dist-packages (from pycaret) (1.23.1)
Requirement already satisfied: plotly>=4.4.1 in /usr/local/lib/python3.7/dist-packages (from pycaret) (5.5.0)
Requirement already satisfied: scipy<=1.5.4 in /usr/local/lib/python3.7/dist-packages (from pycaret) (1.5.4)
Requirement already satisfied: scikit-learn==0.23.2 in /usr/local/lib/python3.7/dist-packages (from pycaret) (0.23.2)
Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.7/dist-packages (from imbalanced-learn==0.7.0->pycaret) (1.19.5)
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn==0.23.2->pycaret) (3.1.0)
Requirement already satisfied: colorlover>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from cufflinks>=0.17.0->pycaret) (0.3.0)
Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.7/dist-packages (from cufflinks>=0.17.0->pycaret) (1.15.0)
Requirement already satisfied: setuptools>=34.4.1 in /usr/local/lib/python3.7/dist-packages (from cufflinks>=0.17.0->pycaret) (57.4.0)
Requirement already satisfied: smart-open>=1.2.1 in /usr/local/lib/python3.7/dist-packages (from gensim<4.0.0->pycaret) (5.2.1)
Requirement already satisfied: pexpect in /usr/local/lib/python3.7/dist-packages (from IPython->pycaret) (4.8.0)
Requirement already satisfied: prompt-toolkit<2.0.0,>=1.0.4 in /usr/local/lib/python3.7/dist-packages (from IPython->pycaret) (1.0.18)
Requirement already satisfied: decorator in /usr/local/lib/python3.7/dist-packages (from IPython->pycaret) (4.4.2)
Requirement already satisfied: pickleshare in /usr/local/lib/python3.7/dist-packages (from IPython->pycaret) (0.7.5)
Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.7/dist-packages (from IPython->pycaret) (5.1.1)
Requirement already satisfied: pygments in /usr/local/lib/python3.7/dist-packages (from IPython->pycaret) (2.6.1)
Requirement already satisfied: simplegeneric>0.8 in /usr/local/lib/python3.7/dist-packages (from IPython->pycaret) (0.8.1)
Requirement already satisfied: nbformat>=4.2.0 in /usr/local/lib/python3.7/dist-packages (from ipywidgets->pycaret) (5.1.3)
Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from ipywidgets->pycaret) (1.0.2)
Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.7/dist-packages (from ipywidgets->pycaret) (0.2.0)
Requirement already satisfied: widgetsnbextension~=3.5.0 in /usr/local/lib/python3.7/dist-packages (from ipywidgets->pycaret) (3.5.2)
Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.7/dist-packages (from ipywidgets->pycaret) (4.10.1)
Requirement already satisfied: jupyter-client in /usr/local/lib/python3.7/dist-packages (from ipykernel>=4.5.1->ipywidgets->pycaret) (5.3.5)
Requirement already satisfied: tornado>=4.0 in /usr/local/lib/python3.7/dist-packages (from ipykernel>=4.5.1->ipywidgets->pycaret) (5.1.1)
Requirement already satisfied: wheel in /usr/local/lib/python3.7/dist-packages (from lightgbm>=2.3.1->pycaret) (0.37.1)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pycaret) (0.11.0)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pycaret) (1.3.2)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pycaret) (3.0.7)
Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->pycaret) (2.8.2)
Requirement already satisfied: jupyter-core in /usr/local/lib/python3.7/dist-packages (from nbformat>=4.2.0->ipywidgets->pycaret) (4.9.1)
Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /usr/local/lib/python3.7/dist-packages (from nbformat>=4.2.0->ipywidgets->pycaret) (4.3.3)
Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets->pycaret) (3.10.0.2)
Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets->pycaret) (4.10.1)
Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets->pycaret) (0.18.1)
Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets->pycaret) (21.4.0)
Requirement already satisfied: importlib-resources>=1.4.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets->pycaret) (5.4.0)
Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.7/dist-packages (from importlib-resources>=1.4.0->jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets->pycaret) (3.7.0)
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->pycaret) (2018.9)
Requirement already satisfied: tangled-up-in-unicode==0.1.0 in /usr/local/lib/python3.7/dist-packages (from pandas-profiling>=2.8.0->pycaret) (0.1.0)
Requirement already satisfied: pydantic>=1.8.1 in /usr/local/lib/python3.7/dist-packages (from pandas-profiling>=2.8.0->pycaret) (1.9.0)
Requirement already satisfied: markupsafe~=2.0.1 in /usr/local/lib/python3.7/dist-packages (from pandas-profiling>=2.8.0->pycaret) (2.0.1)
Requirement already satisfied: missingno>=0.4.2 in /usr/local/lib/python3.7/dist-packages (from pandas-profiling>=2.8.0->pycaret) (0.5.0)
Requirement already satisfied: tqdm>=4.48.2 in /usr/local/lib/python3.7/dist-packages (from pandas-profiling>=2.8.0->pycaret) (4.62.3)
Requirement already satisfied: multimethod>=1.4 in /usr/local/lib/python3.7/dist-packages (from pandas-profiling>=2.8.0->pycaret) (1.7)
Requirement already satisfied: jinja2>=2.11.1 in /usr/local/lib/python3.7/dist-packages (from pandas-profiling>=2.8.0->pycaret) (2.11.3)
Requirement already satisfied: requests>=2.24.0 in /usr/local/lib/python3.7/dist-packages (from pandas-profiling>=2.8.0->pycaret) (2.27.1)
Requirement already satisfied: phik>=0.11.1 in /usr/local/lib/python3.7/dist-packages (from pandas-profiling>=2.8.0->pycaret) (0.12.0)
Requirement already satisfied: htmlmin>=0.1.12 in /usr/local/lib/python3.7/dist-packages (from pandas-profiling>=2.8.0->pycaret) (0.1.12)
Requirement already satisfied: visions[type_image_path]==0.7.4 in /usr/local/lib/python3.7/dist-packages (from pandas-profiling>=2.8.0->pycaret) (0.7.4)
Requirement already satisfied: networkx>=2.4 in /usr/local/lib/python3.7/dist-packages (from visions[type_image_path]==0.7.4->pandas-profiling>=2.8.0->pycaret) (2.6.3)
Requirement already satisfied: Pillow in /usr/local/lib/python3.7/dist-packages (from visions[type_image_path]==0.7.4->pandas-profiling>=2.8.0->pycaret) (7.1.2)
Requirement already satisfied: imagehash in /usr/local/lib/python3.7/dist-packages (from visions[type_image_path]==0.7.4->pandas-profiling>=2.8.0->pycaret) (4.2.1)
Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.7/dist-packages (from plotly>=4.4.1->pycaret) (8.0.1)
Requirement already satisfied: wcwidth in /usr/local/lib/python3.7/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->IPython->pycaret) (0.2.5)
Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.7/dist-packages (from requests>=2.24.0->pandas-profiling>=2.8.0->pycaret) (2.0.11)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.24.0->pandas-profiling>=2.8.0->pycaret) (1.24.3)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.24.0->pandas-profiling>=2.8.0->pycaret) (2.10)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.24.0->pandas-profiling>=2.8.0->pycaret) (2021.10.8)
Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.7/dist-packages (from spacy<2.4.0->pycaret) (1.0.6)
Requirement already satisfied: thinc==7.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy<2.4.0->pycaret) (7.4.0)
Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy<2.4.0->pycaret) (1.0.5)
Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.7/dist-packages (from spacy<2.4.0->pycaret) (1.1.3)
Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy<2.4.0->pycaret) (0.9.0)
Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.7/dist-packages (from spacy<2.4.0->pycaret) (1.0.0)
Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy<2.4.0->pycaret) (3.0.6)
Requirement already satisfied: blis<0.5.0,>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from spacy<2.4.0->pycaret) (0.4.1)
Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from spacy<2.4.0->pycaret) (2.0.6)
Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.7/dist-packages (from widgetsnbextension~=3.5.0->ipywidgets->pycaret) (5.3.1)
Requirement already satisfied: Send2Trash in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (1.8.0)
Requirement already satisfied: terminado>=0.8.1 in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (0.13.1)
Requirement already satisfied: nbconvert in /usr/local/lib/python3.7/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (5.6.1)
Requirement already satisfied: pyzmq>=13 in /usr/local/lib/python3.7/dist-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets->pycaret) (22.3.0)
Requirement already satisfied: ptyprocess in /usr/local/lib/python3.7/dist-packages (from terminado>=0.8.1->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (0.7.0)
Requirement already satisfied: PyWavelets in /usr/local/lib/python3.7/dist-packages (from imagehash->visions[type_image_path]==0.7.4->pandas-profiling>=2.8.0->pycaret) (1.2.0)
Requirement already satisfied: databricks-cli>=0.8.7 in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret) (0.16.4)
Requirement already satisfied: gunicorn in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret) (20.1.0)
Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret) (21.3)
Requirement already satisfied: protobuf>=3.7.0 in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret) (3.17.3)
Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret) (7.1.2)
Requirement already satisfied: docker>=4.0.0 in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret) (5.0.3)
Requirement already satisfied: alembic in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret) (1.7.6)
Requirement already satisfied: sqlparse>=0.3.1 in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret) (0.4.2)
Requirement already satisfied: cloudpickle in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret) (1.3.0)
Requirement already satisfied: gitpython>=2.1.0 in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret) (3.1.26)
Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret) (1.4.31)
Requirement already satisfied: querystring-parser in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret) (1.2.4)
Requirement already satisfied: entrypoints in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret) (0.4)
Requirement already satisfied: prometheus-flask-exporter in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret) (0.18.7)
Requirement already satisfied: Flask in /usr/local/lib/python3.7/dist-packages (from mlflow->pycaret) (1.1.4)
Requirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.7/dist-packages (from databricks-cli>=0.8.7->mlflow->pycaret) (0.8.9)
Requirement already satisfied: websocket-client>=0.32.0 in /usr/local/lib/python3.7/dist-packages (from docker>=4.0.0->mlflow->pycaret) (1.2.3)
Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.7/dist-packages (from gitpython>=2.1.0->mlflow->pycaret) (4.0.9)
Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.7/dist-packages (from gitdb<5,>=4.0.1->gitpython>=2.1.0->mlflow->pycaret) (5.0.0)
Requirement already satisfied: Mako in /usr/local/lib/python3.7/dist-packages (from alembic->mlflow->pycaret) (1.1.6)
Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.7/dist-packages (from sqlalchemy->mlflow->pycaret) (1.1.2)
Requirement already satisfied: Werkzeug<2.0,>=0.15 in /usr/local/lib/python3.7/dist-packages (from Flask->mlflow->pycaret) (1.0.1)
Requirement already satisfied: itsdangerous<2.0,>=0.24 in /usr/local/lib/python3.7/dist-packages (from Flask->mlflow->pycaret) (1.1.0)
Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (0.8.4)
Requirement already satisfied: testpath in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (0.5.0)
Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (1.5.0)
Requirement already satisfied: defusedxml in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (0.7.1)
Requirement already satisfied: bleach in /usr/local/lib/python3.7/dist-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (4.1.0)
Requirement already satisfied: webencodings in /usr/local/lib/python3.7/dist-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets->pycaret) (0.5.1)
Requirement already satisfied: prometheus-client in /usr/local/lib/python3.7/dist-packages (from prometheus-flask-exporter->mlflow->pycaret) (0.13.1)
Requirement already satisfied: future in /usr/local/lib/python3.7/dist-packages (from pyLDAvis->pycaret) (0.16.0)
Requirement already satisfied: funcy in /usr/local/lib/python3.7/dist-packages (from pyLDAvis->pycaret) (1.17)
Requirement already satisfied: numexpr in /usr/local/lib/python3.7/dist-packages (from pyLDAvis->pycaret) (2.8.1)
Requirement already satisfied: numba>=0.35 in /usr/local/lib/python3.7/dist-packages (from pyod->pycaret) (0.51.2)
Requirement already satisfied: statsmodels in /usr/local/lib/python3.7/dist-packages (from pyod->pycaret) (0.10.2)
Requirement already satisfied: llvmlite<0.35,>=0.34.0.dev0 in /usr/local/lib/python3.7/dist-packages (from numba>=0.35->pyod->pycaret) (0.34.0)
Requirement already satisfied: patsy>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from statsmodels->pyod->pycaret) (0.5.2)
Requirement already satisfied: pynndescent>=0.5 in /usr/local/lib/python3.7/dist-packages (from umap-learn->pycaret) (0.5.6)
In [ ]:
from pycaret.classification import *
clf1 = setup(data = satisfy_train,
silent = True,
target = 'target',
session_id = 20201809,
n_jobs = 7)
DescriptionValue01234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
session_id | 20201809 |
Target | target |
Target Type | Binary |
Label Encoded | None |
Original Data | (3000, 17) |
Missing Values | False |
Numeric Features | 6 |
Categorical Features | 10 |
Ordinal Features | False |
High Cardinality Features | False |
High Cardinality Method | None |
Transformed Train Set | (2099, 42) |
Transformed Test Set | (901, 42) |
Shuffle Train-Test | True |
Stratify Train-Test | False |
Fold Generator | StratifiedKFold |
Fold Number | 10 |
CPU Jobs | 7 |
Use GPU | False |
Log Experiment | False |
Experiment Name | clf-default-name |
USI | 6028 |
Imputation Type | simple |
Iterative Imputation Iteration | None |
Numeric Imputer | mean |
Iterative Imputation Numeric Model | None |
Categorical Imputer | constant |
Iterative Imputation Categorical Model | None |
Unknown Categoricals Handling | least_frequent |
Normalize | False |
Normalize Method | None |
Transformation | False |
Transformation Method | None |
PCA | False |
PCA Method | None |
PCA Components | None |
Ignore Low Variance | False |
Combine Rare Levels | False |
Rare Level Threshold | None |
Numeric Binning | False |
Remove Outliers | False |
Outliers Threshold | None |
Remove Multicollinearity | False |
Multicollinearity Threshold | None |
Remove Perfect Collinearity | True |
Clustering | False |
Clustering Iteration | None |
Polynomial Features | False |
Polynomial Degree | None |
Trignometry Features | False |
Polynomial Threshold | None |
Group Features | False |
Feature Selection | False |
Feature Selection Method | classic |
Features Selection Threshold | None |
Feature Interaction | False |
Feature Ratio | False |
Interaction Threshold | None |
Fix Imbalance | False |
Fix Imbalance Method | SMOTE |
In [ ]:
compare_models(n_select=3, fold = 10) #상위 모델 3개를 선택합니다
ModelAccuracyAUCRecallPrec.F1KappaMCCTT (Sec)lightgbmrfgbcetadadtldaridgelrnbknndummyqdasvm
Light Gradient Boosting Machine | 0.9109 | 0.9706 | 0.9176 | 0.9255 | 0.9210 | 0.8188 | 0.8200 | 0.904 |
Random Forest Classifier | 0.9056 | 0.9609 | 0.9234 | 0.9122 | 0.9173 | 0.8074 | 0.8086 | 0.498 |
Gradient Boosting Classifier | 0.9028 | 0.9646 | 0.9235 | 0.9075 | 0.9150 | 0.8015 | 0.8027 | 0.353 |
Extra Trees Classifier | 0.8942 | 0.9590 | 0.9033 | 0.9100 | 0.9062 | 0.7849 | 0.7858 | 0.378 |
Ada Boost Classifier | 0.8538 | 0.9231 | 0.8697 | 0.8723 | 0.8707 | 0.7024 | 0.7029 | 0.187 |
Decision Tree Classifier | 0.8509 | 0.8464 | 0.8797 | 0.8612 | 0.8700 | 0.6951 | 0.6961 | 0.034 |
Linear Discriminant Analysis | 0.8457 | 0.9138 | 0.8840 | 0.8500 | 0.8664 | 0.6838 | 0.6851 | 0.033 |
Ridge Classifier | 0.8452 | 0.0000 | 0.8831 | 0.8498 | 0.8660 | 0.6829 | 0.6841 | 0.027 |
Logistic Regression | 0.8404 | 0.9140 | 0.8739 | 0.8493 | 0.8612 | 0.6736 | 0.6745 | 1.867 |
Naive Bayes | 0.7485 | 0.8421 | 0.7603 | 0.7886 | 0.7738 | 0.4907 | 0.4918 | 0.038 |
K Neighbors Classifier | 0.5894 | 0.5909 | 0.6906 | 0.6231 | 0.6541 | 0.1509 | 0.1542 | 0.093 |
Dummy Classifier | 0.5665 | 0.5000 | 1.0000 | 0.5665 | 0.7232 | 0.0000 | 0.0000 | 0.016 |
Quadratic Discriminant Analysis | 0.5546 | 0.5401 | 0.6495 | 0.5959 | 0.6181 | 0.0828 | 0.0875 | 0.049 |
SVM - Linear Kernel | 0.5488 | 0.0000 | 0.7975 | 0.6458 | 0.5932 | 0.0215 | 0.0675 | 0.055 |
Out[ ]:
[LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
importance_type='split', learning_rate=0.1, max_depth=-1,
min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
n_estimators=100, n_jobs=7, num_leaves=31, objective=None,
random_state=20201809, reg_alpha=0.0, reg_lambda=0.0,
silent='warn', subsample=1.0, subsample_for_bin=200000,
subsample_freq=0),
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
criterion='gini', max_depth=None, max_features='auto',
max_leaf_nodes=None, max_samples=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=7,
oob_score=False, random_state=20201809, verbose=0,
warm_start=False),
GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
learning_rate=0.1, loss='deviance', max_depth=3,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, n_estimators=100,
n_iter_no_change=None, presort='deprecated',
random_state=20201809, subsample=1.0, tol=0.0001,
validation_fraction=0.1, verbose=0,
warm_start=False)]
In [ ]:
blend_3_soft = blend_models(estimator_list=best, method='soft', choose_better = True)
predict_model(blend_3_soft).head(10)
InitiatedStatusEstimator
. . . . . . . . . . . . . . . . . . | 01:59:01 |
. . . . . . . . . . . . . . . . . . | Finalizing Model |
. . . . . . . . . . . . . . . . . . | Voting Classifier |
AccuracyAUCRecallPrec.F1KappaMCC
---------------------------------------------------------------------------
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/joblib/externals/loky/process_executor.py", line 616, in wait_result_broken_or_wakeup
result_item = result_reader.recv()
File "/usr/lib/python3.7/multiprocessing/connection.py", line 251, in recv
return _ForkingPickler.loads(buf.getbuffer())
File "/usr/local/lib/python3.7/dist-packages/numpy/random/_pickle.py", line 2, in <module>
from ._philox import Philox
File "_philox.pyx", line 1, in init numpy.random._philox
File "bit_generator.pyx", line 1, in init numpy.random.bit_generator
ValueError: numpy.ufunc size changed, may indicate binary incompatibility. Expected 216 from C header, got 192 from PyObject
"""
The above exception was the direct cause of the following exception:
BrokenProcessPool Traceback (most recent call last)
<ipython-input-60-36506378a0cd> in <module>()
----> 1 blend_3_soft = blend_models(estimator_list=best, method='soft', choose_better = True)
2 predict_model(blend_3_soft).head(10)
/usr/local/lib/python3.7/dist-packages/pycaret/classification.py in blend_models(estimator_list, fold, round, choose_better, optimize, method, weights, fit_kwargs, groups, probability_threshold, verbose)
1339 groups=groups,
1340 verbose=verbose,
-> 1341 probability_threshold=probability_threshold,
1342 )
1343
/usr/local/lib/python3.7/dist-packages/pycaret/internal/tabular.py in blend_models(estimator_list, fold, round, choose_better, optimize, method, weights, fit_kwargs, groups, probability_threshold, verbose, display)
5302 fit_kwargs=fit_kwargs,
5303 groups=groups,
-> 5304 probability_threshold=probability_threshold,
5305 )
5306 model_results = pull()
/usr/local/lib/python3.7/dist-packages/pycaret/internal/tabular.py in create_model_supervised(estimator, fold, round, cross_validation, predict, fit_kwargs, groups, refit, verbose, system, X_train_data, y_train_data, metrics, add_to_model_list, probability_threshold, display, **kwargs)
3217 logger.info("Finalizing model")
3218 with io.capture_output():
-> 3219 pipeline_with_model.fit(data_X, data_y, **fit_kwargs)
3220 model_fit_end = time.time()
3221
/usr/local/lib/python3.7/dist-packages/pycaret/internal/pipeline.py in fit(self, X, y, **fit_kwargs)
116
117 def fit(self, X, y=None, **fit_kwargs):
--> 118 result = super().fit(X, y=y, **fit_kwargs)
119
120 self._carry_over_final_estimator_fit_vars()
/usr/local/lib/python3.7/dist-packages/imblearn/pipeline.py in fit(self, X, y, **fit_params)
279 self._log_message(len(self.steps) - 1)):
280 if self._final_estimator != "passthrough":
--> 281 self._final_estimator.fit(Xt, yt, **fit_params)
282 return self
283
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_voting.py in fit(self, X, y, sample_weight)
263 transformed_y = self.le_.transform(y)
264
--> 265 return super().fit(X, transformed_y, sample_weight)
266
267 def predict(self, X):
/usr/local/lib/python3.7/dist-packages/sklearn/ensemble/_voting.py in fit(self, X, y, sample_weight)
79 idx + 1, len(clfs))
80 )
---> 81 for idx, clf in enumerate(clfs) if clf not in (None, 'drop')
82 )
83
/usr/local/lib/python3.7/dist-packages/joblib/parallel.py in __call__(self, iterable)
1052
1053 with self._backend.retrieval_context():
-> 1054 self.retrieve()
1055 # Make sure that we get a last message telling us we are done
1056 elapsed_time = time.time() - self._start_time
/usr/local/lib/python3.7/dist-packages/joblib/parallel.py in retrieve(self)
931 try:
932 if getattr(self._backend, 'supports_timeout', False):
--> 933 self._output.extend(job.get(timeout=self.timeout))
934 else:
935 self._output.extend(job.get())
/usr/local/lib/python3.7/dist-packages/joblib/_parallel_backends.py in wrap_future_result(future, timeout)
540 AsyncResults.get from multiprocessing."""
541 try:
--> 542 return future.result(timeout=timeout)
543 except CfTimeoutError as e:
544 raise TimeoutError from e
/usr/lib/python3.7/concurrent/futures/_base.py in result(self, timeout)
433 raise CancelledError()
434 elif self._state == FINISHED:
--> 435 return self.__get_result()
436 else:
437 raise TimeoutError()
/usr/lib/python3.7/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
BrokenProcessPool: A result has failed to un-serialize. Please ensure that the objects returned by the function are always picklable.
In [ ]:
final_blend_3_soft = finalize_model(blend_3_soft)
prediction = predict_model(final_blend_3_soft, data = satisfy_test)
In [ ]:
print(prediction["Label"])
0 1
1 0
2 1
3 1
4 1
..
1995 0
1996 1
1997 0
1998 1
1999 1
Name: Label, Length: 2000, dtype: int64
In [ ]:
submission = pd.read_csv('/content/drive/MyDrive/sample_submission.csv')
In [ ]:
submission.head()
Out[ ]:
idtarget01234
1 | 0 |
2 | 0 |
3 | 0 |
4 | 0 |
5 | 0 |
In [ ]:
submission['target'] = prediction['Label']
submission.head()
Out[ ]:
idtarget01234
1 | 1 |
2 | 0 |
3 | 1 |
4 | 1 |
5 | 1 |
In [ ]:
submission.to_csv("Satisfy_predict-4.csv",index=False)
728x90