import torch
torch.cuda.is_available()
True
from pycaret.datasets import get_data
dataset = get_data('juice')
Id Purchase WeekofPurchase StoreID PriceCH PriceMM DiscCH DiscMM SpecialCH SpecialMM LoyalCH SalePriceMM SalePriceCH PriceDiff Store7 PctDiscMM PctDiscCH ListPriceDiff STORE
0 1 CH 237 1 1.75 1.99 0.00 0.0 0 0 0.500000 1.99 1.75 0.24 No 0.000000 0.000000 0.24 1
1 2 CH 239 1 1.75 1.99 0.00 0.3 0 1 0.600000 1.69 1.75 -0.06 No 0.150754 0.000000 0.24 1
2 3 CH 245 1 1.86 2.09 0.17 0.0 0 0 0.680000 2.09 1.69 0.40 No 0.000000 0.091398 0.23 1
3 4 MM 227 1 1.69 1.69 0.00 0.0 0 0 0.400000 1.69 1.69 0.00 No 0.000000 0.000000 0.00 1
4 5 CH 228 7 1.69 1.69 0.00 0.0 0 0 0.956535 1.69 1.69 0.00 Yes 0.000000 0.000000 0.00 0
from pycaret.classification import *
setup_clf = setup(data=dataset, target='Purchase')
Description Value
0 session_id 288
1 Target Purchase
2 Target Type Binary
3 Label Encoded CH: 0, MM: 1
4 Original Data (1070, 19)
5 Missing Values False
6 Numeric Features 13
7 Categorical Features 5
8 Ordinal Features False
9 High Cardinality Features False
10 High Cardinality Method None
11 Transformed Train Set (748, 17)
12 Transformed Test Set (322, 17)
13 Shuffle Train-Test True
14 Stratify Train-Test False
15 Fold Generator StratifiedKFold
16 Fold Number 10
17 CPU Jobs -1
18 Use GPU False
19 Log Experiment False
20 Experiment Name clf-default-name
21 USI ad07
22 Imputation Type simple
23 Iterative Imputation Iteration None
24 Numeric Imputer mean
25 Iterative Imputation Numeric Model None
26 Categorical Imputer constant
27 Iterative Imputation Categorical Model None
28 Unknown Categoricals Handling least_frequent
29 Normalize False
30 Normalize Method None
31 Transformation False
32 Transformation Method None
33 PCA False
34 PCA Method None
35 PCA Components None
36 Ignore Low Variance False
37 Combine Rare Levels False
38 Rare Level Threshold None
39 Numeric Binning False
40 Remove Outliers False
41 Outliers Threshold None
42 Remove Multicollinearity False
43 Multicollinearity Threshold None
44 Remove Perfect Collinearity True
45 Clustering False
46 Clustering Iteration None
47 Polynomial Features False
48 Polynomial Degree None
49 Trignometry Features False
50 Polynomial Threshold None
51 Group Features False
52 Feature Selection False
53 Feature Selection Method classic
54 Features Selection Threshold None
55 Feature Interaction False
56 Feature Ratio False
57 Interaction Threshold None
58 Fix Imbalance False
59 Fix Imbalance Method SMOTE
rf = create_model('rf',fold=5)
Accuracy AUC Recall Prec. F1 Kappa MCC
Fold
0 0.8000 0.8649 0.7018 0.7547 0.7273 0.5697 0.5706
1 0.7933 0.8453 0.7895 0.7031 0.7438 0.5716 0.5743
2 0.8000 0.8817 0.7018 0.7547 0.7273 0.5697 0.5706
3 0.7248 0.8169 0.6429 0.6316 0.6372 0.4156 0.4156
4 0.8054 0.8380 0.7679 0.7288 0.7478 0.5895 0.5901
Mean 0.7847 0.8494 0.7207 0.7146 0.7167 0.5432 0.5443
Std 0.0302 0.0223 0.0524 0.0457 0.0406 0.0643 0.0647
top5 = compare_models(sort='Accuracy', n_select=5)
Model Accuracy AUC Recall Prec. F1 Kappa MCC TT (Sec)
lr Logistic Regression 0.8155 0.8868 0.7277 0.7756 0.7491 0.6036 0.6061 0.1880
ridge Ridge Classifier 0.8155 0.0000 0.7523 0.7627 0.7557 0.6077 0.6095 0.0070
lda Linear Discriminant Analysis 0.8101 0.8876 0.7346 0.7596 0.7448 0.5940 0.5963 0.0060
gbc Gradient Boosting Classifier 0.8007 0.8756 0.7203 0.7459 0.7301 0.5725 0.5755 0.0230
ada Ada Boost Classifier 0.7968 0.8640 0.7060 0.7462 0.7228 0.5629 0.5661 0.0210
rf Random Forest Classifier 0.7888 0.8553 0.7346 0.7189 0.7218 0.5523 0.5571 0.1100
lightgbm Light Gradient Boosting Machine 0.7860 0.8620 0.7314 0.7149 0.7199 0.5473 0.5506 0.0160
et Extra Trees Classifier 0.7821 0.8178 0.7347 0.7053 0.7147 0.5391 0.5447 0.1090
dt Decision Tree Classifier 0.7540 0.7529 0.7031 0.6696 0.6821 0.4822 0.4863 0.0050
nb Naive Bayes 0.7499 0.8248 0.7527 0.6475 0.6946 0.4854 0.4913 0.0060
knn K Neighbors Classifier 0.7314 0.7596 0.5930 0.6646 0.6242 0.4166 0.4202 0.0280
dummy Dummy Classifier 0.6217 0.5000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0050
svm SVM - Linear Kernel 0.5240 0.0000 0.4000 0.1512 0.2194 0.0000 0.0000 0.0050
qda Quadratic Discriminant Analysis 0.3717 0.4771 0.9034 0.3591 0.5110 -0.0424 -0.0494 0.0060
top5
[LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                    intercept_scaling=1, l1_ratio=None, max_iter=1000,
                    multi_class='auto', n_jobs=None, penalty='l2',
                    random_state=288, solver='lbfgs', tol=0.0001, verbose=0,
                    warm_start=False),
 RidgeClassifier(alpha=1.0, class_weight=None, copy_X=True, fit_intercept=True,
                 max_iter=None, normalize=False, random_state=288, solver='auto',
                 tol=0.001),
 LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
                            solver='svd', store_covariance=False, tol=0.0001),
 GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                            learning_rate=0.1, loss='deviance', max_depth=3,
                            max_features=None, max_leaf_nodes=None,
                            min_impurity_decrease=0.0, min_impurity_split=None,
                            min_samples_leaf=1, min_samples_split=2,
                            min_weight_fraction_leaf=0.0, n_estimators=100,
                            n_iter_no_change=None, presort='deprecated',
                            random_state=288, subsample=1.0, tol=0.0001,
                            validation_fraction=0.1, verbose=0,
                            warm_start=False),
 AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,
                    n_estimators=50, random_state=288)]
tuned_top5 = [tune_model(i) for i in top5]
Accuracy AUC Recall Prec. F1 Kappa MCC
Fold
0 0.8267 0.9003 0.7586 0.7857 0.7719 0.6322 0.6325
1 0.8667 0.9067 0.7241 0.9130 0.8077 0.7077 0.7189
2 0.8533 0.9153 0.8276 0.8000 0.8136 0.6927 0.6930
3 0.7867 0.8894 0.7500 0.7000 0.7241 0.5506 0.5514
4 0.8667 0.9362 0.8929 0.7812 0.8333 0.7230 0.7275
5 0.8400 0.9236 0.7500 0.8077 0.7778 0.6530 0.6541
6 0.7467 0.8469 0.5714 0.6957 0.6275 0.4383 0.4432
7 0.7600 0.8100 0.6429 0.6923 0.6667 0.4796 0.4804
8 0.8649 0.8769 0.7500 0.8750 0.8077 0.7045 0.7094
9 0.7568 0.8688 0.7143 0.6667 0.6897 0.4900 0.4908
Mean 0.8168 0.8874 0.7382 0.7717 0.7520 0.6072 0.6101
Std 0.0468 0.0362 0.0839 0.0783 0.0672 0.1024 0.1035
blender_top5 = blend_models(estimator_list=tuned_top5)
Accuracy AUC Recall Prec. F1 Kappa MCC
Fold
0 0.8667 0.0000 0.8621 0.8065 0.8333 0.7224 0.7235
1 0.8267 0.0000 0.7586 0.7857 0.7719 0.6322 0.6325
2 0.8667 0.0000 0.8621 0.8065 0.8333 0.7224 0.7235
3 0.7867 0.0000 0.7857 0.6875 0.7333 0.5569 0.5603
4 0.8800 0.0000 0.8929 0.8065 0.8475 0.7490 0.7516
5 0.8533 0.0000 0.7857 0.8148 0.8000 0.6843 0.6846
6 0.8000 0.0000 0.7143 0.7407 0.7273 0.5695 0.5697
7 0.7333 0.0000 0.6786 0.6333 0.6552 0.4382 0.4389
8 0.8514 0.0000 0.7143 0.8696 0.7843 0.6726 0.6801
9 0.7568 0.0000 0.7143 0.6667 0.6897 0.4900 0.4908
Mean 0.8221 0.0000 0.7768 0.7618 0.7676 0.6237 0.6256
Std 0.0480 0.0000 0.0706 0.0725 0.0615 0.1005 0.1010
final_model = finalize_model(blender_top5)
prediction = predict_model(final_model, data=dataset.iloc[-100:])
Model Accuracy AUC Recall Prec. F1 Kappa MCC
0 Voting Classifier 0 0.8000 0 0 0 0 0
prediction
Id Purchase WeekofPurchase StoreID PriceCH PriceMM DiscCH DiscMM SpecialCH SpecialMM LoyalCH SalePriceMM SalePriceCH PriceDiff Store7 PctDiscMM PctDiscCH ListPriceDiff STORE Label
970 971 MM 240 1 1.75 1.99 0.0 0.30 0 1 0.224526 1.69 1.75 -0.06 No 0.150754 0.000000 0.24 1 MM
971 972 MM 241 1 1.86 1.99 0.0 0.30 0 1 0.179621 1.69 1.86 -0.17 No 0.150754 0.000000 0.13 1 MM
972 973 CH 242 1 1.86 1.99 0.0 0.30 0 1 0.143697 1.69 1.86 -0.17 No 0.150754 0.000000 0.13 1 MM
973 974 MM 243 1 1.86 1.99 0.0 0.80 0 1 0.314957 1.19 1.86 -0.67 No 0.402010 0.000000 0.13 1 MM
974 975 MM 244 1 1.86 2.09 0.0 0.00 0 0 0.251966 2.09 1.86 0.23 No 0.000000 0.000000 0.23 1 MM
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1065 1066 CH 252 7 1.86 2.09 0.1 0.00 0 0 0.587822 2.09 1.76 0.33 Yes 0.000000 0.053763 0.23 0 CH
1066 1067 CH 256 7 1.86 2.18 0.0 0.00 0 0 0.670258 2.18 1.86 0.32 Yes 0.000000 0.000000 0.32 0 CH
1067 1068 MM 257 7 1.86 2.18 0.0 0.00 0 0 0.736206 2.18 1.86 0.32 Yes 0.000000 0.000000 0.32 0 CH
1068 1069 CH 261 7 1.86 2.13 0.0 0.24 0 0 0.588965 1.89 1.86 0.03 Yes 0.112676 0.000000 0.27 0 CH
1069 1070 CH 270 1 1.86 2.18 0.0 0.00 0 0 0.671172 2.18 1.86 0.32 No 0.000000 0.000000 0.32 1 CH

100 rows × 20 columns

from pycaret.utils import check_metric
check_metric(prediction['Purchase'],prediction['Label'],metric='Accuracy')
0.81

카테고리:

업데이트: