728x90

실전연습

In [1]:

from IPython.core.display import display, HTML

display(HTML("<style>.container { width:90% !important; }</style>"))

In [126]:

import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split 
import numpy as np

In [127]:

df = pd.read_csv("./wine.csv")

In [128]:

df

Out[128]:

	fixed acidity	volatile acidity	citric acid	residual sugar	chlorides	free sulfur dioxide	total sulfur dioxide	density	pH	sulphates	alcohol	quality
0	7.4	0.700	0.00	1.9	0.076	11.0	34.0	0.99780	3.51	0.56	9.4	0
1	7.8	0.880	0.00	2.6	0.098	25.0	67.0	0.99680	3.20	0.68	9.8	0
2	7.8	0.760	0.04	2.3	0.092	15.0	54.0	0.99700	3.26	0.65	9.8	0
3	11.2	0.280	0.56	1.9	0.075	17.0	60.0	0.99800	3.16	0.58	9.8	1
4	7.4	0.700	0.00	1.9	0.076	11.0	34.0	0.99780	3.51	0.56	9.4	0
...	...	...	...	...	...	...	...	...	...	...	...	...
1594	6.2	0.600	0.08	2.0	0.090	32.0	44.0	0.99490	3.45	0.58	10.5	0
1595	5.9	0.550	0.10	2.2	0.062	39.0	51.0	0.99512	3.52	0.76	11.2	1
1596	6.3	0.510	0.13	2.3	0.076	29.0	40.0	0.99574	3.42	0.75	11.0	1
1597	5.9	0.645	0.12	2.0	0.075	32.0	44.0	0.99547	3.57	0.71	10.2	0
1598	6.0	0.310	0.47	3.6	0.067	18.0	42.0	0.99549	3.39	0.66	11.0	1

1599 rows × 12 columns

In [61]:

df.isnull().sum()

Out[61]:

fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64

In [62]:

df.shape

Out[62]:

(1599, 12)

In [64]:

df.iloc[:,:-1]   #x데이터. 모든 행+0~-1까지 열

Out[64]:

	fixed acidity	volatile acidity	citric acid	residual sugar	chlorides	free sulfur dioxide	total sulfur dioxide	density	pH	sulphates	alcohol
0	7.4	0.700	0.00	1.9	0.076	11.0	34.0	0.99780	3.51	0.56	9.4
1	7.8	0.880	0.00	2.6	0.098	25.0	67.0	0.99680	3.20	0.68	9.8
2	7.8	0.760	0.04	2.3	0.092	15.0	54.0	0.99700	3.26	0.65	9.8
3	11.2	0.280	0.56	1.9	0.075	17.0	60.0	0.99800	3.16	0.58	9.8
4	7.4	0.700	0.00	1.9	0.076	11.0	34.0	0.99780	3.51	0.56	9.4
...	...	...	...	...	...	...	...	...	...	...	...
1594	6.2	0.600	0.08	2.0	0.090	32.0	44.0	0.99490	3.45	0.58	10.5
1595	5.9	0.550	0.10	2.2	0.062	39.0	51.0	0.99512	3.52	0.76	11.2
1596	6.3	0.510	0.13	2.3	0.076	29.0	40.0	0.99574	3.42	0.75	11.0
1597	5.9	0.645	0.12	2.0	0.075	32.0	44.0	0.99547	3.57	0.71	10.2
1598	6.0	0.310	0.47	3.6	0.067	18.0	42.0	0.99549	3.39	0.66	11.0

1599 rows × 11 columns

In [65]:

df.iloc[:,-1:]   #y데이터

Out[65]:

	quality
0	0
1	0
2	0
3	1
4	0
...	...
1594	0
1595	1
1596	1
1597	0
1598	1

1599 rows × 1 columns

In [66]:

df_x = df.iloc[:,:-1].copy()
df_y = df.iloc[:,-1:].copy()

In [86]:

train_input, test_input, train_target, test_target = train_test_split(df_x,df_y, random_state=42)

In [87]:

print(train_input.shape)
print(test_input.shape)

(1199, 11)
(400, 11)

In [88]:

mean = np.mean(train_input,axis=0)
std = np.std(train_input,axis=0)

In [89]:

mean

Out[89]:

fixed acidity            8.316931
volatile acidity         0.531902
citric acid              0.272244
residual sugar           2.547623
chlorides                0.088464
free sulfur dioxide     15.864887
total sulfur dioxide    46.766055
density                  0.996765
pH                       3.311259
sulphates                0.660017
alcohol                 10.420183
dtype: float64

In [90]:

std

Out[90]:

fixed acidity            1.713657
volatile acidity         0.180142
citric acid              0.195369
residual sugar           1.411277
chlorides                0.049336
free sulfur dioxide     10.249824
total sulfur dioxide    33.012617
density                  0.001842
pH                       0.154192
sulphates                0.175158
alcohol                  1.058656
dtype: float64

In [91]:

train_scaled = (train_input - mean) / std

In [92]:

train_target.values.ravel().shape

Out[92]:

(1199,)

In [93]:

kn = KNeighborsClassifier()
kn.fit(train_scaled,train_target.values.ravel())

Out[93]:

KNeighborsClassifier()

In [94]:

test_scaled = (test_input-mean) / std

In [95]:

kn.score(test_scaled,test_target)

Out[95]:

0.7025

In [96]:

for n in range(3, 16):
    kn.n_neighbors = n
    score = kn.score(test_scaled, test_target)
    print(n, score)

In [97]:

kn.n_neighbors = 10

In [110]:

import pickle
ml_data = {'model':kn,
          'mean':mean,
          'std':std}
with open("./ml.pkl","wb") as f:
    pickle.dump(ml_data,f)

In [111]:

ml_data

Out[111]:

{'model': KNeighborsClassifier(n_neighbors=10),
 'mean': fixed acidity            8.316931
 volatile acidity         0.531902
 citric acid              0.272244
 residual sugar           2.547623
 chlorides                0.088464
 free sulfur dioxide     15.864887
 total sulfur dioxide    46.766055
 density                  0.996765
 pH                       3.311259
 sulphates                0.660017
 alcohol                 10.420183
 dtype: float64,
 'std': fixed acidity            1.713657
 volatile acidity         0.180142
 citric acid              0.195369
 residual sugar           1.411277
 chlorides                0.049336
 free sulfur dioxide     10.249824
 total sulfur dioxide    33.012617
 density                  0.001842
 pH                       0.154192
 sulphates                0.175158
 alcohol                  1.058656
 dtype: float64}

In [108]:

test_scaled.iloc[10].values.reshape(1,-1)   #reshape 많이 씀

Out[108]:

array([[ 1.27392452e+00, -1.21579562e-01,  1.88236489e+00,
        -1.04602461e-01,  3.75717860e-01, -9.62444565e-01,
        -9.62239831e-01,  2.90765142e-01, -1.43496105e+00,
        -9.52314295e-05,  1.30336580e+00]])

In [109]:

kn.predict(test_scaled.iloc[10].values.reshape(1,-1))

Out[109]:

array([1])

'두두의 IT > Machine Learning' 카테고리의 다른 글

[머신러닝3] K-최근접 이웃/표준점수(z-score) (0)	2022.01.05
[머신러닝2] 불확실성/샘플링편향/훈련세트/테스트세트 (0)	2022.01.05
[머신러닝1] 개념/통계와의 차이/이진분류/K-최근접 이웃(K-Nearest Neighbors) (0)	2022.01.05

두두의 頭

[머신러닝4] 실전연습/KNN 알고리즘으로 z-score 구하기

'두두의 IT > Machine Learning' 카테고리의 다른 글

티스토리툴바

[머신러닝4] 실전연습/KNN 알고리즘으로 z-score 구하기

'두두의 IT > Machine Learning' 카테고리의 다른 글

'두두의 IT/Machine Learning' Related Articles

티스토리툴바