import numpy as np
import pandas as pd
import sklearn.model_selection
import sklearn.linear_model
import sklearn.tree
import matplotlib.pyplot as plt
import seaborn as sns
07wk-35: 아이스크림(이상치) / 의사결정나무
1. 강의영상
2. Imports
3. Data
43052)
np.random.seed(= pd.read_csv('https://raw.githubusercontent.com/guebin/DV2022/master/posts/temp.csv').iloc[:100,3].to_numpy()
temp
temp.sort()= 10 + temp * 0.5 + np.random.randn(100)
ice_sales 0] = 200
ice_sales[= pd.DataFrame({'temp':temp,'ice_sales':ice_sales})
df_train df_train
temp | ice_sales | |
---|---|---|
0 | -4.1 | 200.000000 |
1 | -3.7 | 9.234175 |
2 | -3.0 | 9.642778 |
3 | -1.3 | 9.657894 |
4 | -0.5 | 9.987787 |
... | ... | ... |
95 | 12.4 | 17.508688 |
96 | 13.4 | 17.105376 |
97 | 14.7 | 17.164930 |
98 | 15.0 | 18.555388 |
99 | 15.2 | 18.787014 |
100 rows × 2 columns
'o') plt.plot(df_train.temp,df_train.ice_sales,
4. 분석
-
분석: 의사결정나무
# step1
= df_train[['temp']]
X = df_train['ice_sales']
y # step2
= sklearn.tree.DecisionTreeRegressor()
predictr # step3
predictr.fit(X,y)# step4
'ice_sales_hat'] = predictr.predict(X) df_train[
# plt.plot(df_train.temp,df_train.ice_sales,'o')
# plt.plot(df_train.temp,df_train.ice_sales_hat,'--')
1:],df_train.ice_sales[1:],'o')
plt.plot(df_train.temp[1:],df_train.ice_sales_hat[1:],'--') plt.plot(df_train.temp[
-
12.5~18 구간사이의 unseen data를 가상으로 만들고 예측값을 살펴보자.
= df_test = pd.DataFrame({'temp':np.linspace(12.5,18,100)}) XX
'ice_sales_hat'] = predictr.predict(XX) df_test[
1:],df_train.ice_sales[1:],'o',color='C0',alpha=0.5)
plt.plot(df_train.temp[1:],df_train.ice_sales_hat[1:],'--',color='C1',alpha=0.5)
plt.plot(df_train.temp['--',color='C2',linewidth=2) plt.plot(df_test.temp,df_test.ice_sales_hat,
-
-15~0 구간사이의 unseen data를 가상으로 만들고 예측값을 살펴보자.
= df_test = pd.DataFrame({'temp':np.linspace(-15,0,100)}) XX
'ice_sales_hat'] = predictr.predict(XX) df_test[
1:],df_train.ice_sales[1:],'o',color='C0',alpha=0.5)
plt.plot(df_train.temp[1:],df_train.ice_sales_hat[1:],'--',color='C1',alpha=0.5)
plt.plot(df_train.temp['--',color='C2',linewidth=2) plt.plot(df_test.temp,df_test.ice_sales_hat,
- 뭐 이 데이터에서는 최선이지 않을까?
6. HW
-
없어요. 다른과목 중간고사 준비 잘하세요!