2. Imports
import numpy as np
import pandas as pd
import sklearn.linear_model
import sklearn.tree
3. Data
df_train = pd.read_csv('https://raw.githubusercontent.com/guebin/MP2023/main/posts/weightloss.csv')
df_train
| 0 |
False |
False |
-0.877103 |
| 1 |
True |
False |
1.604542 |
| 2 |
True |
True |
13.824148 |
| 3 |
True |
True |
13.004505 |
| 4 |
True |
True |
13.701128 |
| ... |
... |
... |
... |
| 9995 |
True |
False |
1.558841 |
| 9996 |
False |
False |
-0.217816 |
| 9997 |
False |
True |
4.072701 |
| 9998 |
True |
False |
-0.253796 |
| 9999 |
False |
False |
-1.399092 |
10000 rows × 3 columns
df_train.pivot_table(index='Supplement',columns='Exercise',values='Weight_Loss')
| Supplement |
|
|
| False |
0.021673 |
4.991314 |
| True |
0.497573 |
14.966363 |
- 운동과 체중감량보조제를 병행하면 시너지가 나는 것 같음
4. 분석
- 분석1: 선형회귀 (교호작용 고려 X)
# step 1
X,y = df_train[['Supplement','Exercise']], df_train['Weight_Loss']
# step 2
predictr = sklearn.linear_model.LinearRegression()
# step 3
predictr.fit(X,y)
# step 4
df_train['Weight_Loss_hat'] = predictr.predict(X)
df_train.pivot_table(index='Supplement',columns='Exercise',values='Weight_Loss')
| Supplement |
|
|
| False |
0.021673 |
4.991314 |
| True |
0.497573 |
14.966363 |
df_train.pivot_table(index='Supplement',columns='Exercise',values='Weight_Loss_hat')
| Supplement |
|
|
| False |
-2.373106 |
7.374557 |
| True |
2.845934 |
12.593598 |
- 분석2: 의사결정나무
# step 1
X,y = df_train[['Supplement','Exercise']], df_train['Weight_Loss']
# step 2
predictr = sklearn.tree.DecisionTreeRegressor()
# step 3
predictr.fit(X,y)
# step 4
df_train['Weight_Loss_hat'] = predictr.predict(X)
| 0 |
False |
False |
-0.877103 |
0.021673 |
| 1 |
True |
False |
1.604542 |
0.497573 |
| 2 |
True |
True |
13.824148 |
14.966363 |
| 3 |
True |
True |
13.004505 |
14.966363 |
| 4 |
True |
True |
13.701128 |
14.966363 |
| ... |
... |
... |
... |
... |
| 9995 |
True |
False |
1.558841 |
0.497573 |
| 9996 |
False |
False |
-0.217816 |
0.021673 |
| 9997 |
False |
True |
4.072701 |
4.991314 |
| 9998 |
True |
False |
-0.253796 |
0.497573 |
| 9999 |
False |
False |
-1.399092 |
0.021673 |
10000 rows × 4 columns
df_train.pivot_table(index='Supplement',columns='Exercise',values='Weight_Loss')
| Supplement |
|
|
| False |
0.021673 |
4.991314 |
| True |
0.497573 |
14.966363 |
df_train.pivot_table(index='Supplement',columns='Exercise',values='Weight_Loss_hat')
| Supplement |
|
|
| False |
0.021673 |
4.991314 |
| True |
0.497573 |
14.966363 |