2. Imports
import numpy as np
import pandas as pd
import sklearn.linear_model
import sklearn.tree
3. Data
df_train = pd.read_csv('https://raw.githubusercontent.com/guebin/MP2023/main/posts/weightloss.csv')
df_train
0 |
False |
False |
-0.877103 |
1 |
True |
False |
1.604542 |
2 |
True |
True |
13.824148 |
3 |
True |
True |
13.004505 |
4 |
True |
True |
13.701128 |
... |
... |
... |
... |
9995 |
True |
False |
1.558841 |
9996 |
False |
False |
-0.217816 |
9997 |
False |
True |
4.072701 |
9998 |
True |
False |
-0.253796 |
9999 |
False |
False |
-1.399092 |
10000 rows × 3 columns
df_train.pivot_table(index='Supplement',columns='Exercise',values='Weight_Loss')
Supplement |
|
|
False |
0.021673 |
4.991314 |
True |
0.497573 |
14.966363 |
-
운동과 체중감량보조제를 병행하면 시너지가 나는 것 같음
4. 분석
-
분석1: 선형회귀 (교호작용 고려 X)
# step 1
X,y = df_train[['Supplement','Exercise']], df_train['Weight_Loss']
# step 2
predictr = sklearn.linear_model.LinearRegression()
# step 3
predictr.fit(X,y)
# step 4
df_train['Weight_Loss_hat'] = predictr.predict(X)
df_train.pivot_table(index='Supplement',columns='Exercise',values='Weight_Loss')
Supplement |
|
|
False |
0.021673 |
4.991314 |
True |
0.497573 |
14.966363 |
df_train.pivot_table(index='Supplement',columns='Exercise',values='Weight_Loss_hat')
Supplement |
|
|
False |
-2.373106 |
7.374557 |
True |
2.845934 |
12.593598 |
-
분석2: 의사결정나무
# step 1
X,y = df_train[['Supplement','Exercise']], df_train['Weight_Loss']
# step 2
predictr = sklearn.tree.DecisionTreeRegressor()
# step 3
predictr.fit(X,y)
# step 4
df_train['Weight_Loss_hat'] = predictr.predict(X)
0 |
False |
False |
-0.877103 |
0.021673 |
1 |
True |
False |
1.604542 |
0.497573 |
2 |
True |
True |
13.824148 |
14.966363 |
3 |
True |
True |
13.004505 |
14.966363 |
4 |
True |
True |
13.701128 |
14.966363 |
... |
... |
... |
... |
... |
9995 |
True |
False |
1.558841 |
0.497573 |
9996 |
False |
False |
-0.217816 |
0.021673 |
9997 |
False |
True |
4.072701 |
4.991314 |
9998 |
True |
False |
-0.253796 |
0.497573 |
9999 |
False |
False |
-1.399092 |
0.021673 |
10000 rows × 4 columns
df_train.pivot_table(index='Supplement',columns='Exercise',values='Weight_Loss')
Supplement |
|
|
False |
0.021673 |
4.991314 |
True |
0.497573 |
14.966363 |
df_train.pivot_table(index='Supplement',columns='Exercise',values='Weight_Loss_hat')
Supplement |
|
|
False |
0.021673 |
4.991314 |
True |
0.497573 |
14.966363 |