import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
05wk-1: 딥러닝의 기초 (4)
로지스틱(2)– 로지스틱 네트워크설계 및 학습, 손실함수의 비교: BCE loss의 위대함
강의영상
https://youtube.com/playlist?list=PLQqh36zP38-xTS1PLLmvw8ECtkv6M1RTN
Imports
Logistic regression intro
motive
-
현실에서 이런 경우가 많음
- \(x\)가 커질수록 (혹은 작아질수록) 성공확률이 증가함.
-
(X,y)는 어떤모양?
= pd.DataFrame({'x':range(-6,7),'y':[0,0,0,0,0,0,1,0,1,1,1,1,1]})
_df _df
x | y | |
---|---|---|
0 | -6 | 0 |
1 | -5 | 0 |
2 | -4 | 0 |
3 | -3 | 0 |
4 | -2 | 0 |
5 | -1 | 0 |
6 | 0 | 1 |
7 | 1 | 0 |
8 | 2 | 1 |
9 | 3 | 1 |
10 | 4 | 1 |
11 | 5 | 1 |
12 | 6 | 1 |
'o') plt.plot(_df.x,_df.y,
-
(예비학습) 시그모이드라는 함수가 있음
= torch.linspace(-6,6,100)
_x def f(x):
return torch.exp(x)/(1+torch.exp(x))
'o')
plt.plot(_df.x,_df.y, plt.plot(_x,f(_x))
model
-
\(x\)가 커질수록 \(y=1\)이 잘나오는 모형은 아래와 같이 설계할 수 있음 <— 외우세요!!!
\(y_i \sim Ber(\pi_i),\quad\) where \(\pi_i = \frac{\exp(w_0+w_1x_i)}{1+\exp(w_0+w_1x_i)}\)
\(\hat{y}_i= \frac{\exp(\hat{w}_0+\hat{w}_1x_i)}{1+\exp(\hat{w}_0+\hat{w}_1x_i)}=\frac{1}{1+\exp(-\hat{w}_0-\hat{w}_1x_i)}\)
\(loss= - \sum_{i=1}^{n} \big(y_i\log(\hat{y}_i)+(1-y_i)\log(1-\hat{y}_i)\big)\) <— 외우세요!!
toy example
-
예제시작
=torch.linspace(-1,1,2000).reshape(2000,1)
x= -1
w0= 5
w1= w0+x*w1
u = torch.exp(u)/(1+torch.exp(u)) # v=πi, 즉 확률을 의미함
v = torch.bernoulli(v) y
=0.05)
plt.scatter(x,y,alpha'--r') plt.plot(x,v,
- 우리의 목적: \(x\)가 들어가면 빨간선 \(\hat{y}\)의 값을 만들어주는 mapping을 학습해보자.
net 설계
-
최초곡선
=0.05)
plt.scatter(x,y,alpha'--r')
plt.plot(x,v,= -0.8470
w0hat = -0.3467
w1hat *w1hat+w0hat),'--b') plt.plot(x,f(x
-
f 대신에 torch.nn.Sigmoid() 사용해서 함수만들어도 무방
= torch.nn.Sigmoid() a1
=0.05)
plt.scatter(x,y,alpha'--r')
plt.plot(x,v,= -0.8470
w0hat = -0.3467
w1hat *w1hat+w0hat),'--b') plt.plot(x,a1(x
-
x*w1hat + w0hat 대신에 torch.nn.Linear() 로 써도 무방
43052)
torch.manual_seed(=torch.nn.Linear(in_features=1,out_features=1,bias=True) l1
l1.weight
Parameter containing:
tensor([[-0.3467]], requires_grad=True)
l1.bias
Parameter containing:
tensor([-0.8470], requires_grad=True)
=0.05)
plt.scatter(x,y,alpha'--r')
plt.plot(x,v,'--b') plt.plot(x,a1(l1(x)).data,
-
지금 \(x \overset{l1}{\to} u \overset{a1}{\to} v = \hat{y}\) 구조임
-
l1,a1 을 sequential 하게 (직렬로) 엮어서 $ x $ 로 만들수 없을까?
= torch.nn.Sequential(l1,a1) net
=0.05)
plt.scatter(x,y,alpha'--r')
plt.plot(x,v,'--b') plt.plot(x,net(x).data,
학습
-
이제 옵티마이저 설계하고 학습하자.
= torch.optim.SGD(net.parameters(),lr=0.05) optimizr
-
step1~4
(1000번 학습)
for epoc in range(1000):
## 1
= net(x)
yhat## 2
= torch.mean((y-yhat)**2) ## loss가 사실 이러면 안됩니다.. ㅠㅠ
loss## 3
loss.backward() ## 4
optimizr.step() optimizr.zero_grad()
=0.05)
plt.scatter(x,y,alpha'--r')
plt.plot(x,v,'--b') plt.plot(x,net(x).data,
(5000번 추가학습)
for epoc in range(5000):
## 1
= net(x)
yhat## 2
= torch.mean((y-yhat)**2) ## 사실 이러면 안됩니다.. ㅠㅠ
loss## 3
loss.backward() ## 4
optimizr.step() optimizr.zero_grad()
=0.05)
plt.scatter(x,y,alpha'--r')
plt.plot(x,v,'--b') plt.plot(x,net(x).data,
.. 성공?
BCE loss의 위대함
MSE loss 와 BCE loss 의 비교
-
loss_fn1, loss_fn2
def loss_fn1(y,yhat):
return torch.mean((y-yhat)**2)
def loss_fn2(y,yhat):
return -torch.mean(y*torch.log(yhat) + (1-y)*torch.log(1-yhat))
-
loss_fn1(MSE), SGD, lr=0.05
43052)
torch.manual_seed(= torch.nn.Sequential(torch.nn.Linear(1,1),torch.nn.Sigmoid())
net = torch.optim.SGD(net.parameters(),lr=0.05)
optimizr for epoc in range(1000):
## step1
= net(x)
yhat ## step2
= loss_fn1(y,yhat)
loss ## step3
loss.backward() ## step4
optimizr.step() optimizr.zero_grad()
'o',alpha=0.05)
plt.plot(x,y,'--')
plt.plot(x,v,'--') plt.plot(x,net(x).data,
-
loss_fn2(BCE), SGD, lr=0.05
43052)
torch.manual_seed(= torch.nn.Sequential(torch.nn.Linear(1,1),torch.nn.Sigmoid())
net = torch.optim.SGD(net.parameters(),lr=0.05)
optimizr for epoc in range(1000):
## step1
= net(x)
yhat ## step2
= loss_fn2(y,yhat)
loss ## step3
loss.backward() ## step4
optimizr.step() optimizr.zero_grad()
'o',alpha=0.05)
plt.plot(x,y,'--')
plt.plot(x,v,'--') plt.plot(x,net(x).data,
!! loss_fn2 로 하니까 더 잘맞는다?
?? 왜?????
-
손실함수의 모양이 다르다..
= plt.figure()
fig =fig.add_subplot(1,2,1,projection='3d')
ax1=fig.add_subplot(1,2,2,projection='3d')
ax2=15;ax2.elev=15;ax1.azim=75;ax2.azim=75
ax1.elev8)
fig.set_figheight(8) fig.set_figwidth(
def plot_loss(loss_fn,ax):
=torch.meshgrid(torch.arange(-10,3,0.15),torch.arange(-1,10,0.15),indexing='ij')
w0hat,w1hat = w0hat.reshape(-1)
w0hat = w1hat.reshape(-1)
w1hat def l(w0hat,w1hat):
= torch.exp(w0hat+w1hat*x)/(1+torch.exp(w0hat+w1hat*x))
yhat return loss_fn(y,yhat)
= list(map(l,w0hat,w1hat))
loss =0.1,alpha=0.2)
ax.scatter(w0hat,w1hat,loss,s-1,5,l(-1,5),s=200,marker='*') # 실제로 -1,5에서 최소값을 가지는건 아님.. ax.scatter(
plot_loss(loss_fn1,ax1)
plot_loss(loss_fn2,ax2)'MSE loss')
ax1.set_title('BCE loss')
ax2.set_title( fig
- 왼쪽 그림은 손실함수가 convex 하지 않다.
- 오른쪽 그림은 손실함수가 convex 하다.
시각화를 위한 준비함수들
준비1: for문 대신 돌려주고 epoch마다 필요한 정보를 기록하는 함수를 만들자!
def learn_and_record(net, loss_fn, optimizr):
= []
yhat_history = []
loss_history = []
what_history
for epoc in range(1000):
## step1
= net(x)
yhat ## step2
= loss_fn(y,yhat)
loss ## step3
loss.backward() ## step4
optimizr.step()
optimizr.zero_grad()
## record
if epoc % 20 ==0:
-1).data.tolist())
yhat_history.append(yhat.reshape(
loss_history.append(loss.item())0].bias.data.item(), net[0].weight.data.item()])
what_history.append([net[return yhat_history, loss_history, what_history
준비2: 애니메이션을 만들어주는 함수를 만들자!
from matplotlib import animation
"animation.html"] = "jshtml" plt.rcParams[
def show_lrpr2(net,loss_fn,optimizr,suptitle=''):
= learn_and_record(net,loss_fn,optimizr)
yhat_history,loss_history,what_history
= plt.figure(figsize=(7,2.5))
fig = fig.add_subplot(1, 2, 1)
ax1 = fig.add_subplot(1, 2, 2, projection='3d')
ax2 ;ax1.set_yticks([])
ax1.set_xticks([]);ax2.set_yticks([]);ax2.set_zticks([])
ax2.set_xticks([])= 15; ax2.azim = 75
ax2.elev
## ax1: 왼쪽그림
'--')
ax1.plot(x,v,=0.05)
ax1.scatter(x,y,alpha= ax1.plot(x,yhat_history[0],'--')
line,
plot_loss(loss_fn,ax2)
fig.suptitle(suptitle)
fig.tight_layout()
def animate(epoc):
line.set_ydata(yhat_history[epoc])0],np.array(what_history)[epoc,1],loss_history[epoc],color='grey')
ax2.scatter(np.array(what_history)[epoc,return line
= animation.FuncAnimation(fig, animate, frames=30)
ani
plt.close()return ani
시각화1: MSE loss, SGD, 좋은초기값
= torch.nn.Linear(1,1)
l1 = torch.nn.Sigmoid()
a1 = torch.nn.Sequential(l1,a1)
net = torch.optim.SGD(net.parameters(),lr=0.05) optimizr
= torch.tensor([-3.0])
l1.bias.data = torch.tensor([[-1.0]]) l1.weight.data
'MSEloss, SGD, good_init') show_lrpr2(net,loss_fn1,optimizr,
시각화2: BCE loss, SGD, 좋은초기값
= torch.nn.Linear(1,1)
l1 = torch.nn.Sigmoid()
a1 = torch.nn.Sequential(l1,a1)
net = torch.optim.SGD(net.parameters(),lr=0.05) optimizr
= torch.tensor([-3.0])
l1.bias.data = torch.tensor([[-1.0]]) l1.weight.data
show_lrpr2(net,loss_fn2,optimizr)
시각화3: MSE loss, SGD, 나쁜초기값
= torch.nn.Linear(1,1)
l1 = torch.nn.Sigmoid()
a1 = torch.nn.Sequential(l1,a1)
net = torch.optim.SGD(net.parameters(),lr=0.05) optimizr
= torch.tensor([-10.0])
l1.bias.data = torch.tensor([[-1.0]]) l1.weight.data
show_lrpr2(net,loss_fn1,optimizr)
시각화4: BCE loss, SGD, 나쁜초기값 (?)
= torch.nn.Linear(1,1)
l1 = torch.nn.Sigmoid()
a1 = torch.nn.Sequential(l1,a1)
net = torch.optim.SGD(net.parameters(),lr=0.05) optimizr
= torch.tensor([-10.0])
l1.bias.data = torch.tensor([[-1.0]]) l1.weight.data
show_lrpr2(net,loss_fn2,optimizr)
숙제
-
시각화1~4에서 optimizer를 Adam으로 설정하고 돌려볼 것
(hint) 시각화1의 경우 아래와 같이 코드를 수정하면 된다.
시각화1: MSE loss, ADAM
, 좋은초기값
= torch.nn.Linear(1,1)
l1 = torch.nn.Sigmoid()
a1 = torch.nn.Sequential(l1,a1)
net = torch.optim.Adam(net.parameters(),lr=0.05) optimizr
= torch.tensor([-3.0])
l1.bias.data = torch.tensor([[-1.0]]) l1.weight.data
'MSEloss, ADAM, good_init') show_lrpr2(net,loss_fn1,optimizr,