import torch
import matplotlib.pyplot as plt
import time
11wk-2: 순환신경망 (5)
순환신경망
LSTM (1)– GPU실험, abcabC예제, abcdabcD예제
강의영상
https://youtube.com/playlist?list=PLQqh36zP38-wCXvhHTVOdOLBFD5T5uscl
import
Define some funtions
def f(txt,mapping):
return [mapping[key] for key in txt]
= torch.nn.Sigmoid()
sig = torch.nn.Softmax(dim=1)
soft = torch.nn.Tanh() tanh
GPU 실험
실험결과 요약
len | # of hidden nodes | backward | cpu | gpu | ratio |
---|---|---|---|---|---|
20000 | 20 | O | 93.02 | 3.26 | 28.53 |
20000 | 20 | X | 18.85 | 1.29 | 14.61 |
2000 | 20 | O | 6.53 | 0.75 | 8.70 |
2000 | 20 | X | 1.25 | 0.14 | 8.93 |
2000 | 1000 | O | 58.99 | 4.75 | 12.41 |
2000 | 1000 | X | 13.16 | 2.29 | 5.74 |
Exam5: abcabC
data
= list('abcabC')*100
txt 8] txt[:
['a', 'b', 'c', 'a', 'b', 'C', 'a', 'b']
= txt[:-1]
txt_x = txt[1:] txt_y
= {'a':0,'b':1,'c':2,'C':3}
mapping = torch.nn.functional.one_hot(torch.tensor(f(txt_x,mapping))).float()
x= torch.nn.functional.one_hot(torch.tensor(f(txt_y,mapping))).float() y
= x.to("cuda:0")
x = y.to("cuda:0") y
x.shape
torch.Size([599, 4])
RNN
43052)
torch.manual_seed(= torch.nn.RNN(4,3)
rnn = torch.nn.Linear(3,4)
linr = torch.nn.CrossEntropyLoss()
loss_fn = torch.optim.Adam(list(rnn.parameters())+ list(linr.parameters())) optimizr
"cuda:0")
rnn.to("cuda:0") linr.to(
Linear(in_features=3, out_features=4, bias=True)
-
3000 epochs
for epoc in range(3000):
## 1
= torch.zeros(1,3).to("cuda:0")
_water = rnn(x,_water)
hidden, hT = linr(hidden)
output ## 2
= loss_fn(output,y)
loss ## 3
loss.backward()## 4
optimizr.step() optimizr.zero_grad()
= soft(output)
yhat = torch.concat([hidden,yhat],axis=1).data.to("cpu") combinded
-6:],cmap='bwr') plt.matshow(combinded[
<matplotlib.image.AxesImage at 0x7f47e032f890>
-
6000 epochs
for epoc in range(3000):
## 1
= torch.zeros(1,3).to("cuda:0")
_water = rnn(x,_water)
hidden, hT = linr(hidden)
output ## 2
= loss_fn(output,y)
loss ## 3
loss.backward()## 4
optimizr.step() optimizr.zero_grad()
= soft(output)
yhat = torch.concat([hidden,yhat],axis=1).data.to("cpu") combinded
-6:],cmap='bwr') plt.matshow(combinded[
<matplotlib.image.AxesImage at 0x7f47e1078b90>
-
9000 epochs
for epoc in range(3000):
## 1
= torch.zeros(1,3).to("cuda:0")
_water = rnn(x,_water)
hidden, hT = linr(hidden)
output ## 2
= loss_fn(output,y)
loss ## 3
loss.backward()## 4
optimizr.step() optimizr.zero_grad()
= soft(output)
yhat = torch.concat([hidden,yhat],axis=1).data.to("cpu") combinded
-6:],cmap='bwr') plt.matshow(combinded[
<matplotlib.image.AxesImage at 0x7f47e0358590>
-
12000 epochs
for epoc in range(3000):
## 1
= torch.zeros(1,3).to("cuda:0")
_water = rnn(x,_water)
hidden, hT = linr(hidden)
output ## 2
= loss_fn(output,y)
loss ## 3
loss.backward()## 4
optimizr.step() optimizr.zero_grad()
= soft(output)
yhat = torch.concat([hidden,yhat],axis=1).data.to("cpu") combinded
-6:],cmap='bwr') plt.matshow(combinded[
<matplotlib.image.AxesImage at 0x7f47e2de6f10>
-
15000 epochs
for epoc in range(3000):
## 1
= torch.zeros(1,3).to("cuda:0")
_water = rnn(x,_water)
hidden, hT = linr(hidden)
output ## 2
= loss_fn(output,y)
loss ## 3
loss.backward()## 4
optimizr.step() optimizr.zero_grad()
= soft(output)
yhat = torch.concat([hidden,yhat],axis=1).data.to("cpu") combinded
-6:],cmap='bwr') plt.matshow(combinded[
<matplotlib.image.AxesImage at 0x7f47cc12ae50>
LSTM
-
LSTM
43052)
torch.manual_seed(= torch.nn.LSTM(4,3)
lstm = torch.nn.Linear(3,4)
linr = torch.nn.CrossEntropyLoss()
loss_fn = torch.optim.Adam(list(lstm.parameters())+ list(linr.parameters())) optimizr
"cuda:0")
lstm.to("cuda:0") linr.to(
Linear(in_features=3, out_features=4, bias=True)
-
3000 epochs
for epoc in range(3000):
## 1
= torch.zeros(1,3).to("cuda:0")
_water = lstm(x,(_water,_water))
hidden, (hT,cT) = linr(hidden)
output ## 2
= loss_fn(output,y)
loss ## 3
loss.backward()## 4
optimizr.step() optimizr.zero_grad()
= soft(output)
yhat = torch.concat([hidden,yhat],axis=1).data.to("cpu") combinded
-6:],cmap='bwr',vmin=-1,vmax=1) plt.matshow(combinded[
<matplotlib.image.AxesImage at 0x7f47cc0608d0>
-
6000 epochs
for epoc in range(3000):
## 1
= torch.zeros(1,3).to("cuda:0")
_water = lstm(x,(_water,_water))
hidden, (hT,cT) = linr(hidden)
output ## 2
= loss_fn(output,y)
loss ## 3
loss.backward()## 4
optimizr.step() optimizr.zero_grad()
= soft(output)
yhat = torch.concat([hidden,yhat],axis=1).data.to("cpu") combinded
-6:],cmap='bwr',vmin=-1,vmax=1) plt.matshow(combinded[
<matplotlib.image.AxesImage at 0x7f47c61dd750>
RNN vs LSTM 성능비교실험
-
RNN
= plt.subplots(5,5,figsize=(10,10))
fig, ax for i in range(5):
for j in range(5):
= torch.nn.RNN(4,3).to("cuda:0")
rnn = torch.nn.Linear(3,4).to("cuda:0")
linr = torch.nn.CrossEntropyLoss()
loss_fn = torch.optim.Adam(list(rnn.parameters())+list(linr.parameters()),lr=0.1)
optimizr = torch.zeros(1,3).to("cuda:0")
_water for epoc in range(3000):
## 1
= rnn(x,_water)
hidden, hT = linr(hidden)
output ## 2
= loss_fn(output,y)
loss ## 3
loss.backward()## 4
optimizr.step()
optimizr.zero_grad()=soft(output)
yhat= torch.concat([hidden,yhat],axis=1)
combind "cpu").data[-6:],cmap='bwr',vmin=-1,vmax=1)
ax[i][j].matshow(combind.to(r"$RNN$",size=20)
fig.suptitle( fig.tight_layout()
-
LSTM
= plt.subplots(5,5,figsize=(10,10))
fig, ax for i in range(5):
for j in range(5):
= torch.nn.LSTM(4,3).to("cuda:0")
lstm = torch.nn.Linear(3,4).to("cuda:0")
linr = torch.nn.CrossEntropyLoss()
loss_fn = torch.optim.Adam(list(lstm.parameters())+list(linr.parameters()),lr=0.1)
optimizr = torch.zeros(1,3).to("cuda:0")
_water for epoc in range(3000):
## 1
= lstm(x,(_water,_water))
hidden, (hT,cT) = linr(hidden)
output ## 2
= loss_fn(output,y)
loss ## 3
loss.backward()## 4
optimizr.step()
optimizr.zero_grad()=soft(output)
yhat= torch.concat([hidden,yhat],axis=1)
combind "cpu").data[-6:],cmap='bwr',vmin=-1,vmax=1)
ax[i][j].matshow(combind.to(r"$LSTM$",size=20)
fig.suptitle( fig.tight_layout()
Exam6: abcdabcD
data
= list('abcdabcD')*100
txt 8] txt[:
['a', 'b', 'c', 'd', 'a', 'b', 'c', 'D']
= txt[:-1]
txt_x = txt[1:] txt_y
= {'a':0, 'b':1, 'c':2, 'd':3, 'D':4}
mapping = torch.nn.functional.one_hot(torch.tensor(f(txt_x,mapping))).float()
x = torch.nn.functional.one_hot(torch.tensor(f(txt_y,mapping))).float() y
=x.to("cuda:0")
x=y.to("cuda:0") y
RNN vs LSTM 성능비교실험
-
RNN
= plt.subplots(5,5,figsize=(10,10))
fig, ax for i in range(5):
for j in range(5):
= torch.nn.RNN(5,4).to("cuda:0")
rnn = torch.nn.Linear(4,5).to("cuda:0")
linr = torch.nn.CrossEntropyLoss()
loss_fn = torch.optim.Adam(list(rnn.parameters())+list(linr.parameters()),lr=0.1)
optimizr = torch.zeros(1,4).to("cuda:0")
_water for epoc in range(3000):
## 1
= rnn(x,_water)
hidden, hT = linr(hidden)
output ## 2
= loss_fn(output,y)
loss ## 3
loss.backward()## 4
optimizr.step()
optimizr.zero_grad()=soft(output)
yhat= torch.concat([hidden,yhat],axis=1)
combind "cpu").data[-8:],cmap='bwr',vmin=-1,vmax=1)
ax[i][j].matshow(combind.to(r"$RNN$",size=20)
fig.suptitle( fig.tight_layout()
-
LSTM
= plt.subplots(5,5,figsize=(10,10))
fig, ax for i in range(5):
for j in range(5):
= torch.nn.LSTM(5,4).to("cuda:0")
lstm = torch.nn.Linear(4,5).to("cuda:0")
linr = torch.nn.CrossEntropyLoss()
loss_fn = torch.optim.Adam(list(lstm.parameters())+list(linr.parameters()),lr=0.1)
optimizr = torch.zeros(1,4).to("cuda:0")
_water for epoc in range(3000):
## 1
= lstm(x,(_water,_water))
hidden, (hT,cT) = linr(hidden)
output ## 2
= loss_fn(output,y)
loss ## 3
loss.backward()## 4
optimizr.step()
optimizr.zero_grad()=soft(output)
yhat= torch.concat([hidden,yhat],axis=1)
combind "cpu").data[-8:],cmap='bwr',vmin=-1,vmax=1)
ax[i][j].matshow(combind.to(r"$LSTM$",size=20)
fig.suptitle( fig.tight_layout()
-
관찰1: LSTM이 확실히 장기기억에 강하다.
-
관찰2: LSTM은 hidden에 0이 잘 나온다.
- 사실 확실히 구분되는 특징을 판별할때는 -1,1 로 히든레이어 값들이 설정되면 명확하다.
- 히든레이어에 -1~1사이의 값이 나온다면 애매한 판단이 내려지게 된다.
- 그런데 이 애매한 판단이 어떻게 보면 문맥의 뉘앙스를 이해하는데 더 잘 맞다.
- 그런데 RNN은 -1,1로 셋팅된 상황에서 -1~1로의 변화가 더디다는 것이 문제임.