Neural networks are function approximators that stack affine transformations followed by nonlinear transformations
1.1. Linear Neural Networks
1.2. Multi dimensional in/out
1.4. Multi-Layer Perceptron
class MultiLayerPerceptronClass(nn.Module):
"""
Multilayer Perceptron (MLP) Class
"""
def __init__(self,name='mlp',xdim=784, hdim=256, ydim=10): # xdim = 28 * 28 픽셀의 바운딩박스, hdim = 256(Hyper Param), ydim = 0~9의 기대값 추출
super(MultiLayerPerceptronClass,self).__init__()
self.name = name
self.xdim = xdim
self.hdim = hdim
self.ydim = ydim
self.lin_1 = nn.Linear( # h = W2x
# FILL IN HERE
self.xdim, self.hdim
)
self.lin_2 = nn.Linear( # y = w2h
# FILL IN HERE
self.hdim, self.ydim
)
self.init_param() # initialize parameters
def init_param(self):
nn.init.kaiming_normal_(self.lin_1.weight) #He initialization 함수들
nn.init.zeros_(self.lin_1.bias)
nn.init.kaiming_normal_(self.lin_2.weight)
nn.init.zeros_(self.lin_2.bias)
def forward(self,x):
net = x
net = self.lin_1(net)
net = F.relu(net) # activation function
net = self.lin_2(net)
return net
M = MultiLayerPerceptronClass(name='mlp',xdim=784, hdim=256, ydim=10).to(device)
loss = nn.CrossEntropyLoss()
optm = optim.Adam(M.parameters(),lr=1e-3)
#print(M.parameters())
print ("Done.")
print ("Start training.")
M.init_param() # initialize parameters
M.train()
EPOCHS,print_every = 10,1
for epoch in range(EPOCHS):
loss_val_sum = 0
for batch_in, batch_out in train_iter:
# Forward path
y_pred = M.forward(batch_in.view(-1, 28*28).to(device))
loss_out = loss(y_pred,batch_out.to(device)) # nn.CrossEntropyLoss()
# Update
# FILL IN HERE # reset gradient
optm.zero_grad()
# FILL IN HERE # backpropagate
loss_out.backward() # 미분해서 각각의 그래디언트 백터를 구해주고
# FILL IN HERE # optimizer update
optm.step() # 원래 파라미터의 그래디언트를 적용
loss_val_sum += loss_out
loss_val_avg = loss_val_sum/len(train_iter)
# Print
if ((epoch%print_every)==0) or (epoch==(EPOCHS-1)):
train_accr = func_eval(M,train_iter,device)
test_accr = func_eval(M,test_iter,device)
print ("epoch:[%d] loss:[%.3f] train_accr:[%.3f] test_accr:[%.3f]."%
(epoch,loss_val_avg,train_accr,test_accr))
print ("Done")