import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
torch.empty(2), torch.ones(2), torch.zeros(2)
(tensor([1.1422e-40, 4.5703e-35]), tensor([1., 1.]), tensor([0., 0.]))
torch.rand(5), torch.randn(5)
(tensor([0.9261, 0.7819, 0.3198, 0.2448, 0.3161]), tensor([ 0.0261, -1.8937, -1.3276, 2.1708, -1.8366]))
t = torch.tensor(1)
t
tensor(1)
t = torch.tensor([1])
t
tensor([1])
torch.tensor([1, 2, 3]).dtype
torch.int64
torch.tensor([1.0, 2.0, 3.0]).dtype
torch.float32
torch.tensor([1], dtype=torch.int32).dtype
torch.int32
v = np.array([1, 2, 3])
t = torch.from_numpy(v)
t
tensor([1, 2, 3])
v += 1
t
tensor([2, 3, 4])
u = t.numpy()
u
array([2, 3, 4])
t.add_(1)
u
array([3, 4, 5])
t = torch.tensor(range(1,7)).view(-1, 3)
t
tensor([[1, 2, 3],
[4, 5, 6]])
t[0]
tensor([1, 2, 3])
t[0, 1]
tensor(2)
s = t[0, 1].item()
s, type(s)
(2, int)
t[:, 2], t[1, :]
(tensor([3, 6]), tensor([4, 5, 6]))
t + t
tensor([[ 2, 4, 6],
[ 8, 10, 12]])
t * t
tensor([[ 1, 4, 9],
[16, 25, 36]])
t ** 2
tensor([[ 1, 4, 9],
[16, 25, 36]])
t @ t.T # 2x3 . 3x2 = 2x2
tensor([[14, 32],
[32, 77]])
t.add(2)
tensor([[3, 4, 5],
[6, 7, 8]])
t.add_(2), t
(tensor([[3, 4, 5],
[6, 7, 8]]),
tensor([[3, 4, 5],
[6, 7, 8]]))
t
tensor([[3, 4, 5],
[6, 7, 8]])
t.max(), torch.max(t)
(tensor(8), tensor(8))
t.max(dim=0), t.min(dim=0)
(torch.return_types.max( values=tensor([6, 7, 8]), indices=tensor([1, 1, 1])), torch.return_types.min( values=tensor([3, 4, 5]), indices=tensor([0, 0, 0])))
torch.max(t, dim=1).values, t.max(dim=1).indices
(tensor([5, 8]), tensor([2, 2]))
t.argmax(dim=1), t.argmax(dim=1, keepdim=True)
(tensor([2, 2]),
tensor([[2],
[2]]))
t = torch.tensor(1.0, requires_grad=True)
t
tensor(1., requires_grad=True)
t.detach(), t
(tensor(1.), tensor(1., requires_grad=True))
t.requires_grad_(False), t
(tensor(1.), tensor(1.))
x = torch.tensor(1.5, requires_grad=True)
y = 3*x**2
y
tensor(6.7500, grad_fn=<MulBackward0>)
y.backward()
x.grad
tensor(9.)
x.grad.zero_(), x.grad
(tensor(0.), tensor(0.))
with torch.no_grad():
z = 0.5*x
try:
z.backward()
except:
print('Exception')
z, x.grad
Exception
(tensor(0.7500), tensor(0.))
logits = torch.tensor([[1.0, 2.0, 3.0]])
probs = torch.softmax(logits, dim=-1)
probs
tensor([[0.0900, 0.2447, 0.6652]])
ce_loss = nn.CrossEntropyLoss()
y0 = torch.tensor([0])
y2 = torch.tensor([2])
# CrossEntropyLoss()() takes in *logits (NOT probs)* and *class labels*
ce_loss(logits, y0), ce_loss(logits, y2)
(tensor(2.4076), tensor(0.4076))
-np.log(probs.numpy())
array([[2.408, 1.408, 0.408]], dtype=float32)
nll_loss = nn.NLLLoss()
nll_loss(probs, y0), nll_loss(probs, y2)
(tensor(-0.0900), tensor(-0.6652))
bce_loss = nn.BCELoss()
bce_loss(torch.tensor([0.9]), torch.tensor([0.0]))
tensor(2.3026)
-(1-0)*np.log(1 - 0.9)
2.303
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32).view(-1, 1)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32).view(-1, 1)
w = torch.tensor([[0.0]], dtype=torch.float32, requires_grad=True)
def forward(X, w):
return X @ w # 4x1 . 1x1 = 4x1
def loss(Y_pred, Y):
return ((Y_pred - Y)**2).mean()
learning_rate = 0.01
for epoch in range(40):
Y_pred = forward(X, w)
j = loss(Y, Y_pred)
j.backward()
with torch.no_grad():
w -= learning_rate * w.grad
w.grad.zero_()
if epoch % 10 == 9:
print(f'epoch {epoch+1}: w = {w.item():.3f}, loss = {j.item():.8f}')
epoch 10: w = 1.606, loss = 1.60939169 epoch 20: w = 1.922, loss = 0.06237914 epoch 30: w = 1.985, loss = 0.00241778 epoch 40: w = 1.997, loss = 0.00009371
w = torch.tensor([[0.0]], dtype=torch.float32, requires_grad=True)
optimizer = torch.optim.SGD([w], lr=learning_rate)
loss = nn.MSELoss()
for epoch in range(40):
Y_pred = forward(X, w)
j = loss(Y_pred, Y)
j.backward()
optimizer.step()
optimizer.zero_grad()
if epoch % 10 == 9:
print(f'epoch {epoch+1}: w = {w.item():.3f}, loss = {j.item():.8f}')
epoch 10: w = 1.606, loss = 1.60939169 epoch 20: w = 1.922, loss = 0.06237914 epoch 30: w = 1.985, loss = 0.00241778 epoch 40: w = 1.997, loss = 0.00009371
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
for epoch in range(30):
Y_pred = forward(X, w)
j = loss(Y_pred, Y)
j.backward()
optimizer.step()
optimizer.zero_grad()
lr_scheduler.step()
if epoch % 10 == 9:
lr = optimizer.state_dict()['param_groups'][0]['lr']
print(f'epoch {epoch+1}: w = {w.item():.3f}, lr = {j.item():.8f}')
epoch 10: w = 1.999, lr = 0.00000363 epoch 20: w = 2.000, lr = 0.00000064 epoch 30: w = 2.000, lr = 0.00000028
model = nn.Linear(in_features=1, out_features=1)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
for epoch in range(40):
Y_pred = model(X)
j = loss(Y_pred, Y)
j.backward()
optimizer.step()
optimizer.zero_grad()
if epoch % 10 == 9:
w, b = model.parameters() # unpack parameters
print(f'epoch {epoch+1}: w = {w.item():.3f}, loss = {j.item():.8f}')
epoch 10: w = 1.948, loss = 0.65129113 epoch 20: w = 2.122, loss = 0.05157164 epoch 30: w = 2.147, loss = 0.03403451 epoch 40: w = 2.147, loss = 0.03167749
predicted = model(X).detach()
predicted
tensor([[1.7123],
[3.8591],
[6.0059],
[8.1526]])
with torch.no_grad():
predicted = model(X)
predicted
tensor([[1.7123],
[3.8591],
[6.0059],
[8.1526]])
X = torch.tensor([[2, 1], [1, 2]], dtype=torch.float32)
Y = torch.tensor([0, 1], dtype=torch.float32).view(-1, 1)
class LogR(nn.Module):
def __init__(self, in_features):
super().__init__()
self.linear = nn.Linear(in_features, 1)
def forward(self, x):
x = self.linear(x)
x = torch.sigmoid(x)
return x
model = LogR(X.shape[-1])
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
loss = nn.BCELoss()
for epoch in range(50):
Y_pred = model(X)
j = loss(Y_pred, Y)
j.backward()
optimizer.step()
optimizer.zero_grad()
if epoch % 10 == 9:
print(f'epoch {epoch+1}: loss = {j.item():.8f}')
epoch 10: loss = 0.58357650 epoch 20: loss = 0.47398025 epoch 30: loss = 0.40872616 epoch 40: loss = 0.35784590 epoch 50: loss = 0.31676430
with torch.no_grad():
Y_pred = model(X)
Y_pred
tensor([[0.2693],
[0.7317]])
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
torch.tensor([1], device=device), torch.tensor([2]).to(device)
(tensor([1]), tensor([2]))
class Model(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(2, 1)
def forward(self, x):
x = self.linear(x)
x = torch.sigmoid(x)
return x
model = Model()
torch.save(model, 'model.pth')
# ---
model = torch.load('model.pth')
model.eval()
for param in model.parameters():
print(param)
Parameter containing: tensor([[-0.0505, 0.6279]], requires_grad=True) Parameter containing: tensor([0.0782], requires_grad=True)
model = Model()
torch.save(model.state_dict(), 'model_state.pth')
print(model.state_dict())
# ---
model = Model()
model.load_state_dict(torch.load('model_state.pth'))
# model.load_state_dict(torch.load('model_state.path', map_location=device))
model.eval()
print(model.state_dict())
OrderedDict([('linear.weight', tensor([[ 0.3975, -0.2836]])), ('linear.bias', tensor([0.2074]))])
OrderedDict([('linear.weight', tensor([[ 0.3975, -0.2836]])), ('linear.bias', tensor([0.2074]))])
checkpoint = {
"model_state": model.state_dict(),
"optim_state": optimizer.state_dict()
}
torch.save(checkpoint, 'checkpoint.pth')
# ---
model = Model()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
checkpoint = torch.load('checkpoint.pth')
model.load_state_dict(checkpoint['model_state'])
optimizer.load_state_dict(checkpoint['optim_state'])
optimizer
SGD (
Parameter Group 0
dampening: 0
lr: 0.1
momentum: 0
nesterov: False
weight_decay: 0
)
class MyDataset(Dataset):
def __init__(self):
super().__init__()
self.X = torch.tensor([[2, 1], [1, 2]], dtype=torch.float32)
self.Y = torch.tensor([0, 1], dtype=torch.float32).view(-1, 1)
def __getitem__(self, idx):
return self.X[idx], self.Y[idx]
def __len__(self):
return len(self.X)
ds = MyDataset()
len(ds), ds[0]
(2, (tensor([2., 1.]), tensor([0.])))
dl = DataLoader(ds, batch_size=2, shuffle=True)
next(iter(dl))
[tensor([[2., 1.],
[1., 2.]]),
tensor([[0.],
[1.]])]
for epoch in range(50):
for X, Y in dl:
X = X.to(device)
Y = Y.to(device)
Y_pred = model(X)
j = loss(Y_pred, Y)
optimizer.zero_grad()
j.backward()
optimizer.step()
if epoch % 10 == 9:
print(f'epoch {epoch+1}: loss = {j.item():.8f}')
epoch 10: loss = 0.74077058 epoch 20: loss = 0.61800981 epoch 30: loss = 0.52327919 epoch 40: loss = 0.44903362 epoch 50: loss = 0.39019936
# ds = torchvision.datasets.ImageFolder(path)
transform = transforms.Compose([transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
train_dataset = torchvision.datasets.CIFAR10(
root='~/pytorch_datasets', download=True, train=True, transform=transform)
Files already downloaded and verified
train_dataset[0][0].shape # NOTE: PyTorch uses Channel-FIRST (TF uses Channel-Last)
torch.Size([3, 32, 32])
model = torchvision.models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 10)
for param in model.parameters():
param.requires_grad = False