Portfolio

📌

LoRA나 Adaper 논문에서 비교 대상이 되는 FL이 뭔지!

[FL]

what

비교 대상이 되는 fine tuning은 보통 ‘전통적인’ 파인 튜닝, 즉 일부, 또는 가중치를 바꾸는 튜닝을 의미한다.

Basic of Transfer Learning

위 페이지에서 다룬 4가지 방법을 사용해서 frozen하는 방식으로 메모리 이득을 얻기는 할 듯!?

code → add FC layer

지피티한테 물어본 코드


import math
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import Subset, DataLoader
from torch.utils.data.dataset import Dataset
from PIL import Image
import timm

# ViT 모델 로드
def load_vit_model():
    model = timm.create_model('vit_base_patch16_224', pretrained=True)
    preprocess = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ])
    return model, preprocess

class FilteredCIFAR10(Dataset):
    def __init__(self, root, train=True, transform=None, download=False):
        self.cifar10 = datasets.CIFAR10(root=root, train=train, transform=transform, download=download)
        self.data = []
        self.targets = []
        for img, target in zip(self.cifar10.data, self.cifar10.targets):
            if target < 8:  # Only keep classes 0-7
                self.data.append(img)
                self.targets.append(target)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img, target = self.data[idx], self.targets[idx]
        img = Image.fromarray(img)
        if self.cifar10.transform:
            img = self.cifar10.transform(img)
        return img, target

class ViTFineTuning(nn.Module):
    def __init__(self, base_model, num_classes):
        super(ViTFineTuning, self).__init__()
        self.base_model = base_model
        self.fc = nn.Linear(self.base_model.head.in_features, num_classes)  # 새로운 Fully Connected Layer 추가
        self.base_model.head = self.fc  # 모델의 헤드 부분을 새로운 FC 레이어로 교체

    def forward(self, x):
        x = self.base_model.forward_features(x)
        x = x[:, 0]  # 첫 번째 클래스 토큰만 사용
        x = self.fc(x)
        return x

# GPU 사용 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ViT 모델과 전처리 함수 로드
base_model, preprocess = load_vit_model()
model = ViTFineTuning(base_model, num_classes=8).to(device)  # num_classes 변경

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)  # 학습률 스케줄러 추가

# 전처리 함수로 데이터 로딩
train_dataset = FilteredCIFAR10(root='./data', train=True, download=True, transform=preprocess)
test_dataset = FilteredCIFAR10(root='./data', train=False, download=True, transform=preprocess)

# 훈련 및 테스트 데이터셋에서 100개의 샘플 선택
train_subset = Subset(train_dataset, range(100))
test_subset = Subset(test_dataset, range(100))

# 데이터 로더
train_loader = DataLoader(train_subset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_subset, batch_size=64, shuffle=False, num_workers=2)

# 훈련 루프 (간단히)
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    scheduler.step()  # 학습률 스케줄러 스텝

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    # 에포크마다 정확도 계산
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
        print(f'Accuracy after epoch {epoch+1}: {accuracy:.2f} %')

print('Finished Training')

# 최종 정확도
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Final Test Accuracy: {100 * correct / total:.2f} %')

일부 layer를 frozen하고 학습하게 만든 코드


import math
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import Subset, DataLoader
from torch.utils.data.dataset import Dataset
from PIL import Image
import timm

# ViT 모델 로드
def load_vit_model():
    model = timm.create_model('vit_base_patch16_224', pretrained=True)
    preprocess = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ])
    return model, preprocess

class FilteredCIFAR10(Dataset):
    def __init__(self, root, train=True, transform=None, download=False):
        self.cifar10 = datasets.CIFAR10(root=root, train=train, transform=transform, download=download)
        self.data = []
        self.targets = []
        for img, target in zip(self.cifar10.data, self.cifar10.targets):
            if target < 8:  # Only keep classes 0-7
                self.data.append(img)
                self.targets.append(target)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img, target = self.data[idx], self.targets[idx]
        img = Image.fromarray(img)
        if self.cifar10.transform:
            img = self.cifar10.transform(img)
        return img, target

class ViTFineTuning(nn.Module):
    def __init__(self, base_model, num_classes):
        super(ViTFineTuning, self).__init__()
        self.base_model = base_model
        self.fc = nn.Linear(self.base_model.head.in_features, num_classes)  # 새로운 Fully Connected Layer 추가
        self.base_model.head = self.fc  # 모델의 헤드 부분을 새로운 FC 레이어로 교체

    def forward(self, x):
        x = self.base_model.forward_features(x)
        x = x[:, 0]  # 첫 번째 클래스 토큰만 사용
        x = self.fc(x)
        return x

# GPU 사용 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ViT 모델과 전처리 함수 로드
base_model, preprocess = load_vit_model()
model = ViTFineTuning(base_model, num_classes=8).to(device)  # num_classes 변경

# 일부 레이어만 학습시키고 나머지를 고정
for name, param in model.named_parameters():
    if 'head' not in name:  # head 외의 파라미터는 고정
        param.requires_grad = False

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.01, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)  # 학습률 스케줄러 추가

# 전처리 함수로 데이터 로딩
train_dataset = FilteredCIFAR10(root='./data', train=True, download=True, transform=preprocess)
test_dataset = FilteredCIFAR10(root='./data', train=False, download=True, transform=preprocess)

# 훈련 및 테스트 데이터셋에서 100개의 샘플 선택
train_subset = Subset(train_dataset, range(100))
test_subset = Subset(test_dataset, range(100))

# 데이터 로더
train_loader = DataLoader(train_subset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_subset, batch_size=64, shuffle=False, num_workers=2)

# 훈련 루프 (간단히)
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    scheduler.step()  # 학습률 스케줄러 스텝

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    # 에포크마다 정확도 계산
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
        print(f'Accuracy after epoch {epoch+1}: {accuracy:.2f} %')

print('Finished Training')

# 최종 정확도
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Final Test Accuracy: {100 * correct / total:.2f} %')

간단한 코드 설명

: ViT model의 head 일부를 FC layer로 바꾸어서 학습 시킨다.

result → add FC layer

오히려 잘된다?

개잘된다..


Epoch [1/20], Loss: 1.7136
Accuracy after epoch 1: 45.00 %
Epoch [2/20], Loss: 1.1403
Accuracy after epoch 2: 73.00 %
Epoch [3/20], Loss: 0.2301
Accuracy after epoch 3: 89.00 %
Epoch [4/20], Loss: 0.0677
Accuracy after epoch 4: 93.00 %
Epoch [5/20], Loss: 0.0132
Accuracy after epoch 5: 95.00 %
Epoch [6/20], Loss: 0.0053
Accuracy after epoch 6: 93.00 %
Epoch [7/20], Loss: 0.0082
Accuracy after epoch 7: 93.00 %
Epoch [8/20], Loss: 0.0074
Accuracy after epoch 8: 93.00 %
Epoch [9/20], Loss: 0.0075
Accuracy after epoch 9: 93.00 %
Epoch [10/20], Loss: 0.0085
Accuracy after epoch 10: 93.00 %
Epoch [11/20], Loss: 0.0031
Accuracy after epoch 11: 93.00 %
Epoch [12/20], Loss: 0.0016
Accuracy after epoch 12: 93.00 %
Epoch [13/20], Loss: 0.0075
Accuracy after epoch 13: 94.00 %
Epoch [14/20], Loss: 0.0049
Accuracy after epoch 14: 94.00 %
Epoch [15/20], Loss: 0.0034
Accuracy after epoch 15: 94.00 %
Epoch [16/20], Loss: 0.0045
Accuracy after epoch 16: 94.00 %
Epoch [17/20], Loss: 0.0067
Accuracy after epoch 17: 94.00 %
Epoch [18/20], Loss: 0.0019
Accuracy after epoch 18: 94.00 %
Epoch [19/20], Loss: 0.0058
Accuracy after epoch 19: 94.00 %
Epoch [20/20], Loss: 0.0061
Accuracy after epoch 20: 94.00 %
Finished Training
Final Test Accuracy: 94.00 %

→ 지금 나는 FC 레이어, 즉 출력층만 바꿔서 학습을 시킨거다. 이런 높은 정확도는 image net과 CIFAR10의 이미지들이 서로 유사했기 때문에 가능한 결과라고 예측할 수 있다.

→ 당연히 FC 레이어만 학습 시켰기 때문에 학습 속도는 LoRA에서 보다 빨랐을 수 밖에 없었을 듯!

FL이 무조건적으로 LoRA나 adapter보다 안좋은가?

놉. 데이터셋이 pretrained model의 데이터셋과 유사하다면 FL의 결과가 더 좋을 수 있다.

이번에는 CIFAR10이라는 새로운 데이터 셋이 기존 ImageNet과 비슷해서 이런 결과가 나왔던 것 같다.

FL이 LoRA보다 속도가 더 빨랐던 이유? / 메모리를 덜 썼던 이유?

이번에는 FC 레이어만 학습 시켜서 그랬던 것 같다.

code → add FC layer and train some layer

일부 layer를 train!

block과 layer의 차이?

아직 자세히는 모름…

블럭은 transfomer model에서 사용되는 용어인 것 같다..

여러 개의 layer 한 묶음을 block이라고 하는 것 같다..

📌

이 부분이 일부 block train 하는 과정


# 일부 레이어만 학습시키고 나머지를 고정
for name, param in model.named_parameters():
    if 'head' not in name and 'blocks.10' not in name and 'blocks.11' not in name:  # 마지막 두 블록과 헤드 외의 파라미터는 고정
        param.requires_grad = False


import math
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import Subset, DataLoader
from torch.utils.data.dataset import Dataset
from PIL import Image
import timm

# ViT 모델 로드
def load_vit_model():
    model = timm.create_model('vit_base_patch16_224', pretrained=True)
    preprocess = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ])
    return model, preprocess

class FilteredCIFAR10(Dataset):
    def __init__(self, root, train=True, transform=None, download=False):
        self.cifar10 = datasets.CIFAR10(root=root, train=train, transform=transform, download=download)
        self.data = []
        self.targets = []
        for img, target in zip(self.cifar10.data, self.cifar10.targets):
            if target < 8:  # Only keep classes 0-7
                self.data.append(img)
                self.targets.append(target)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img, target = self.data[idx], self.targets[idx]
        img = Image.fromarray(img)
        if self.cifar10.transform:
            img = self.cifar10.transform(img)
        return img, target

class ViTFineTuning(nn.Module):
    def __init__(self, base_model, num_classes):
        super(ViTFineTuning, self).__init__()
        self.base_model = base_model
        self.fc = nn.Linear(self.base_model.head.in_features, num_classes)  # 새로운 Fully Connected Layer 추가
        self.base_model.head = self.fc  # 모델의 헤드 부분을 새로운 FC 레이어로 교체

    def forward(self, x):
        x = self.base_model.forward_features(x)
        x = x[:, 0]  # 첫 번째 클래스 토큰만 사용
        x = self.fc(x)
        return x

# GPU 사용 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ViT 모델과 전처리 함수 로드
base_model, preprocess = load_vit_model()
model = ViTFineTuning(base_model, num_classes=8).to(device)  # num_classes 변경

# 일부 레이어만 학습시키고 나머지를 고정
for name, param in model.named_parameters():
    if 'head' not in name and 'blocks.10' not in name and 'blocks.11' not in name:  # 마지막 두 블록과 헤드 외의 파라미터는 고정
        param.requires_grad = False

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.01, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)  # 학습률 스케줄러 추가

# 전처리 함수로 데이터 로딩
train_dataset = FilteredCIFAR10(root='./data', train=True, download=True, transform=preprocess)
test_dataset = FilteredCIFAR10(root='./data', train=False, download=True, transform=preprocess)

# 훈련 및 테스트 데이터셋에서 100개의 샘플 선택
train_subset = Subset(train_dataset, range(100))
test_subset = Subset(test_dataset, range(100))

# 데이터 로더
train_loader = DataLoader(train_subset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_subset, batch_size=64, shuffle=False, num_workers=2)

# 훈련 루프 (간단히)
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    scheduler.step()  # 학습률 스케줄러 스텝

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    # 에포크마다 정확도 계산
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total
        print(f'Accuracy after epoch {epoch+1}: {accuracy:.2f} %')

print('Finished Training')

# 최종 정확도
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Final Test Accuracy: {100 * correct / total:.2f} %')

result → add FC layer and train some layer

Good!


Epoch [1/20], Loss: 1.8082
Accuracy after epoch 1: 52.00 %
Epoch [2/20], Loss: 0.6429
Accuracy after epoch 2: 93.00 %
Epoch [3/20], Loss: 0.0665
Accuracy after epoch 3: 92.00 %
Epoch [4/20], Loss: 0.0219
Accuracy after epoch 4: 94.00 %
Epoch [5/20], Loss: 0.0014
Accuracy after epoch 5: 94.00 %
Epoch [6/20], Loss: 0.0002
Accuracy after epoch 6: 94.00 %
Epoch [7/20], Loss: 0.0000
Accuracy after epoch 7: 95.00 %
Epoch [8/20], Loss: 0.0002
Accuracy after epoch 8: 95.00 %
Epoch [9/20], Loss: 0.0000
Accuracy after epoch 9: 95.00 %
Epoch [10/20], Loss: 0.0003
Accuracy after epoch 10: 95.00 %
Epoch [11/20], Loss: 0.0001
Accuracy after epoch 11: 95.00 %
Epoch [12/20], Loss: 0.0002
Accuracy after epoch 12: 95.00 %
Epoch [13/20], Loss: 0.0000
Accuracy after epoch 13: 95.00 %
Epoch [14/20], Loss: 0.0000
Accuracy after epoch 14: 95.00 %
Epoch [15/20], Loss: 0.0000
Accuracy after epoch 15: 95.00 %
Epoch [16/20], Loss: 0.0000
Accuracy after epoch 16: 95.00 %
Epoch [17/20], Loss: 0.0000
Accuracy after epoch 17: 95.00 %
Epoch [18/20], Loss: 0.0003
Accuracy after epoch 18: 95.00 %
Epoch [19/20], Loss: 0.0004
Accuracy after epoch 19: 95.00 %
Epoch [20/20], Loss: 0.0002
Accuracy after epoch 20: 95.00 %
Finished Training

code without GPT…

GPT 없이도 코드를 짜보아야 할텐데…

[Fine-tuning]

[FL]