HomeAboutMeBlogGuest
© 2025 Sejin Cha. All rights reserved.
Built with Next.js, deployed on Vercel
장지원 페이지/
📕
2024 UGRP
/
Member Page
Member Page
/
장지원
장지원
/
#32. ToDo

#32. ToDo

태그
연구
날짜
Nov 12, 2024
상태
완료

Emoset Monestyle

태완이 형이 거의 다 한 model 학습..
코드
tip: 런타임 유형 gpu로 설정해야 돌아간다.
import torch import os from transformers import AutoModelForImageClassification, AutoImageProcessor from datasets import load_dataset from torch.utils.data import DataLoader from torch.optim import Adam import torch.nn as nn from peft import get_peft_model, LoraConfig # GPU 설정 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("Device:", device) # 데이터셋 로드 train_dataset = load_dataset("xodhks/EmoSet118K_MonetStyle", split="train") test_dataset = load_dataset("xodhks/Children_Sketch", split="train") # 테스트 데이터셋의 유효 라벨 목록 test_valid_label_indices = [0, 1, 4, 5] # Children_Sketch에 존재하는 라벨 인덱스만 포함 # 이미지 처리기와 모델 로드 processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224", use_fast=True) model = AutoModelForImageClassification.from_pretrained( "google/vit-base-patch16-224", num_labels=6, # 데이터셋의 감정 클래스 수 ignore_mismatched_sizes=True ).to(device) # LoRA 구성 및 적용 config = LoraConfig( r=8, lora_alpha=32, lora_dropout=0.1, target_modules=["query", "key", "value"], ) model = get_peft_model(model, config) # 모델 저장을 위한 디렉토리 생성 os.makedirs("top_models", exist_ok=True) top_models = [] # DataLoader 설정 def collate_fn(batch): images = [item['image'] for item in batch] labels = [item['label'] for item in batch] inputs = processor(images=images, return_tensors="pt") inputs['labels'] = torch.tensor(labels, dtype=torch.long) return inputs train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn, num_workers=4) test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn, num_workers=4) # 손실 함수 및 옵티마이저 정의 criterion = nn.CrossEntropyLoss() optimizer = Adam(model.parameters(), lr=1e-4) # 평가 함수 def evaluate(model, data_loader): model.eval() correct = 0 total = 0 with torch.no_grad(): for batch in data_loader: inputs = {k: v.to(device) for k, v in batch.items()} outputs = model(**inputs) _, preds = torch.max(outputs.logits, 1) for pred, label in zip(preds, inputs['labels']): if pred.item() in test_valid_label_indices: if pred.item() == label.item(): correct += 1 total += 1 accuracy = 100 * correct / total return accuracy # 모델 저장 함수 def save_top_models(epoch, accuracy, model, top_models): model_filename = f"model_epoch_{epoch + 1}_accuracy_{accuracy:.2f}.pth" model_path = os.path.join("top_models", model_filename) top_models.append((accuracy, model_path)) top_models = sorted(top_models, key=lambda x: x[0], reverse=True)[:10] torch.save(model.state_dict(), model_path) print("\nTop 10 Models (by accuracy):") for i, (acc, path) in enumerate(top_models, 1): print(f"Rank {i}: Accuracy = {acc:.2f}%, Model Path = {path}") return top_models # 학습 루프 num_epochs = 100 for epoch in range(num_epochs): model.train() running_loss = 0.0 for batch in train_loader: optimizer.zero_grad() inputs = {k: v.to(device) for k, v in batch.items()} outputs = model(**inputs) loss = outputs.loss loss.backward() optimizer.step() running_loss += loss.item() print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}") test_accuracy = evaluate(model, test_loader) print(f"Test Accuracy after Epoch {epoch+1}: {test_accuracy:.2f}%") top_models = save_top_models(epoch, test_accuracy, model, top_models) print("Finished Training")
 
model_epoch_5_accuracy_43.09.pth
337051.3KB
 

Test

  1. 그냥 바로 test
    1. 코드
      import torch import os from transformers import AutoModelForImageClassification, AutoImageProcessor from datasets import load_dataset from torch.utils.data import DataLoader, Dataset from sklearn.preprocessing import LabelEncoder from peft import get_peft_model, LoraConfig from torchvision import transforms from tqdm import tqdm from sklearn.model_selection import train_test_split from huggingface_hub import hf_hub_download # 장치 설정 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 모델 및 전처리 함수 설정 processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224", use_fast=True) model = AutoModelForImageClassification.from_pretrained( "google/vit-base-patch16-224", num_labels=8, ignore_mismatched_sizes=True ).to(device) # 가중치 로드 model_weights_path = hf_hub_download(repo_id="JANGJIWON/EmoSet118K_MonetStyle_student", filename="model_epoch_5_accuracy_43.09.pth") model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False) # LoRA 적용 config = LoraConfig( r=8, lora_alpha=32, lora_dropout=0.1, target_modules=["query", "key", "value"] ) model = get_peft_model(model, config) # 전처리 함수 def preprocess_image(image): transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), ]) return transform(image) # Custom Dataset 정의 class CustomDataset(Dataset): def __init__(self, dataset, transform=None): self.dataset = dataset self.transform = transform self.label_encoder = LabelEncoder() labels = [item['label'] for item in dataset] self.label_encoder.fit(labels) def __len__(self): return len(self.dataset) def __getitem__(self, idx): item = self.dataset[idx] img = item['image'] label = item['label'] if self.transform: img = self.transform(img) label = self.label_encoder.transform([label])[0] return img, torch.tensor(label, dtype=torch.long) # 데이터셋 준비 dataset = load_dataset("JANGJIWON/UGRP_sketchset_textbook", split="train") dataset_list = [dict(item) for item in dataset] _, test_data = train_test_split(dataset_list, test_size=0.2, random_state=42) # 테스트 데이터 로더 생성 test_dataset = CustomDataset(test_data, transform=preprocess_image) test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2) # 테스트 수행 및 정확도 계산 model.eval() correct = 0 total = 0 with torch.no_grad(): for images, labels in tqdm(test_loader, desc="Testing"): images, labels = images.to(device), labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.logits, 1) total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = 100 * correct / total print(f'Final Test Accuracy: {accuracy:.2f}%')
  1. train 하고 test
    1. 코드
      0.001, 0.0001 ⇒ overfit
      import torch import os from transformers import AutoModelForImageClassification, AutoImageProcessor from datasets import load_dataset from torch.utils.data import DataLoader, Dataset from torch.optim import Adam import torch.nn as nn from sklearn.preprocessing import LabelEncoder from peft import get_peft_model, LoraConfig import requests import io from torchvision import transforms from tqdm import tqdm from sklearn.model_selection import train_test_split # ViT 모델 로드 및 전처리 함수 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224", use_fast=True) model = AutoModelForImageClassification.from_pretrained( "google/vit-base-patch16-224", num_labels=8, ignore_mismatched_sizes=True ).to(device) # 전처리 함수 def preprocess_image(image): transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), ]) return transform(image) class CustomDataset(Dataset): def __init__(self, dataset, transform=None): self.dataset = dataset self.transform = transform self.label_encoder = LabelEncoder() labels = [item['label'] for item in dataset] self.label_encoder.fit(labels) def __len__(self): return len(self.dataset) def __getitem__(self, idx): item = self.dataset[idx] img = item['image'] label = item['label'] if self.transform: img = self.transform(img) label = self.label_encoder.transform([label])[0] return img, torch.tensor(label, dtype=torch.long) # 모델 가중치 로드 및 데이터셋 확인 # model_url = "https://huggingface.co/JANGJIWON/EmoSet118K_MonetStyle_student/blob/main/model_epoch_5_accuracy_43.09.pth" # response = requests.get(model_url) # model_weights = io.BytesIO(response.content) from huggingface_hub import hf_hub_download model_weights_path = hf_hub_download(repo_id="JANGJIWON/EmoSet118K_MonetStyle_student", filename="model_epoch_5_accuracy_43.09.pth") model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False) # try: # model.load_state_dict(torch.load(model_weights, map_location='cpu', weights_only=False), strict=False) # except RuntimeError as e: # print(f"Error loading state_dict: {e}") # 두 번째 LoRA 구성 및 적용 config2 = LoraConfig( r=4, lora_alpha=16, lora_dropout=0.05, target_modules=["query", "key", "value"] ) model = get_peft_model(model, config2) # 데이터셋 준비 dataset = load_dataset("JANGJIWON/UGRP_sketchset_textbook", split="train") # Convert the dataset to a list of dictionaries for splitting dataset_list = [dict(item) for item in dataset] # Split the dataset into train and test sets (80% train, 20% test) train_data, test_data = train_test_split(dataset_list, test_size=0.3, random_state=42) # Create datasets and dataloaders train_dataset = CustomDataset(train_data, transform=preprocess_image) test_dataset = CustomDataset(test_data, transform=preprocess_image) train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=2) test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=2) # 옵티마이저 및 손실 함수 설정 learning_rate = 0.001 # 조금 더 높은 학습률로 설정 optimizer = Adam(model.parameters(), lr=learning_rate) criterion = nn.CrossEntropyLoss() # 모델 훈련 num_epochs = 10 model.train() for epoch in range(num_epochs): running_loss = 0.0 correct = 0 total = 0 for images, labels in tqdm(train_loader): images, labels = images.to(device), labels.to(device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs.logits, labels) loss.backward() optimizer.step() running_loss += loss.item() _, predicted = torch.max(outputs.logits, 1) total += labels.size(0) correct += (predicted == labels).sum().item() epoch_accuracy = 100 * correct / total print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%') # 테스트 수행 및 정확도 계산 model.eval() correct = 0 total = 0 with torch.no_grad(): for images, labels in test_loader: # Use test_loader here images, labels = images.to(device), labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.logits, 1) total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = 100 * correct / total print(f'Final Test Accuracy after second LoRA tuning: {accuracy:.2f}%')
이 코드에서 LoRA 잘 씌우는 지 확인해 보기
 
→ clip 모델 사용해 보자. (unlabeled로 학습한 data → zero shot에 성능이 좋다고 한다)
foundation model을 사용해 보자.
LLM이나 vision foundation model
근데 우리가 ViT 불러온게 pre trained가 맞아?
질문: 근데 CLIP은 label이 없다 했는데, 얘로는 LoRA layer 적용할 수 있는가? → 여기서는 PEFT 적용 X? ⇒ 가능하다.
 
huggingfacehuggingfaceopenai/clip-vit-large-patch14 · Hugging Face
openai/clip-vit-large-patch14 · Hugging Face

openai/clip-vit-large-patch14 · Hugging Face

We’re on a journey to advance and democratize artificial intelligence through open source and open science.

huggingfacehuggingface
이걸로 한 번 더 학습 시켜보기 openai/clip-vit-large-patch14 · Hugging Face
 

Test Set Split

huggingfacehuggingfaceLoad
Load

Load

We’re on a journey to advance and democratize artificial intelligence through open source and open science.

huggingfacehuggingface
⇒ 걍 손수 나눠서 따로 올두기
 
 
colab 직접구매