크롤링으로 데이터 수집 해보기
- 설문 조사 데이터 셋 json 파일 만들어 보기
태완님 학습시킨 모델 → sketch 데이터로 한 번 더 학습 시켜보기 → 테스트 해보기
- 태완님 학습시킨 모델 → 크롤링 데이터로 학습 시켜 보기 → sketch 데이터로 테스트 해보기
- 태완님 학습시킨 모델 → 크롤링 데이터로 학습 시켜 보기 → sketch 데이터로 한 번 더 학습 시켜보기 → sketch 데이터로 테스트 해보기
그림 스케치 바로 바꿔주는 gen이 잘 없다..
Task1
완전히 자동으로는 못하겠다.. → 내가 모아서 적당히 이상한 건 없애고 200개를 모아보자!
ToDo Data 거르기 + 파일 자동 이름 바꿔주는 코드 작성하기
- hugging에 올려보기
Task3
3. 진행 → only test
import torch import os from transformers import AutoModelForImageClassification, AutoImageProcessor from datasets import load_dataset from torch.utils.data import DataLoader, Dataset, Subset from torch.optim import Adam import torch.nn as nn from sklearn.preprocessing import LabelEncoder from peft import get_peft_model, LoraConfig import requests import io import random from torchvision import transforms # ViT 모델 로드 및 전처리 함수 # 이미지 처리기와 모델 로드 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224", use_fast=True) model = AutoModelForImageClassification.from_pretrained( "google/vit-base-patch16-224", num_labels=8, # 전체 레이블의 수 ignore_mismatched_sizes=True # 크기 불일치를 무시 ).to(device) # 전처리 함수 def preprocess_image(image): transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), ]) return transform(image) class CustomDataset(Dataset): def __init__(self, dataset, transform=None): self.dataset = dataset self.transform = transform self.label_encoder = LabelEncoder() labels = [item['label'] for item in dataset] # 모든 레이블 수집 self.label_encoder.fit(labels) # 레이블을 숫자로 인코딩 def __len__(self): return len(self.dataset) def __getitem__(self, idx): item = self.dataset[idx] img = item['image'] label = item['label'] if self.transform: img = self.transform(img) label = self.label_encoder.transform([label])[0] # 문자열 레이블을 숫자로 변환 return img, torch.tensor(label, dtype=torch.long) # 정수형 텐서로 변환하여 반환 # 모델 저장된 URL에서 다운로드 model_url = "https://huggingface.co/JANGJIWON/UGRP1.0/resolve/main/model_epoch_3_accuracy_23.17.pth" response = requests.get(model_url) model_weights = io.BytesIO(response.content) # 데이터셋 로드 test_dataset = load_dataset("xodhks/Children_Sketch", split="train") # LoRA 구성 및 적용 config = LoraConfig( r=8, # Rank 설정 lora_alpha=32, lora_dropout=0.1, target_modules=["query", "key", "value"], # LoRA를 적용할 모듈 ) model = get_peft_model(model, config) # 가중치 로드 try: model.load_state_dict(torch.load(model_weights, map_location='cpu'), strict=False) except RuntimeError as e: print(f"Error loading state_dict: {e}") model.eval() # 전처리 함수로 데이터 로딩 test_dataset = CustomDataset(test_dataset, transform=preprocess_image) # 테스트 데이터셋 크기 설정 test_size = 100 test_indices = random.sample(range(len(test_dataset)), test_size) test_subset = Subset(test_dataset, test_indices) test_loader = DataLoader(test_subset, batch_size=64, shuffle=False, num_workers=2) # 테스트 수행 및 정확도 계산 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) correct = 0 total = 0 with torch.no_grad(): for images, labels in test_loader: images, labels = images.to(device), labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.logits, 1) # 수정된 부분 total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = 100 * correct / total print(f'Final Test Accuracy: {accuracy:.2f} %')
train도 같이 하는 코드 → fine tuning이 아닌 전체 training
import torch import os from transformers import AutoModelForImageClassification, AutoImageProcessor from datasets import load_dataset from torch.utils.data import DataLoader, Dataset, Subset from torch.optim import Adam import torch.nn as nn from sklearn.preprocessing import LabelEncoder from peft import get_peft_model, LoraConfig import requests import io import random from torchvision import transforms # ViT 모델 로드 및 전처리 함수 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224", use_fast=True) model = AutoModelForImageClassification.from_pretrained( "google/vit-base-patch16-224", num_labels=8, # 전체 레이블의 수 ignore_mismatched_sizes=True # 크기 불일치를 무시 ).to(device) # 전처리 함수 def preprocess_image(image): transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), ]) return transform(image) class CustomDataset(Dataset): def __init__(self, dataset, transform=None): self.dataset = dataset self.transform = transform self.label_encoder = LabelEncoder() labels = [item['label'] for item in dataset] # 모든 레이블 수집 self.label_encoder.fit(labels) # 레이블을 숫자로 인코딩 def __len__(self): return len(self.dataset) def __getitem__(self, idx): item = self.dataset[idx] img = item['image'] label = item['label'] if self.transform: img = self.transform(img) label = self.label_encoder.transform([label])[0] # 문자열 레이블을 숫자로 변환 return img, torch.tensor(label, dtype=torch.long) # 정수형 텐서로 변환하여 반환 # 모델 저장된 URL에서 다운로드 model_url = "https://huggingface.co/JANGJIWON/UGRP1.0/resolve/main/model_epoch_3_accuracy_23.17.pth" response = requests.get(model_url) model_weights = io.BytesIO(response.content) # 데이터셋 로드 dataset = load_dataset("xodhks/Children_Sketch", split="train") # LoRA 구성 및 적용 config = LoraConfig( r=8, # Rank 설정 lora_alpha=32, lora_dropout=0.1, target_modules=["query", "key", "value"], # LoRA를 적용할 모듈 ) model = get_peft_model(model, config) # 가중치 로드 try: model.load_state_dict(torch.load(model_weights, map_location='cpu'), strict=False) except RuntimeError as e: print(f"Error loading state_dict: {e}") model.train() # 데이터셋을 학습 및 검증 세트로 분할 train_size = int(0.8 * len(dataset)) # 데이터셋의 80%를 학습에 사용 val_size = len(dataset) - train_size train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size]) train_dataset = CustomDataset(train_dataset, transform=preprocess_image) val_dataset = CustomDataset(val_dataset, transform=preprocess_image) train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2) val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2) # 손실 함수 및 옵티마이저 설정 criterion = nn.CrossEntropyLoss() optimizer = Adam(model.parameters(), lr=5e-5) # 학습 루프 num_epochs = 50 for epoch in range(num_epochs): model.train() running_loss = 0.0 correct_train = 0 total_train = 0 for images, labels in train_loader: images, labels = images.to(device), labels.to(device) # 옵티마이저 초기화 optimizer.zero_grad() # 모델 출력 outputs = model(images) loss = criterion(outputs.logits, labels) # 역전파 및 최적화 loss.backward() optimizer.step() running_loss += loss.item() _, predicted = torch.max(outputs.logits, 1) total_train += labels.size(0) correct_train += (predicted == labels).sum().item() # 검증 루프 model.eval() correct_val = 0 total_val = 0 with torch.no_grad(): for images, labels in val_loader: images, labels = images.to(device), labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.logits, 1) total_val += labels.size(0) correct_val += (predicted == labels).sum().item() train_accuracy = 100 * correct_train / total_train val_accuracy = 100 * correct_val / total_val print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss:.4f}, Train Accuracy: {train_accuracy:.2f} %, Val Accuracy: {val_accuracy:.2f} %") # 최종 테스트 세트에서 평가 model.eval() correct = 0 total = 0 test_size = 100 test_indices = random.sample(range(len(val_dataset)), test_size) test_subset = Subset(val_dataset, test_indices) test_loader = DataLoader(test_subset, batch_size=32, shuffle=False, num_workers=2) with torch.no_grad(): for images, labels in test_loader: images, labels = images.to(device), labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.logits, 1) total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = 100 * correct / total print(f'Final Test Accuracy: {accuracy:.2f} %')
정확도
Epoch [1/50], Loss: 68.7508, Train Accuracy: 19.21 %, Val Accuracy: 25.00 % Epoch [2/50], Loss: 42.8771, Train Accuracy: 30.18 %, Val Accuracy: 35.37 % Epoch [3/50], Loss: 30.8693, Train Accuracy: 37.80 %, Val Accuracy: 37.80 % Epoch [4/50], Loss: 25.8834, Train Accuracy: 44.82 %, Val Accuracy: 42.68 % Epoch [5/50], Loss: 23.0340, Train Accuracy: 51.98 %, Val Accuracy: 48.78 % Epoch [6/50], Loss: 21.0744, Train Accuracy: 56.10 %, Val Accuracy: 52.44 % Epoch [7/50], Loss: 19.8018, Train Accuracy: 59.60 %, Val Accuracy: 56.71 % Epoch [8/50], Loss: 18.6397, Train Accuracy: 64.63 %, Val Accuracy: 57.32 % Epoch [9/50], Loss: 17.5901, Train Accuracy: 67.23 %, Val Accuracy: 60.37 % Epoch [10/50], Loss: 16.7490, Train Accuracy: 69.51 %, Val Accuracy: 62.20 % Epoch [11/50], Loss: 15.9372, Train Accuracy: 71.34 %, Val Accuracy: 65.85 % Epoch [12/50], Loss: 14.9945, Train Accuracy: 74.24 %, Val Accuracy: 64.02 % Epoch [13/50], Loss: 14.3015, Train Accuracy: 75.00 %, Val Accuracy: 64.63 % Epoch [14/50], Loss: 13.7278, Train Accuracy: 76.37 %, Val Accuracy: 67.07 % Epoch [15/50], Loss: 12.9560, Train Accuracy: 77.44 %, Val Accuracy: 64.02 % Epoch [16/50], Loss: 12.1400, Train Accuracy: 79.42 %, Val Accuracy: 66.46 % Epoch [17/50], Loss: 11.5139, Train Accuracy: 80.95 %, Val Accuracy: 66.46 % Epoch [18/50], Loss: 10.7460, Train Accuracy: 81.25 %, Val Accuracy: 68.90 % Epoch [19/50], Loss: 10.2181, Train Accuracy: 83.38 %, Val Accuracy: 68.29 % Epoch [20/50], Loss: 9.4378, Train Accuracy: 83.84 %, Val Accuracy: 68.90 % Epoch [21/50], Loss: 8.6059, Train Accuracy: 87.35 %, Val Accuracy: 69.51 % Epoch [22/50], Loss: 8.1183, Train Accuracy: 88.72 %, Val Accuracy: 71.34 % Epoch [23/50], Loss: 7.5660, Train Accuracy: 89.79 %, Val Accuracy: 67.07 % Epoch [24/50], Loss: 7.2096, Train Accuracy: 89.94 %, Val Accuracy: 68.90 % Epoch [25/50], Loss: 6.5739, Train Accuracy: 91.31 %, Val Accuracy: 71.95 % Epoch [26/50], Loss: 5.9693, Train Accuracy: 91.46 %, Val Accuracy: 68.90 % Epoch [27/50], Loss: 5.5421, Train Accuracy: 91.62 %, Val Accuracy: 73.17 % Epoch [28/50], Loss: 5.1165, Train Accuracy: 93.29 %, Val Accuracy: 70.12 % Epoch [29/50], Loss: 4.7574, Train Accuracy: 93.60 %, Val Accuracy: 71.34 % Epoch [30/50], Loss: 4.5132, Train Accuracy: 93.29 %, Val Accuracy: 70.73 % Epoch [31/50], Loss: 4.1620, Train Accuracy: 93.60 %, Val Accuracy: 70.73 % Epoch [32/50], Loss: 3.8691, Train Accuracy: 93.75 %, Val Accuracy: 71.34 % Epoch [33/50], Loss: 3.6897, Train Accuracy: 93.75 %, Val Accuracy: 71.34 % Epoch [34/50], Loss: 3.5394, Train Accuracy: 93.14 %, Val Accuracy: 71.34 % Epoch [35/50], Loss: 3.4095, Train Accuracy: 94.05 %, Val Accuracy: 71.95 % Epoch [36/50], Loss: 3.2041, Train Accuracy: 93.90 %, Val Accuracy: 72.56 % Epoch [37/50], Loss: 3.1759, Train Accuracy: 93.75 %, Val Accuracy: 71.34 % Epoch [38/50], Loss: 3.0086, Train Accuracy: 93.90 %, Val Accuracy: 71.95 % Epoch [39/50], Loss: 2.9050, Train Accuracy: 93.60 %, Val Accuracy: 70.73 % Epoch [40/50], Loss: 2.9457, Train Accuracy: 92.68 %, Val Accuracy: 72.56 % Epoch [41/50], Loss: 2.5150, Train Accuracy: 94.05 %, Val Accuracy: 72.56 % Epoch [42/50], Loss: 2.5224, Train Accuracy: 93.75 %, Val Accuracy: 73.17 % Epoch [43/50], Loss: 2.5806, Train Accuracy: 93.60 %, Val Accuracy: 72.56 % Epoch [44/50], Loss: 2.5361, Train Accuracy: 94.36 %, Val Accuracy: 71.95 % Epoch [45/50], Loss: 2.4434, Train Accuracy: 94.05 %, Val Accuracy: 71.95 % Epoch [46/50], Loss: 2.5080, Train Accuracy: 93.29 %, Val Accuracy: 71.95 % Epoch [47/50], Loss: 2.5710, Train Accuracy: 92.68 %, Val Accuracy: 71.95 % Epoch [48/50], Loss: 2.4221, Train Accuracy: 94.51 %, Val Accuracy: 72.56 % Epoch [49/50], Loss: 2.3234, Train Accuracy: 93.75 %, Val Accuracy: 73.17 % Epoch [50/50], Loss: 2.3947, Train Accuracy: 93.90 %, Val Accuracy: 72.56 % Final Test Accuracy: 77.00 %
→ 오 괜찮군…
Task4
질문 모음
- label 숫자로 되어 있는 거 → 내 코드와의 차이 뭔지 생각해 보기
- 마지막 fc 레이어 안 바꿔도 되는지?
- label len()으로 설정한 이유?
지피티 질문
- 점진적으로 학습 시키는 것?: Curriculum Learning
→ 약간 이런 걸 연구 목적으로 정해봐도 될 듯..? 점진적으로 데이터 바꿔가면서 학습 시키는 것 못찾겠음,,, 우리가 첫 번째로 시도하는 게 아닐까? 라는 망상…
ToDo → emotion folder 유지하고 추가하도록 코드 짜기
나중에 사람으로도 테스트 해보기