HomeAboutMeBlogGuest
© 2025 Sejin Cha. All rights reserved.
Built with Next.js, deployed on Vercel
장지원 페이지/
📕
2024 UGRP
/
Member Page
Member Page
/
권태완
권태완
/
2024/11/12 - dataset learning

2024/11/12 - dataset learning

Tags

EmoSet118K 재 업로드

각 이미지를 re-labeling

import os import json # 이미지가 저장된 기본 폴더 경로 base_folder_path = '.\image' # 감정 목록과 대응되는 label 값 emotions = ['happiness', 'anger', 'surprise', 'disgust', 'fear', 'sadness'] emotion_to_label = {emotion: idx for idx, emotion in enumerate(emotions)} # JSON 파일을 생성하는 함수 def create_json_for_emotion(emotion): # 감정 폴더 경로 src_dir = os.path.join(base_folder_path, emotion) if not os.path.exists(src_dir): print(f"Directory {src_dir} does not exist.") return # 감정별로 3000개 이미지를 처리 image_files = sorted([f for f in os.listdir(src_dir) if f.endswith('.jpg') or f.endswith('.png')]) json_data_list = [] # JSON 데이터 리스트 for filename in image_files: # 원본 파일 이름에서 확장자를 제외한 부분 base_filename = os.path.splitext(filename)[0] # 새로운 경로를 포함한 이미지 파일 이름 new_image_path = f"image/{emotion}/{filename}" # label 값 설정 (감정에 따른 숫자) label = emotion_to_label[emotion] # 이미지에 맞는 JSON 구조 생성 json_data = { "image": new_image_path, "emotion": emotion, "label": label, "image_id": base_filename } # JSON 데이터 리스트에 추가 json_data_list.append(json_data) # JSON 파일 이름 설정 json_filename = os.path.join(base_folder_path, f"{emotion}_data.json") # JSON 파일 쓰기 with open(json_filename, 'w') as json_file: json.dump(json_data_list, json_file, indent=4) print(f"Created JSON for {emotion} with {len(json_data_list)} images.") # 모든 감정에 대해 JSON 생성 for emotion in emotions: create_json_for_emotion(emotion) print("JSON creation completed.")
 

Push_to_hub()

import os import json from datasets import Dataset, Features, Image, Value, concatenate_datasets from huggingface_hub import HfApi # Hugging Face API 키 설정 (옵션) api_token = "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" api = HfApi() # 업로드할 감정 목록 emotions = ['happiness', 'anger', 'surprise', 'disgust', 'fear', 'sadness'] # 리포지토리 ID 설정 repo_id = "xodhks/EmoSet118K" # 기존 데이터셋 불러오기 (옵션) try: existing_dataset = load_dataset(repo_id, split='train') # 'train' 스플릿만 불러옴 except: existing_dataset = None # 각 감정에 대해 데이터를 처리 for emotion in emotions: json_path = f"./image/{emotion}/{emotion}_data.json" # JSON 파일 로드 with open(json_path, 'r') as f: emotion_data = json.load(f) # 이미지 파일 경로와 JSON에서 읽은 메타데이터를 결합 data = { "image": [], "emotion": [], "label": [], "image_id": [] } for item in emotion_data: # JSON 데이터에서 이미지 경로를 가져오고, 메타데이터를 추가 image_file = item["image"] # 이미지는 full path로 저장되어 있음 data["image"].append(image_file) data["emotion"].append(item["emotion"]) data["label"].append(item["label"]) # label은 이미 숫자 data["image_id"].append(item["image_id"]) # 새로운 Dataset 생성 features = Features({ "image": Image(), # 이미지 필드 "emotion": Value("string"), # 감정 필드 "label": Value("int32"), # 라벨 필드 (숫자) "image_id": Value("string") # 이미지 ID 필드 }) new_dataset = Dataset.from_dict(data, features=features) # 기존 데이터셋과 병합 if existing_dataset: combined_dataset = concatenate_datasets([existing_dataset, new_dataset]) else: combined_dataset = new_dataset existing_dataset = combined_dataset # 병합된 데이터셋을 Hugging Face Hub에 업로드 existing_dataset.push_to_hub( repo_id=repo_id, # 동일한 리포지토리에 업로드 token=api_token, max_shard_size="1GB" # 데이터셋이 큰 경우 샤딩 설정 ) print("All emotion datasets have been merged and uploaded successfully.")

Children_sketch dataset re-labeling

이 데이터 셋도 학습이나 테스트에 사용될 것이기 때문에 re-labeling 한다

import os import json # CombinedArts 폴더 경로 설정 combined_folder = './CombinedArts' # 감정 폴더 목록 emotions = ['anger', 'fear', 'sadness', 'happiness'] emotion_to_label = {emotion: idx for idx, emotion in enumerate(['happiness', 'anger', 'surprise', 'disgust', 'fear', 'sadness'])} # 각 감정별로 데이터를 저장할 딕셔너리 생성 emotion_data = {emotion: [] for emotion in emotions} # 감정 폴더별로 데이터 생성 for emotion in emotions: # 해당 감정이 emotion_to_label에 없는 경우 건너뛰기 if emotion not in emotion_to_label: continue # 감정 폴더 경로 설정 emotion_folder = os.path.join(combined_folder, emotion) # 감정 폴더 내 파일 반복 for file_name in os.listdir(emotion_folder): if file_name.lower().endswith('.jpg'): # image_id 추출 image_id = os.path.splitext(file_name)[0] # 파일명에서 확장자 제거 # JSON 객체 생성 item = { "image": os.path.join("image", emotion, file_name), "emoiton": emotion, "label": emotion_to_label[emotion], "image_id": image_id } # 해당 감정의 데이터 리스트에 추가 emotion_data[emotion].append(item) # 각 감정별로 JSON 파일로 저장 for emotion, data in emotion_data.items(): output_file = f'./{emotion}.json' with open(output_file, 'w') as f: json.dump(data, f, indent=4) print(f"JSON 파일이 '{output_file}'에 저장되었습니다.")

push_to_hub()

import os import json from datasets import Dataset, Features, Image, Value from huggingface_hub import HfApi # Hugging Face API 키 설정 (옵션) api_token = "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" api = HfApi() # CombinedArts 폴더 경로 설정 combined_folder = "./CombinedArts" # 업로드할 감정 목록 emotions = ["anger", "fear", "sadness", "happiness"] # 레포지토리 ID 설정 repo_id = "xodhks/Children_Sketch" # 모든 감정 데이터를 저장할 딕셔너리 data = { "image": [], "emotion": [], "label": [], "image_id": [] } # 각 감정에 대해 데이터를 준비 for emotion in emotions: json_path = os.path.join(combined_folder, f"{emotion}.json") image_folder = os.path.join(combined_folder, emotion) # JSON 파일 로드 with open(json_path, 'r') as f: emotion_data = json.load(f) # 이미지 파일 경로와 JSON에서 읽은 메타데이터를 결합 for item in emotion_data: image_file = os.path.join(image_folder, os.path.basename(item["image"])) data["image"].append(image_file) data["emotion"].append(item["emoiton"]) # JSON의 "emoiton" 필드 사용 data["label"].append(item["label"]) # JSON의 "label" 필드 사용 data["image_id"].append(item["image_id"]) # 전체 데이터를 사용하여 새로운 Dataset 생성 features = Features({ "image": Image(), "emotion": Value("string"), "label": Value("int32"), "image_id": Value("string") }) combined_dataset = Dataset.from_dict(data, features=features) # Dataset을 Hugging Face Hub에 업로드 combined_dataset.push_to_hub( repo_id=repo_id, # 새로운 리포지토리에 업로드 token=api_token, max_shard_size="1GB" ) print("All emotion datasets have been combined and uploaded successfully.")

sketch of {emotions} - dataset crawling하기

  • 데이터셋을 모아본 결과 단순한 이미지의 GAN 형태로는 큰 학습효과를 보기 어렵다는 예상에 의거, 연필 등의 필기구를 이용해 만들어진 스케치 포맷의 감정 이미지들을 구글에서 크롤링하기로 했다. 이를 위한 코드이다
  • 제약 조건: 구글은 자동적으로 이미지를 크롤링하려는 시도가 있으면 이를 막기 위해 capcha 같은 사용자 인증을 통해서 크롤링을 막는다. 이를 위해서 랜덤한 시간에 이를 크롤링하는 시도가 필요하다
 
 
 

학습하기

  • 09/09 페이지에 사용했던 학습 코드를 바탕으로 진행했다.
  • 이전과 달라진 점은 데이터셋의 구조, 개수가 달라졌다
import torch import os from transformers import AutoModelForImageClassification, AutoImageProcessor from datasets import load_dataset from torch.utils.data import DataLoader from torch.optim import Adam import torch.nn as nn from peft import get_peft_model, LoraConfig # GPU 설정 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("Device:", device) # 데이터셋 로드 train_dataset = load_dataset("xodhks/EmoSet118K", split="train") # train_dataset = load_dataset("xodhks/EmoSet118K_MonetStyle", split="train") test_dataset = load_dataset("xodhks/Children_Sketch", split="train") # 테스트 데이터셋의 유효 라벨 목록 test_valid_label_indices = [0, 1, 4, 5] # Children_Sketch에 존재하는 라벨 인덱스만 포함 # 이미지 처리기와 모델 로드 processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224", use_fast=True) model = AutoModelForImageClassification.from_pretrained( "google/vit-base-patch16-224", num_labels=6, # 데이터셋의 감정 클래스 수 ignore_mismatched_sizes=True ).to(device) # LoRA 구성 및 적용 config = LoraConfig( r=8, lora_alpha=32, lora_dropout=0.1, target_modules=["query", "key", "value"], ) model = get_peft_model(model, config) # 모델 저장을 위한 디렉토리 생성 os.makedirs("top_models", exist_ok=True) top_models = [] # DataLoader 설정 def collate_fn(batch): images = [item['image'] for item in batch] labels = [item['label'] for item in batch] inputs = processor(images=images, return_tensors="pt") inputs['labels'] = torch.tensor(labels, dtype=torch.long) return inputs train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn, num_workers=4) test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn, num_workers=4) # 손실 함수 및 옵티마이저 정의 criterion = nn.CrossEntropyLoss() optimizer = Adam(model.parameters(), lr=1e-4) # 평가 함수 def evaluate(model, data_loader): model.eval() correct = 0 total = 0 with torch.no_grad(): for batch in data_loader: inputs = {k: v.to(device) for k, v in batch.items()} outputs = model(**inputs) _, preds = torch.max(outputs.logits, 1) for pred, label in zip(preds, inputs['labels']): if pred.item() in test_valid_label_indices: if pred.item() == label.item(): correct += 1 total += 1 accuracy = 100 * correct / total return accuracy # 모델 저장 함수 def save_top_models(epoch, accuracy, model, top_models): model_filename = f"model_epoch_{epoch + 1}_accuracy_{accuracy:.2f}.pth" model_path = os.path.join("top_models", model_filename) top_models.append((accuracy, model_path)) top_models = sorted(top_models, key=lambda x: x[0], reverse=True)[:10] torch.save(model.state_dict(), model_path) print("\nTop 10 Models (by accuracy):") for i, (acc, path) in enumerate(top_models, 1): print(f"Rank {i}: Accuracy = {acc:.2f}%, Model Path = {path}") return top_models # 학습 루프 num_epochs = 100 for epoch in range(num_epochs): model.train() running_loss = 0.0 for batch in train_loader: optimizer.zero_grad() inputs = {k: v.to(device) for k, v in batch.items()} outputs = model(**inputs) loss = outputs.loss loss.backward() optimizer.step() running_loss += loss.item() print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}") test_accuracy = evaluate(model, test_loader) print(f"Test Accuracy after Epoch {epoch+1}: {test_accuracy:.2f}%") top_models = save_top_models(epoch, test_accuracy, model, top_models) print("Finished Training")
  • 결과
    • model_epoch_1_accuracy_41.07.pth
      337051.3KB
      가장 높은 정확도를 가진 것으로 보이나 너무 높은 loss값을 가진 것으로 보이므로 다른 것을 가져오는 것이 좋을 것이다
      model_epoch_54_accuracy_40.61.pth
      337051.6KB
      model_epoch_5_accuracy_41.44.pth
      337051.3KB

실제 데이터셋을 통해 테스트하기

1. 추가 학습 없는 테스트

import torch import os from transformers import AutoModelForImageClassification, AutoImageProcessor from datasets import load_dataset from torch.utils.data import DataLoader, Dataset from sklearn.preprocessing import LabelEncoder from peft import get_peft_model, LoraConfig from torchvision import transforms from tqdm import tqdm from sklearn.model_selection import train_test_split from huggingface_hub import hf_hub_download # 장치 설정 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 모델 및 전처리 함수 설정 processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224", use_fast=True) model = AutoModelForImageClassification.from_pretrained( "google/vit-base-patch16-224", num_labels=8, ignore_mismatched_sizes=True ).to(device) # 가중치 로드 model_weights_path = './saved_models/model_epoch_54_accuracy_40.61.pth' model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False) # LoRA 적용 config = LoraConfig( r=8, lora_alpha=32, lora_dropout=0.1, target_modules=["query", "key", "value"] ) model = get_peft_model(model, config) # 전처리 함수 def preprocess_image(image): transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), ]) return transform(image) # Custom Dataset 정의 class CustomDataset(Dataset): def __init__(self, dataset, transform=None): self.dataset = dataset self.transform = transform self.label_encoder = LabelEncoder() labels = [item['label'] for item in dataset] self.label_encoder.fit(labels) def __len__(self): return len(self.dataset) def __getitem__(self, idx): item = self.dataset[idx] img = item['image'] label = item['label'] if self.transform: img = self.transform(img) label = self.label_encoder.transform([label])[0] return img, torch.tensor(label, dtype=torch.long) # 데이터셋 준비 dataset = load_dataset("JANGJIWON/UGRP_sketchset_textbook", split="train") dataset_list = [dict(item) for item in dataset] _, test_data = train_test_split(dataset_list, test_size=0.2, random_state=42) # 테스트 데이터 로더 생성 test_dataset = CustomDataset(test_data, transform=preprocess_image) test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2) # 테스트 수행 및 정확도 계산 model.eval() correct = 0 total = 0 with torch.no_grad(): for images, labels in tqdm(test_loader, desc="Testing"): images, labels = images.to(device), labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.logits, 1) total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = 100 * correct / total print(f'Final Test Accuracy: {accuracy:.2f}%')
result
/home/rnjsxodhks/anaconda3/envs/UGRP/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match: - classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([8]) in the model instantiated - classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([8, 768]) in the model instantiated You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. /tmp/ipykernel_1538/3340183218.py:26: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False) Downloading data: 100%|██████████| 48/48 [00:04<00:00, 10.71files/s] Generating train split: 100%|██████████| 48/48 [00:00<00:00, 4500.93 examples/s] Testing: 100%|██████████| 1/1 [00:00<00:00, 1.44it/s] Final Test Accuracy: 0.00%
 

2. 추가 학습하는 테스트

import torch import os from transformers import AutoModelForImageClassification, AutoImageProcessor from datasets import load_dataset from torch.utils.data import DataLoader, Dataset from torch.optim import Adam import torch.nn as nn from sklearn.preprocessing import LabelEncoder from peft import get_peft_model, LoraConfig import requests import io from torchvision import transforms from tqdm import tqdm from sklearn.model_selection import train_test_split # ViT 모델 로드 및 전처리 함수 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224", use_fast=True) model = AutoModelForImageClassification.from_pretrained( "google/vit-base-patch16-224", num_labels=8, ignore_mismatched_sizes=True ).to(device) # 전처리 함수 def preprocess_image(image): transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), ]) return transform(image) class CustomDataset(Dataset): def __init__(self, dataset, transform=None): self.dataset = dataset self.transform = transform self.label_encoder = LabelEncoder() labels = [item['label'] for item in dataset] self.label_encoder.fit(labels) def __len__(self): return len(self.dataset) def __getitem__(self, idx): item = self.dataset[idx] img = item['image'] label = item['label'] if self.transform: img = self.transform(img) label = self.label_encoder.transform([label])[0] return img, torch.tensor(label, dtype=torch.long) # 모델 가중치 로드 및 데이터셋 확인 # model_url = "https://huggingface.co/JANGJIWON/EmoSet118K_MonetStyle_student/blob/main/model_epoch_5_accuracy_43.09.pth" # response = requests.get(model_url) # model_weights = io.BytesIO(response.content) from huggingface_hub import hf_hub_download model_weights_path = "./saved_models/model_epoch_54_accuracy_40.61.pth" model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False) # 첫 번째 LoRA 구성 및 적용 config1 = LoraConfig( r=8, lora_alpha=32, lora_dropout=0.1, target_modules=["query", "key", "value"] ) model = get_peft_model(model, config1) # try: # model.load_state_dict(torch.load(model_weights, map_location='cpu', weights_only=False), strict=False) # except RuntimeError as e: # print(f"Error loading state_dict: {e}") # 두 번째 LoRA 구성 및 적용 config2 = LoraConfig( r=4, lora_alpha=16, lora_dropout=0.05, target_modules=["query", "key", "value"] ) model = get_peft_model(model, config2) # 데이터셋 준비 dataset = load_dataset("JANGJIWON/UGRP_sketchset_textbook", split="train") # Convert the dataset to a list of dictionaries for splitting dataset_list = [dict(item) for item in dataset] # Split the dataset into train and test sets (80% train, 20% test) train_data, test_data = train_test_split(dataset_list, test_size=0.3, random_state=42) # Create datasets and dataloaders train_dataset = CustomDataset(train_data, transform=preprocess_image) test_dataset = CustomDataset(test_data, transform=preprocess_image) train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=2) test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=2) # 옵티마이저 및 손실 함수 설정 learning_rate = 0.001 # 조금 더 높은 학습률로 설정 optimizer = Adam(model.parameters(), lr=learning_rate) criterion = nn.CrossEntropyLoss() # 모델 훈련 num_epochs = 10 model.train() for epoch in range(num_epochs): running_loss = 0.0 correct = 0 total = 0 for images, labels in tqdm(train_loader): images, labels = images.to(device), labels.to(device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs.logits, labels) loss.backward() optimizer.step() running_loss += loss.item() _, predicted = torch.max(outputs.logits, 1) total += labels.size(0) correct += (predicted == labels).sum().item() epoch_accuracy = 100 * correct / total print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%') # 테스트 수행 및 정확도 계산 model.eval() correct = 0 total = 0 with torch.no_grad(): for images, labels in test_loader: # Use test_loader here images, labels = images.to(device), labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.logits, 1) total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = 100 * correct / total print(f'Final Test Accuracy after second LoRA tuning: {accuracy:.2f}%')
result
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match: - classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([8]) in the model instantiated - classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([8, 768]) in the model instantiated You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. /tmp/ipykernel_1538/827046990.py:63: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False) 100%|██████████| 33/33 [00:00<00:00, 35.45it/s] Epoch [1/10], Loss: 56.6235, Accuracy: 48.48% 100%|██████████| 33/33 [00:00<00:00, 57.74it/s] Epoch [2/10], Loss: 27.6023, Accuracy: 66.67% 100%|██████████| 33/33 [00:00<00:00, 54.12it/s] Epoch [3/10], Loss: 14.1222, Accuracy: 90.91% 100%|██████████| 33/33 [00:00<00:00, 55.03it/s] Epoch [4/10], Loss: 5.8903, Accuracy: 100.00% 100%|██████████| 33/33 [00:00<00:00, 54.82it/s] Epoch [5/10], Loss: 2.2460, Accuracy: 100.00% 100%|██████████| 33/33 [00:00<00:00, 56.98it/s] Epoch [6/10], Loss: 1.0786, Accuracy: 100.00% 100%|██████████| 33/33 [00:00<00:00, 55.77it/s] Epoch [7/10], Loss: 0.6976, Accuracy: 100.00% 100%|██████████| 33/33 [00:00<00:00, 53.64it/s] Epoch [8/10], Loss: 0.5067, Accuracy: 100.00% 100%|██████████| 33/33 [00:00<00:00, 57.19it/s] Epoch [9/10], Loss: 0.3936, Accuracy: 100.00% 100%|██████████| 33/33 [00:00<00:00, 56.64it/s] Epoch [10/10], Loss: 0.3180, Accuracy: 100.00% Final Test Accuracy after second LoRA tuning: 13.33%
 
 

clip_vit

import torch import os from transformers import CLIPProcessor, CLIPModel from datasets import load_dataset from torch.utils.data import DataLoader from torch.optim import Adam import torch.nn as nn from peft import get_peft_model, LoraConfig, TaskType from PIL import Image # GPU 설정 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("Device:", device) # 데이터셋 로드 train_dataset = load_dataset("xodhks/EmoSet118K", split="train") test_dataset = load_dataset("xodhks/Children_Sketch", split="train") # 테스트 데이터셋의 유효 라벨 목록 test_valid_label_indices = [0, 1, 4, 5] # Children_Sketch에 존재하는 라벨 인덱스만 포함 # 이미지 처리기와 모델 로드 model_name = "openai/clip-vit-base-patch32" processor = CLIPProcessor.from_pretrained(model_name) model = CLIPModel.from_pretrained(model_name, num_labels=6, # 데이터셋의 감정 클래스 수 ignore_mismatched_sizes=True ).to(device) # LoRA 구성 및 적용 config = LoraConfig( task_type=TaskType.FEATURE_EXTRACTION, r=8, lora_alpha=32, lora_dropout=0.1, target_modules=["visual_projection"], # 시각적인 부분에 LoRA 적용 ) model = get_peft_model(model, config) # 모델 저장을 위한 디렉토리 생성 os.makedirs("top_models", exist_ok=True) top_models = [] def collate_fn(batch): # batch의 이미지가 이미 로드된 `Image` 객체라면, 변환만 수행 images = [item['image'].convert("RGB") if isinstance(item['image'], Image.Image) else Image.open(item['image']).convert("RGB") for item in batch] labels = [item['label'] for item in batch] inputs = processor(images=images, return_tensors="pt", padding=True) inputs['labels'] = torch.tensor(labels, dtype=torch.long) return inputs train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn, num_workers=4) test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn, num_workers=4) # 손실 함수 및 옵티마이저 정의 criterion = nn.CrossEntropyLoss() optimizer = Adam(model.parameters(), lr=1e-4) # 평가 함수 def evaluate(model, data_loader): model.eval() correct = 0 total = 0 with torch.no_grad(): for batch in data_loader: inputs = {k: v.to(device) for k, v in batch.items() if k != "labels"} labels = batch['labels'].to(device) outputs = model.get_image_features(pixel_values=inputs["pixel_values"]) # 분류기 로스를 위해 예측 logits = model.text_projection(outputs) # 직접 연산하는 대신 text_projection 레이어 통과 _, preds = torch.max(logits, 1) for pred, label in zip(preds, labels): if pred.item() in test_valid_label_indices: if pred.item() == label.item(): correct += 1 total += 1 accuracy = 100 * correct / total return accuracy # 모델 저장 함수 def save_top_models(epoch, accuracy, model, top_models): model_filename = f"model_epoch_{epoch + 1}_accuracy_{accuracy:.2f}.pth" model_path = os.path.join("top_models", model_filename) top_models.append((accuracy, model_path)) top_models = sorted(top_models, key=lambda x: x[0], reverse=True)[:10] torch.save(model.state_dict(), model_path) print("\nTop 10 Models (by accuracy):") for i, (acc, path) in enumerate(top_models, 1): print(f"Rank {i}: Accuracy = {acc:.2f}%, Model Path = {path}") return top_models # 학습 루프 num_epochs = 100 for epoch in range(num_epochs): model.train() running_loss = 0.0 for batch in train_loader: optimizer.zero_grad() inputs = {k: v.to(device) for k, v in batch.items() if k != "labels"} labels = batch['labels'].to(device) outputs = model.get_image_features(pixel_values=inputs["pixel_values"]) logits = model.text_projection(outputs) # 직접 연산하는 대신 text_projection 레이어 통과 loss = criterion(logits, labels) loss.backward() optimizer.step() running_loss += loss.item() print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}") test_accuracy = evaluate(model, test_loader) print(f"Test Accuracy after Epoch {epoch+1}: {test_accuracy:.2f}%") top_models = save_top_models(epoch, test_accuracy, model, top_models) print("Finished Training")
model_epoch_85_accuracy_58.84.pth
591139.3KB