Portfolio

EmoSet118K 재 업로드

각 이미지를 re-labeling


import os
import json

# 이미지가 저장된 기본 폴더 경로
base_folder_path = '.\image'

# 감정 목록과 대응되는 label 값
emotions = ['happiness', 'anger', 'surprise', 'disgust', 'fear', 'sadness']
emotion_to_label = {emotion: idx for idx, emotion in enumerate(emotions)}

# JSON 파일을 생성하는 함수
def create_json_for_emotion(emotion):
    # 감정 폴더 경로
    src_dir = os.path.join(base_folder_path, emotion)
    
    if not os.path.exists(src_dir):
        print(f"Directory {src_dir} does not exist.")
        return
    
    # 감정별로 3000개 이미지를 처리
    image_files = sorted([f for f in os.listdir(src_dir) if f.endswith('.jpg') or f.endswith('.png')])
    
    json_data_list = []  # JSON 데이터 리스트

    for filename in image_files:
        # 원본 파일 이름에서 확장자를 제외한 부분
        base_filename = os.path.splitext(filename)[0]

        # 새로운 경로를 포함한 이미지 파일 이름
        new_image_path = f"image/{emotion}/{filename}"

        # label 값 설정 (감정에 따른 숫자)
        label = emotion_to_label[emotion]

        # 이미지에 맞는 JSON 구조 생성
        json_data = {
            "image": new_image_path,
            "emotion": emotion,
            "label": label,
            "image_id": base_filename
        }

        # JSON 데이터 리스트에 추가
        json_data_list.append(json_data)

    # JSON 파일 이름 설정
    json_filename = os.path.join(base_folder_path, f"{emotion}_data.json")

    # JSON 파일 쓰기
    with open(json_filename, 'w') as json_file:
        json.dump(json_data_list, json_file, indent=4)

    print(f"Created JSON for {emotion} with {len(json_data_list)} images.")

# 모든 감정에 대해 JSON 생성
for emotion in emotions:
    create_json_for_emotion(emotion)

print("JSON creation completed.")

Push_to_hub()


import os
import json
from datasets import Dataset, Features, Image, Value, concatenate_datasets
from huggingface_hub import HfApi

# Hugging Face API 키 설정 (옵션)
api_token = "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"
api = HfApi()

# 업로드할 감정 목록
emotions = ['happiness', 'anger', 'surprise', 'disgust', 'fear', 'sadness']

# 리포지토리 ID 설정
repo_id = "xodhks/EmoSet118K"

# 기존 데이터셋 불러오기 (옵션)
try:
    existing_dataset = load_dataset(repo_id, split='train')  # 'train' 스플릿만 불러옴
except:
    existing_dataset = None

# 각 감정에 대해 데이터를 처리
for emotion in emotions:
    json_path = f"./image/{emotion}/{emotion}_data.json"

    # JSON 파일 로드
    with open(json_path, 'r') as f:
        emotion_data = json.load(f)

    # 이미지 파일 경로와 JSON에서 읽은 메타데이터를 결합
    data = {
        "image": [],
        "emotion": [],
        "label": [],
        "image_id": []
    }

    for item in emotion_data:
        # JSON 데이터에서 이미지 경로를 가져오고, 메타데이터를 추가
        image_file = item["image"]  # 이미지는 full path로 저장되어 있음
        data["image"].append(image_file)
        data["emotion"].append(item["emotion"])
        data["label"].append(item["label"])  # label은 이미 숫자
        data["image_id"].append(item["image_id"])

    # 새로운 Dataset 생성
    features = Features({
        "image": Image(),           # 이미지 필드
        "emotion": Value("string"), # 감정 필드
        "label": Value("int32"),    # 라벨 필드 (숫자)
        "image_id": Value("string") # 이미지 ID 필드
    })
    new_dataset = Dataset.from_dict(data, features=features)

    # 기존 데이터셋과 병합
    if existing_dataset:
        combined_dataset = concatenate_datasets([existing_dataset, new_dataset])
    else:
        combined_dataset = new_dataset

    existing_dataset = combined_dataset

# 병합된 데이터셋을 Hugging Face Hub에 업로드
existing_dataset.push_to_hub(
    repo_id=repo_id,  # 동일한 리포지토리에 업로드
    token=api_token,
    max_shard_size="1GB"  # 데이터셋이 큰 경우 샤딩 설정
)

print("All emotion datasets have been merged and uploaded successfully.")

Children_sketch dataset re-labeling

이 데이터 셋도 학습이나 테스트에 사용될 것이기 때문에 re-labeling 한다


import os
import json

# CombinedArts 폴더 경로 설정
combined_folder = './CombinedArts'

# 감정 폴더 목록
emotions = ['anger', 'fear', 'sadness', 'happiness']
emotion_to_label = {emotion: idx for idx, emotion in enumerate(['happiness', 'anger', 'surprise', 'disgust', 'fear', 'sadness'])}

# 각 감정별로 데이터를 저장할 딕셔너리 생성
emotion_data = {emotion: [] for emotion in emotions}

# 감정 폴더별로 데이터 생성
for emotion in emotions:
    # 해당 감정이 emotion_to_label에 없는 경우 건너뛰기
    if emotion not in emotion_to_label:
        continue

    # 감정 폴더 경로 설정
    emotion_folder = os.path.join(combined_folder, emotion)
    
    # 감정 폴더 내 파일 반복
    for file_name in os.listdir(emotion_folder):
        if file_name.lower().endswith('.jpg'):
            # image_id 추출
            image_id = os.path.splitext(file_name)[0]  # 파일명에서 확장자 제거
            
            # JSON 객체 생성
            item = {
                "image": os.path.join("image", emotion, file_name),
                "emoiton": emotion,
                "label": emotion_to_label[emotion],
                "image_id": image_id
            }
            
            # 해당 감정의 데이터 리스트에 추가
            emotion_data[emotion].append(item)

# 각 감정별로 JSON 파일로 저장
for emotion, data in emotion_data.items():
    output_file = f'./{emotion}.json'
    with open(output_file, 'w') as f:
        json.dump(data, f, indent=4)
    print(f"JSON 파일이 '{output_file}'에 저장되었습니다.")

push_to_hub()


import os
import json
from datasets import Dataset, Features, Image, Value
from huggingface_hub import HfApi

# Hugging Face API 키 설정 (옵션)
api_token = "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"
api = HfApi()

# CombinedArts 폴더 경로 설정
combined_folder = "./CombinedArts"

# 업로드할 감정 목록
emotions = ["anger", "fear", "sadness", "happiness"]

# 레포지토리 ID 설정
repo_id = "xodhks/Children_Sketch"

# 모든 감정 데이터를 저장할 딕셔너리
data = {
    "image": [],
    "emotion": [],
    "label": [],
    "image_id": []
}

# 각 감정에 대해 데이터를 준비
for emotion in emotions:
    json_path = os.path.join(combined_folder, f"{emotion}.json")
    image_folder = os.path.join(combined_folder, emotion)

    # JSON 파일 로드
    with open(json_path, 'r') as f:
        emotion_data = json.load(f)

    # 이미지 파일 경로와 JSON에서 읽은 메타데이터를 결합
    for item in emotion_data:
        image_file = os.path.join(image_folder, os.path.basename(item["image"]))
        data["image"].append(image_file)
        data["emotion"].append(item["emoiton"])  # JSON의 "emoiton" 필드 사용
        data["label"].append(item["label"])      # JSON의 "label" 필드 사용
        data["image_id"].append(item["image_id"])

# 전체 데이터를 사용하여 새로운 Dataset 생성
features = Features({
    "image": Image(),
    "emotion": Value("string"),
    "label": Value("int32"),
    "image_id": Value("string")
})
combined_dataset = Dataset.from_dict(data, features=features)

# Dataset을 Hugging Face Hub에 업로드
combined_dataset.push_to_hub(
    repo_id=repo_id,  # 새로운 리포지토리에 업로드
    token=api_token,
    max_shard_size="1GB"
)

print("All emotion datasets have been combined and uploaded successfully.")

sketch of {emotions} - dataset crawling하기

데이터셋을 모아본 결과 단순한 이미지의 GAN 형태로는 큰 학습효과를 보기 어렵다는 예상에 의거, 연필 등의 필기구를 이용해 만들어진 스케치 포맷의 감정 이미지들을 구글에서 크롤링하기로 했다. 이를 위한 코드이다

제약 조건: 구글은 자동적으로 이미지를 크롤링하려는 시도가 있으면 이를 막기 위해 capcha 같은 사용자 인증을 통해서 크롤링을 막는다. 이를 위해서 랜덤한 시간에 이를 크롤링하는 시도가 필요하다

학습하기

09/09 페이지에 사용했던 학습 코드를 바탕으로 진행했다.

이전과 달라진 점은 데이터셋의 구조, 개수가 달라졌다


import torch
import os
from transformers import AutoModelForImageClassification, AutoImageProcessor
from datasets import load_dataset
from torch.utils.data import DataLoader
from torch.optim import Adam
import torch.nn as nn
from peft import get_peft_model, LoraConfig

# GPU 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device:", device)

# 데이터셋 로드
train_dataset = load_dataset("xodhks/EmoSet118K", split="train")
# train_dataset = load_dataset("xodhks/EmoSet118K_MonetStyle", split="train")

test_dataset = load_dataset("xodhks/Children_Sketch", split="train")

# 테스트 데이터셋의 유효 라벨 목록
test_valid_label_indices = [0, 1, 4, 5]  # Children_Sketch에 존재하는 라벨 인덱스만 포함

# 이미지 처리기와 모델 로드
processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224", use_fast=True)
model = AutoModelForImageClassification.from_pretrained(
    "google/vit-base-patch16-224", 
    num_labels=6,  # 데이터셋의 감정 클래스 수
    ignore_mismatched_sizes=True  
).to(device)

# LoRA 구성 및 적용
config = LoraConfig(
    r=8,  
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["query", "key", "value"],  
)
model = get_peft_model(model, config)

# 모델 저장을 위한 디렉토리 생성
os.makedirs("top_models", exist_ok=True)
top_models = []

# DataLoader 설정
def collate_fn(batch):
    images = [item['image'] for item in batch]
    labels = [item['label'] for item in batch]
    
    inputs = processor(images=images, return_tensors="pt")
    inputs['labels'] = torch.tensor(labels, dtype=torch.long)
    return inputs

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn, num_workers=4)

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=1e-4)

# 평가 함수
def evaluate(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in data_loader:
            inputs = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**inputs)
            _, preds = torch.max(outputs.logits, 1)
            
            for pred, label in zip(preds, inputs['labels']):
                if pred.item() in test_valid_label_indices:
                    if pred.item() == label.item():
                        correct += 1
                total += 1
                
    accuracy = 100 * correct / total
    return accuracy

# 모델 저장 함수
def save_top_models(epoch, accuracy, model, top_models):
    model_filename = f"model_epoch_{epoch + 1}_accuracy_{accuracy:.2f}.pth"
    model_path = os.path.join("top_models", model_filename)
    top_models.append((accuracy, model_path))
    top_models = sorted(top_models, key=lambda x: x[0], reverse=True)[:10]
    torch.save(model.state_dict(), model_path)
    print("\nTop 10 Models (by accuracy):")
    for i, (acc, path) in enumerate(top_models, 1):
        print(f"Rank {i}: Accuracy = {acc:.2f}%, Model Path = {path}")
    return top_models

# 학습 루프
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch in train_loader:
        optimizer.zero_grad()
        inputs = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**inputs)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")
    test_accuracy = evaluate(model, test_loader)
    print(f"Test Accuracy after Epoch {epoch+1}: {test_accuracy:.2f}%")
    top_models = save_top_models(epoch, test_accuracy, model, top_models)

print("Finished Training")

결과

model_epoch_1_accuracy_41.07.pth

337051.3KB

가장 높은 정확도를 가진 것으로 보이나 너무 높은 loss값을 가진 것으로 보이므로 다른 것을 가져오는 것이 좋을 것이다

model_epoch_54_accuracy_40.61.pth

337051.6KB

model_epoch_5_accuracy_41.44.pth

337051.3KB

실제 데이터셋을 통해 테스트하기

1. 추가 학습 없는 테스트


import torch
import os
from transformers import AutoModelForImageClassification, AutoImageProcessor
from datasets import load_dataset
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import LabelEncoder
from peft import get_peft_model, LoraConfig
from torchvision import transforms
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from huggingface_hub import hf_hub_download

# 장치 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 모델 및 전처리 함수 설정
processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224", use_fast=True)
model = AutoModelForImageClassification.from_pretrained(
    "google/vit-base-patch16-224",
    num_labels=8,
    ignore_mismatched_sizes=True
).to(device)

# 가중치 로드
model_weights_path = './saved_models/model_epoch_54_accuracy_40.61.pth'
model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False)

# LoRA 적용
config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["query", "key", "value"]
)
model = get_peft_model(model, config)

# 전처리 함수
def preprocess_image(image):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ])
    return transform(image)

# Custom Dataset 정의
class CustomDataset(Dataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform
        self.label_encoder = LabelEncoder()
        labels = [item['label'] for item in dataset]
        self.label_encoder.fit(labels)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        item = self.dataset[idx]
        img = item['image']
        label = item['label']
        if self.transform:
            img = self.transform(img)
        label = self.label_encoder.transform([label])[0]
        return img, torch.tensor(label, dtype=torch.long)

# 데이터셋 준비
dataset = load_dataset("JANGJIWON/UGRP_sketchset_textbook", split="train")
dataset_list = [dict(item) for item in dataset]
_, test_data = train_test_split(dataset_list, test_size=0.2, random_state=42)

# 테스트 데이터 로더 생성
test_dataset = CustomDataset(test_data, transform=preprocess_image)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2)

# 테스트 수행 및 정확도 계산
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in tqdm(test_loader, desc="Testing"):
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.logits, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Final Test Accuracy: {accuracy:.2f}%')

result


/home/rnjsxodhks/anaconda3/envs/UGRP/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([8]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([8, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
/tmp/ipykernel_1538/3340183218.py:26: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False)
Downloading data: 100%|██████████| 48/48 [00:04<00:00, 10.71files/s]
Generating train split: 100%|██████████| 48/48 [00:00<00:00, 4500.93 examples/s]
Testing: 100%|██████████| 1/1 [00:00<00:00,  1.44it/s]
Final Test Accuracy: 0.00%

2. 추가 학습하는 테스트


import torch
import os
from transformers import AutoModelForImageClassification, AutoImageProcessor
from datasets import load_dataset
from torch.utils.data import DataLoader, Dataset
from torch.optim import Adam
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
from peft import get_peft_model, LoraConfig
import requests
import io
from torchvision import transforms
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# ViT 모델 로드 및 전처리 함수
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224", use_fast=True)
model = AutoModelForImageClassification.from_pretrained(
    "google/vit-base-patch16-224",
    num_labels=8,
    ignore_mismatched_sizes=True
).to(device)

# 전처리 함수
def preprocess_image(image):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ])
    return transform(image)

class CustomDataset(Dataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform
        self.label_encoder = LabelEncoder()
        labels = [item['label'] for item in dataset]
        self.label_encoder.fit(labels)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        item = self.dataset[idx]
        img = item['image']
        label = item['label']
        if self.transform:
            img = self.transform(img)
        label = self.label_encoder.transform([label])[0]
        return img, torch.tensor(label, dtype=torch.long)

# 모델 가중치 로드 및 데이터셋 확인
# model_url = "https://huggingface.co/JANGJIWON/EmoSet118K_MonetStyle_student/blob/main/model_epoch_5_accuracy_43.09.pth"
# response = requests.get(model_url)
# model_weights = io.BytesIO(response.content)

from huggingface_hub import hf_hub_download

model_weights_path = "./saved_models/model_epoch_54_accuracy_40.61.pth"
model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False)


# 첫 번째 LoRA 구성 및 적용
config1 = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["query", "key", "value"]
)
model = get_peft_model(model, config1)

# try:
#     model.load_state_dict(torch.load(model_weights, map_location='cpu', weights_only=False), strict=False)
# except RuntimeError as e:
#     print(f"Error loading state_dict: {e}")

# 두 번째 LoRA 구성 및 적용
config2 = LoraConfig(
    r=4,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["query", "key", "value"]
)
model = get_peft_model(model, config2)

# 데이터셋 준비
dataset = load_dataset("JANGJIWON/UGRP_sketchset_textbook", split="train")

# Convert the dataset to a list of dictionaries for splitting
dataset_list = [dict(item) for item in dataset]

# Split the dataset into train and test sets (80% train, 20% test)
train_data, test_data = train_test_split(dataset_list, test_size=0.3, random_state=42)

# Create datasets and dataloaders
train_dataset = CustomDataset(train_data, transform=preprocess_image)
test_dataset = CustomDataset(test_data, transform=preprocess_image)

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=2)

# 옵티마이저 및 손실 함수 설정
learning_rate = 0.001  # 조금 더 높은 학습률로 설정
optimizer = Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# 모델 훈련
num_epochs = 10
model.train()

for epoch in range(num_epochs):
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in tqdm(train_loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs.logits, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.logits, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')

# 테스트 수행 및 정확도 계산
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:  # Use test_loader here
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.logits, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Final Test Accuracy after second LoRA tuning: {accuracy:.2f}%')

result


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([8]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([8, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
/tmp/ipykernel_1538/827046990.py:63: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False)
100%|██████████| 33/33 [00:00<00:00, 35.45it/s]
Epoch [1/10], Loss: 56.6235, Accuracy: 48.48%
100%|██████████| 33/33 [00:00<00:00, 57.74it/s]
Epoch [2/10], Loss: 27.6023, Accuracy: 66.67%
100%|██████████| 33/33 [00:00<00:00, 54.12it/s]
Epoch [3/10], Loss: 14.1222, Accuracy: 90.91%
100%|██████████| 33/33 [00:00<00:00, 55.03it/s]
Epoch [4/10], Loss: 5.8903, Accuracy: 100.00%
100%|██████████| 33/33 [00:00<00:00, 54.82it/s]
Epoch [5/10], Loss: 2.2460, Accuracy: 100.00%
100%|██████████| 33/33 [00:00<00:00, 56.98it/s]
Epoch [6/10], Loss: 1.0786, Accuracy: 100.00%
100%|██████████| 33/33 [00:00<00:00, 55.77it/s]
Epoch [7/10], Loss: 0.6976, Accuracy: 100.00%
100%|██████████| 33/33 [00:00<00:00, 53.64it/s]
Epoch [8/10], Loss: 0.5067, Accuracy: 100.00%
100%|██████████| 33/33 [00:00<00:00, 57.19it/s]
Epoch [9/10], Loss: 0.3936, Accuracy: 100.00%
100%|██████████| 33/33 [00:00<00:00, 56.64it/s]
Epoch [10/10], Loss: 0.3180, Accuracy: 100.00%

Final Test Accuracy after second LoRA tuning: 13.33%

clip_vit


import torch
import os
from transformers import CLIPProcessor, CLIPModel
from datasets import load_dataset
from torch.utils.data import DataLoader
from torch.optim import Adam
import torch.nn as nn
from peft import get_peft_model, LoraConfig, TaskType
from PIL import Image

# GPU 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device:", device)

# 데이터셋 로드
train_dataset = load_dataset("xodhks/EmoSet118K", split="train")
test_dataset = load_dataset("xodhks/Children_Sketch", split="train")

# 테스트 데이터셋의 유효 라벨 목록
test_valid_label_indices = [0, 1, 4, 5]  # Children_Sketch에 존재하는 라벨 인덱스만 포함

# 이미지 처리기와 모델 로드
model_name = "openai/clip-vit-base-patch32"
processor = CLIPProcessor.from_pretrained(model_name)
model = CLIPModel.from_pretrained(model_name,     
                                num_labels=6,  # 데이터셋의 감정 클래스 수
                                ignore_mismatched_sizes=True  
                                ).to(device)

# LoRA 구성 및 적용
config = LoraConfig(
    task_type=TaskType.FEATURE_EXTRACTION,
    r=8,  
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["visual_projection"],  # 시각적인 부분에 LoRA 적용
)
model = get_peft_model(model, config)

# 모델 저장을 위한 디렉토리 생성
os.makedirs("top_models", exist_ok=True)
top_models = []

def collate_fn(batch):
    # batch의 이미지가 이미 로드된 `Image` 객체라면, 변환만 수행
    images = [item['image'].convert("RGB") if isinstance(item['image'], Image.Image) else Image.open(item['image']).convert("RGB") for item in batch]
    labels = [item['label'] for item in batch]
    
    inputs = processor(images=images, return_tensors="pt", padding=True)
    inputs['labels'] = torch.tensor(labels, dtype=torch.long)
    return inputs

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn, num_workers=4)

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=1e-4)

# 평가 함수
def evaluate(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in data_loader:
            inputs = {k: v.to(device) for k, v in batch.items() if k != "labels"}
            labels = batch['labels'].to(device)
            outputs = model.get_image_features(pixel_values=inputs["pixel_values"])
            
            # 분류기 로스를 위해 예측
            logits = model.text_projection(outputs)  # 직접 연산하는 대신 text_projection 레이어 통과

            _, preds = torch.max(logits, 1)
            
            for pred, label in zip(preds, labels):
                if pred.item() in test_valid_label_indices:
                    if pred.item() == label.item():
                        correct += 1
                total += 1
                
    accuracy = 100 * correct / total
    return accuracy

# 모델 저장 함수
def save_top_models(epoch, accuracy, model, top_models):
    model_filename = f"model_epoch_{epoch + 1}_accuracy_{accuracy:.2f}.pth"
    model_path = os.path.join("top_models", model_filename)
    top_models.append((accuracy, model_path))
    top_models = sorted(top_models, key=lambda x: x[0], reverse=True)[:10]
    torch.save(model.state_dict(), model_path)
    print("\nTop 10 Models (by accuracy):")
    for i, (acc, path) in enumerate(top_models, 1):
        print(f"Rank {i}: Accuracy = {acc:.2f}%, Model Path = {path}")
    return top_models

# 학습 루프
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch in train_loader:
        optimizer.zero_grad()
        inputs = {k: v.to(device) for k, v in batch.items() if k != "labels"}
        labels = batch['labels'].to(device)
        
        outputs = model.get_image_features(pixel_values=inputs["pixel_values"])
        logits = model.text_projection(outputs)  # 직접 연산하는 대신 text_projection 레이어 통과
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")
    test_accuracy = evaluate(model, test_loader)
    print(f"Test Accuracy after Epoch {epoch+1}: {test_accuracy:.2f}%")
    top_models = save_top_models(epoch, test_accuracy, model, top_models)

print("Finished Training")

model_epoch_85_accuracy_58.84.pth

591139.3KB

2024/11/12 - dataset learning

EmoSet118K 재 업로드

각 이미지를 re-labeling

Push_to_hub()

Children_sketch dataset re-labeling

이 데이터 셋도 학습이나 테스트에 사용될 것이기 때문에 re-labeling 한다

push_to_hub()

sketch of {emotions} - dataset crawling하기

학습하기

실제 데이터셋을 통해 테스트하기

1. 추가 학습 없는 테스트

2. 추가 학습하는 테스트

clip_vit