HomeAboutMeBlogGuest
© 2025 Sejin Cha. All rights reserved.
Built with Next.js, deployed on Vercel
장지원 페이지/
📕
2024 UGRP
/
Member Page
Member Page
/
권태완
권태완
/
2024/7/21 - ViT

2024/7/21 - ViT

Tags

ViT를 PEFT형식으로 전이학습하기

사용한 Dataset: huggingfacehuggingfaceFastJobs/Visual_Emotional_Analysis · Datasets at Hugging Face
FastJobs/Visual_Emotional_Analysis · Datasets at Hugging Face

FastJobs/Visual_Emotional_Analysis · Datasets at Hugging Face

We’re on a journey to advance and democratize artificial intelligence through open source and open science.

huggingfacehuggingface
사용한 모델: huggingfacehuggingfacegoogle/vit-base-patch16-224-in21k · Hugging Face
google/vit-base-patch16-224-in21k · Hugging Face

google/vit-base-patch16-224-in21k · Hugging Face

We’re on a journey to advance and democratize artificial intelligence through open source and open science.

huggingfacehuggingface
from datasets import load_dataset from transformers import AutoImageProcessor, AutoModelForImageClassification, TrainingArguments, Trainer from peft import LoraConfig, get_peft_model import torch from torchvision.transforms import ( CenterCrop, Compose, Normalize, RandomHorizontalFlip, RandomResizedCrop, Resize, ToTensor, ) from PIL import Image import logging # Initialize logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Load dataset dataset = load_dataset("FastJobs/Visual_Emotional_Analysis") labels = dataset["train"].features["label"].names label2id = {label: i for i, label in enumerate(labels)} id2label = {i: label for i, label in enumerate(labels)} # Image processor and transformations image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k") normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std) train_transforms = Compose( [ RandomResizedCrop(image_processor.size["height"]), RandomHorizontalFlip(), ToTensor(), normalize, ] ) def preprocess_train(example_batch): example_batch["pixel_values"] = [train_transforms(image.convert("RGB")) for image in example_batch["image"]] return {"pixel_values": example_batch["pixel_values"], "labels": example_batch["label"]} # Preprocess train dataset train_dataset = dataset["train"].map(preprocess_train, batched=True) # LoRA configuration and model application model = AutoModelForImageClassification.from_pretrained( "google/vit-base-patch16-224-in21k", num_labels=len(labels), # 올바른 클래스 수 설정 label2id=label2id, id2label=id2label, ) lora_config = LoraConfig( r=16, lora_alpha=32, target_modules=["attention.query", "attention.key", "attention.value", "intermediate.dense"], lora_dropout=0.1, bias="none", modules_to_save=["classifier"], ) model = get_peft_model(model, lora_config) # Training arguments without validation training_args = TrainingArguments( output_dir="./results", save_strategy="epoch", learning_rate=5e-4, per_device_train_batch_size=4, num_train_epochs=2, weight_decay=0.01, logging_steps=10, save_total_limit=2, load_best_model_at_end=False, logging_dir='./logs', ) # Data collator definition def collate_fn(examples): pixel_values = torch.stack([torch.tensor(example["pixel_values"]) for example in examples]) labels = torch.tensor([example["labels"] for example in examples], dtype=torch.long) return {"pixel_values": pixel_values, "labels": labels} # Trainer instance without validation trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, data_collator=collate_fn, ) # Training trainer.train() # 모델과 이미지 프로세서를 로컬에 저장 model.save_pretrained("./trained_model") image_processor.save_pretrained("./trained_model")
Resolving data files: 100%
800/800 [00:00<00:00, 163.25it/s]
Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`. Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[400/400 03:46, Epoch 2/2]
Step
Training Loss
10
2.080800
20
2.068200
30
2.092600
40
2.059000
50
2.028400
60
1.947200
70
1.854000
80
1.811000
90
1.730300
100
1.693300
110
1.604800
120
1.507600
130
1.536400
140
1.460100
150
1.719700
160
1.417700
170
1.613800
180
1.611200
190
1.522200
200
1.502700
210
1.392300
220
1.323600
230
1.257000
240
1.249800
250
1.394300
260
1.299300
270
1.270400
280
1.374700
290
1.579900
300
1.415400
310
1.183400
320
1.261300
330
1.248900
340
1.101700
350
1.174200
360
1.209000
370
1.104300
380
1.265800
390
1.269100
400
1.173700
['./trained_model/preprocessor_config.json']

테스트

notion image
from transformers import AutoImageProcessor, AutoModelForImageClassification from PIL import Image import torch import os # 로그 테스트 - logging이라는 라이브러리가 작동하지 않아 print문으로 대체하였음! print("로그 테스트 - 이 메시지가 출력되면 로그 설정이 정상적으로 작동하는 것입니다.") # 모델과 이미지 프로세서 로드 try: print("모델 로드 중...") model_path = "./trained_model" if not os.path.exists(model_path): print("모델 경로가 존재하지 않습니다:", model_path) raise FileNotFoundError(f"모델 경로가 존재하지 않습니다: {model_path}") model = AutoModelForImageClassification.from_pretrained(model_path, num_labels=8) #num_labels를 설정하지 않으면 오류가 생긴다!!! image_processor = AutoImageProcessor.from_pretrained(model_path) print("모델 및 이미지 프로세서 로드 완료") except Exception as e: print("모델 로드 실패:", e) raise e # 이미지 파일 경로 uploaded_image_path = "./content/amusement.jpg" # 이미지 로드 및 전처리 try: print("이미지 로드 중...") if not os.path.exists(uploaded_image_path): print("이미지 파일이 존재하지 않습니다:", uploaded_image_path) raise FileNotFoundError(f"이미지 파일이 존재하지 않습니다: {uploaded_image_path}") image = Image.open(uploaded_image_path) encoding = image_processor(images=image.convert("RGB"), return_tensors="pt") print("이미지 전처리 완료") except Exception as e: print("이미지 로드 및 전처리 실패:", e) raise e # 모델을 동일한 디바이스에 이동 try: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"디바이스: {device}") model.to(device) encoding = {k: v.to(device) for k, v in encoding.items()} print("디바이스로 이동된 encoding:", encoding) except Exception as e: print("디바이스 이동 실패:", e) raise e # 모델 예측 수행 try: print("모델 예측 수행 중...") model.eval() with torch.no_grad(): outputs = model(**encoding) logits = outputs.logits print("예측 완료") except Exception as e: print("모델 예측 실패:", e) raise e # 예측된 클래스 출력 try: predicted_class_idx = logits.argmax(-1).item() predicted_class = model.config.id2label[predicted_class_idx] print("Predicted class:", predicted_class) except Exception as e: print("예측된 클래스 출력 실패:", e) raise e
notion image
로그 테스트 - 이 메시지가 출력되면 로그 설정이 정상적으로 작동하는 것입니다. 모델 로드 중...
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
모델 및 이미지 프로세서 로드 완료 이미지 로드 중... 이미지 전처리 완료 디바이스: cuda 디바이스로 이동된 encoding: {'pixel_values': tensor([[[[0.7882, 0.8275, 0.8510, ..., 0.8667, 0.8745, 0.9059], [0.8118, 0.8510, 0.8431, ..., 0.8510, 0.8824, 0.9059], [0.8196, 0.8588, 0.8745, ..., 0.8431, 0.8667, 0.8902], ..., [0.8118, 0.8118, 0.7804, ..., 0.6784, 0.6627, 0.6314], [0.8510, 0.8275, 0.8353, ..., 0.7176, 0.7176, 0.7255], [0.8431, 0.8275, 0.8353, ..., 0.7333, 0.7176, 0.7176]], [[0.7725, 0.7961, 0.7961, ..., 0.8588, 0.8510, 0.8510], [0.7961, 0.8196, 0.7961, ..., 0.8431, 0.8510, 0.8588], [0.8039, 0.8353, 0.8196, ..., 0.8431, 0.8353, 0.8353], ..., [0.8275, 0.8275, 0.7961, ..., 0.7333, 0.7255, 0.6941], [0.8667, 0.8353, 0.8510, ..., 0.7725, 0.7647, 0.7804], [0.8510, 0.8353, 0.8431, ..., 0.7804, 0.7647, 0.7647]], [[0.6549, 0.6863, 0.6941, ..., 0.7725, 0.7647, 0.7725], [0.6784, 0.7020, 0.6863, ..., 0.7490, 0.7647, 0.7725], [0.6863, 0.7176, 0.7176, ..., 0.7490, 0.7490, 0.7569], ..., [0.6549, 0.6549, 0.6235, ..., 0.4588, 0.4431, 0.4196], [0.7098, 0.6784, 0.6941, ..., 0.5529, 0.5529, 0.5608], [0.7098, 0.6941, 0.7020, ..., 0.5608, 0.5529, 0.5451]]]], device='cuda:0')} 모델 예측 수행 중... 예측 완료 Predicted class: LABEL_7
LABEL_7 : surprise
 

notion image
로그 테스트 - 이 메시지가 출력되면 로그 설정이 정상적으로 작동하는 것입니다. 모델 로드 중...
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
모델 및 이미지 프로세서 로드 완료 이미지 로드 중... 이미지 전처리 완료 디바이스: cuda 디바이스로 이동된 encoding: {'pixel_values': tensor([[[[ 0.1373, 0.0980, 0.0510, ..., 0.3020, 0.2863, 0.2627], [ 0.0510, -0.0039, -0.0431, ..., -0.0353, -0.0510, -0.0745], [ 0.0039, -0.0431, -0.0745, ..., -0.1529, -0.1529, -0.1686], ..., [ 0.4275, 0.5059, 0.5216, ..., 0.4196, 0.4667, 0.5137], [ 0.4824, 0.5059, 0.4902, ..., 0.4353, 0.4824, 0.5294], [ 0.4980, 0.4745, 0.4588, ..., 0.4353, 0.4824, 0.5294]], [[ 0.2314, 0.2000, 0.1608, ..., 0.3098, 0.2941, 0.2706], [ 0.0980, 0.0588, 0.0353, ..., 0.0353, 0.0196, -0.0039], [ 0.0431, 0.0196, 0.0039, ..., -0.1059, -0.1059, -0.1216], ..., [ 0.5294, 0.6078, 0.6392, ..., 0.4824, 0.5529, 0.6078], [ 0.5922, 0.6314, 0.6235, ..., 0.5216, 0.5843, 0.6392], [ 0.6157, 0.6078, 0.6078, ..., 0.5373, 0.6000, 0.6627]], [[ 0.1529, 0.1294, 0.1059, ..., 0.2784, 0.2627, 0.2392], [-0.0039, -0.0510, -0.0745, ..., 0.0118, -0.0039, -0.0275], [-0.0510, -0.0902, -0.1294, ..., -0.2235, -0.2235, -0.2392], ..., [ 0.6863, 0.7804, 0.8431, ..., 0.7020, 0.7882, 0.8431], [ 0.7882, 0.8353, 0.8353, ..., 0.7333, 0.8118, 0.8667], [ 0.8510, 0.8353, 0.8275, ..., 0.7490, 0.8275, 0.8824]]]], device='cuda:0')} 모델 예측 수행 중... 예측 완료 Predicted class: LABEL_1
LABEL_1 : contempt - 경멸
 

최종 파일 업로드!!!

ViT_LoRA.ipynb
99.6KB
Google Colab
Google Colab

Google Colab