Portfolio

clip_vit

open ai 에서 만든 clip vit라는 모델이 있다. 이 모델을 이용하면 few shot learning에 유리하다고 한다

학습하지 않고 돌림


import torch
import os
from transformers import CLIPProcessor, CLIPModel
from datasets import load_dataset
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import LabelEncoder
from peft import get_peft_model, LoraConfig
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import torch.nn as nn

# 장치 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 모델 및 프로세서 설정
model_name = "openai/clip-vit-base-patch32"
processor = CLIPProcessor.from_pretrained(model_name)
model = CLIPModel.from_pretrained(model_name).to(device)

# 추가 분류기 레이어
model.classifier = nn.Linear(model.config.projection_dim, 6).to(device)  # 8은 분류할 감정 클래스 수

# 가중치 로드
model_weights_path = './top_models/model_epoch_85_accuracy_58.84.pth'
model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False)

# LoRA 적용
config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["visual_projection"]
)
model = get_peft_model(model, config)
    
# 테스트 데이터 로더 생성
class CLIPCustomDataset(Dataset):
    def __init__(self, dataset, processor):
        self.dataset = dataset
        self.processor = processor
        self.label_encoder = LabelEncoder()
        labels = [item['label'] for item in dataset]
        self.label_encoder.fit(labels)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        item = self.dataset[idx]
        img = item['image']
        label = item['label']
        inputs = self.processor(images=img, return_tensors="pt")  # 이미지만 전처리
        inputs = {k: v.squeeze() for k, v in inputs.items()}  # 배치 차원을 제거
        label = self.label_encoder.transform([label])[0]
        return inputs, torch.tensor(label, dtype=torch.long)

# 데이터셋 준비
dataset = load_dataset("JANGJIWON/UGRP_sketchset_textbook", split="train")
dataset_list = [dict(item) for item in dataset]
_, test_data = train_test_split(dataset_list, test_size=0.2, random_state=42)

# CLIPCustomDataset를 사용하여 데이터셋 준비
test_dataset = CLIPCustomDataset(test_data, processor=processor)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2)

# 테스트 수행 및 정확도 계산
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for batch in tqdm(test_loader, desc="Testing"):
        images, labels = batch
        images = {k: v.to(device) for k, v in images.items()}
        labels = labels.to(device)
        
        # 이미지 피처 추출 후 분류
        image_features = model.get_image_features(**images)
        logits = model.classifier(image_features)  # 분류기 통과
        _, predicted = torch.max(logits, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Final Test Accuracy: {accuracy:.2f}%')

result


Testing: 100%|██████████| 1/1 [00:00<00:00,  2.41it/s]Final Test Accuracy: 10.00%

추가 학습 후 돌렸을 때


import torch
import os
from transformers import CLIPProcessor, CLIPModel
from datasets import load_dataset
from torch.utils.data import DataLoader, Dataset
from torch.optim import Adam
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
from peft import get_peft_model, LoraConfig
import requests
import io
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torchvision import transforms

# CLIP 모델 로드 및 전처리 함수
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model_name = "openai/clip-vit-base-patch32"
processor = CLIPProcessor.from_pretrained(model_name)
model = CLIPModel.from_pretrained(model_name).to(device)

# 추가 분류기 레이어
model.classifier = nn.Linear(model.config.projection_dim, 6).to(device)  # 6은 감정 클래스 수

# 가중치 로드
model_weights_path = './top_models/model_epoch_85_accuracy_58.84.pth'
model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False)

# 첫 번째 LoRA 구성 및 적용
config1 = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["visual_projection"]
)
model = get_peft_model(model, config1)

# 두 번째 LoRA 구성 및 적용
config2 = LoraConfig(
    r=4,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["visual_projection"]
)
model = get_peft_model(model, config2)

# 전처리 함수
def preprocess_image(image):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ])
    return transform(image)

# Custom Dataset 정의
class CustomDataset(Dataset):
    def __init__(self, dataset, processor):
        self.dataset = dataset
        self.processor = processor
        self.label_encoder = LabelEncoder()
        labels = [item['label'] for item in dataset]
        self.label_encoder.fit(labels)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        item = self.dataset[idx]
        img = item['image']
        label = item['label']
        inputs = self.processor(images=img, return_tensors="pt")  # 이미지만 전처리
        inputs = {k: v.squeeze() for k, v in inputs.items()}  # 배치 차원 제거
        label = self.label_encoder.transform([label])[0]
        return inputs, torch.tensor(label, dtype=torch.long)

# 데이터셋 준비
dataset = load_dataset("JANGJIWON/UGRP_sketchset_textbook", split="train")

# 데이터셋을 리스트로 변환하여 학습 및 테스트 세트로 분할
dataset_list = [dict(item) for item in dataset]
train_data, test_data = train_test_split(dataset_list, test_size=0.3, random_state=42)

# 데이터 로더 생성
train_dataset = CustomDataset(train_data, processor=processor)
test_dataset = CustomDataset(test_data, processor=processor)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2)

# 옵티마이저 및 손실 함수 설정
learning_rate = 0.001  # 높은 학습률 설정
optimizer = Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# 모델 훈련
num_epochs = 10
model.train()

for epoch in range(num_epochs):
    running_loss = 0.0
    correct = 0
    total = 0
    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images, labels = batch
        images = {k: v.to(device) for k, v in images.items()}
        labels = labels.to(device)
        
        optimizer.zero_grad()

        # 이미지 피처 추출 및 로짓 생성
        image_features = model.get_image_features(**images)
        logits = model.classifier(image_features)
        
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(logits, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_accuracy = 100 * correct / total
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')

# 테스트 수행 및 정확도 계산
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for batch in test_loader:
        images, labels = batch
        images = {k: v.to(device) for k, v in images.items()}
        labels = labels.to(device)
        
        # 이미지 피처 추출 및 로짓 생성
        image_features = model.get_image_features(**images)
        logits = model.classifier(image_features)
        
        _, predicted = torch.max(logits, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Final Test Accuracy after second LoRA tuning: {accuracy:.2f}%')

result


Epoch 1/10: 100%|██████████| 3/3 [00:00<00:00,  4.21it/s]
Epoch [1/10], Loss: 5.3525, Accuracy: 15.15%
Epoch 2/10: 100%|██████████| 3/3 [00:00<00:00,  4.18it/s]
Epoch [2/10], Loss: 4.7298, Accuracy: 54.55%
Epoch 3/10: 100%|██████████| 3/3 [00:00<00:00,  3.82it/s]
Epoch [3/10], Loss: 5.0558, Accuracy: 60.61%
Epoch 4/10: 100%|██████████| 3/3 [00:00<00:00,  5.20it/s]
Epoch [4/10], Loss: 2.8268, Accuracy: 60.61%
Epoch 5/10: 100%|██████████| 3/3 [00:00<00:00,  4.85it/s]
Epoch [5/10], Loss: 2.6705, Accuracy: 60.61%
Epoch 6/10: 100%|██████████| 3/3 [00:00<00:00,  4.20it/s]
Epoch [6/10], Loss: 4.6043, Accuracy: 60.61%
Epoch 7/10: 100%|██████████| 3/3 [00:00<00:00,  4.51it/s]
Epoch [7/10], Loss: 2.6707, Accuracy: 60.61%
Epoch 8/10: 100%|██████████| 3/3 [00:00<00:00,  4.23it/s]
Epoch [8/10], Loss: 3.8665, Accuracy: 60.61%
Epoch 9/10: 100%|██████████| 3/3 [00:00<00:00,  5.11it/s]
Epoch [9/10], Loss: 4.2060, Accuracy: 63.64%
Epoch 10/10: 100%|██████████| 3/3 [00:00<00:00,  4.13it/s]Epoch [10/10], Loss: 2.5665, Accuracy: 66.67%

Final Test Accuracy after second LoRA tuning: 13.33%

result 2


Epoch 1/30: 100%|██████████| 3/3 [00:00<00:00,  3.78it/s]
Epoch [1/30], Loss: 5.6222, Accuracy: 9.09%
Epoch 2/30: 100%|██████████| 3/3 [00:00<00:00,  5.17it/s]
Epoch [2/30], Loss: 4.9761, Accuracy: 15.15%
Epoch 3/30: 100%|██████████| 3/3 [00:00<00:00,  4.71it/s]
Epoch [3/30], Loss: 3.9942, Accuracy: 57.58%
Epoch 4/30: 100%|██████████| 3/3 [00:00<00:00,  4.59it/s]
Epoch [4/30], Loss: 4.6032, Accuracy: 60.61%
Epoch 5/30: 100%|██████████| 3/3 [00:00<00:00,  4.69it/s]
Epoch [5/30], Loss: 2.7958, Accuracy: 60.61%
Epoch 6/30: 100%|██████████| 3/3 [00:00<00:00,  4.53it/s]
Epoch [6/30], Loss: 4.7401, Accuracy: 60.61%
Epoch 7/30: 100%|██████████| 3/3 [00:00<00:00,  4.49it/s]
Epoch [7/30], Loss: 2.7414, Accuracy: 60.61%
Epoch 8/30: 100%|██████████| 3/3 [00:00<00:00,  4.35it/s]
Epoch [8/30], Loss: 3.8171, Accuracy: 63.64%
Epoch 9/30: 100%|██████████| 3/3 [00:00<00:00,  4.90it/s]
Epoch [9/30], Loss: 2.3093, Accuracy: 63.64%
Epoch 10/30: 100%|██████████| 3/3 [00:00<00:00,  4.58it/s]
Epoch [10/30], Loss: 2.2179, Accuracy: 63.64%
Epoch 11/30: 100%|██████████| 3/3 [00:00<00:00,  4.85it/s]
Epoch [11/30], Loss: 2.9166, Accuracy: 69.70%
Epoch 12/30: 100%|██████████| 3/3 [00:00<00:00,  3.97it/s]
Epoch [12/30], Loss: 3.3085, Accuracy: 72.73%
Epoch 13/30: 100%|██████████| 3/3 [00:00<00:00,  4.88it/s]
Epoch [13/30], Loss: 1.8047, Accuracy: 72.73%
Epoch 14/30: 100%|██████████| 3/3 [00:00<00:00,  4.71it/s]
Epoch [14/30], Loss: 1.5259, Accuracy: 75.76%
Epoch 15/30: 100%|██████████| 3/3 [00:00<00:00,  5.11it/s]
Epoch [15/30], Loss: 1.5759, Accuracy: 75.76%
Epoch 16/30: 100%|██████████| 3/3 [00:00<00:00,  4.50it/s]
Epoch [16/30], Loss: 1.4162, Accuracy: 75.76%
Epoch 17/30: 100%|██████████| 3/3 [00:00<00:00,  4.12it/s]
Epoch [17/30], Loss: 3.4429, Accuracy: 75.76%
Epoch 18/30: 100%|██████████| 3/3 [00:00<00:00,  4.84it/s]
Epoch [18/30], Loss: 1.1535, Accuracy: 75.76%
Epoch 19/30: 100%|██████████| 3/3 [00:00<00:00,  3.88it/s]
Epoch [19/30], Loss: 1.0986, Accuracy: 87.88%
Epoch 20/30: 100%|██████████| 3/3 [00:00<00:00,  3.42it/s]
Epoch [20/30], Loss: 1.1869, Accuracy: 87.88%
Epoch 21/30: 100%|██████████| 3/3 [00:00<00:00,  4.28it/s]
Epoch [21/30], Loss: 1.7801, Accuracy: 90.91%
Epoch 22/30: 100%|██████████| 3/3 [00:00<00:00,  4.35it/s]
Epoch [22/30], Loss: 0.8713, Accuracy: 90.91%
Epoch 23/30: 100%|██████████| 3/3 [00:00<00:00,  4.23it/s]
Epoch [23/30], Loss: 0.8126, Accuracy: 84.85%
Epoch 24/30: 100%|██████████| 3/3 [00:00<00:00,  4.92it/s]
Epoch [24/30], Loss: 0.7799, Accuracy: 84.85%
Epoch 25/30: 100%|██████████| 3/3 [00:00<00:00,  3.33it/s]
Epoch [25/30], Loss: 2.7608, Accuracy: 84.85%
Epoch 26/30: 100%|██████████| 3/3 [00:00<00:00,  4.76it/s]
Epoch [26/30], Loss: 0.6572, Accuracy: 84.85%
Epoch 27/30: 100%|██████████| 3/3 [00:00<00:00,  4.87it/s]
Epoch [27/30], Loss: 0.6374, Accuracy: 90.91%
Epoch 28/30: 100%|██████████| 3/3 [00:00<00:00,  5.30it/s]
Epoch [28/30], Loss: 0.5551, Accuracy: 90.91%
Epoch 29/30: 100%|██████████| 3/3 [00:00<00:00,  4.91it/s]
Epoch [29/30], Loss: 0.5121, Accuracy: 93.94%
Epoch 30/30: 100%|██████████| 3/3 [00:00<00:00,  4.20it/s]Epoch [30/30], Loss: 0.5336, Accuracy: 96.97%

Final Test Accuracy after second LoRA tuning: 6.67%

Crawling


from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
import urllib.request
import os
import random

chrome_options = Options()
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument(
    "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36"
)

driver = webdriver.Chrome(chrome_options)
driver.get("https://www.google.co.kr/imghp")

search = input("Enter the search term: ")
elem = driver.find_element(By.NAME,"q")
elem.send_keys(search)
elem.send_keys(Keys.RETURN)

elem = driver.find_element(By.TAG_NAME, "body")
for i in range(60):
    elem.send_keys(Keys.PAGE_DOWN)
    time.sleep(random.uniform(1.0,3.0))

#rso > div > div > div.wH6SXe.u32vCb > div > div > div:nth-child(2) > div.czzyk.XOEbc > h3 > a > div > div > div > g-img
images = driver.find_elements(By.CSS_SELECTOR,"#rso > div > div > div.wH6SXe.u32vCb > div > div > div > div.czzyk.XOEbc > h3 > a > div > div > div > g-img > img")
print('find images: ',len(images))

random.shuffle(images)

base_dir = "E:\Crawling_images"
folder_dir = f"{base_dir}\\{search}"
os.makedirs(folder_dir, exist_ok=True) # 존재하면 넘어가고 없으면 만드는 것

count = 1
max_retries = 3

for image in images:
    retries = 0
    while retries < max_retries:
        try:
            ActionChains(driver).move_to_element(image).click().perform()
            # image.click()
            time.sleep(random.uniform(2,4))

            imgUrl = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.XPATH, '//*[@id="Sva75c"]/div[2]/div[2]/div/div[2]/c-wiz/div/div[3]/div[1]/a/img'))
            ).get_attribute('src')

            if imgUrl and imgUrl.startswith("http"):
                urllib.request.urlretrieve(imgUrl, f"./{search}/{search}_{str(count)}.jpg")
                print(f"Image saved: {search}_{count}.jpg")
                count += 1
                break
            else: 
                print("Invalid URL, retrying...")

        except Exception as e:
            print(f"retry {retries + 1}/{max_retries} failed. Error {e}")
            
        retries += 1

driver.close()

result

2024/11/19 - testing

clip_vit

학습하지 않고 돌림

추가 학습 후 돌렸을 때

Crawling