open ai 에서 만든 clip vit라는 모델이 있다. 이 모델을 이용하면 few shot learning에 유리하다고 한다
학습하지 않고 돌림
import torch
import os
from transformers import CLIPProcessor, CLIPModel
from datasets import load_dataset
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import LabelEncoder
from peft import get_peft_model, LoraConfig
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import torch.nn as nn
# 장치 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 모델 및 프로세서 설정
model_name = "openai/clip-vit-base-patch32"
processor = CLIPProcessor.from_pretrained(model_name)
model = CLIPModel.from_pretrained(model_name).to(device)
# 추가 분류기 레이어
model.classifier = nn.Linear(model.config.projection_dim, 6).to(device) # 8은 분류할 감정 클래스 수
# 가중치 로드
model_weights_path = './top_models/model_epoch_85_accuracy_58.84.pth'
model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False)
# LoRA 적용
config = LoraConfig(
r=8,
lora_alpha=32,
lora_dropout=0.1,
target_modules=["visual_projection"]
)
model = get_peft_model(model, config)
# 테스트 데이터 로더 생성
class CLIPCustomDataset(Dataset):
def __init__(self, dataset, processor):
self.dataset = dataset
self.processor = processor
self.label_encoder = LabelEncoder()
labels = [item['label'] for item in dataset]
self.label_encoder.fit(labels)
def __len__(self):
return len(self.dataset)
def __getitem__(self, idx):
item = self.dataset[idx]
img = item['image']
label = item['label']
inputs = self.processor(images=img, return_tensors="pt") # 이미지만 전처리
inputs = {k: v.squeeze() for k, v in inputs.items()} # 배치 차원을 제거
label = self.label_encoder.transform([label])[0]
return inputs, torch.tensor(label, dtype=torch.long)
# 데이터셋 준비
dataset = load_dataset("JANGJIWON/UGRP_sketchset_textbook", split="train")
dataset_list = [dict(item) for item in dataset]
_, test_data = train_test_split(dataset_list, test_size=0.2, random_state=42)
# CLIPCustomDataset를 사용하여 데이터셋 준비
test_dataset = CLIPCustomDataset(test_data, processor=processor)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2)
# 테스트 수행 및 정확도 계산
model.eval()
correct = 0
total = 0
with torch.no_grad():
for batch in tqdm(test_loader, desc="Testing"):
images, labels = batch
images = {k: v.to(device) for k, v in images.items()}
labels = labels.to(device)
# 이미지 피처 추출 후 분류
image_features = model.get_image_features(**images)
logits = model.classifier(image_features) # 분류기 통과
_, predicted = torch.max(logits, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print(f'Final Test Accuracy: {accuracy:.2f}%')
result
Testing: 100%|██████████| 1/1 [00:00<00:00, 2.41it/s]Final Test Accuracy: 10.00%
추가 학습 후 돌렸을 때
import torch
import os
from transformers import CLIPProcessor, CLIPModel
from datasets import load_dataset
from torch.utils.data import DataLoader, Dataset
from torch.optim import Adam
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
from peft import get_peft_model, LoraConfig
import requests
import io
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torchvision import transforms
# CLIP 모델 로드 및 전처리 함수
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_name = "openai/clip-vit-base-patch32"
processor = CLIPProcessor.from_pretrained(model_name)
model = CLIPModel.from_pretrained(model_name).to(device)
# 추가 분류기 레이어
model.classifier = nn.Linear(model.config.projection_dim, 6).to(device) # 6은 감정 클래스 수
# 가중치 로드
model_weights_path = './top_models/model_epoch_85_accuracy_58.84.pth'
model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False)
# 첫 번째 LoRA 구성 및 적용
config1 = LoraConfig(
r=8,
lora_alpha=32,
lora_dropout=0.1,
target_modules=["visual_projection"]
)
model = get_peft_model(model, config1)
# 두 번째 LoRA 구성 및 적용
config2 = LoraConfig(
r=4,
lora_alpha=16,
lora_dropout=0.05,
target_modules=["visual_projection"]
)
model = get_peft_model(model, config2)
# 전처리 함수
def preprocess_image(image):
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
])
return transform(image)
# Custom Dataset 정의
class CustomDataset(Dataset):
def __init__(self, dataset, processor):
self.dataset = dataset
self.processor = processor
self.label_encoder = LabelEncoder()
labels = [item['label'] for item in dataset]
self.label_encoder.fit(labels)
def __len__(self):
return len(self.dataset)
def __getitem__(self, idx):
item = self.dataset[idx]
img = item['image']
label = item['label']
inputs = self.processor(images=img, return_tensors="pt") # 이미지만 전처리
inputs = {k: v.squeeze() for k, v in inputs.items()} # 배치 차원 제거
label = self.label_encoder.transform([label])[0]
return inputs, torch.tensor(label, dtype=torch.long)
# 데이터셋 준비
dataset = load_dataset("JANGJIWON/UGRP_sketchset_textbook", split="train")
# 데이터셋을 리스트로 변환하여 학습 및 테스트 세트로 분할
dataset_list = [dict(item) for item in dataset]
train_data, test_data = train_test_split(dataset_list, test_size=0.3, random_state=42)
# 데이터 로더 생성
train_dataset = CustomDataset(train_data, processor=processor)
test_dataset = CustomDataset(test_data, processor=processor)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2)
# 옵티마이저 및 손실 함수 설정
learning_rate = 0.001 # 높은 학습률 설정
optimizer = Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()
# 모델 훈련
num_epochs = 10
model.train()
for epoch in range(num_epochs):
running_loss = 0.0
correct = 0
total = 0
for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
images, labels = batch
images = {k: v.to(device) for k, v in images.items()}
labels = labels.to(device)
optimizer.zero_grad()
# 이미지 피처 추출 및 로짓 생성
image_features = model.get_image_features(**images)
logits = model.classifier(image_features)
loss = criterion(logits, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
_, predicted = torch.max(logits, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
epoch_accuracy = 100 * correct / total
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')
# 테스트 수행 및 정확도 계산
model.eval()
correct = 0
total = 0
with torch.no_grad():
for batch in test_loader:
images, labels = batch
images = {k: v.to(device) for k, v in images.items()}
labels = labels.to(device)
# 이미지 피처 추출 및 로짓 생성
image_features = model.get_image_features(**images)
logits = model.classifier(image_features)
_, predicted = torch.max(logits, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print(f'Final Test Accuracy after second LoRA tuning: {accuracy:.2f}%')
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
import urllib.request
import os
import random
chrome_options = Options()
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36"
)
driver = webdriver.Chrome(chrome_options)
driver.get("https://www.google.co.kr/imghp")
search = input("Enter the search term: ")
elem = driver.find_element(By.NAME,"q")
elem.send_keys(search)
elem.send_keys(Keys.RETURN)
elem = driver.find_element(By.TAG_NAME, "body")
for i in range(60):
elem.send_keys(Keys.PAGE_DOWN)
time.sleep(random.uniform(1.0,3.0))
#rso > div > div > div.wH6SXe.u32vCb > div > div > div:nth-child(2) > div.czzyk.XOEbc > h3 > a > div > div > div > g-img
images = driver.find_elements(By.CSS_SELECTOR,"#rso > div > div > div.wH6SXe.u32vCb > div > div > div > div.czzyk.XOEbc > h3 > a > div > div > div > g-img > img")
print('find images: ',len(images))
random.shuffle(images)
base_dir = "E:\Crawling_images"
folder_dir = f"{base_dir}\\{search}"
os.makedirs(folder_dir, exist_ok=True) # 존재하면 넘어가고 없으면 만드는 것
count = 1
max_retries = 3
for image in images:
retries = 0
while retries < max_retries:
try:
ActionChains(driver).move_to_element(image).click().perform()
# image.click()
time.sleep(random.uniform(2,4))
imgUrl = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//*[@id="Sva75c"]/div[2]/div[2]/div/div[2]/c-wiz/div/div[3]/div[1]/a/img'))
).get_attribute('src')
if imgUrl and imgUrl.startswith("http"):
urllib.request.urlretrieve(imgUrl, f"./{search}/{search}_{str(count)}.jpg")
print(f"Image saved: {search}_{count}.jpg")
count += 1
break
else:
print("Invalid URL, retrying...")
except Exception as e:
print(f"retry {retries + 1}/{max_retries} failed. Error {e}")
retries += 1
driver.close()