HomeAboutMeBlogGuest
© 2025 Sejin Cha. All rights reserved.
Built with Next.js, deployed on Vercel
장지원 페이지/
📕
2024 UGRP
/
Member Page
Member Page
/
권태완
권태완
/
2024/11/19 - testing

2024/11/19 - testing

Tags

clip_vit

  • open ai 에서 만든 clip vit라는 모델이 있다. 이 모델을 이용하면 few shot learning에 유리하다고 한다

학습하지 않고 돌림

import torch import os from transformers import CLIPProcessor, CLIPModel from datasets import load_dataset from torch.utils.data import DataLoader, Dataset from sklearn.preprocessing import LabelEncoder from peft import get_peft_model, LoraConfig from tqdm import tqdm from sklearn.model_selection import train_test_split import torch.nn as nn # 장치 설정 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 모델 및 프로세서 설정 model_name = "openai/clip-vit-base-patch32" processor = CLIPProcessor.from_pretrained(model_name) model = CLIPModel.from_pretrained(model_name).to(device) # 추가 분류기 레이어 model.classifier = nn.Linear(model.config.projection_dim, 6).to(device) # 8은 분류할 감정 클래스 수 # 가중치 로드 model_weights_path = './top_models/model_epoch_85_accuracy_58.84.pth' model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False) # LoRA 적용 config = LoraConfig( r=8, lora_alpha=32, lora_dropout=0.1, target_modules=["visual_projection"] ) model = get_peft_model(model, config) # 테스트 데이터 로더 생성 class CLIPCustomDataset(Dataset): def __init__(self, dataset, processor): self.dataset = dataset self.processor = processor self.label_encoder = LabelEncoder() labels = [item['label'] for item in dataset] self.label_encoder.fit(labels) def __len__(self): return len(self.dataset) def __getitem__(self, idx): item = self.dataset[idx] img = item['image'] label = item['label'] inputs = self.processor(images=img, return_tensors="pt") # 이미지만 전처리 inputs = {k: v.squeeze() for k, v in inputs.items()} # 배치 차원을 제거 label = self.label_encoder.transform([label])[0] return inputs, torch.tensor(label, dtype=torch.long) # 데이터셋 준비 dataset = load_dataset("JANGJIWON/UGRP_sketchset_textbook", split="train") dataset_list = [dict(item) for item in dataset] _, test_data = train_test_split(dataset_list, test_size=0.2, random_state=42) # CLIPCustomDataset를 사용하여 데이터셋 준비 test_dataset = CLIPCustomDataset(test_data, processor=processor) test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2) # 테스트 수행 및 정확도 계산 model.eval() correct = 0 total = 0 with torch.no_grad(): for batch in tqdm(test_loader, desc="Testing"): images, labels = batch images = {k: v.to(device) for k, v in images.items()} labels = labels.to(device) # 이미지 피처 추출 후 분류 image_features = model.get_image_features(**images) logits = model.classifier(image_features) # 분류기 통과 _, predicted = torch.max(logits, 1) total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = 100 * correct / total print(f'Final Test Accuracy: {accuracy:.2f}%')
result
Testing: 100%|██████████| 1/1 [00:00<00:00, 2.41it/s]Final Test Accuracy: 10.00%
 

추가 학습 후 돌렸을 때

import torch import os from transformers import CLIPProcessor, CLIPModel from datasets import load_dataset from torch.utils.data import DataLoader, Dataset from torch.optim import Adam import torch.nn as nn from sklearn.preprocessing import LabelEncoder from peft import get_peft_model, LoraConfig import requests import io from tqdm import tqdm from sklearn.model_selection import train_test_split from torchvision import transforms # CLIP 모델 로드 및 전처리 함수 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_name = "openai/clip-vit-base-patch32" processor = CLIPProcessor.from_pretrained(model_name) model = CLIPModel.from_pretrained(model_name).to(device) # 추가 분류기 레이어 model.classifier = nn.Linear(model.config.projection_dim, 6).to(device) # 6은 감정 클래스 수 # 가중치 로드 model_weights_path = './top_models/model_epoch_85_accuracy_58.84.pth' model.load_state_dict(torch.load(model_weights_path, map_location='cpu'), strict=False) # 첫 번째 LoRA 구성 및 적용 config1 = LoraConfig( r=8, lora_alpha=32, lora_dropout=0.1, target_modules=["visual_projection"] ) model = get_peft_model(model, config1) # 두 번째 LoRA 구성 및 적용 config2 = LoraConfig( r=4, lora_alpha=16, lora_dropout=0.05, target_modules=["visual_projection"] ) model = get_peft_model(model, config2) # 전처리 함수 def preprocess_image(image): transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), ]) return transform(image) # Custom Dataset 정의 class CustomDataset(Dataset): def __init__(self, dataset, processor): self.dataset = dataset self.processor = processor self.label_encoder = LabelEncoder() labels = [item['label'] for item in dataset] self.label_encoder.fit(labels) def __len__(self): return len(self.dataset) def __getitem__(self, idx): item = self.dataset[idx] img = item['image'] label = item['label'] inputs = self.processor(images=img, return_tensors="pt") # 이미지만 전처리 inputs = {k: v.squeeze() for k, v in inputs.items()} # 배치 차원 제거 label = self.label_encoder.transform([label])[0] return inputs, torch.tensor(label, dtype=torch.long) # 데이터셋 준비 dataset = load_dataset("JANGJIWON/UGRP_sketchset_textbook", split="train") # 데이터셋을 리스트로 변환하여 학습 및 테스트 세트로 분할 dataset_list = [dict(item) for item in dataset] train_data, test_data = train_test_split(dataset_list, test_size=0.3, random_state=42) # 데이터 로더 생성 train_dataset = CustomDataset(train_data, processor=processor) test_dataset = CustomDataset(test_data, processor=processor) train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2) test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2) # 옵티마이저 및 손실 함수 설정 learning_rate = 0.001 # 높은 학습률 설정 optimizer = Adam(model.parameters(), lr=learning_rate) criterion = nn.CrossEntropyLoss() # 모델 훈련 num_epochs = 10 model.train() for epoch in range(num_epochs): running_loss = 0.0 correct = 0 total = 0 for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"): images, labels = batch images = {k: v.to(device) for k, v in images.items()} labels = labels.to(device) optimizer.zero_grad() # 이미지 피처 추출 및 로짓 생성 image_features = model.get_image_features(**images) logits = model.classifier(image_features) loss = criterion(logits, labels) loss.backward() optimizer.step() running_loss += loss.item() _, predicted = torch.max(logits, 1) total += labels.size(0) correct += (predicted == labels).sum().item() epoch_accuracy = 100 * correct / total print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%') # 테스트 수행 및 정확도 계산 model.eval() correct = 0 total = 0 with torch.no_grad(): for batch in test_loader: images, labels = batch images = {k: v.to(device) for k, v in images.items()} labels = labels.to(device) # 이미지 피처 추출 및 로짓 생성 image_features = model.get_image_features(**images) logits = model.classifier(image_features) _, predicted = torch.max(logits, 1) total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = 100 * correct / total print(f'Final Test Accuracy after second LoRA tuning: {accuracy:.2f}%')
result
Epoch 1/10: 100%|██████████| 3/3 [00:00<00:00, 4.21it/s] Epoch [1/10], Loss: 5.3525, Accuracy: 15.15% Epoch 2/10: 100%|██████████| 3/3 [00:00<00:00, 4.18it/s] Epoch [2/10], Loss: 4.7298, Accuracy: 54.55% Epoch 3/10: 100%|██████████| 3/3 [00:00<00:00, 3.82it/s] Epoch [3/10], Loss: 5.0558, Accuracy: 60.61% Epoch 4/10: 100%|██████████| 3/3 [00:00<00:00, 5.20it/s] Epoch [4/10], Loss: 2.8268, Accuracy: 60.61% Epoch 5/10: 100%|██████████| 3/3 [00:00<00:00, 4.85it/s] Epoch [5/10], Loss: 2.6705, Accuracy: 60.61% Epoch 6/10: 100%|██████████| 3/3 [00:00<00:00, 4.20it/s] Epoch [6/10], Loss: 4.6043, Accuracy: 60.61% Epoch 7/10: 100%|██████████| 3/3 [00:00<00:00, 4.51it/s] Epoch [7/10], Loss: 2.6707, Accuracy: 60.61% Epoch 8/10: 100%|██████████| 3/3 [00:00<00:00, 4.23it/s] Epoch [8/10], Loss: 3.8665, Accuracy: 60.61% Epoch 9/10: 100%|██████████| 3/3 [00:00<00:00, 5.11it/s] Epoch [9/10], Loss: 4.2060, Accuracy: 63.64% Epoch 10/10: 100%|██████████| 3/3 [00:00<00:00, 4.13it/s]Epoch [10/10], Loss: 2.5665, Accuracy: 66.67% Final Test Accuracy after second LoRA tuning: 13.33%
result 2
Epoch 1/30: 100%|██████████| 3/3 [00:00<00:00, 3.78it/s] Epoch [1/30], Loss: 5.6222, Accuracy: 9.09% Epoch 2/30: 100%|██████████| 3/3 [00:00<00:00, 5.17it/s] Epoch [2/30], Loss: 4.9761, Accuracy: 15.15% Epoch 3/30: 100%|██████████| 3/3 [00:00<00:00, 4.71it/s] Epoch [3/30], Loss: 3.9942, Accuracy: 57.58% Epoch 4/30: 100%|██████████| 3/3 [00:00<00:00, 4.59it/s] Epoch [4/30], Loss: 4.6032, Accuracy: 60.61% Epoch 5/30: 100%|██████████| 3/3 [00:00<00:00, 4.69it/s] Epoch [5/30], Loss: 2.7958, Accuracy: 60.61% Epoch 6/30: 100%|██████████| 3/3 [00:00<00:00, 4.53it/s] Epoch [6/30], Loss: 4.7401, Accuracy: 60.61% Epoch 7/30: 100%|██████████| 3/3 [00:00<00:00, 4.49it/s] Epoch [7/30], Loss: 2.7414, Accuracy: 60.61% Epoch 8/30: 100%|██████████| 3/3 [00:00<00:00, 4.35it/s] Epoch [8/30], Loss: 3.8171, Accuracy: 63.64% Epoch 9/30: 100%|██████████| 3/3 [00:00<00:00, 4.90it/s] Epoch [9/30], Loss: 2.3093, Accuracy: 63.64% Epoch 10/30: 100%|██████████| 3/3 [00:00<00:00, 4.58it/s] Epoch [10/30], Loss: 2.2179, Accuracy: 63.64% Epoch 11/30: 100%|██████████| 3/3 [00:00<00:00, 4.85it/s] Epoch [11/30], Loss: 2.9166, Accuracy: 69.70% Epoch 12/30: 100%|██████████| 3/3 [00:00<00:00, 3.97it/s] Epoch [12/30], Loss: 3.3085, Accuracy: 72.73% Epoch 13/30: 100%|██████████| 3/3 [00:00<00:00, 4.88it/s] Epoch [13/30], Loss: 1.8047, Accuracy: 72.73% Epoch 14/30: 100%|██████████| 3/3 [00:00<00:00, 4.71it/s] Epoch [14/30], Loss: 1.5259, Accuracy: 75.76% Epoch 15/30: 100%|██████████| 3/3 [00:00<00:00, 5.11it/s] Epoch [15/30], Loss: 1.5759, Accuracy: 75.76% Epoch 16/30: 100%|██████████| 3/3 [00:00<00:00, 4.50it/s] Epoch [16/30], Loss: 1.4162, Accuracy: 75.76% Epoch 17/30: 100%|██████████| 3/3 [00:00<00:00, 4.12it/s] Epoch [17/30], Loss: 3.4429, Accuracy: 75.76% Epoch 18/30: 100%|██████████| 3/3 [00:00<00:00, 4.84it/s] Epoch [18/30], Loss: 1.1535, Accuracy: 75.76% Epoch 19/30: 100%|██████████| 3/3 [00:00<00:00, 3.88it/s] Epoch [19/30], Loss: 1.0986, Accuracy: 87.88% Epoch 20/30: 100%|██████████| 3/3 [00:00<00:00, 3.42it/s] Epoch [20/30], Loss: 1.1869, Accuracy: 87.88% Epoch 21/30: 100%|██████████| 3/3 [00:00<00:00, 4.28it/s] Epoch [21/30], Loss: 1.7801, Accuracy: 90.91% Epoch 22/30: 100%|██████████| 3/3 [00:00<00:00, 4.35it/s] Epoch [22/30], Loss: 0.8713, Accuracy: 90.91% Epoch 23/30: 100%|██████████| 3/3 [00:00<00:00, 4.23it/s] Epoch [23/30], Loss: 0.8126, Accuracy: 84.85% Epoch 24/30: 100%|██████████| 3/3 [00:00<00:00, 4.92it/s] Epoch [24/30], Loss: 0.7799, Accuracy: 84.85% Epoch 25/30: 100%|██████████| 3/3 [00:00<00:00, 3.33it/s] Epoch [25/30], Loss: 2.7608, Accuracy: 84.85% Epoch 26/30: 100%|██████████| 3/3 [00:00<00:00, 4.76it/s] Epoch [26/30], Loss: 0.6572, Accuracy: 84.85% Epoch 27/30: 100%|██████████| 3/3 [00:00<00:00, 4.87it/s] Epoch [27/30], Loss: 0.6374, Accuracy: 90.91% Epoch 28/30: 100%|██████████| 3/3 [00:00<00:00, 5.30it/s] Epoch [28/30], Loss: 0.5551, Accuracy: 90.91% Epoch 29/30: 100%|██████████| 3/3 [00:00<00:00, 4.91it/s] Epoch [29/30], Loss: 0.5121, Accuracy: 93.94% Epoch 30/30: 100%|██████████| 3/3 [00:00<00:00, 4.20it/s]Epoch [30/30], Loss: 0.5336, Accuracy: 96.97% Final Test Accuracy after second LoRA tuning: 6.67%

Crawling

from selenium.webdriver.common.keys import Keys from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.action_chains import ActionChains import time import urllib.request import os import random chrome_options = Options() chrome_options.add_argument("--disable-blink-features=AutomationControlled") chrome_options.add_argument( "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36" ) driver = webdriver.Chrome(chrome_options) driver.get("https://www.google.co.kr/imghp") search = input("Enter the search term: ") elem = driver.find_element(By.NAME,"q") elem.send_keys(search) elem.send_keys(Keys.RETURN) elem = driver.find_element(By.TAG_NAME, "body") for i in range(60): elem.send_keys(Keys.PAGE_DOWN) time.sleep(random.uniform(1.0,3.0)) #rso > div > div > div.wH6SXe.u32vCb > div > div > div:nth-child(2) > div.czzyk.XOEbc > h3 > a > div > div > div > g-img images = driver.find_elements(By.CSS_SELECTOR,"#rso > div > div > div.wH6SXe.u32vCb > div > div > div > div.czzyk.XOEbc > h3 > a > div > div > div > g-img > img") print('find images: ',len(images)) random.shuffle(images) base_dir = "E:\Crawling_images" folder_dir = f"{base_dir}\\{search}" os.makedirs(folder_dir, exist_ok=True) # 존재하면 넘어가고 없으면 만드는 것 count = 1 max_retries = 3 for image in images: retries = 0 while retries < max_retries: try: ActionChains(driver).move_to_element(image).click().perform() # image.click() time.sleep(random.uniform(2,4)) imgUrl = WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.XPATH, '//*[@id="Sva75c"]/div[2]/div[2]/div/div[2]/c-wiz/div/div[3]/div[1]/a/img')) ).get_attribute('src') if imgUrl and imgUrl.startswith("http"): urllib.request.urlretrieve(imgUrl, f"./{search}/{search}_{str(count)}.jpg") print(f"Image saved: {search}_{count}.jpg") count += 1 break else: print("Invalid URL, retrying...") except Exception as e: print(f"retry {retries + 1}/{max_retries} failed. Error {e}") retries += 1 driver.close()
result
notion image