Portfolio

TISTORYStable Diffusion AI 모델을 다운로드할 수 있는 무료 웹사이트

Stable Diffusion AI 모델을 다운로드할 수 있는 무료 웹사이트

Stable Diffusion 은 다양한 방식으로 파인튜닝을 통해서 자신만의 색감과 스타일로 이미지를 생성할 수 있습니다. 그렇게 특정한 방식으로 튜닝된 모델을 다운받아서 적용하면 원하는 이미지를 보다 쉽게 만들 수 있습니다. 아래의 4가지 사이트는 다양한 방식으로 튜닝된 모델을 무료로 다운로드 할 수 있는 사이트들 입니다. 1. Hugging Face: https://huggingface.co/models?pipeline_tag=stable-diffusion&sort=downloads&training_task=&search=stable%20diffusion&page=1&type= Hugging Face는 머신 러닝에서 참조하는 오픈 소스를 이용해 최첨단 모델을 만들고 훈련하고 배포하는 웹사이트입니다. ..

Dataset Re-Labeling

주요 감정 개수가 너무 많아 구분이 힘들다는 판단이 있어 새로운 감정 기준과 함께 기존의 데이터셋에 있던 데이터들을 병합하기로 했다.

Re-Labeling


import os
import shutil

# 감정 매핑: 기존 폴더를 새 폴더 이름으로 매핑
emotion_map = {
    "amusement": "happiness",
    "excitement": "happiness",
    "contentment": "happiness",
    "awe": "surprise",
    "sadness": "sadness",
    "anger": "anger",
    "fear": "fear",
    "disgust": "disgust"
}

# 기존 폴더들이 위치한 루트 경로 (데이터셋 경로)
source_path = "path/to/your/dataset"  # 수정 필요
target_path = "path/to/your/organized_dataset"  # 통합된 폴더 경로

# 1. 새 폴더 생성 및 파일 수집
file_data = {}  # 각 감정별로 파일을 수집할 딕셔너리
for folder_name, new_emotion in emotion_map.items():
    old_folder = os.path.join(source_path, folder_name)

    if new_emotion not in file_data:
        file_data[new_emotion] = []  # 새 감정별로 리스트 생성

    # 각 폴더의 파일을 감정별로 리스트에 추가
    for filename in sorted(os.listdir(old_folder)):
        if filename.endswith(".jpg"):
            file_data[new_emotion].append(os.path.join(old_folder, filename))

# 2. 파일 이동 및 이름 변경
for emotion, files in file_data.items():
    new_folder = os.path.join(target_path, emotion)
    os.makedirs(new_folder, exist_ok=True)  # 새 폴더 없으면 생성

    # 감정별 파일들을 순서대로 새 이름으로 저장
    for i, file_path in enumerate(files):
        new_filename = f"{emotion}_{i:05}.jpg"  # {emotion}_00000.jpg 형식
        dst = os.path.join(new_folder, new_filename)
        shutil.move(file_path, dst)

print("폴더와 파일 정리가 완료되었습니다.")

결과 이미지

Create json file

이번에도 이 폴더 내에 json파일을 만들어준다.


import os
import json

# 이미지가 저장된 기본 폴더 경로
base_folder_path = 'E:\\EmoSet-118K_MonetStyle\\image'

# 감정 목록과 대응되는 label 값
emotions = ['happiness', 'anger', 'surprise', 'disgust', 'fear', 'sadness']
emotion_to_label = {emotion: idx for idx, emotion in enumerate(emotions)}

# JSON 파일을 생성하는 함수
def create_json_for_emotion(emotion):
    # 감정 폴더 경로
    src_dir = os.path.join(base_folder_path, emotion)
    
    if not os.path.exists(src_dir):
        print(f"Directory {src_dir} does not exist.")
        return
    
    # 감정별로 3000개 이미지를 처리
    image_files = sorted([f for f in os.listdir(src_dir) if f.endswith('.jpg') or f.endswith('.png')])
    
    json_data_list = []  # JSON 데이터 리스트

    for filename in image_files[:3000]:  # 최대 3000개만 처리
        # 원본 파일 이름에서 확장자를 제외한 부분
        base_filename = os.path.splitext(filename)[0]

        # 새로운 경로를 포함한 이미지 파일 이름
        new_image_path = f"image/{emotion}/{filename}"

        # label 값 설정 (감정에 따른 숫자)
        label = emotion_to_label[emotion]

        # 이미지에 맞는 JSON 구조 생성
        json_data = {
            "image": new_image_path,
            "emotion": emotion,
            "label": label,
            "image_id": base_filename
        }

        # JSON 데이터 리스트에 추가
        json_data_list.append(json_data)

    # JSON 파일 이름 설정
    json_filename = os.path.join(base_folder_path, f"{emotion}_data.json")

    # JSON 파일 쓰기
    with open(json_filename, 'w') as json_file:
        json.dump(json_data_list, json_file, indent=4)

    print(f"Created JSON for {emotion} with {len(json_data_list)} images.")

# 모든 감정에 대해 JSON 생성
for emotion in emotions:
    create_json_for_emotion(emotion)

print("JSON creation completed.")

['happiness', 'anger', 'surprise', 'disgust', 'fear', 'sadness'] 이 순서대로 숫자 레이블이 부여된다

Push_to_hub()


import os
import json
from datasets import Dataset, Features, Image, Value, concatenate_datasets
from huggingface_hub import HfApi

# Hugging Face API 키 설정 (옵션)
api_token = "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"
api = HfApi()

# 업로드할 감정 목록
emotions = ['happiness', 'anger', 'surprise', 'disgust', 'fear', 'sadness']

# 리포지토리 ID 설정
repo_id = "xodhks/EmoSet118K_MonetStyle"

# 기존 데이터셋 불러오기 (옵션)
try:
    existing_dataset = load_dataset(repo_id, split='train')  # 'train' 스플릿만 불러옴
except:
    existing_dataset = None

# 각 감정에 대해 데이터를 처리
for emotion in emotions:
    json_path = f"E:/EmoSet-118K_MonetStyle/image/{emotion}/{emotion}_data.json"

    # JSON 파일 로드
    with open(json_path, 'r') as f:
        emotion_data = json.load(f)

    # 이미지 파일 경로와 JSON에서 읽은 메타데이터를 결합
    data = {
        "image": [],
        "emotion": [],
        "label": [],
        "image_id": []
    }

    for item in emotion_data:
        # JSON 데이터에서 이미지 경로를 가져오고, 메타데이터를 추가
        image_file = item["image"]  # 이미지는 full path로 저장되어 있음
        data["image"].append(image_file)
        data["emotion"].append(item["emotion"])
        data["label"].append(item["label"])  # label은 이미 숫자
        data["image_id"].append(item["image_id"])

    # 새로운 Dataset 생성
    features = Features({
        "image": Image(),           # 이미지 필드
        "emotion": Value("string"), # 감정 필드
        "label": Value("int32"),    # 라벨 필드 (숫자)
        "image_id": Value("string") # 이미지 ID 필드
    })
    new_dataset = Dataset.from_dict(data, features=features)

    # 기존 데이터셋과 병합
    if existing_dataset:
        combined_dataset = concatenate_datasets([existing_dataset, new_dataset])
    else:
        combined_dataset = new_dataset

    existing_dataset = combined_dataset

# 병합된 데이터셋을 Hugging Face Hub에 업로드
existing_dataset.push_to_hub(
    repo_id=repo_id,  # 동일한 리포지토리에 업로드
    token=api_token,
    max_shard_size="1GB"  # 데이터셋이 큰 경우 샤딩 설정
)

print("All emotion datasets have been merged and uploaded successfully.")

결과

보이는 거는 angry 까진데 실제로는 sadness까지 성공적으로 업로드 되었다.

업로드 확인하기

정확하게 업로드 되었는지 확인해보자

2024/10/29 - dataset re-Labeling

Dataset Re-Labeling

Re-Labeling

Create json file

Push_to_hub()

업로드 확인하기