PhotoMaker と modernDisneyXL で遊んでみた - パソコン関連もろもろ

目的

1枚の顔写真からディズニー風画像を作成することが目的です。

結果

ただのImage2imageではないです。

左の画像から右の画像を作成しました。

よーく見ると、なんとなく特徴を捉えているような気もします。
どうでしょう？

Pythonスクリプト

import torch
from pathlib import Path
from diffusers.utils import load_image
from diffusers import DDIMScheduler
from photomaker.pipeline import PhotoMakerStableDiffusionXLPipeline

pipe = PhotoMakerStableDiffusionXLPipeline.from_pretrained(
    "model/modernDisneyXL_v3",
    torch_dtype=torch.bfloat16, 
    use_safetensors=True, 
    variant="fp16"
).to("cuda")

pipe.load_photomaker_adapter(
    "weights",
    subfolder="",
    weight_name="photomaker-v1.bin",
    trigger_word="img"
)     

pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)

import random
input_folder_name = "face_one"
image_path_list = Path(input_folder_name).glob("*.png")

input_id_images = []
for image_path in image_path_list:
    input_id_images.append(load_image(image_path.as_posix()))

Path(input_folder_name, "result").mkdir(parents=True, exist_ok=True)
#prompt = "modisn disney style, A photo of a woman img"
prompt = "A photo of a woman img, disney anime style"
negative_prompt = "worst quality, low quality, illustration, 3d, 2d, painting, sketch, grayscale"
generator = torch.manual_seed(2024)
for steps in [5, 10, 15, 20]:
    generator = torch.manual_seed(2024)
    gen_image = pipe(
        prompt=prompt,
        input_id_images=input_id_images,
        negative_prompt=negative_prompt,
        num_images_per_prompt=1,
        num_inference_steps=35,
        start_merge_step=steps,
        generator=generator,
    ).images[0]
    gen_image.save(Path(input_folder_name, "result", f"result_steps{steps}.png").as_posix())