PC環境
Windows 11 CUDA 11.8 Python 3.11
Python環境構築
pip install torch==2.0.1+cu118 --index-url https://download.pytorch.org/whl/cu118 pip install git+https://github.com/huggingface/diffusers pip install accelerate transformers einops pip install git+https://github.com/tencent-ailab/IP-Adapter.git pip install onnxruntime-gpu insightface
結果
用意した画像
用意した画像は1枚だけです。「fudukiMix_v2.0」を使って作成したものです。
今回作成した画像
上の写真の人物を描いたつもりですがあまり似ていません。
ただし、作成した4枚は同一人物と言える出来栄えです。
Pythonスクリプト
import os import cv2 import torch from insightface.app import FaceAnalysis from diffusers import StableDiffusionXLPipeline, DPMSolverMultistepScheduler from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDXL app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider']) app.prepare(ctx_id=0, det_size=(640, 640)) image = cv2.imread("face.png") faces = app.get(image) faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0) base_model_path = "model/fudukiMix_v20" ip_ckpt = "adapter/ip-adapter-faceid_sdxl.bin" device = "cuda" noise_scheduler = DPMSolverMultistepScheduler( num_train_timesteps=1000, beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", steps_offset=1, algorithm_type="sde-dpmsolver++", use_karras_sigmas=True, final_sigmas_type="denoise_to_zero" ) pipe = StableDiffusionXLPipeline.from_pretrained( base_model_path, torch_dtype=torch.float16, scheduler=noise_scheduler, variant="fp16" ) ip_model = IPAdapterFaceIDXL(pipe, ip_ckpt, device) prompt = "japanese woman, close-up, natural lighting, wavy hair, from side, white sweater, dyanmic posing, see-through curtain, bright room" negative_prompt = "cleavage, illustration, 3d, 2d, painting, cartoons, sketch, watercolor, monotone, kimono, crossed eyes, strabismus" os.makedirs("results", exist_ok=True) for i in range(2): seed = 20240111 + 2024 * i images = ip_model.generate( prompt=prompt, negative_prompt=negative_prompt, faceid_embeds=faceid_embeds, num_samples=2, width=1024, height=1024, num_inference_steps=40, guidance_scale=7.5, seed=seed ) for j in range(2): images[j].save(os.path.join("results", f"{seed}_{j}.png"))
補足1
SD1.5用のFaceIDはこちらを見てください。touch-sp.hatenablog.com
おなじ顔写真で比較したところSD1.5用のFaceID-PlusV2の方が結果が良い印象です。
補足2
元画像の顔写真を作成したスクリプトはこちらになります。from diffusers import StableDiffusionXLKDiffusionPipeline import torch pipe = StableDiffusionXLKDiffusionPipeline.from_pretrained( "model/fudukiMix_v20", torch_dtype=torch.float16, variant="fp16" ) pipe.set_scheduler("sample_dpmpp_2m_sde") pipe.to("cuda") prompt = "japanese woman, close-up, natural lighting, wavy hair, forehead, dark brown eyes, downward slanting eyes, pouty, white sweater, dyanmic posing, see-through curtain, bright room" n_prompt = "cleavage, illustration, 3d, 2d, painting, cartoons, sketch, watercolor, monotone, kimono, crossed eyes, strabismus" generator = torch.manual_seed(10000) image = pipe( prompt=prompt, negative_prompt=n_prompt, generator=generator, num_inference_steps = 35, width=896, height=1152, use_karras_sigmas=True ).images[0] image.save("kdiffusion.png")
補足3
IP-Adapter-FaceID-PlusV2の記事を新たに書きました。touch-sp.hatenablog.com