はじめに
こちらの拡張です。touch-sp.hatenablog.com
「FacaID」が使えるようになったので3つ同時に使用してみました。
命題
この女性をイラスト化して下さい。結果
左から
「Plus」と「Plus Face」の組み合わせ
「Plus」と「FaceID」の組み合わせ
「Plus」と「Plus Face」と「FaceID」の組み合わせ
方法
最初にそれぞれのImage Embeddingsを計算して保存しました。Image Embeddingsの作成、保存
「Plus」のImage Embeddings
import torch from diffusers import AutoPipelineForText2Image from diffusers.utils import load_image from pathlib import Path pipeline = AutoPipelineForText2Image.from_pretrained( "model/modernDisneyXL_v3", torch_dtype=torch.float16, variant="fp16" ) pipeline.load_ip_adapter( "IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter-plus_sdxl_vit-h.safetensors", image_encoder_folder="models/image_encoder" ) pipeline.to("cuda") style_images = [load_image(x.as_posix()) for x in Path("style_ziggy").glob("*.png")] image_embeds = pipeline.prepare_ip_adapter_image_embeds( ip_adapter_image=[style_images], ip_adapter_image_embeds=None, device="cuda", num_images_per_prompt=1, do_classifier_free_guidance=True ) torch.save(image_embeds, "xl_plus.ipadpt")
「Plus Face」のImage Embeddings
import torch from diffusers import AutoPipelineForText2Image from diffusers.utils import load_image from pathlib import Path pipeline = AutoPipelineForText2Image.from_pretrained( "model/modernDisneyXL_v3", torch_dtype=torch.float16, variant="fp16" ) pipeline.load_ip_adapter( "IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter-plus-face_sdxl_vit-h.safetensors", image_encoder_folder="models/image_encoder" ) pipeline.to("cuda") face_images = [load_image(x.as_posix()) for x in Path("woman").glob("*.jpg")] image_embeds = pipeline.prepare_ip_adapter_image_embeds( ip_adapter_image=[face_images], ip_adapter_image_embeds=None, device="cuda", num_images_per_prompt=1, do_classifier_free_guidance=True ) torch.save(image_embeds, "xl_plusface.ipadpt")
「FaceID」のImage Embeddings
import torch from diffusers.utils import load_image from pathlib import Path from insightface.app import FaceAnalysis import cv2 import numpy as np face_images = [load_image(x.as_posix()) for x in Path("woman").glob("*.jpg")] ref_images_embeds = [] app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) app.prepare(ctx_id=0, det_size=(640, 640)) for im in face_images: image = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB) faces = app.get(image) if len(faces) != 0: image = torch.from_numpy(faces[0].normed_embedding) ref_images_embeds.append(image.unsqueeze(0)) print(f"{len(face_images)} face images, {len(ref_images_embeds)} faces detected") assert len(ref_images_embeds) > 0, "face detection failed." ref_images_embeds = torch.stack(ref_images_embeds, dim=0).unsqueeze(0) neg_ref_images_embeds = torch.zeros_like(ref_images_embeds) id_embeds = torch.cat([neg_ref_images_embeds, ref_images_embeds]).to(dtype=torch.float16, device="cuda") torch.save([id_embeds], "xl_faceid.ipadpt")
実行
「Plus」と「Plus Face」の組み合わせ
import torch from diffusers.utils import load_image from diffusers import AutoPipelineForText2Image, DPMSolverMultistepScheduler pipeline = AutoPipelineForText2Image.from_pretrained( "model/modernDisneyXL_v3", torch_dtype=torch.float16, variant="fp16" ) pipeline.scheduler = DPMSolverMultistepScheduler( num_train_timesteps=1000, beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", steps_offset=1, algorithm_type="sde-dpmsolver++", use_karras_sigmas=True ) pipeline.to("cuda") pipeline.load_ip_adapter( "IP-Adapter", subfolder="sdxl_models", weight_name=[ "ip-adapter-plus_sdxl_vit-h.safetensors", "ip-adapter-plus-face_sdxl_vit-h.safetensors" ], image_encoder_folder=None ) pipeline.set_ip_adapter_scale([0.7, 0.8]) t1 = torch.load("xl_plus.ipadpt") t2 = torch.load("xl_plusface.ipadpt") image_embeds = [t1[0], t2[0]] image = pipeline( prompt="a woman", ip_adapter_image_embeds=image_embeds, negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality", num_inference_steps=50, num_images_per_prompt=1, guidance_scale = 7.5, width=1024, height=1024, generator=torch.Generator(device="cpu").manual_seed(0) ).images[0] image.save("plusface.png")
「Plus」と「FaceID」の組み合わせ
import torch from diffusers.utils import load_image from diffusers import AutoPipelineForText2Image, DPMSolverMultistepScheduler pipeline = AutoPipelineForText2Image.from_pretrained( "model/modernDisneyXL_v3", torch_dtype=torch.float16, variant="fp16" ) pipeline.scheduler = DPMSolverMultistepScheduler( num_train_timesteps=1000, beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", steps_offset=1, algorithm_type="sde-dpmsolver++", use_karras_sigmas=True ) pipeline.to("cuda") pipeline.load_ip_adapter( ["IP-Adapter", "IP-Adapter-FaceID"], subfolder=["sdxl_models", None], weight_name=[ "ip-adapter-plus_sdxl_vit-h.safetensors", "ip-adapter-faceid_sdxl.bin" ], image_encoder_folder=None ) pipeline.set_ip_adapter_scale([0.3, 0.8]) t1 = torch.load("xl_plus.ipadpt") t2 = torch.load("xl_faceid.ipadpt") image_embeds = [t1[0], t2[0]] image = pipeline( prompt="a woman", ip_adapter_image_embeds=image_embeds, negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality", num_inference_steps=50, num_images_per_prompt=1, guidance_scale = 7.5, width=1024, height=1024, generator=torch.Generator(device="cpu").manual_seed(0) ).images[0] image.save("faceid.png")
「Plus」と「Plus Face」と「FaceID」の組み合わせ
import torch from diffusers.utils import load_image from diffusers import AutoPipelineForText2Image, DPMSolverMultistepScheduler pipeline = AutoPipelineForText2Image.from_pretrained( "model/modernDisneyXL_v3", torch_dtype=torch.float16, variant="fp16" ) pipeline.scheduler = DPMSolverMultistepScheduler( num_train_timesteps=1000, beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", steps_offset=1, algorithm_type="sde-dpmsolver++", use_karras_sigmas=True ) pipeline.to("cuda") pipeline.load_ip_adapter( ["IP-Adapter", "IP-Adapter", "IP-Adapter-FaceID"], subfolder=["sdxl_models", "sdxl_models", None], weight_name=[ "ip-adapter-plus_sdxl_vit-h.safetensors", "ip-adapter-plus-face_sdxl_vit-h.safetensors", "ip-adapter-faceid_sdxl.bin" ], image_encoder_folder=None ) pipeline.set_ip_adapter_scale([0.5, 0.5, 0.5]) t1 = torch.load("xl_plus.ipadpt") t2 = torch.load("xl_plusface.ipadpt") t3 = torch.load("xl_faceid.ipadpt") image_embeds = [t1[0], t2[0], t3[0]] image = pipeline( prompt="a woman", ip_adapter_image_embeds=image_embeds, negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality", num_inference_steps=50, num_images_per_prompt=1, guidance_scale = 7.5, width=1024, height=1024, generator=torch.Generator(device="cpu").manual_seed(0) ).images[0] image.save("plusface_faceid.png")