IP-Adapter用の画像を作成
まずは「yabalMixTrue25D」というモデルを使って女性の画像を作成しました。なんとなく作ったのですが手の描写が完璧すぎて驚きました。
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler, AutoencoderKL pipe = DiffusionPipeline.from_pretrained( "model/yabalMixTrue25Dv5_ema", custom_pipeline="lpw_stable_diffusion", vae=AutoencoderKL.from_single_file("vae/vae-ft-mse-840000-ema-pruned.safetensors"), safety_checker=None ) pipe.scheduler = DPMSolverMultistepScheduler.from_config( pipe.scheduler.config, algorithm_type="sde-dpmsolver++", use_karras_sigmas=True ) pipe.to("cuda") prompt = "(masterpiece:1.2), (absurdres:1.2), (best quality:1.2), (looking at viewer), shiny skin, pantyhose, cowboy shot, skirt, outdoors, day, countryside, hand on hip, (white|black theme), medium breasts, happy face" neg_prompt = "(worst quality), (low quality), (bad quality), (bad anatomy), (hat:1.2), (cap:1.2), greyscale" image = pipe.text2img( prompt=prompt, negative_prompt=neg_prompt, width=512, height=768, max_embeddings_multiples=3 ).images[0] image.save("1girl.png")
AnimateDiffの実行
なぜかLCM-RoLAを使った方が結果が良かったです。作成時間も短くなるし一石二鳥です。import torch from diffusers import MotionAdapter, AnimateDiffPipeline, AutoencoderKL, LCMScheduler from diffusers.utils import export_to_gif, load_image lcm_weight = 1.0 ipadapter_scale = 0.6 seed=10000000 adapter = MotionAdapter.from_pretrained("animatediff-motion-adapter-v1-5-2") model_id = "model/yabalMixTrue25Dv5_ema" pipe = AnimateDiffPipeline.from_pretrained( model_id, motion_adapter=adapter, vae=AutoencoderKL.from_single_file("vae/vae-ft-mse-840000-ema-pruned.safetensors") ) pipe.scheduler = LCMScheduler.from_config( pipe.scheduler.config, beta_schedule="linear" ) pipe.load_lora_weights("lora/lcm-lora-sdv1-5", adapter_name="lcm") pipe.set_adapters(["lcm"], adapter_weights=[lcm_weight]) pipe.load_ip_adapter( "IP-Adapter", subfolder="models", weight_name="ip-adapter-plus_sd15.safetensors" ) pipe.set_ip_adapter_scale(ipadapter_scale) ip_image = load_image("1girl.png") # enable memory savings pipe.enable_vae_slicing() pipe.enable_model_cpu_offload() output = pipe( prompt="a girl, walking, best quality, extremely detailed", negative_prompt="longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality", ip_adapter_image=ip_image, num_frames=16, guidance_scale=1.0, num_inference_steps=5, generator=torch.manual_seed(seed), width=512, height=768 ) frames = output.frames[0] export_to_gif(frames, f"adapter{ipadapter_scale}_lcm{lcm_weight}.gif")
結果
同一人物には見えませんがなんとなく特徴は継承しているかなと感じます。