はじめに
Image2Image + LoRAは以前に試しています。touch-sp.hatenablog.com
今回は「SDXL Caricaturized Lora」というLoRAを使って顔写真から似顔絵を作成してみます。
結果
一番左がもとの顔写真、右3枚が今回作成した画像です。3年程前に「WarpGAN」というのを使って同じようなことをしました。
GANから拡散モデルに変わって比べ物にならないくらい画像の質が高くなっています。
touch-sp.hatenablog.com
Pythonスクリプト
from diffusers import AutoPipelineForImage2Image from diffusers.utils import load_image import torch import os pipe = AutoPipelineForImage2Image.from_pretrained( "model/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16 ).to("cuda") pipe.load_lora_weights("lora/SDXL-Caricaturized-Lora.safetensors") prompt = "young japanese woman, Caricaturized" negative_prompt = "worst quality, low quality" init_image = load_image("seed10000.png").resize((1024,1024)) lora_scale_list = [1.0] strength_list = [0.5, 0.6, 0.7] seed = 20000 os.makedirs("results", exist_ok=True) for lora_scale in lora_scale_list: for strength in strength_list: generator = torch.manual_seed(seed) image = pipe( prompt=prompt, negative_prompt=negative_prompt, image=init_image, generator=generator, strength=strength, cross_attention_kwargs={"scale": lora_scale} ).images[0] image.save(os.path.join("results", f"scale{lora_scale}_strength{strength}.png"))
Appendix 1(顔写真の作り方)
Civitaiからダウンロードさせて頂いた「firsttunnerxl_v10.safetensors」を使っています。
from diffusers import StableDiffusionXLPipeline, DPMSolverMultistepScheduler import torch from compel import Compel, ReturnedEmbeddingsType import os model_id = "firsttunnerxl_v10.safetensors" pipe = StableDiffusionXLPipeline.from_single_file( f"safetensors/{model_id}", extract_ema=True, variant="fp16", torch_dtype=torch.float16 ) pipe.scheduler = DPMSolverMultistepScheduler.from_config( pipe.scheduler.config, algorithm_type="sde-dpmsolver++", use_karras_sigmas=True ) pipe.to("cuda") base_compel = Compel( tokenizer=[pipe.tokenizer, pipe.tokenizer_2], text_encoder=[pipe.text_encoder, pipe.text_encoder_2], returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, requires_pooled=[False, True] ) prompt = "portrait of beautiful japanese woman with smile, 35yo+++, 8k, (best quality)+, masterpiece, photo-realistic, focus, professional lighting" negative_prompt = "(worst quality)++, (low quality)++" conditioning, pooled = base_compel(prompt) negative_conditioning, negatice_pooled = base_compel(negative_prompt) save_folder = os.path.splitext(model_id)[0] os.makedirs(save_folder, exist_ok=True) seed =10000 generator = torch.manual_seed(seed) image = pipe( prompt_embeds=conditioning, pooled_prompt_embeds=pooled, negative_prompt_embeds=negative_conditioning, negative_pooled_prompt_embeds=negatice_pooled, generator=generator, num_inference_steps=30, ).images[0] image.save(os.path.join(save_folder, f"seed{seed}.png"))