環境
Windows 11 CUDA 11.7 Python 3.11.5
Python環境構築
pip install torch==2.0.1+cu117 torchvision==0.15.2+cu117 --index-url https://download.pytorch.org/whl/cu117 pip install mmcv==2.0.1 -f https://download.openmmlab.com/mmcv/dist/cu117/torch2.0.0/index.html pip install openmim==0.3.9 pip install mmagic==1.1.0 pip install accelerate==0.23.0 pip install albumentations==1.3.1 pip install xformers==0.0.21
これが失敗するようなら以下を実行してみて下さい。
pip install -U setuptools wheel
モデルのダウンロード
以下のようなファイル構造になるようにします。models ├─DreamBooth_LoRA │ toonyou_beta6.safetensors │ ├─Motion_Module │ mm_sd_v15_v2.ckpt │ └─StableDiffusion └─stable-diffusion-v1-5
Configファイルの準備
「animatediff_config.py」というファイル名にしています。stable_diffusion_v15_url = "./models/StableDiffusion/stable-diffusion-v1-5" models_path = './models/' diffusion_scheduler = dict( type='DDIMScheduler', beta_end=0.012, beta_schedule='linear', beta_start=0.00085, num_train_timesteps=1000, prediction_type='epsilon', set_alpha_to_one=True, clip_sample=False, thresholding=False, steps_offset=1 ) model = dict( type='AnimateDiff', vae=dict( type='AutoencoderKL', from_pretrained=stable_diffusion_v15_url, subfolder='vae'), unet=dict( type='UNet3DConditionMotionModel', unet_use_cross_frame_attention=False, unet_use_temporal_attention=False, use_motion_module=True, motion_module_resolutions=[1, 2, 4, 8], motion_module_mid_block=True, motion_module_decoder_only=False, motion_module_type='Vanilla', motion_module_kwargs=dict( num_attention_heads=8, num_transformer_block=1, attention_block_types=['Temporal_Self', 'Temporal_Self'], temporal_position_encoding=True, temporal_position_encoding_max_len=32, temporal_attention_dim_div=1 ), subfolder='unet', from_pretrained=stable_diffusion_v15_url ), text_encoder=dict( type='ClipWrapper', clip_type='huggingface', pretrained_model_name_or_path=stable_diffusion_v15_url, subfolder='text_encoder' ), tokenizer=stable_diffusion_v15_url, scheduler=diffusion_scheduler, test_scheduler=diffusion_scheduler, data_preprocessor=dict(type='DataPreprocessor'), motion_module_cfg=dict(path=models_path + 'Motion_Module/mm_sd_v15_v2.ckpt'), dream_booth_lora_cfg=dict( type='toonyou', path=models_path + 'DreamBooth_LoRA/toonyou_beta6.safetensors' ) )
実行
from mmengine import Config from mmagic.registry import MODELS from mmagic.utils import register_all_modules import os import datetime from mmagic.models.editors.animatediff import save_videos_grid prompts = ["a girl, dancing, blue denim, white plain t-shirt, best quality, extremely detailed"] * 5 n_prompt = ["longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality"] * 5 random_seeds = [1000000000, 1500000000, 2000000000, 2500000000, 3000000000] register_all_modules() cfg = Config.fromfile("animatediff_config.py") animatediff = MODELS.build(cfg.model).cuda() time_str = datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S") savedir = f"samples/{cfg.model['dream_booth_lora_cfg']['type']}-{time_str}" os.makedirs(savedir) for prompt_idx, (prompt, n_prompt, random_seed) in enumerate(zip(prompts, n_prompt, random_seeds)): output_dict = animatediff.infer( prompt=prompt, negative_prompt=n_prompt, video_length=16, height=512, width=512, seed=random_seed, num_inference_steps=40, guidance_scale=8.5 ) sample = output_dict['samples'] save_videos_grid(sample, f"{savedir}/{prompt_idx}.gif")
結果
結果はGoogle Bloggerに載せています。support-touchsp.blogspot.com
関連記事
MMagicを使わずにAnimateDiffを使う場合も記事にしています。touch-sp.hatenablog.com