はじめに
前回テキストからの3D生成を行いました。touch-sp.hatenablog.com
今回は1枚の画像から3D生成を行います。
環境
Ubuntu 22.04 on WSL2 CUDA 11.8 Python 3.10
導入
pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 --index-url https://download.pytorch.org/whl/cu118 pip install pyyaml pip install ipywidgets pip install git+https://github.com/facebookresearch/pytorch3d.git git clone https://github.com/openai/shap-e cd shap-e pip install -e .
上記インストールで失敗する場合には最初に以下を実行すれば解決すると思います。
python -m pip install --upgrade pip pip install -U setuptools wheel
Pythonスクリプト
import torch from shap_e.diffusion.sample import sample_latents from shap_e.diffusion.gaussian_diffusion import diffusion_from_config from shap_e.models.download import load_model, load_config from shap_e.util.notebooks import create_pan_cameras, decode_latent_images from shap_e.util.image_util import load_image device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') xm = load_model('transmitter', device=device) model = load_model('image300M', device=device) diffusion = diffusion_from_config(load_config('diffusion')) batch_size = 2 guidance_scale = 3.0 # To get the best result, you should remove the background and show only the object of interest to the model. image = load_image("shap_e/examples/example_data/corgi.png") latents = sample_latents( batch_size=batch_size, model=model, diffusion=diffusion, guidance_scale=guidance_scale, model_kwargs=dict(images=[image] * batch_size), progress=True, clip_denoised=True, use_fp16=True, use_karras=True, karras_steps=64, sigma_min=1e-3, sigma_max=160, s_churn=0, ) render_mode = 'nerf' # you can change this to 'stf' for mesh rendering size = 256 # this is the size of the renders; higher values take longer to render. cameras = create_pan_cameras(size, device) for i, latent in enumerate(latents): images = decode_latent_images(xm, latent, cameras, rendering_mode=render_mode) images[0].save( f"result_{i}.gif", format="GIF", save_all=True, append_images=images[1:], duration=100, loop=0)