
はじめに
比較的新しいCogView4-6Bという画像生成モデルでText2Imageをやってみました。使用したPC
プロセッサ Intel(R) Core(TM) i7-12700H 実装 RAM 32.0 GB GPU RTX 3080 Laptop (VRAM 16GB)
結果
冒頭の画像が作成されました。プロンプトA vibrant cherry red sports car sits proudly under the gleaming sun, its polished exterior smooth and flawless, casting a mirror-like reflection. The car features a low, aerodynamic body, angular headlights that gaze forward like predatory eyes, and a set of black, high-gloss racing rims that contrast starkly with the red. A subtle hint of chrome embellishes the grille and exhaust, while the tinted windows suggest a luxurious and private interior. The scene conveys a sense of speed and elegance, the car appearing as if it's about to burst into a sprint along a coastal road, with the ocean's azure waves crashing in the background.
Google翻訳で翻訳
鮮やかなチェリーレッドのスポーツカーが、輝く太陽の下に誇らしげに佇んでいます。磨き上げられた外装は滑らかで完璧で、鏡のような反射をしています。車の特徴は、低く空力的なボディ、捕食者の目のように前方を見つめる角張ったヘッドライト、赤と鮮明なコントラストをなす光沢のある黒いレーシングホイールのセットです。グリルと排気管にはクロームのほのかなアクセントがあしらわれ、色付きの窓は豪華でプライベートなインテリアを暗示しています。このシーンはスピード感と優雅さを伝え、背景には海の青い波が打ち寄せ、車は海岸沿いの道路に沿って今にも疾走しそうな様子です。
いろいろな条件で作成して結果がこちらです。
VRAM 16GBを超えていますが何とか生成は可能でした。
Sorted by time taken enable_model_cpu_offload() vae_enable_slicing() vae_enable_tiling() time: 271.83 sec memory: 16.48 GB enable_model_cpu_offload() vae_enable_slicing() time: 287.74 sec memory: 16.48 GB enable_model_cpu_offload() vae_enable_tiling() time: 292.59 sec memory: 16.48 GB enable_model_cpu_offload() time: 318.51 sec memory: 16.48 GB
Pythonスクリプト
import torch from diffusers import CogView4Pipeline from typing import Tuple, TypedDict from itertools import product import gc import time def reset_memory(): gc.collect() torch.cuda.empty_cache() torch.cuda.reset_accumulated_memory_stats() torch.cuda.reset_peak_memory_stats() class ResultDict(TypedDict): memeory: float time_required: float combination: str def main( i: int, combination: Tuple[bool, bool, bool, bool, bool] ) -> ResultDict: if combination[0] == 1: return None if combination[2] == 1: return None if combination[1] != 1: return None pipe = CogView4Pipeline.from_pretrained( "THUDM/CogView4-6B", torch_dtype=torch.bfloat16 ) try: combination_list = [] if combination[0]: pipe.to("cuda") combination_list.append("to(\"cada\")") if combination[1]: pipe.enable_model_cpu_offload() combination_list.append("enable_model_cpu_offload()") if combination[2]: pipe.enable_sequential_cpu_offload() combination_list.append("enable_sequential_cpu_offload()") if combination[3]: pipe.vae.enable_slicing() combination_list.append("vae_enable_slicing()") if combination[4]: pipe.vae.enable_tiling() combination_list.append("vae_enable_tiling()") prompt = "A vibrant cherry red sports car sits proudly under the gleaming sun, its polished exterior smooth and flawless, casting a mirror-like reflection. The car features a low, aerodynamic body, angular headlights that gaze forward like predatory eyes, and a set of black, high-gloss racing rims that contrast starkly with the red. A subtle hint of chrome embellishes the grille and exhaust, while the tinted windows suggest a luxurious and private interior. The scene conveys a sense of speed and elegance, the car appearing as if it's about to burst into a sprint along a coastal road, with the ocean's azure waves crashing in the background." generator = torch.manual_seed(20250305) start_time = time.time() image = pipe( prompt=prompt, guidance_scale=3.5, num_images_per_prompt=1, num_inference_steps=50, width=1024, height=1024, generator=generator ).images[0] image.save(f"cogview4_result{i}.png") end_time = time.time() result: ResultDict = { "memory": round(torch.cuda.max_memory_reserved() / 1024**3, 2), "time_required": round(end_time - start_time, 2), "combination": "\n".join(combination_list) } except Exception as e: print("\n".join(combination_list)) print(e) return None print("succeee!!") print("\n".join(combination_list)) print(f"saved image as cogview4_result{i}.png") return result if __name__=="__main__": combinations = list(product([True, False], repeat=5)) result_list = [] for i, combination in enumerate(combinations): reset_memory() result = main(i, combination) if result is not None: result_list.append(result) print("Sorted by time taken") time_sorted_list = sorted(result_list, key=lambda x: x["time_required"]) for time_sorted in time_sorted_list: print(time_sorted["combination"]) print(f"time: {time_sorted["time_required"]} sec") print(f"memory: {time_sorted["memory"]} GB") print() print("Sorted by memory used") memory_sorted_list = sorted(result_list, key=lambda x: x["memory"]) for memory_sorted in memory_sorted_list: print(memory_sorted["combination"]) print(f"memory: {memory_sorted["memory"]} GB") print(f"time: {memory_sorted["time_required"]} sec") print()