Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ This is the official code and model release for [Shap-E: Generating Conditional

* See [Usage](#usage) for guidance on how to use this repository.
* See [Samples](#samples) for examples of what our text-conditional model can generate.
* Try web demo and API: [![Replicate](https://replicate.com/cjwbw/shap-e/badge)](https://replicate.com/cjwbw/shap-e)

# Samples

Expand Down Expand Up @@ -68,6 +69,6 @@ Install with `pip install -e .`.

To get started with examples, see the following notebooks:

* [sample_text_to_3d.ipynb](shap_e/examples/sample_text_to_3d.ipynb) - sample a 3D model, conditioned on a text prompt
* [sample_image_to_3d.ipynb](shap_e/examples/sample_image_to_3d.ipynb) - sample a 3D model, conditioned on an synthetic view image.
* [sample_text_to_3d.ipynb](shap_e/examples/sample_text_to_3d.ipynb) - sample a 3D model, conditioned on a text prompt.
* [sample_image_to_3d.ipynb](shap_e/examples/sample_image_to_3d.ipynb) - sample a 3D model, conditioned on a synthetic view image. To get the best result, you should remove background from the input image.
* [encode_model.ipynb](shap_e/examples/encode_model.ipynb) - loads a 3D model or a trimesh, creates a batch of multiview renders and a point cloud, encodes them into a latent, and renders it back. For this to work, install Blender version 3.3.1 or higher, and set the environment variable `BLENDER_PATH` to the path of the Blender executable.
23 changes: 23 additions & 0 deletions cog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Configuration for Cog ⚙️
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md

build:
gpu: true
cuda: "11.6"
python_version: "3.10"
python_packages:
- "filelock== 3.12.0"
- "Pillow==9.5.0"
- "torch==2.0.1"
- "fire==0.5.0"
- "humanize==4.6.0"
- "tqdm==4.65.0"
- "matplotlib==3.7.1"
- "scikit-image==0.20.0"
- "scipy==1.10.1"
- "numpy==1.24.3"
- "blobfile==2.0.2"
- "ipywidgets==8.0.6"
- "clip @ git+https://github.com/openai/CLIP.git"

predict: "predict.py:Predictor"
121 changes: 121 additions & 0 deletions predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# Prediction interface for Cog ⚙️
# https://github.com/replicate/cog/blob/main/docs/python.md

import io
from typing import List
import base64
from PIL import Image
import torch
from cog import BasePredictor, Input, Path

from shap_e.diffusion.sample import sample_latents
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
from shap_e.models.download import load_model, load_config
from shap_e.util.notebooks import (
create_pan_cameras,
decode_latent_images,
decode_latent_mesh,
)
from shap_e.util.image_util import load_image

WEIGHTS_DIR = "model_weights"


class Predictor(BasePredictor):
def setup(self):
"""Load the model into memory to make running multiple predictions efficient"""
# self.model = torch.load("./weights.pth")
self.device = "cuda"
self.xm = load_model("transmitter", cache_dir=WEIGHTS_DIR, device=self.device)
self.text_model = load_model(
"text300M", cache_dir=WEIGHTS_DIR, device=self.device
)
self.image_model = load_model(
"image300M", cache_dir=WEIGHTS_DIR, device=self.device
)
self.diffusion = diffusion_from_config(load_config("diffusion"))

def predict(
self,
prompt: str = Input(
description="Text prompt for generating the 3D model, ignored if an image is provide below",
default=None,
),
image: Path = Input(
description="A synthetic view image for generating the 3D modeld. To get the best result, remove background from the input image",
default=None,
),
guidance_scale: float = Input(
description="Set the scale for guidanece", default=15.0
),
batch_size: int = Input(description="Number of output", default=1),
render_mode: str = Input(
description="Choose a render mode", choices=["nerf", "stf"], default="nerf"
),
render_size: int = Input(
description="Set the size of the a renderer, higher values take longer to render",
default=128,
),
save_mesh: bool = Input(
description="Save the latents as meshes.",
default=False,
),
) -> List[Path]:
"""Run a single prediction on the model"""

assert prompt or image, "Please provide prompt of image"
model = self.image_model if image else self.text_model

if image:
model_kwargs = dict(images=[load_image(str(image))] * batch_size)
else:
model_kwargs = dict(texts=[prompt] * batch_size)

latents = sample_latents(
batch_size=batch_size,
model=model,
diffusion=self.diffusion,
guidance_scale=guidance_scale,
model_kwargs=model_kwargs,
progress=True,
clip_denoised=True,
use_fp16=True,
use_karras=True,
karras_steps=64,
sigma_min=1e-3,
sigma_max=160,
s_churn=0,
)

cameras = create_pan_cameras(render_size, self.device)
output = []
for i, latent in enumerate(latents):
images = decode_latent_images(
self.xm, latent, cameras, rendering_mode=render_mode
)
writer = io.BytesIO()
images[0].save(
writer,
format="GIF",
save_all=True,
append_images=images[1:],
duration=100,
loop=0,
)
writer.seek(0)
data = base64.b64encode(writer.read()).decode("ascii")

filename = f"/tmp/out_{i}.gif"
with open(filename, "wb") as f:
f.write(writer.getbuffer())
output.append(Path(filename))

if save_mesh:
for i, latent in enumerate(latents):
filename = f"/tmp/mesh_{i}.obj"
t = decode_latent_mesh(self.xm, latent).tri_mesh()
with open(filename, "w") as f:
t.write_obj(f)
output.append(Path(filename))

return output