Add Replicate demo and API #85

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

chenxwh wants to merge 2 commits into openai:main from chenxwh:main

README.md

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -4,6 +4,7 @@ This is the official code and model release for [Shap-E: Generating Conditional
  
     * See [Usage](#usage) for guidance on how to use this repository.

     * See [Samples](#samples) for examples of what our text-conditional model can generate.

     * Try web demo and API: [![Replicate](https://replicate.com/cjwbw/shap-e/badge)](https://replicate.com/cjwbw/shap-e) 

    # Samples

    @@ -68,6 +69,6 @@ Install with `pip install -e .`.
  
    To get started with examples, see the following notebooks:

    * [sample_text_to_3d.ipynb](shap_e/examples/sample_text_to_3d.ipynb) - sample a 3D model, conditioned on a text prompt

    * [sample_image_to_3d.ipynb](shap_e/examples/sample_image_to_3d.ipynb) - sample a 3D model, conditioned on an synthetic view image.

    * [sample_text_to_3d.ipynb](shap_e/examples/sample_text_to_3d.ipynb) - sample a 3D model, conditioned on a text prompt.

    * [sample_image_to_3d.ipynb](shap_e/examples/sample_image_to_3d.ipynb) - sample a 3D model, conditioned on a synthetic view image. To get the best result, you should remove background from the input image.

    * [encode_model.ipynb](shap_e/examples/encode_model.ipynb) - loads a 3D model or a trimesh, creates a batch of multiview renders and a point cloud, encodes them into a latent, and renders it back. For this to work, install Blender version 3.3.1 or higher, and set the environment variable `BLENDER_PATH` to the path of the Blender executable.

cog.yaml

-Original file line number
+Diff line change
@@ -0,0 +1,23 @@
+    # Configuration for Cog ⚙️
+    # Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
+    build:
+      gpu: true
+      cuda: "11.6"
+      python_version: "3.10"
+      python_packages:
+        - "filelock== 3.12.0"
+        - "Pillow==9.5.0"
+        - "torch==2.0.1"
+        - "fire==0.5.0"
+        - "humanize==4.6.0"
+        - "tqdm==4.65.0"
+        - "matplotlib==3.7.1"
+        - "scikit-image==0.20.0"
+        - "scipy==1.10.1"
+        - "numpy==1.24.3"
+        - "blobfile==2.0.2"
+        - "ipywidgets==8.0.6"
+        - "clip @ git+https://github.com/openai/CLIP.git"
+    predict: "predict.py:Predictor"

predict.py

-Original file line number
+Diff line change
@@ -0,0 +1,121 @@
+    # Prediction interface for Cog ⚙️
+    # https://github.com/replicate/cog/blob/main/docs/python.md
+    import io
+    from typing import List
+    import base64
+    from PIL import Image
+    import torch
+    from cog import BasePredictor, Input, Path
+    from shap_e.diffusion.sample import sample_latents
+    from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
+    from shap_e.models.download import load_model, load_config
+    from shap_e.util.notebooks import (
+        create_pan_cameras,
+        decode_latent_images,
+        decode_latent_mesh,
+    )
+    from shap_e.util.image_util import load_image
+    WEIGHTS_DIR = "model_weights"
+    class Predictor(BasePredictor):
+        def setup(self):
+            """Load the model into memory to make running multiple predictions efficient"""
+            # self.model = torch.load("./weights.pth")
+            self.device = "cuda"
+            self.xm = load_model("transmitter", cache_dir=WEIGHTS_DIR, device=self.device)
+            self.text_model = load_model(
+                "text300M", cache_dir=WEIGHTS_DIR, device=self.device
+            )
+            self.image_model = load_model(
+                "image300M", cache_dir=WEIGHTS_DIR, device=self.device
+            )
+            self.diffusion = diffusion_from_config(load_config("diffusion"))
+        def predict(
+            self,
+            prompt: str = Input(
+                description="Text prompt for generating the 3D model, ignored if an image is provide below",
+                default=None,
+            ),
+            image: Path = Input(
+                description="A synthetic view image for generating the 3D modeld. To get the best result, remove background from the input image",
+                default=None,
+            ),
+            guidance_scale: float = Input(
+                description="Set the scale for guidanece", default=15.0
+            ),
+            batch_size: int = Input(description="Number of output", default=1),
+            render_mode: str = Input(
+                description="Choose a render mode", choices=["nerf", "stf"], default="nerf"
+            ),
+            render_size: int = Input(
+                description="Set the size of the a renderer, higher values take longer to render",
+                default=128,
+            ),
+            save_mesh: bool = Input(
+                description="Save the latents as meshes.",
+                default=False,
+            ),
+        ) -> List[Path]:
+            """Run a single prediction on the model"""
+            assert prompt or image, "Please provide prompt of image"
+            model = self.image_model if image else self.text_model
+            if image:
+                model_kwargs = dict(images=[load_image(str(image))] * batch_size)
+            else:
+                model_kwargs = dict(texts=[prompt] * batch_size)
+            latents = sample_latents(
+                batch_size=batch_size,
+                model=model,
+                diffusion=self.diffusion,
+                guidance_scale=guidance_scale,
+                model_kwargs=model_kwargs,
+                progress=True,
+                clip_denoised=True,
+                use_fp16=True,
+                use_karras=True,
+                karras_steps=64,
+                sigma_min=1e-3,
+                sigma_max=160,
+                s_churn=0,
+            )
+            cameras = create_pan_cameras(render_size, self.device)
+            output = []
+            for i, latent in enumerate(latents):
+                images = decode_latent_images(
+                    self.xm, latent, cameras, rendering_mode=render_mode
+                )
+                writer = io.BytesIO()
+                images[0].save(
+                    writer,
+                    format="GIF",
+                    save_all=True,
+                    append_images=images[1:],
+                    duration=100,
+                    loop=0,
+                )
+                writer.seek(0)
+                data = base64.b64encode(writer.read()).decode("ascii")
+                filename = f"/tmp/out_{i}.gif"
+                with open(filename, "wb") as f:
+                    f.write(writer.getbuffer())
+                output.append(Path(filename))
+            if save_mesh:
+                for i, latent in enumerate(latents):
+                    filename = f"/tmp/mesh_{i}.obj"
+                    t = decode_latent_mesh(self.xm, latent).tri_mesh()
+                    with open(filename, "w") as f:
+                        t.write_obj(f)
+                    output.append(Path(filename))
+            return output

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add Replicate demo and API #85

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Add Replicate demo and API #85

Are you sure you want to change the base?

Uh oh!

Add Replicate demo and API #85

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing