commit c70b3ef9e7dad26f1b4d54812cdad2db005a14b0 Author: ailab Date: Fri Jun 7 18:50:38 2024 +0800 first commit diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..3217fd3 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +figures/output_examples.mp4 filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..c443290 --- /dev/null +++ b/README.md @@ -0,0 +1,50 @@ +--- +datasets: +- allenai/objaverse +tags: +- 3d +extra_gated_fields: + Name: text + Email: text + Country: text + Organization or Affiliation: text + I ALLOW Stability AI to email me about new model releases: checkbox +license: mit +pipeline_tag: image-to-3d +--- +# TripoSR +![](figures/input800.mp4) +TripoSR is a fast and feed-forward 3D generative model developed in collaboration between Stability AI and Tripo AI. + +## Model Details + +### Model Description + +We closely follow [LRM](https://arxiv.org/abs/2311.04400) network architecture for the model design, where TripoSR incorporates a series of technical advancements over the LRM model in terms of both data curation as well as model and training improvements. For more technical details and evaluations, please refer to [our tech report](https://arxiv.org/abs/2403.02151). + +* **Developed by**: [Stability AI](https://stability.ai/), [Tripo AI](https://tripo3d.ai/) +* **Model type**: Feed-forward 3D reconstruction from a single image +* **License**: MIT +* **Hardware**: We train `TripoSR` for 5 days on 22 GPU nodes each with 8 A100 40GB GPUs + +### Model Sources + +* **Repository**: https://github.com/VAST-AI-Research/TripoSR +* **Tech report**: https://arxiv.org/abs/2403.02151 +* **Demo**: https://huggingface.co/spaces/stabilityai/TripoSR + +### Training Dataset + +We use renders from the [Objaverse](https://objaverse.allenai.org/objaverse-1.0) dataset, utilizing our enhanced rendering method that more closely replicate the distribution of images found in the real world, significantly improving our model’s ability to generalize. We selected a carefully curated subset of the Objaverse dataset for the training data, which is available under the CC-BY license. + + +## Usage + +* For usage instructions, please refer to our [TripoSR GitHub repository](https://github.com/VAST-AI-Research/TripoSR) + +* You can also try it in [our gradio demo](https://huggingface.co/spaces/stabilityai/TripoSR) + + +### Misuse, Malicious Use, and Out-of-Scope Use + +The model should not be used to intentionally create or disseminate 3D models that people would foreseeably find disturbing, distressing, or offensive; or content that propagates historical or current stereotypes. \ No newline at end of file diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..fa3e59a --- /dev/null +++ b/config.yaml @@ -0,0 +1,38 @@ +cond_image_size: 512 + +image_tokenizer_cls: tsr.models.tokenizers.image.DINOSingleImageTokenizer +image_tokenizer: + pretrained_model_name_or_path: "facebook/dino-vitb16" + +tokenizer_cls: tsr.models.tokenizers.triplane.Triplane1DTokenizer +tokenizer: + plane_size: 32 + num_channels: 1024 + +backbone_cls: tsr.models.transformer.transformer_1d.Transformer1D +backbone: + in_channels: ${tokenizer.num_channels} + num_attention_heads: 16 + attention_head_dim: 64 + num_layers: 16 + cross_attention_dim: 768 + +post_processor_cls: tsr.models.network_utils.TriplaneUpsampleNetwork +post_processor: + in_channels: 1024 + out_channels: 40 + +decoder_cls: tsr.models.network_utils.NeRFMLP +decoder: + in_channels: 120 # 3 * 40 + n_neurons: 64 + n_hidden_layers: 9 + activation: silu + +renderer_cls: tsr.models.nerf_renderer.TriplaneNeRFRenderer +renderer: + radius: 0.87 # slightly larger than 0.5 * sqrt(3) + feature_reduction: concat + density_activation: exp + density_bias: -1.0 + num_samples_per_ray: 128 \ No newline at end of file diff --git a/figures/input800.mp4 b/figures/input800.mp4 new file mode 100644 index 0000000..cb77e64 Binary files /dev/null and b/figures/input800.mp4 differ diff --git a/figures/output_examples.mp4 b/figures/output_examples.mp4 new file mode 100644 index 0000000..16e69f7 --- /dev/null +++ b/figures/output_examples.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c63a5fe7afea93549cc412fab612a1c2e5a46844fa75c5ff4eee892b9d3bbc4e +size 2425685 diff --git a/model.ckpt b/model.ckpt new file mode 100644 index 0000000..f783226 --- /dev/null +++ b/model.ckpt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:429e2c6b22a0923967459de24d67f05962b235f79cde6b032aa7ed2ffcd970ee +size 1677246742