-
Notifications
You must be signed in to change notification settings - Fork 3
Experiments (WIP)
If you can't explain it simply, you don't understand it well enough.
https://keras.io/examples/vision/nerf/ https://colab.research.google.com/github/keras-team/keras-io/blob/master/examples/vision/ipynb/nerf.ipynb#scrollTo=bPIeu6Dh1lDL https://keras.io/api/callbacks/tensorboard/
NeRF proposes a hierarchical structure. The overall network architecture is composed of two networks: the coarse network and the fine network.
The model is a multi-layer perceptron (MLP), with ReLU as its non-linearity.
MLP first processes the input 3D coordinate x with 8 fully-connected layers (using ReLU activations and 256 channels per layer), and outputs sigma and a 256-dimensional feature vector. This feature vector is then concatenated with the camera ray's viewing direction and passed to one additional fully-connected layer (using a ReLU activation and 128 channels) that output the view-dependent RGB color."*
class NerfModel(nn.Module): # pytorch
def __init__(self, embedding_dim_pos=20, embedding_dim_direction=8, hidden_dim=128):
super(NerfModel, self).__init__()
self.block1 = nn.Sequential(nn.Linear(embedding_dim_pos * 3, hidden_dim), nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), )
self.block2 = nn.Sequential(nn.Linear(embedding_dim_pos * 3 + hidden_dim, hidden_dim), nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim + 1), )
self.block3 = nn.Sequential(nn.Linear(embedding_dim_direction * 3 + hidden_dim, hidden_dim // 2), nn.ReLU(), )
self.block4 = nn.Sequential(nn.Linear(hidden_dim // 2, 3), nn.Sigmoid(), )
self.embedding_dim_pos = embedding_dim_pos
self.embedding_dim_direction = embedding_dim_direction
self.relu = nn.ReLU()
def get_nerf_model(num_layers, num_pos):
"""Generates the NeRF neural network.
Args:
num_layers: The number of MLP layers.
num_pos: The number of dimensions of positional encoding.
Returns: The `tf.keras` model.
"""
inputs = keras.Input(shape=(num_pos, 2 * 3 * POS_ENCODE_DIMS + 3))
x = inputs
for i in range(num_layers):
x = layers.Dense(units=64, activation="relu")(x)
if i % 4 == 0 and i > 0:
# Inject residual connection.
x = layers.concatenate([x, inputs], axis=-1)
outputs = layers.Dense(units=4)(x)
return keras.Model(inputs=inputs, outputs=outputs)
class DirectTemporalNeRF(nn.Module): #TiNeuVox
def __init__(self, D=8, W=256, input_ch=3, input_ch_views=3, input_ch_time=1, output_ch=4, skips=[4],
use_viewdirs=False, memory=[], embed_fn=None, zero_canonical=True):
super(DirectTemporalNeRF, self).__init__()
self.D = D
self.W = W
self.input_ch = input_ch
self.input_ch_views = input_ch_views
self.input_ch_time = input_ch_time
self.skips = skips
self.use_viewdirs = use_viewdirs
self.memory = memory
self.embed_fn = embed_fn
self.zero_canonical = zero_canonical
self._occ = NeRFOriginal(D=D, W=W, input_ch=input_ch, input_ch_views=input_ch_views,
input_ch_time=input_ch_time, output_ch=output_ch, skips=skips,
use_viewdirs=use_viewdirs, memory=memory, embed_fn=embed_fn, output_color_ch=3)
self._time, self._time_out = self.create_time_net()
def create_time_net(self):
layers = [nn.Linear(self.input_ch + self.input_ch_time, self.W)]
for i in range(self.D - 1):
if i in self.memory:
raise NotImplementedError
else:
layer = nn.Linear
in_channels = self.W
if i in self.skips:
in_channels += self.input_ch
layers += [layer(in_channels, self.W)]
return nn.ModuleList(layers), nn.Linear(self.W, 3)
def forward(self, x, ts):
input_pts, input_views = torch.split(x, [self.input_ch, self.input_ch_views], dim=-1)
t = ts[0]
assert len(torch.unique(t[:, :1])) == 1, "Only accepts all points from same time"
cur_time = t[0, 0]
if cur_time == 0. and self.zero_canonical:
dx = torch.zeros_like(input_pts[:, :3])
else:
dx = self.query_time(input_pts, t, self._time, self._time_out)
input_pts_orig = input_pts[:, :3]
input_pts = self.embed_fn(input_pts_orig + dx)
out, _ = self._occ(torch.cat([input_pts, input_views], dim=-1), t)
return out, dx
class NeRFOriginal(nn.Module):
def __init__(self, D=8, W=256, input_ch=3, input_ch_views=3, input_ch_time=1, output_ch=4, skips=[4],
use_viewdirs=False, memory=[], embed_fn=None, output_color_ch=3, zero_canonical=True):
super(NeRFOriginal, self).__init__()
self.D = D
self.W = W
self.input_ch = input_ch
self.input_ch_views = input_ch_views
self.skips = skips
self.use_viewdirs = use_viewdirs
# self.pts_linears = nn.ModuleList(
# [nn.Linear(input_ch, W)] +
# [nn.Linear(W, W) if i not in self.skips else nn.Linear(W + input_ch, W) for i in range(D-1)])
layers = [nn.Linear(input_ch, W)]
for i in range(D - 1):
if i in memory:
raise NotImplementedError
else:
layer = nn.Linear
in_channels = W
if i in self.skips:
in_channels += input_ch
layers += [layer(in_channels, W)]
self.pts_linears = nn.ModuleList(layers)
### Implementation according to the official code release (https://github.com/bmild/nerf/blob/master/run_nerf_helpers.py#L104-L105)
self.views_linears = nn.ModuleList([nn.Linear(input_ch_views + W, W//2)])
### Implementation according to the paper
# self.views_linears = nn.ModuleList(
# [nn.Linear(input_ch_views + W, W//2)] + [nn.Linear(W//2, W//2) for i in range(D//2)])
if use_viewdirs:
self.feature_linear = nn.Linear(W, W)
self.alpha_linear = nn.Linear(W, 1)
self.rgb_linear = nn.Linear(W//2, output_color_ch)
else:
self.output_linear = nn.Linear(W, output_ch)
def forward(self, x, ts):
input_pts, input_views = torch.split(x, [self.input_ch, self.input_ch_views], dim=-1)
h = input_pts
for i, l in enumerate(self.pts_linears):
h = self.pts_linears[i](h)
h = F.relu(h)
if i in self.skips:
h = torch.cat([input_pts, h], -1)
if self.use_viewdirs:
alpha = self.alpha_linear(h)
feature = self.feature_linear(h)
h = torch.cat([feature, input_views], -1)
for i, l in enumerate(self.views_linears):
h = self.views_linears[i](h)
h = F.relu(h)
rgb = self.rgb_linear(h)
outputs = torch.cat([rgb, alpha], -1)
else:
outputs = self.output_linear(h)
return outputs, torch.zeros_like(input_pts[:, :3])
Shoot a ray through each pixel and sample some points on the ray. A ray is commonly parameterized by the equation r(t) = o + td where t is the parameter, o is the origin and d is the unit directional vector
- https://towardsdatascience.com/nerf-representing-scenes-as-neural-radiance-fields-for-view-synthesis-ef1e8cebace4 Figure 1. the overall architecture of NeRF, which uses positional encoding rather than naive input.
- D-nerf https://github.com/albertpumarola/D-NeRF
Nerf_pl pytorch implementation (https://github.com/kwea123/nerf_pl)
The code is largely based on NeRF implementation (see master or dev branch), the main difference is the model structure and the rendering process, which can be found in the two files under models/.
- http://assimp.sourceforge.net/lib_html/index.html
- http://www.fullofstars.de/importing-ply-files.html
Local Light Field Fusion (LLFF) [28] LLFF is designed for producing photorealistic novel views for well-sampled forward facing scenes. It uses a trained 3D convolutional network to directly predict a discretized frustum-sampled RGBα grid (multiplane image or MPI [52]) for each input view, then renders novel views by alpha compositing and blending nearby MPIs into the novel viewport
The regularizer coalesces distributed density (along each ray) to a minimal spaced discrete sample where possible. Basically it squeezes soft data to a solid surface. This is what's responsible for eliminating all the floaters.
- https://github.com/irajsb/UE4_Assimp/wiki/How-To-Build#step-3--build-assimp to Unreal
- https://github.com/yashbhalgat/HashNeRF-pytorch enum class GridType { Hash, Dense, Tiled, };
https://github.com/3a1b2c3/torch-ngp
https://keras.io/examples/vision/nerf/ https://www.unrealengine.com/marketplace/en-US/product/point-cloud-kit engine
Original nerf
We synthesize views by querying 5D coordinates along camera rays and use classic volume rendering techniques to project the output colors and densities into an image. Because volume rendering is naturally differentiable, the only input required to optimize our representation is a set of images with known camera poses https://github.com/bmild/nerf/blob/20a91e764a28816ee2234fcadb73bd59a613a44c/run_nerf.py#L60 Hash_nerf
- https://github.com/3a1b2c3/HashNeRF-pytorch/blob/main/run_nerf.py#L76
- https://github.com/NVlabs/instant-ngp/blob/66a965d2816809d0ba645dcf10b2006025543c62/src/testbed_image.cu#L284
TiNeuVox
- https://phog.github.io/snerg/
- https://microsoft.github.io/FastNeRF/
- https://github.com/3a1b2c3/vixel web gl voxel
- https://github.com/NVlabs/instant-ngp/discussions/494 Short of baking the NeRF into a data structure more amenable to webgl rendering (such as PlenOctrees), you'd have to re-implement the entire inference pipeline of tiny-cuda-nn in glsl, express the trained weights of the hash encoding + neural network in terms of textures, and then run that.
https://geometry.cs.ucl.ac.uk/group_website/projects/2022/relu_fields