# losses.sliced_sm
def sliced_score_estimation(score_net, samples, n_particles=1):
dup_samples = samples.unsqueeze(0).expand(n_particles, *samples.shape).contiguous().view(-1, *samples.shape[1:])
dup_samples.requires_grad_(True)
vectors = torch.randn_like(dup_samples)
vectors = vectors / torch.norm(vectors, dim=-1, keepdim=True)
grad1 = score_net(dup_samples) # H, estimation of score
gradv = torch.sum(grad1 * vectors) # project H with v
loss1 = torch.sum(grad1 * vectors, dim=-1) ** 2 * 0.5 # second term of J(\theta)
grad2 = autograd.grad(gradv, dup_samples, create_graph=True)[0] # grad of h w.r.t samples(z)
loss2 = torch.sum(vectors * grad2, dim=-1)
loss1 = loss1.view(n_particles, -1).mean(dim=0)
loss2 = loss2.view(n_particles, -1).mean(dim=0)
loss = loss1 + loss2
return loss.mean(), loss1.mean(), loss2.mean()
# losses.vae.elbo_ssm
z = imp_encoder(X)
ssm_loss, *_ = sliced_score_estimation_vr(functools.partial(score, dup_X), z, n_particles=n_particles)
To my understanding, grad1 is the estimation of score $h = S_{m}(x;\theta)$ and loss2 is the first term of $J(\theta)$, which is $v^{T}\nabla_{x}h(x;\theta)v$. But in the code, it seems to be calculated as $v^{T}\nabla_{z}h(x;\theta)v$.
To my understanding,$h = S_{m}(x;\theta)$ and loss2 is the first term of $J(\theta)$ , which is $v^{T}\nabla_{x}h(x;\theta)v$ . But in the code, it seems to be calculated as $v^{T}\nabla_{z}h(x;\theta)v$ .
grad1is the estimation of score