Skip to contents

Introduction

This document provides a reference for relevant terms for useful distributions and their relationships in the context of variational inference.

Gamma and Inverse-Gamma

Gamma

yGamma(α,β)f(y|α,β)=βαΓ(α)eyβyα1,y,α,β+lnf(y|α,β)=αlnβlnΓ(α)yβ+(α1)lny𝔼[y]=αβ𝕍[y]=αβ2[y]=αlnβ+lnΓ(a)+(1α)ψ(α)𝔼[lny]=ψ(α)ln(β) \begin{aligned} y &\sim \text{Gamma}(\alpha, \beta) \\ f(y|\alpha,\beta) &= \frac{\beta^\alpha}{\Gamma(\alpha)}e^{-y\beta}y^{\alpha-1},\quad y,\alpha,\beta\in\mathbb R^+ \\ \ln f(y|\alpha,\beta) &= \alpha\ln\beta-\ln\Gamma(\alpha)-y\beta+(\alpha-1)\ln y\\ \mathbb E[y] &= \alpha\beta \\ \mathbb V[y] &= \alpha\beta^2 \\ \mathbb H[y] &= \alpha-\ln\beta+\ln\Gamma(a)+(1-\alpha)\psi(\alpha) \\ \mathbb E[\ln y] &= \psi(\alpha)-\ln(\beta) \end{aligned}

Inverse-Gamma

If then y1Inverse-Gamma(α,β)y^{-1}\sim\text{Inverse-Gamma}(\alpha,\beta).

yInverse-Gamma(α,β)f(y|α,β)=βαΓ(α)ey/βyα1,y,α,β+lnf(y|α,β)=αlnβlnΓ(α)y/β(α+1)ln(y)𝔼[y]=βα1,α>1𝔼[y1]=αβ𝕍[y]=β2(α1)2(α2),α>2[y]=α+lnβ+lnΓ(a)(1+α)ψ(α)𝔼[lny]=lnβψ(α) \begin{aligned} y &\sim \text{Inverse-Gamma}(\alpha,\beta) \\ f(y|\alpha,\beta) &= \frac{\beta^\alpha}{\Gamma(\alpha)}e^{-y/\beta}y^{-\alpha-1},\quad y,\alpha,\beta\in\mathbb R^+ \\ \ln f(y|\alpha,\beta) &= \alpha\ln\beta-\ln\Gamma(\alpha)-y/\beta-(\alpha+1)\ln(y) \\ \mathbb E[y] &= \frac{\beta}{\alpha-1},\quad \alpha>1 \\ \mathbb E[y^{-1}] &= \frac{\alpha}{\beta} \\ \mathbb V[y] &= \frac{\beta^2}{(\alpha-1)^2(\alpha-2)},\quad \alpha>2 \\ \mathbb H[y] &= \alpha+\ln\beta+\ln\Gamma(a)-(1+\alpha)\psi(\alpha) \\ \mathbb E[\ln y] &= \ln\beta-\psi(\alpha) \end{aligned}

Relations

YGamma(α,β)YInv-Gamma(α,β)YGamma(ν/2,1/2)YChi-square(ν)YInv-Gamma(α,1/2)YInv-Chi-square(2α)YInv-Gamma(α,β)YInv-Wishart(2α,2β) \begin{align} Y &\sim \text{Gamma}(\alpha,\beta) &\iff Y &\sim \text{Inv-Gamma}(\alpha, \beta) \\ Y &\sim \text{Gamma}(\nu/2,1/2) &\iff Y &\sim \text{Chi-square}(\nu)\\ Y &\sim \text{Inv-Gamma}(\alpha,1/2) &\iff Y &\sim \text{Inv-Chi-square}(2\alpha) \\ Y &\sim \text{Inv-Gamma}(\alpha, \beta) &\iff Y&\sim \text{Inv-Wishart}(2\alpha,2\beta) \end{align}

Y2|XInv-Gamma(ν/2,ν/X)XInv-Gamma(1/2,1/A2)YHalf-t(ν,A)Y2|XInv-Gamma(1/2,1/X)XInv-Gamma(1/2,1/A2)YHalf-Cauchy(A) \begin{aligned} Y^2|X&\sim\text{Inv-Gamma}(\nu/2,\nu/X)\\ X&\sim\text{Inv-Gamma}(1/2,1/A^2) \\ \implies Y&\sim\text{Half-}t(\nu,A) \\ Y^2|X&\sim\text{Inv-Gamma}(1/2,1/X)\\ X&\sim\text{Inv-Gamma}(1/2,1/A^2) \\ \implies Y&\sim\text{Half-Cauchy}(A) \end{aligned}

Chi-squared, Inverse-Chi-squared, and Scaled-Chi-squared

Chi-squared

yChi-squared(ν)f(y|ν)=12n/2Γ(n/2)eν/2yn/21,y,ν+lnf(y|ν)=(n/2)ln(2)lnΓ(n/2)ν/2+(n/21)ln(y)𝔼[y]=ν𝕍[y]=2ν[y]=ψ(n/2)+ln(2) \begin{aligned} y &\sim \text{Chi-squared}(\nu) \\ f(y|\nu) &= \frac{1}{2^{n/2}\Gamma(n/2)}e^{-\nu/2}y^{n/2-1},\quad y,\nu\in\mathbb R^+ \\ \ln f(y|\nu) &= -(n/2)\ln(2)-\ln\Gamma(n/2)-\nu/2+(n/2-1)\ln(y)\\ \mathbb E[y] &= \nu \\ \mathbb V[y] &= 2\nu \\ \mathbb H[y] &= \psi(n/2) + \ln(2) \end{aligned}

Inverse-Chi-squared

yInverse-Chi-squared(ν)f(y|ν)=2ν/2Γ(ν/2)yν/21e1/(2y),y,ν+lnf(y|ν)=(ν/2)ln(2)lnΓ(ν/2)(ν/2+1)ln(y)1/(2y)𝔼[y]=1ν2,ν>2𝕍[y]=2(ν2)2(ν4),ν>4[y]=ν/2+ln(ν2Γ(ν/2))(ν/2+1)ψ(ν/2) \begin{aligned} y &\sim \text{Inverse-Chi-squared}(\nu) \\ f(y|\nu) &= \frac{2^{-\nu/2}}{\Gamma(\nu/2)}y^{-\nu/2-1}e^{-1/(2y)},\quad y,\nu\in\mathbb R^{+} \\ \ln f(y|\nu) &= -(\nu/2)\ln(2)-\ln\Gamma(\nu/2)-(\nu/2+1)\ln(y)-1/(2y) \\ \mathbb E[y] &= \frac{1}{\nu-2},\quad\nu>2\\ \mathbb V[y] &= \frac{2}{(\nu-2)^2(\nu-4)},\quad \nu>4 \\ \mathbb H[y] &= \nu/2+\ln\left(\frac{\nu}{2}\Gamma(\nu/2)\right)-(\nu/2+1)\psi(\nu/2) \end{aligned}

Scale-inverse-Chi-squared

yScaled-inverse-Chi-squared(ν,τ2)f(y|ν,τ2)=(τ2ν/2)ν/2Γ(ν/2)exp(ντ22y)y1+ν/2lnf(y,τ2)=(ν/2)ln(ντ2/2)lnΓ(ν/2)ντ22y(1+ν/2)ln(y)𝔼[y]=ντ2ν2,ν>2𝕍[y]=2ν2τ4(ν2)2(ν4),ν>4[y]=ν/2+ln(ντ22Γ(ν/2))(1+ν/2)ψ(ν/2) \begin{aligned} y &\sim \text{Scaled-inverse-Chi-squared}(\nu,\tau^2) \\ f(y|\nu,\tau^2) &= \frac{(\tau^2\nu/2)^{\nu/2}}{\Gamma(\nu/2)} \frac{\exp\left(\frac{-\nu\tau^2}{2y}\right)}{y^{1+\nu/2}} \\ \ln f(y,\tau^2) &= (\nu/2)\ln(\nu\tau^2/2)-\ln\Gamma(\nu/2)-\frac{\nu\tau^2}{2y}-(1+\nu/2)\ln(y) \\ \mathbb E[y] &= \frac{\nu\tau^2}{\nu-2},\quad \nu>2\\ \mathbb V[y] &= \frac{2\nu^2\tau^4}{(\nu-2)^2(\nu-4)},\quad \nu>4\\ \mathbb H[y] &= \nu/2+\ln\left(\frac{\nu\tau^2}{2}\Gamma(\nu/2)\right)-(1+\nu/2)\psi(\nu/2) \end{aligned}

Relations

Chi-square(ν)Gamma(ν/2,2)Inverse-Chi-Squared(ν)Scale-Inverse-Chi-Squared(ν,1/ν)Scale-Inverse-Chi-Squared(ν,τ2)Inverse-Gamma(ν/2,ντ2/2) \begin{aligned} \text{Chi-square}(\nu) &\sim \text{Gamma}(\nu/2,2) \\ \text{Inverse-Chi-Squared}(\nu) &\sim \text{Scale-Inverse-Chi-Squared}(\nu,1/\nu) \\ \text{Scale-Inverse-Chi-Squared}(\nu,\tau^2) &\sim \text{Inverse-Gamma}(\nu/2,\nu\tau^2/2) \end{aligned}

Wishart, Inverse-Wishart, G-Wishart

Wishart

ΣWishartd(ξ,Σ)f(Σ|ξ,λ)=12ξd/2|Λ|ξ/2Γd(ξ/2)|Σ|(ξd1)/2etr(Λ1Σ)/2,ξ>d1,Λ>0lnf(Σ|ξ,λ)=(ξd/2)(ξ/2)ln|Λ|lnΓd(ξ/2)+(ξd1)/2ln|Σ|tr(Λ1Σ)/2𝔼[Σ]=ξΛ𝕍[Σ]ij=[Σ]=d+12ln|Λ|+d(d+1)2ln(2)+lnΓd(ξ/2)ξd12ψd(ξ/2)+ξd2𝔼[ln|Σ|]=ψd(ξ/2)+dln(2)+ln|Λ| \begin{aligned} \Sigma &\sim \text{Wishart}_d(\xi,\Sigma) \\ f(\Sigma|\xi,\lambda) &= \frac{1}{2^{\xi d/2}|\Lambda|^{\xi/2}\Gamma_d(\xi/2)}|\Sigma|^{(\xi-d-1)/2}e^{-\text{tr}(\Lambda^{-1}\Sigma)/2},\quad \xi>d-1,\Lambda>0 \\ \ln f(\Sigma|\xi,\lambda) &= -(\xi d/2)-(\xi/2)\ln|\Lambda|-\ln\Gamma_d(\xi/2)+(\xi-d-1)/2\ln|\Sigma|-\text{tr}(\Lambda^{-1}\Sigma)/2\\ \mathbb E[\Sigma] &= \xi\Lambda \\ \mathbb V[\Sigma]_{ij} &= \\ \mathbb H[\Sigma] &= \frac{d+1}{2}\ln|\Lambda|+\frac{d(d+1)}{2}\ln(2)+\ln\Gamma_d(\xi/2)-\frac{\xi-d-1}{2}\psi_d(\xi/2)+\frac{\xi d}{2} \\ \mathbb E[\ln|\Sigma|] &= \psi_d(\xi/2)+d\ln(2)+\ln|\Lambda| \\ \end{aligned}

Inverse-Wishart

If ΣWishartd(ξ,Λ)\Sigma\sim\text{Wishart}_d(\xi,\Lambda) then Σ1Inverse-Wishartd(ξ,Λ1)\Sigma^{-1}\sim\text{Inverse-Wishart}_d(\xi,\Lambda^{-1})

ΣInverse-Wishartd(ξ,Σ)f(Σ|ξ,Λ)=|Λ|ξ/22ξd/2Γd(ξ/2)|Σ|(ξ+d+1)/2etr(ΛΣ1)/2,ξ>d1,Λ>0lnf(Σ|ξ,Λ)=ξ/2ln|Λ|(ξd)/2ln(2)lnΓd(ξ/2)(ξ+d+1)/2ln|Σ|tr(ΛΣ1)𝔼[Σ]=Λξd1𝔼[Σ1]=ξΛ1𝕍[Σ]ij=(ξd+1)λij2+(ξd1)λiiλjj(ξd)(ξd1)2(ξd3)[Σ]=ξ2ln|Λ|+(ξ+d+1)𝔼[ln|Σ|]+ξd2ln(2)+lnΓd(ξ/2)+ξd2𝔼[ln|Σ|]=ln|12Λ|ψd(ξd+1) \begin{aligned} \Sigma &\sim \text{Inverse-Wishart}_d(\xi,\Sigma) \\ f(\Sigma|\xi,\Lambda) &= \frac{|\Lambda|^{\xi/2}}{2^{\xi d/2}\Gamma_d(\xi/2)}|\Sigma|^{-(\xi+d+1)/2}e^{-\text{tr}(\Lambda\Sigma^{-1})/2},\quad \xi>d-1,\Lambda>0 \\ \ln f(\Sigma|\xi,\Lambda) &= \xi/2\ln|\Lambda|-(\xi d)/2\ln(2)-\ln\Gamma_d(\xi/2)-(\xi+d+1)/2\ln|\Sigma|-\text{tr}(\Lambda\Sigma^{-1})\\ \mathbb E[\Sigma] &= \frac{\Lambda}{\xi-d-1} \\ \mathbb E[\Sigma^{-1}] &= \xi\Lambda^{-1} \\ \mathbb V[\Sigma]_{ij} &= \frac{(\xi-d+1)\lambda_{ij}^2+(\xi-d-1)\lambda_{ii}\lambda_{jj}}{(\xi-d)(\xi-d-1)^2(\xi-d-3)} \\ \mathbb H[\Sigma] &= -\frac{\xi}{2}\ln|\Lambda|+(\xi+d+1)\mathbb E[\ln|\Sigma|]+\frac{\xi d}{2}\ln(2) +\ln\Gamma_d(\xi/2) + \frac{\xi d}{2}\\ \mathbb E[\ln|\Sigma|] &= \ln|\tfrac{1}{2}\Lambda|-\psi_d(\xi-d+1) \end{aligned}

Relations

Wishart1(ξ,Λ)Gamma(ξ/2,Λ/2)inv-Wishart1(ξ,Λ)Inv-Gamma(ξ/2,Λ/2) \begin{aligned} \text{Wishart}_1(\xi,\Lambda) &\sim \text{Gamma}(\xi/2,\Lambda/2) \\ \text{inv-Wishart}_1(\xi,\Lambda) &\sim \text{Inv-Gamma}(\xi/2,\Lambda/2) \end{aligned}

Σ|X1,...,XpInv-Wishartp(ν+p1,2νdiag(1/X1,...,1/Xp))XjindInv-Gamma(1/2,1/Aj2)σjHalf-t(ν,Aj)ρij(1ρij2)ν/21 \begin{aligned} \Sigma|X_1,...,X_p&\sim\text{Inv-Wishart}_p(\nu+p-1,2\nu\text{diag}(1/X_1,...,1/X_p))\\ X_j&\overset{\text{ind}}{\sim}\text{Inv-Gamma}(1/2,1/A_j^2) \\ \implies \sigma_{j}&\sim\text{Half-}t(\nu,A_j) \\ \rho_{ij} &\propto (1-\rho_{ij}^2)^{\nu/2-1} \end{aligned} where Σij=ρijσiσj\Sigma_{ij} = \rho_{ij}\sigma_i\sigma_j

Identities and Definitions

Γd(x)=multivariate gamma function(x)=πd(d1)/4j=1dΓ[x+(1j)/2]ψd(x)=multivariate digamma function(x)=j=1dψ[x+(1j)/2] \begin{aligned} \Gamma_d(x) &= \text{multivariate gamma function}(x) \\ &= \pi^{d(d-1)/4}\prod_{j=1}^d \Gamma\left[x+(1-j)/2\right]\\ \psi_d(x) &= \text{multivariate digamma function}(x) \\ &= \sum_{j=1}^d \psi\left[x+(1-j)/2\right] \end{aligned}

(bdiag(A1,...,Ap))1=bdiag(A11,...,Ap1) \left(\text{bdiag}(A_1,...,A_p)\right)^{-1}=\text{bdiag}(A_1^{-1},...,A_p^{-1})

𝔼[tr(AX)]=tr(A𝔼[X]) \mathbb E[\text{tr}(AX)] = \text{tr}(A\mathbb E[X])