Source code for trilearn.distributions.gaussian_graphical_model

"""
Gaussian graphical model.
"""

import numpy as np

import trilearn.graph.decomposable
from trilearn.distributions import wishart as wish
import trilearn.graph.graph as glib
import trilearn.graph.junction_tree as jtlib


[docs] def log_likelihood(graph, S, n, D, delta, cache={}): """ Args: S (Numpy matrix): sum of squares matrix for the full distribution D (Numpy matrix): location matrix for the full distribution delta (float): scale parameter n (int): number of data samples on which S is built """ tree = trilearn.graph.decomposable.junction_tree(graph) separators = jtlib.separators(tree) cliques = tree.nodes() return log_likelihood_partial(S, n, D, delta, cliques, separators, cache)
[docs] def gaussian_marginal_log_likelihood(S, n, D, delta, cache={}): """ Marginal log-likelihood of the data, x in a normal distribution with zero mean and where the precision matrix is marginalized out. Args: S (Numpy matrix): sum of squares matrix for the full distribution D (Numpy matrix): location matrix for the full distribution delta (float): scale parameter n (int): number of data samples on which S is built """ c1 = wish.log_norm_constant(D + S, delta + n, cache) c2 = wish.log_norm_constant(D, delta, cache) return c1 - c2
[docs] def log_likelihood_partial(S, n, D, delta, cliques, separators, cache={}, idmatrices=None): """ Partial log-likelihood of the given cliques and separators. If every clique and separator is found in a graph, g this is the marginal likelihood of g. Args: S (Numpy matrix): sum of squares matrix for the full distribution D (Numpy matrix): location matrix for the full distribution delta (float): scale parameter n (int): number of data samples on which S is built cliques (list): list of cliques, represented as frozensets separators (dict): dict with separators as keys and list of associated edges as values cache (dict): dict of seps of cliques as kayes and partial ll as values """ cliques_constants = 0.0 for c in cliques: if c not in cache: inds = np.array(list(c), dtype=int).tolist() D_c = D[inds][:, inds] S_c = S[inds][:, inds] cache[c] = gaussian_marginal_log_likelihood(S_c, n, D_c, delta, cache) cliques_constants += cache[c] seps_constants = 0.0 for s in separators: if s == frozenset({}): continue if s not in cache: inds = np.array(list(s), dtype=int).tolist() D_s = D[inds][:, inds] S_s = S[inds][:, inds] cache[s] = gaussian_marginal_log_likelihood(S_s, n, D_s, delta, cache) nu = len(separators[s]) seps_constants += nu * cache[s] return cliques_constants - seps_constants