Source code for trilearn.distributions.gaussian_graphical_model

"""
Gaussian graphical model.
"""

import numpy as np

import trilearn.graph.decomposable
from trilearn.distributions import wishart as wish
import trilearn.graph.graph as glib
import trilearn.graph.junction_tree as jtlib



[docs]
def log_likelihood(graph, S, n, D, delta, cache={}):
    """

    Args:
        S (Numpy matrix): sum of squares matrix for the full distribution
        D (Numpy matrix): location matrix for the full distribution
        delta (float): scale parameter
        n (int): number of data samples on which S is built
    """
    tree = trilearn.graph.decomposable.junction_tree(graph)
    separators = jtlib.separators(tree)
    cliques = tree.nodes()
    return log_likelihood_partial(S, n, D, delta, cliques, separators, cache)




[docs]
def gaussian_marginal_log_likelihood(S, n, D, delta, cache={}):
    """ Marginal log-likelihood of the data, x in a normal distribution
    with zero mean and where the precision matrix is marginalized out.

    Args:
        S (Numpy matrix): sum of squares matrix for the full distribution
        D (Numpy matrix): location matrix for the full distribution
        delta (float): scale parameter
        n (int): number of data samples on which S is built
    """
    c1 = wish.log_norm_constant(D + S, delta + n, cache)
    c2 = wish.log_norm_constant(D, delta, cache)

    return c1 - c2




[docs]
def log_likelihood_partial(S, n, D, delta, cliques, separators, cache={}, idmatrices=None):
    """ Partial log-likelihood of the given cliques and separators.
    If every clique and separator is found in a graph, g this is
    the marginal likelihood of g.

    Args:
        S (Numpy matrix): sum of squares matrix for the full distribution
        D (Numpy matrix): location matrix for the full distribution
        delta (float): scale parameter
        n (int): number of data samples on which S is built
        cliques (list): list of cliques, represented as frozensets
        separators (dict): dict with separators as keys and list of associated edges as values
        cache (dict): dict of seps of cliques as kayes and partial ll as values
    """

    cliques_constants = 0.0
    for c in cliques:
        if c not in cache:
            inds = np.array(list(c), dtype=int).tolist()
            D_c = D[inds][:, inds]
            S_c = S[inds][:, inds]
            cache[c] = gaussian_marginal_log_likelihood(S_c, n, D_c, delta, cache)
        cliques_constants += cache[c]
    seps_constants = 0.0
    for s in separators:
        if s == frozenset({}):
            continue
        if s not in cache:
            inds = np.array(list(s), dtype=int).tolist()
            D_s = D[inds][:, inds]
            S_s = S[inds][:, inds]
            cache[s] = gaussian_marginal_log_likelihood(S_s, n, D_s, delta, cache)
        nu = len(separators[s])
        seps_constants += nu * cache[s]


    return cliques_constants - seps_constants