Source code for trilearn.mh_nodedriven

"""
Metropolis-Hastings sampler for junction tree distributions.
"""
import time

import numpy as np
from tqdm import tqdm

import trilearn.distributions.sequential_junction_tree_distributions as seqdist
import trilearn.graph.graph as glib
import trilearn.graph.junction_tree as jtlib
import trilearn.graph.junction_tree_collapser
import trilearn.graph.junction_tree_expander
import trilearn.graph.trajectory as mcmctraj


[docs]def gen_ggm_trajectory(dataframe, n_samples, D=None, delta=1.0, cache={}, alpha=0.5, beta=0.5, **args): p = dataframe.shape[1] if D is None: D = np.identity(p) sd = seqdist.GGMJTPosterior() sd.init_model(np.asmatrix(dataframe), D, delta, cache) return mh(alpha, beta, n_samples, sd)
[docs]def mh(alpha, beta, traj_length, seq_dist, jt_traj=None, debug=False): """ A Metropolis-Hastings implementation for approximating distributions over junction trees. Args: traj_length (int): Number of Gibbs iterations (samples) alpha (float): sparsity parameter for the Christmas tree algorithm beta (float): sparsity parameter for the Christmas tree algorithm seq_dist (SequentialJTDistributions): the distribution to be sampled from Returns: Trajectory: Markov chain of teh underlying graphs of the junction trees sampled by M-H. """ graph_traj = mcmctraj.Trajectory() graph_traj.set_sequential_distribution(seq_dist) prev_tree = None for i in tqdm(range(traj_length), desc="Metropolis-Hastings samples"): tree = None start_time = time.time() if i == 0: tree = jtlib.sample(seq_dist.p, alpha, beta) else: # Sample backwards trajectories tree = trans_sample(prev_tree, alpha, beta, seq_dist) # Sample T from T_1..p end_time = time.time() graph_traj.add_sample(jtlib.graph(tree), end_time - start_time) prev_tree = tree return graph_traj
[docs]def trans_sample(from_tree, alpha, beta, seq_dist, **args): prop_tree, reduced_tree, moved_node = proposal_sample(from_tree, alpha, beta, seq_dist.p) acc_prob = accept_proposal_prob(from_tree, reduced_tree, prop_tree, moved_node, alpha, beta, seq_dist) if np.random.binomial(1, acc_prob): return prop_tree else: return from_tree
[docs]def proposal_sample(from_tree, alpha, beta, n_nodes, **args): node = np.random.randint(n_nodes) reduced_tree = trilearn.graph.junction_tree_collapser.sample(from_tree, node) new_tree, K_st, old_cliques, old_separators, new_cliques, new_separators = trilearn.graph.junction_tree_expander.sample( reduced_tree, node, alpha, beta, only_tree=False) return new_tree, reduced_tree, node
[docs]def log_prop_pdf(from_tree, reduced_tree, to_tree, moved_node, alpha, beta): # Sum over R(to_tree, tree)K(tree, from_tree) for tree in Supp(R(to_tree, .)) = Supp(K(., to_tree)) #log_prob = 0 #for origin in trilearn.graph.junction_tree_collapser.possible_origins(from_tree, moved_node): # log_prob += -np.log(sum(origin)) log_prob = trilearn.graph.junction_tree_collapser.log_pdf(from_tree, reduced_tree, node=moved_node) log_prob += np.log(trilearn.graph.junction_tree_expander.pdf(reduced_tree, to_tree, alpha, beta, moved_node)) return log_prob
[docs]def log_prop_ratio(from_tree, reduced_tree, to_tree, moved_node, alpha, beta): # print "from tree nodes: " + str(from_tree.nodes()) # print "proposed tree nodes: " + str(to_tree.nodes()) # print "moved node: " + str(moved_node) # print "reduced tree nodes: " + str(reduced_tree.nodes()) ret = log_prop_pdf(to_tree, reduced_tree, from_tree, moved_node, alpha, beta) ret -= log_prop_pdf(from_tree, reduced_tree, to_tree, moved_node, alpha, beta) return ret
[docs]def log_post_ratio(from_tree, to_tree, seqdist): from_tree_seps = jtlib.separators(from_tree) log_post1 = seqdist.log_likelihood_partial(from_tree.nodes(), from_tree_seps) log_post1 -= jtlib.log_n_junction_trees(from_tree, from_tree_seps) to_tree_seps = jtlib.separators(to_tree) log_post2 = seqdist.log_likelihood_partial(to_tree.nodes(), to_tree_seps) log_post2 -= jtlib.log_n_junction_trees(to_tree, to_tree_seps) return log_post2 - log_post1
[docs]def accept_proposal_prob(from_tree, reduced_tree, to_tree, moved_node, alpha, beta, seq_dist): log_acc_prob = log_prop_ratio(from_tree, reduced_tree, to_tree, moved_node, alpha, beta) log_acc_prob += log_post_ratio(from_tree, to_tree, seq_dist) return min(1.0, np.exp(log_acc_prob))