Source code for trilearn.graph.junction_tree_expander

from fractions import Fraction

import networkx as nx
import numpy as np

import trilearn
import trilearn.graph.subtree_sampler as ss
from trilearn import auxiliary_functions as aux


[docs]def sample(tree, i, alpha=0.5, beta=0.5, only_tree=True): """ Expands the junciton tree tree with the internal node i. Args: tree (NetworkX graph): a junction tree i (int): new node to be added to the underlying graph of tree alpha (float): parameter for the subtree kernel beta (float): parameter for the subtree kernel only_tree (bool): Return only the new tree if true. Returns: (JunctionTree, float, dict, dict, dict, dict): A tuple having the following entries: * tree_new - A junction tree based on tree with the extra internal node i. * K_st - Transition probability from tree to tree_new * old_cliques - Dict with the cliques in the subtree used in the transition. * old_separators - Dict with the cliques in the subtree used in the transition. * new_cliques, * new_separators If only_tree is True, only tree_new is returned. Example: >>> import numpy as np >>> np.random.seed(1) >>> t = jtlib.sample(5) >>> t2 = jtelib.sample(t, 5) >>> t2.nodes NodeView((frozenset([1, 2]), frozenset([4]), frozenset([5]), frozenset([0, 2]), frozenset([3]))) >>> t2.edges EdgeView([(frozenset([1, 2]), frozenset([4])), (frozenset([1, 2]), frozenset([0, 2])), (frozenset([4]), frozenset([3])), (frozenset([5]), frozenset([3]))]) >>> (tree_new, K_st, old_cliques, old_separators, new_cliques, new_separators) = jtelib.sample(t, 5, only_tree=False) >>> tree_new <trilearn.graph.junction_tree.JunctionTree object at 0x7fc479893b50> >>> K_st 0.01 >>> old_cliques set([]) >>> old_separators {} >>> new_cliques set([frozenset([5])]) >>> new_separators {frozenset([]): [(frozenset([5]), frozenset([1, 2]))]} """ # for n in tree.nodes(): # lab = tuple(n) # if len(n) == 1: # lab = "(" + str(list(n)[0]) + ")" # tree.node[n] = {"color": "black", "label": lab} # print tree.nodes() if only_tree is True: tree_new = tree # Alter the input tree else: #tree_new = tree.subgraph(tree.nodes()) # nx < 2.0 tree_new = tree.copy() # nx < 2.0 #print(nocopy) #old_G = trilearn.graph.junction_tree.get_graph(tree) #(subtree, old_separators, probtree) = glib.random_subtree(tree, alpha, beta) # plotGraph(subtree, directory+"subtree_"+str(i)+".eps") # for n in subtree.nodes(): # tree_old.node[n] = {"color": "blue", "label": tuple(n)} # if n in tree.nodes(): # tree.node[n] = {"color": "blue", "label": tuple(n)} # plotGraph(tree_old.subgraph(tree_old.nodes()), # directory + "tree(" + str(i-1) + ")p.eps") (_, subtree_nodes, subtree_edges, subtree_adjlist, old_separators, prob_subtree) = ss.random_subtree(tree, alpha, beta, i) (old_cliques, new_cliques, new_separators, P, neig) = sample_cond_on_subtree_nodes(i, tree_new, subtree_nodes, subtree_edges, subtree_adjlist) if only_tree is True: return tree_new #conn_nodes = set() #for clique in new_cliques: # conn_nodes |= clique # for n in tree.nodes(): # lab = tuple(n) # if len(n) == 1: # lab = "("+str(list(n)[0])+")" # if n in new_cliques: # tree.node[n] = {"color": "red", "label": lab} # plotGraph(tree.subgraph(tree.nodes()), directory+"tree("+str(i)+").eps") #G = trilearn.graph.junction_tree.get_graph(tree) # G.node[i] = {"color": "red"} # for n in old_G: # if n in conn_nodes: # old_G.node[n] = {"color": "blue"} # G.node[n] = {"color": "blue"} # plotGraph(G, directory+"G"+str(i)+".eps") # plotGraph(old_G, directory+"G"+str(i-1)+"p.eps") # Proposal kernel K_st = None if len(subtree_nodes) == 1: # There might be two possible subtrees so # we calculate the probabilities for these explicitly K_st = pdf(tree, tree_new, alpha, beta, i) else: K_st = prob_subtree for c in P: K_st *= P[c] * neig[c] return tree_new, K_st, old_cliques, old_separators, new_cliques, new_separators
[docs]def subtree_cond_pdf(tree1, tree2, tree2_subtree_nodes, new): """ Returns the probability of generating tree2 from tree1 where the subtree is defined by tree2_subtree_nodes. Args: tree1 (NetworkX graph): A junction tree tree2 (NetworkX graph): A junction tree expanded from tree1 tree2_subtree_nodes: Contains the cliques in the subgraph of tree2 with the corresponding cliques in tree1. Returns: float: Probability of generating tree2 from tree1 where the subtree is defined by tree2_subtree_nodes. """ # if new is isolated in the underlying graph #if len(tree2_subtree_nodes) == 1 and tree2_subtree_nodes.values()[0] is None: if len(tree2_subtree_nodes) == 1 and list(tree2_subtree_nodes.values())[0] is None: sep = frozenset([]) c = frozenset([new]) lognu = trilearn.graph.junction_tree.log_nu(tree2, sep) return ({c: np.exp(-lognu)}, {c: 1.0}) # Get the subtree induced by the nodes #tree1_subtree = tree1.subgraph([c_t1 for c_t2, c_t1 in tree2_subtree_nodes.iteritems()]) tree1_subtree = tree1.subgraph([c_t1 for c_t2, c_t1 in tree2_subtree_nodes.items()]) # Get the separating sets # S = sepsets_in_subgraph(tree2_subtree_nodes, tree1_subtree) # S = {c: set() for c_t2, c in tree2_subtree_nodes.iteritems()} S = {c: set() for c_t2, c in tree2_subtree_nodes.items()} #for c_tree2, c in tree2_subtree_nodes.iteritems(): for c_tree2, c in tree2_subtree_nodes.items(): for neig in tree1_subtree.neighbors(c): S[c] = S[c] | (c & neig) # P, N get_subset_probabilities(tree2_) # Get the chosen internal nodes M = {} # for c_tree2, c in tree2_subtree_nodes.iteritems(): for c_tree2, c in tree2_subtree_nodes.items(): M[c] = c_tree2 - {new} - S[c] # Calculate probabilities corresponding to each clique P = {} N = {} # for c_tree2, c in tree2_subtree_nodes.iteritems(): for c_tree2, c in tree2_subtree_nodes.items(): neigs = {neig for neig in tree1.neighbors(c) if neig & c <= c_tree2 and neig not in tree1_subtree.nodes()} RM = c - S[c] gamma = tree1_subtree.order() # sepCondition = len({neig for neig in nx.neighbors(tree1_subtree, c) if # S[c] == neig & c}) > 0 or gamma == 1 sepCondition = len({neig for neig in tree1_subtree.neighbors(c) if S[c] == neig & c}) > 0 or gamma == 1 N[c] = 1.0 #N[c] = Fraction(1,1) if sepCondition is False: # Every internal node in c belongs to a separator P[c] = np.power(2.0, - len(RM)) #P[c] = Fraction(1, 2 ** len(RM)) if not len(c) + 1 == len(c_tree2): N[c] = np.power(2.0, -len(neigs)) #N[c] = Fraction(1, 2**len(neigs)) else: P[c] = 1.0 if len(RM) > 1: P[c] = (1.0 / len(RM)) * np.power(2.0, -(len(RM) - 1.0)) * len(M[c]) #P[c] = Fraction(1, len(RM)) * Fraction(len(M[c]), 2 ** (len(RM) - 1.0)) if not len(c) + 1 == len(c_tree2): N[c] = np.power(2.0, -len(neigs)) #N[c] = Fraction(1, 2**len(neigs)) return (P, N)
[docs]def sample_cond_on_subtree_nodes(new, tree, subtree_nodes, subtree_edges, subtree_adjlist): """ Returns a random CT from tree given subtree. Args: n (int): Node to be added to the graph of tree. tree (NetworkX graph): A junction tree. subtree (NetworkX graph): A subtree of tree. Returns: (NetworkX graph): a junction tree expanded from tree where n has been added to subtree. """ new_separators = {} new_cliques = set() old_cliques = set() subtree_order = len(subtree_nodes) #print("subtree nodes:" + str(subtree_nodes)) if subtree_order == 0: # If the tree, tree is empty (n isolated node), # add random neighbor. c = frozenset([new]) new_cliques.add(c) #c2 = tree.nodes()[0] # nx 1.9 c2 = list(tree.nodes())[0] # GraphTool #c2 = list(tree.nodes)[0] # nx 2.1 tree.add_node(c, label=tuple([new]), color="red") tree.add_edge(c, c2, label=tuple([])) sep = frozenset() #tree.fix_graph() trilearn.graph.junction_tree.randomize_at_sep(tree, sep) new_separators[sep] = [(c, c2)] # tree TODO: the actual value for the key is not needed. P = {c: np.exp(-tree.log_nu(sep))} return (old_cliques, new_cliques, new_separators, P, {c: 1.0}) S = {c: set() for c in subtree_nodes} M = {c: set() for c in subtree_nodes} for c in S: for neig in subtree_adjlist[c]: #S[c] = S[c] | (c & neig) S[c] |= (c & neig) RM = {c: c - S[c] for c in S} C = {c: set() for c in subtree_nodes} P = {} N_S = {c: set() for c in subtree_nodes} sepCondition = {} for c in RM: sepCondition[c] = len({neig for neig in subtree_adjlist[c] if S[c] == neig & c}) > 0 or len(subtree_adjlist) == 1 if sepCondition[c] is True: tmp = np.array(list(RM[c])) first_node = [] if len(tmp) > 0: # Connect to one node first_ind = np.random.randint(len(tmp)) first_node = tmp[[first_ind]] tmp = np.delete(tmp, first_ind) rest = set() if len(tmp) > 0: # Connect to the rest of the nodes if there are any left rest = aux.random_subset(tmp) M[c] = frozenset(rest | set(first_node)) else: M[c] = frozenset(aux.random_subset(RM[c])) # Create the new cliques for clique in M: C[clique] = frozenset(M[clique] | S[clique] | {new}) new_cliques.add(C[clique]) # Get the neighbor set of each c which can be moved to C[c] for clique in subtree_nodes: N_S[clique] = {neig for neig in tree.neighbors(clique) if neig & clique <= C[clique] and neig not in subtree_nodes} # Add the new cliques #for c in subtree_nodes: # tree.add_node(C[c], label=str(tuple(C[c])), color="red") tree.add_nodes_from([C[c] for c in subtree_nodes]) # Construct and add the new edges between the new cliques, # replicating the subtree new_subtree_edges = [] for e in subtree_edges: sep = C[e[0]] & C[e[1]] if not sep in new_separators: new_separators[sep] = [] new_separators[sep].append((C[e[0]], C[e[1]])) #new_subtree_edges.append((C[e[0]], C[e[1]])) tree.add_edge(C[e[0]], C[e[1]]) # tree.add_edges_from(new_subtree_edges) # Connect cliques in the subtree to the new cliques for c in subtree_nodes: # Move the neighbors of a swallowed node to the swallowing node # Remove the swallowed node if C[c] - {new} == c: # If connecting to all nodes in a clique (the node should be replaces instead) for neig in tree.neighbors(c): if neig not in subtree_nodes: tree.add_edge(C[c], neig)#, label=lab) tree.remove_node(c) old_cliques.add(c) else: # If not connecting to every node in a clique sep = C[c] & c if not sep in new_separators: new_separators[sep] = [] new_separators[sep].append((C[c], c)) #print "adding edge: " + str((C[c], c)) tree.add_edge(C[c], c) # Pick random subset of neighbors intersecting with subset of S U M N = aux.random_subset(N_S[c]) #for neig in N: # tree.add_edge(C[c], neig) tree.add_edges_from([(C[c], neig) for neig in N]) tree.remove_edges_from([(c, neig) for neig in N]) # Compute probabilities N = {} for c in subtree_nodes: if sepCondition[c] is False: # Every internal node in c belongs to a separator P[c] = np.power(2.0, - len(RM[c])) #P[c] = Fraction(1, 2 ** len(RM[c])) if not len(c) + 1 == len(C[c]): N[c] = np.power(2.0, - len(N_S[c])) #N[c] = Fraction(1, 2 ** len(N_S[c])) else: N[c] = 1.0 #N[c] = Fraction(1,1) else: P[c] = 1.0 #P[c] = Fraction(1, 1) N[c] = 1.0 #N[c] = Fraction(1, 1) if len(RM[c]) > 1: P[c] = 1.0 / len(RM[c]) #P[c] = Fraction(1, len(RM[c])) P[c] *= np.power(2.0, - (len(RM[c]) - 1.0)) * len(M[c]) #P[c] *= Fraction(len(M[c]), 2 ** (len(RM[c]) - 1.0)) if not len(c) + 1 == len(C[c]): # c not swallowed by C[c] #N[c] = np.power(2.0, - len(N_S[c])) N[c] = Fraction(1, 2 ** len(N_S[c])) # Remove the edges in tree tree.remove_edges_from(subtree_edges) # Todo: This will introduce a bug if we instead replace a node. return (old_cliques, new_cliques, new_separators, P, N)
[docs]def pdf(tree1, tree2, alpha, beta, new): """ CT kernel probability K(tree1, tree2) Args: tree1 (NetworkX graph): A junction tree tree2 (NetworkX graph): A junction tree alpha (float): Parameter for the subtree kernel beta (float): Parameter for the subtree kernel Returns: float: probability of generating tree2 from tree1 """ # TODO: Refactor so it can be used in M-H. prob = 0.0 tree2_tree1_subtree_nodes = get_subtree_nodes(tree1, tree2, new) for tree2_subtree_nodes in tree2_tree1_subtree_nodes: # tree1_subtree = tree1.subgraph([c_t1 for c_t2, c_t1 in tree2_subtree_nodes.iteritems() if c_t1 is not None]) tree1_subtree = tree1.subgraph([c_t1 for c_t2, c_t1 in tree2_subtree_nodes.items() if c_t1 is not None]) tree1_subtree_prob = ss.pdf(tree1_subtree, tree1, alpha, beta) (P, N) = subtree_cond_pdf(tree1, tree2, tree2_subtree_nodes, new) christtree_prob = np.prod([P[c] * N[c] for c in P]) prob += tree1_subtree_prob * christtree_prob return prob
[docs]def get_subtree_nodes(T1, T2, new): """ If the junction tree T1 is expanded to T2 by one internal node new, then the subtree chosen in T1 is (almost) unique. Also, the subtree of T2 containing new is unique. This returns a dictionary of the cliques in the induced subtree of T2 as keys and the emerging cliques in T1 as values. Args: T1 (NetworkX graph): a junction tree T2 (NetworkX graph): a junction tree new (int): the new node Returns: dict: a dictionary of the cliques in the induced subtree of T2 as keys and the emerging cliques in T1 as values. """ # Get subtree of T2 induced by the new node T2_ind = trilearn.graph.junction_tree.subtree_induced_by_subset(T2, {new}) T2_subtree_nodes = None # Find the subtree(2) in T1 if T2_ind.has_node(frozenset([new])): # Isolated node. Unique empty subtree T2_subtree_nodes = [{frozenset([new]): None}] elif T2_ind.order() == 1: # Look which is its neighbor #c = T2_ind.nodes()[0] # nx < 2.x c = list(T2_ind.nodes())[0] # nx > 2.x if T1.has_node(c - {new}): # if it was connected to everything in a clique T2_subtree_nodes = [{c: c - {new}}] else: # c always has at lest one neighbor and the separator c\new # for all of them. # We have to decide which of them was the emerging clique. # 3 cases: # 1) 1 neighbor: Trivial. # 2) 2 neighbors: Then it could be any of these. # 3) >2 neighbors: The emerging clique is the one that has the # others as a subset of its neighbors in T1 #neigs = T2.neighbors(c) # nx < 2.x neigs = list(T2.neighbors(c)) # nx > 2.x possible_origins = [c1 for c1 in neigs if c1 & c == c - {new}] g = len(possible_origins) if g == 1: # This neighbor has to be the one it came from T2_subtree_nodes = [{c: possible_origins[0]}] elif g == 2 and len(neigs) == 2: # If there are 2 possible neighbors with the same separator T2_subtree_nodes = [{c: possible_origins[0]}, {c: possible_origins[1]}] else: for neig in possible_origins: if set(neigs) - {neig} <= set(T1.neighbors(neig)): T2_subtree_nodes = [{c: neig}] break else: tmp = {} # In this case the subtree nodes are uniquely defined by T1 and T2 # Loop through all edges in T2 in order to extract the correct # subtree of T1. Note that, by construction we know that it has the same structure as # the induced subtree of T2. for e in T2_ind.edges(): # Non-swallowed cliques, get all potential "emerging" # (cliques from where the new cliques could have have emerged) cliques. Ncp1 = [c for c in T2.neighbors(e[0]) if c & e[0] == e[0] - {new}] Ncp2 = [c for c in T2.neighbors(e[1]) if c & e[1] == e[1] - {new}] # If the clique was swallowed in the new clique, # there will be no neighbors, so the clique itself # (except from the new node) is the unique emerging clique. if Ncp1 == []: Ncp1 = [e[0] - {new}] if Ncp2 == []: Ncp2 = [e[1] - {new}] # Replicate the structure in T2 for neig1 in Ncp1: for neig2 in Ncp2: if T1.has_edge(neig1, neig2): # Know that this edge is unique tmp[e[0]] = neig1 tmp[e[1]] = neig2 T2_subtree_nodes = [tmp] return T2_subtree_nodes