Source code for gneiss.balances
"""
Balances (:mod:`gneiss.balances`)
=================================
.. currentmodule:: gneiss.balances
This module contains modules for calculating balances and creating ETE
objects to visualize these balances on a tree.
Functions
---------
.. autosummary::
:toctree: generated/
balance_basis
balanceplot
"""
# ----------------------------------------------------------------------------
# Copyright (c) 2016--, gneiss development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------
from __future__ import division
import numpy as np
import pandas as pd
from skbio.stats.composition import clr_inv
from collections import OrderedDict
try:
import ete3
from gneiss.layouts import default_layout
except:
pass
def _balance_basis(tree_node):
""" Helper method for calculating balance basis
"""
counts, n_tips = _count_matrix(tree_node)
counts = OrderedDict([(x, counts[x])
for x in counts.keys() if not x.is_tip()])
nds = counts.keys()
r = np.array([counts[n]['r'] for n in nds])
s = np.array([counts[n]['l'] for n in nds])
k = np.array([counts[n]['k'] for n in nds])
t = np.array([counts[n]['t'] for n in nds])
a = np.sqrt(s / (r*(r+s)))
b = -1*np.sqrt(r / (s*(r+s)))
basis = np.zeros((n_tips-1, n_tips))
for i in range(len(nds)):
basis[i, :] = np.array([0]*k[i] + [a[i]]*r[i] + [b[i]]*s[i] + [0]*t[i])
# Make sure that the basis is in level order
basis = basis[:, ::-1]
nds = list(nds)
return basis, nds
[docs]def balance_basis(tree_node):
"""
Determines the basis based on bifurcating tree.
This is commonly referred to as sequential binary partition [1]_.
Given a binary tree relating a list of features, this module can
be used to calculate an orthonormal basis, which is used to
calculate the ilr transform.
Parameters
----------
treenode : skbio.TreeNode
Input bifurcating tree. Must be strictly bifurcating
(i.e. every internal node needs to have exactly 2 children).
Returns
-------
basis : np.array
Returns a set of orthonormal bases in the Aitchison simplex
corresponding to the tree. The order of the
basis is index by the level order of the internal nodes.
nodes : list, skbio.TreeNode
List of tree nodes indicating the ordering in the basis.
Raises
------
ValueError
The tree doesn't contain two branches.
Examples
--------
>>> from gneiss.balances import balance_basis
>>> from skbio import TreeNode
>>> tree = u"((b,c)a, d)root;"
>>> t = TreeNode.read([tree])
>>> basis, nodes = balance_basis(t)
>>> basis
array([[ 0.18507216, 0.18507216, 0.62985567],
[ 0.14002925, 0.57597535, 0.28399541]])
Notes
-----
The tree must be strictly bifurcating, meaning that
every internal node has exactly 2 children.
See Also
--------
skbio.stats.composition.ilr
References
----------
.. [1] J.J. Egozcue and V. Pawlowsky-Glahn "Exploring Compositional Data
with the CoDa-Dendrogram" (2011)
"""
basis, nodes = _balance_basis(tree_node)
basis = clr_inv(basis)
return basis, nodes
def _count_matrix(treenode):
n_tips = 0
nodes = list(treenode.levelorder(include_self=True))
# fill in the Ordered dictionary. Note that the
# elements of this Ordered dictionary are
# dictionaries.
counts = OrderedDict()
columns = ['k', 'r', 'l', 't', 'tips']
for n in nodes:
if n not in counts:
counts[n] = {}
for c in columns:
counts[n][c] = 0
# fill in r and l. This is done in reverse level order.
for n in nodes[::-1]:
if n.is_tip():
counts[n]['tips'] = 1
n_tips += 1
elif len(n.children) == 2:
lchild = n.children[0]
rchild = n.children[1]
counts[n]['r'] = counts[rchild]['tips']
counts[n]['l'] = counts[lchild]['tips']
counts[n]['tips'] = counts[n]['r'] + counts[n]['l']
else:
raise ValueError("Not a strictly bifurcating tree!")
# fill in k and t
for n in nodes:
if n.parent is None:
counts[n]['k'] = 0
counts[n]['t'] = 0
continue
elif n.is_tip():
continue
# left or right child
# left = 0, right = 1
child_idx = 'l' if n.parent.children[0] != n else 'r'
if child_idx == 'l':
counts[n]['t'] = counts[n.parent]['t'] + counts[n.parent]['l']
counts[n]['k'] = counts[n.parent]['k']
else:
counts[n]['k'] = counts[n.parent]['k'] + counts[n.parent]['r']
counts[n]['t'] = counts[n.parent]['t']
return counts, n_tips
def _attach_balances(balances, tree):
""" Appends the balances to each of the internal nodes
in the ete tree.
Parameters
----------
balances : array_like, pd.Series
Vector of balances to plot on internal nodes of the tree.
If the balances is not in a `pd.Series`, it is assumed
to be stored in level order.
tree : skbio.TreeNode
Bifurcating tree to plot balances on.
Return
------
ete.Tree
The ETE representation of the tree with balances encoded
as node weights.
"""
nodes = [n for n in tree.traverse(include_self=True)]
n_tips = sum([n.is_tip() for n in nodes])
n_nontips = len(nodes) - n_tips
if len(balances) != n_nontips:
raise IndexError('The number of balances (%d) is not '
'equal to the number of internal nodes '
'in the tree (%d)' % (len(balances), n_nontips))
ete_tree = ete3.Tree.from_skbio(tree)
# Some random features in all nodes
i = 0
for n in ete_tree.traverse():
if not n.is_leaf():
if not isinstance(balances, pd.Series):
n.add_features(weight=balances[i])
else:
n.add_features(weight=balances.loc[n.name])
i += 1
return ete_tree
[docs]def balanceplot(balances, tree,
layout=None,
mode='c'):
""" Plots balances on tree.
Parameters
----------
balances : np.array
A vector of internal nodes and their associated real-valued balances.
The order of the balances will be assumed to be in level order.
tree : skbio.TreeNode
A strictly bifurcating tree defining a hierarchical relationship
between all of the features within `table`.
layout : function, optional
A layout for formatting the tree visualization. Must take a
`ete.tree` as a parameter.
mode : str
Type of display to show the tree. ('c': circular, 'r': rectangular).
Notes
-----
The `tree` is assumed to strictly bifurcating and whose tips match
`balances`. It is not recommended to attempt to plot trees with a
ton of leaves (i.e. more than 4000 leaves).
Examples
--------
>>> from gneiss.balances import balanceplot
>>> from skbio import TreeNode
>>> tree = u"((b,c)a, d)root;"
>>> t = TreeNode.read([tree])
>>> balances = [10, -10]
>>> tr, ts = balanceplot(balances, t)
>>> print(tr.get_ascii())
<BLANKLINE>
/-b
/a|
-root \-c
|
\-d
See Also
--------
skbio.TreeNode.levelorder
"""
ete_tree = _attach_balances(balances, tree)
# Create an empty TreeStyle
ts = ete3.TreeStyle()
# Set our custom layout function
if layout is None:
ts.layout_fn = default_layout
else:
ts.layout_fn = layout
# Draw a tree
ts.mode = mode
# We will add node names manually
ts.show_leaf_name = False
# Show branch data
ts.show_branch_length = True
ts.show_branch_support = True
return ete_tree, ts