Source code for gneiss.plot._decompose

# ----------------------------------------------------------------------------
# Copyright (c) 2016--, gneiss development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from gneiss.util import NUMERATOR, DENOMINATOR


[docs]def balance_boxplot(balance_name, data, num_color='#FFFFFF', denom_color='#FFFFFF', xlabel="", ylabel="", linewidth=1, ax=None, **kwargs): """ Plots a boxplot for a given balance and the associated metadata. Parameters ---------- x, y, hue: str Variable names to be passed into the seaborn plots for plotting. balance_name : str Name of balance to plot. data : pd.DataFrame Merged dataframe of balances and metadata. num_color : str Hex for background colors of values above zero. denom_color : str Hex for background colors of values below zero. xlabel : str x-axis label. ylabel : str y-axis label. linewidth : str Width of the grid lines. ax : matplotlib axes object Axes object to render boxplots in. **kwargs : dict Values to pass in to customize seaborn boxplot. Returns ------- a : matplotlib axes object Matplotlit axes object with rendered boxplots. See Also -------- seaborn.boxplot """ import seaborn as sns if ax is None: f, ax = plt.subplots() # the number 20 is pretty arbitrary - we are just # resizing to make sure that there is separation between the # edges of the plot, and the boxplot pad = (data[balance_name].max() - data[balance_name].min()) / 20 ax.axvspan(data[balance_name].min()-pad, 0, facecolor=num_color, zorder=0) ax.axvspan(0, data[balance_name].max()+pad, facecolor=denom_color, zorder=0) if 'hue' in kwargs.keys(): hue = kwargs['hue'] num_groups = len(data[hue].value_counts()) else: num_groups = 1 a = sns.boxplot(ax=ax, x=balance_name, data=data, **kwargs) a.minorticks_on() minorLocator = matplotlib.ticker.AutoMinorLocator(num_groups) a.get_yaxis().set_minor_locator(minorLocator) a.grid(axis='y', which='minor', color='k', linestyle=':', linewidth=1) a.set_xlim([data[balance_name].min() - pad, data[balance_name].max() + pad]) a.set_xlabel(xlabel) a.set_ylabel(ylabel) return a
[docs]def balance_barplots(tree, balance_name, header, feature_metadata, ndim=5, num_color="#0000FF", denom_color="#0000FF", xlabel="", ylabel="", axes=(None, None)): """ Plots barplots of counts of features found in the balance. Parameters ---------- tree : skbio.TreeNode Reference tree for balances. balance_name : str Name of balance to plot. header : str Header name for the feature metadata column to summarize feature_metadata : pd.DataFrame Contains information about the features. ndim : int Number of bars to display at a given time (default=5) num_color : str Hex for background colors of values above zero. denom_color : str Hex for background colors of values below zero. xlabel : str x-axis label. ylabel : str y-axis label. axes : tuple of matplotlib axes objects Specifies where the barplots should be rendered. Returns ------- ax_num : matplotlib axes object Barplot of the features in the numerator of the balance. ax_denom : matplotlib axes object Barplot of the features in the denominator of the balance. """ import seaborn as sns if axes[0] is None or axes[1] is None: f, (ax_num, ax_denom) = plt.subplots(2) else: ax_num, ax_denom = axes[0], axes[1] st = tree.find(balance_name) num_clade = st.children[NUMERATOR] denom_clade = st.children[DENOMINATOR] if num_clade.is_tip(): num_ = pd.DataFrame( [[feature_metadata.loc[num_clade.name, header], 1]], columns=['index', header], index=[header]) else: num = feature_metadata.loc[list(num_clade.subset())] num_ = num[header].value_counts().head(ndim).reset_index() if denom_clade.is_tip(): denom_ = pd.DataFrame( [[feature_metadata.loc[denom_clade.name, header], 1]], columns=['index', header], index=[header]) else: denom = feature_metadata.loc[list(denom_clade.subset())] denom_ = denom[header].value_counts().head(ndim).reset_index() ax_denom = sns.barplot(y='index', x=header, data=denom_, ax=ax_denom, color=denom_color) ax_denom.set_ylabel(ylabel) ax_denom.set_xlabel(xlabel) ax_denom.set_xlim([0, max([num_.max().values[1], denom_.max().values[1]])]) ax_num = sns.barplot(y='index', x=header, data=num_, ax=ax_num, color=num_color) ax_num.set_ylabel(ylabel) ax_num.set_xlabel(xlabel) ax_num.set_xlim([0, max([num_.max().values[1], denom_.max().values[1]])]) return ax_num, ax_denom