Source code for b2plot.histogram

 # -*- coding: utf-8 -*-
"""
In this file all the histogram related functions.

"""


from .helpers import get_optimal_bin_size, TheManager
from .colors import b2cm
import pandas as pd
import numpy as np
from matplotlib.colors import hex2color

import matplotlib.pyplot as plt


def _hist_init(data, bins=None, xrange=None):
    """ Performs and stores or returns the binning

    Args:
        data:
        bins:
        xrange:

    Returns:

    """
    xaxis = TheManager.Instance().get_x_axis()

    if xaxis is None or bins is not None or xrange is not None:
        if bins is None:
            bins = get_optimal_bin_size(len(data))
        if xrange == 'auto':
            from .analysis import minmax
            xrange = minmax(data)
        _, xaxis = np.histogram(data, bins, xrange)

    return xaxis


[docs]def set_xaxis(bins, flat=False): TheManager.Instance().set_x_axis(bins)
[docs]def get_xaxis(): return TheManager.Instance().get_x_axis()
[docs]def flat_x(x, nbins=25): set_xaxis(np.percentile(x, np.linspace(0, 100, nbins)))
# This needs to be changed STYLES_facecolor = [None, 'none', 'none', 'none', 'none', 'none'] STYLES_hatches = [None, '///', r"\\\ ", 'xxx', '--', '++', 'o', ".+", 'xx', '//', '*', 'O', '.']
[docs]def hist(data, bins=None, fill=False, range=None, lw=1., ax=None, style=None, color=None, scale=None, weights=None, label=None, edgecolor=None, fillalpha=0.5, *args, **kwargs): """ Args: data: bins: fill: range: lw: ax: style: color: scale: weights: *args: **kwargs: Returns: """ if ax is None: ax = plt.gca() xaxis = _hist_init(data, bins, xrange=range) if type(data) is pd.Series: data = data.values if isinstance(color, int): color = b2cm[color % len(b2cm)] if color is None: color = next(ax._get_lines.prop_cycler)["color"] # convert color if not isinstance(color, list) or isinstance(color, tuple): color = hex2color(color) if style is not None: fill = True if style == 0 and edgecolor is None: edgecolor = 'black' else: style = 0 if weights is None: weights = np.ones(len(data)) if scale is not None: if isinstance(scale, int) or isinstance(scale, float): if not isinstance(scale, bool): weights *= scale else: print("Please provide int or float with scale") edgecolor = color if edgecolor is None else edgecolor if fill: # edgecolor = 'black' if style == 0 else color fc = (*color, fillalpha) if style == 0 else 'none' # y, xaxis, _ = ax.hist(data, xaxis, range=range, histtype='step', # lw=lw, color=color, weights=weights, *args, **kwargs) y, xaxis, patches = ax.hist(data, xaxis, range=range, lw=lw, histtype='stepfilled', hatch=STYLES_hatches[style], edgecolor=edgecolor, facecolor=fc, linewidth=lw, weights=weights, label=label, color=color, *args, **kwargs) else: y, xaxis, patches = ax.hist(data, xaxis, range=range, histtype='step', lw=lw, color=color, weights=weights, label=label, *args, **kwargs) TheManager.Instance().set_x_axis(xaxis) return y, xaxis, patches
def _notransform(x): return x
[docs]def to_stack(df, col, by, transform=None, get_cats=False): """ Convert columns of a dataframe to a list of lists by 'by' Args: df: col: by: transform: Returns: """ g = df.groupby(by) transform = _notransform if transform is None else transform x_data = [] for gr in g.groups: x_data.append(transform(g.get_group(gr)[col].values)) cats = np.array([gg for gg in g.groups]) x_len = np.array([len(x) for x in x_data]) inds = x_len.argsort() # print(cats) # print(inds) if get_cats: return [x_data[i] for i in inds], cats[inds] return [x_data[i] for i in inds]
[docs]def stacked(df, col=None, by=None, bins=None, color=None, range=None, lw=.5, ax=None, edgecolor='black', weights=None, scale=None, label=None, transform=None, *args, **kwargs): """ Create stacked histogram Args: df (DataFrame or list of arrays): col: by: bins: color: lw: *args: **kwargs: Returns: """ if isinstance(df, pd.DataFrame): assert col is not None, "Please provide column" assert by is not None, "Please provide by" data, cats = to_stack(df, col, by, transform, get_cats=True) if label is None: label = cats else: assert isinstance(df, list), "Please provide DataFrame or List" (data, labels) = (df,[None]) if ax is None: ax = plt.gca() if color is None: from b2plot.colors import b2helix n_stacks = len(data) if n_stacks < 20: color = b2helix(n_stacks) if weights is None: weights = [] for i,d in enumerate(data): wei = np.ones(len(d)) if scale is not None: if isinstance(scale, int) or isinstance(scale, float): if not isinstance(scale, bool): wei *= scale elif isinstance(scale, dict): assert cats[i] in scale.keys(), "Scale list must have same lenght as data" wei *= scale[cats[i]] else: print("Please provide int or float with scale") weights.append(wei) xaxis = _hist_init(data[0], bins, xrange=range) y, xaxis, stuff = ax.hist(data, xaxis, histtype='stepfilled', lw=lw, color=color, edgecolor=edgecolor, stacked=True, weights=weights, label=label, *args, **kwargs) TheManager.Instance().set_x_axis(xaxis) return y[-1], xaxis, stuff # dangerous list index
[docs]def errorhist(data, bins=None, color=None, normed=False, density=False, fmt='.', range=None, scale=None, x_err=False, box=False, ax=None, weights=None, plot_zero=True, label=None, *args, **kwargs): """ Histogram as error bar Args: data: bins: color: normed: density: fmt: range: scale: x_err: box: ax: weights: plot_zero: label: *args: **kwargs: Returns: """ xaxis = _hist_init(data, bins, xrange=range) if ax is None: ax = plt.gca() if type(data) is pd.Series: data = data.values if weights is None: weights = np.ones(len(data)) if scale is not None: if isinstance(scale, int) or isinstance(scale, float): if not isinstance(scale, bool): weights *= scale else: print("Please provide int or float with scale") else: scale = 1 if (normed and density) or normed: print('normed is deprecated and changed by density. Your call has been changed to density=True automatically.') density=True y, x = np.histogram(data, xaxis, density=density, weights=weights) # https://www-cdf.fnal.gov/physics/statistics err = (-0.5 + np.sqrt(np.array(y*scale + 0.25)), +0.5 + np.sqrt(np.array(y*scale + 0.25))) # np.sqrt(np.array(y)) bin_centers = (x[1:] + x[:-1]) / 2.0 if isinstance(color, int): color = b2cm[color % len(b2cm)] if color is None: color = next(ax._get_lines.prop_cycler)["color"] if density: yom, x = np.histogram(data, xaxis, weights=weights) err = (np.sqrt(np.array(yom)) *(y/yom), np.sqrt(np.array(yom)) * (y/yom)) if x_err is not False or box: x_err = (x[1]-x[0])/2.0 else: x_err = None errorbar(bin_centers, y, err, x_err, box, plot_zero, fmt, color, ax, label=label, *args, **kwargs) TheManager.Instance().set_x_axis(xaxis) return y, bin_centers, err
[docs]def errorbar(bin_centers, y, y_err, x_err=None, box=False, plot_zero=True, fmt='.', color=None, ax=None, label=None, alpha=0.4, hatch=None, *args, **kwargs): """ Error graph plotting x-y points with errorbars Args: bin_centers: y: y_err: x_err: box: plot_zero: fmt: color: ax: label: alpha: hatch: *args: **kwargs:w """ if ax is None: ax = plt.gca() if len(y_err) != 2: y_err = y_err, y_err if color is None: color = next(ax._get_lines.prop_cycler)["color"] toplot = np.ones(len(y)).astype(bool) if plot_zero is False: toplot[y == 0] = False y_err = (y_err[0][[toplot]], y_err[1][toplot]) if x_err is not None: x_err = x_err[toplot] bin_centers = bin_centers[toplot] y = y[toplot] if box: assert x_err is not None, "Please provide x-err" hi = y_err[0] + y_err[1] lo = y - y_err[0] ax.errorbar(bin_centers, y, color=color, xerr=x_err, fmt=' ') ax.bar(bin_centers[toplot], hi, bottom=lo, align='center', color=color, alpha=alpha, width=2 * x_err, label=label, edgecolor=color, hatch=hatch,*args, **kwargs) else: ax.errorbar(bin_centers, y, yerr=y_err, xerr=x_err, fmt=fmt, color=color,label=label, *args, **kwargs)
[docs]def bar(y, binedges, ax=None, *args, **kwargs): """ Bar plot Args: y: binedges: ax: *args: **kwargs: """ if ax is None: ax = plt.gca() x = (binedges[1:] + binedges[:-1]) / 2.0 return ax.hist(x, bins=binedges, weights=y, *args, **kwargs)
[docs]def profile(x, y, bins=None, range=None, fmt='.', *args, **kwargs): """ Profile plot of x vs y; the mean and std of y in bins of x as errorbar Args: x: y: bins: range: fmt: *args: **kwargs: Returns: """ import scipy xaxis = _hist_init(x, bins, xrange=range) means = scipy.stats.binned_statistic(x, y, bins=xaxis, statistic='mean').statistic std = scipy.stats.binned_statistic(x, y, bins=xaxis, statistic=scipy.stats.sem).statistic bin_centers = (xaxis[:-1] + xaxis[1:]) / 2. return plt.errorbar(x=bin_centers, y=means, yerr=std, linestyle='none', fmt=fmt, *args, **kwargs)