In [40]:
# Globys, Inc. Data Science Team
# Matt Danielson
# Luca Cazzanti
# Olly Downs
# Julie Penzotti
# Carl Sutherland
# Garrett Tenold
# Courosh Mehanian

# Personalized Customer Usage and Behavior Longitudinal View
In [30]:
import matplotlib
import matplotlib.pyplot as plt

import pandas as pd
import datetime
import re
%matplotlib inline
In [31]:
XAXIS_FONTSIZE = 14
YAXIS_FONTSIZE = XAXIS_FONTSIZE
YLABEL_FONTSIZE = XAXIS_FONTSIZE + 2
LEGEND_FONTSIZE = 12
BAR_WIDTH = .6
LEGEND_BBOX_TO_ANCHOR = (1.15, 1)
In [32]:
def setup_timeseries_on_x(ax, **kwargs):
    """
    """
    if "xmin" in kwargs:
        ax.set_xlim(left=kwargs["xmin"])
    if "xmax" in kwargs:
        ax.set_xlim(right=kwargs["xmax"])

    all_months = matplotlib.dates.MonthLocator()
    months = matplotlib.dates.MonthLocator(range(1, 13), bymonthday=1, interval=2)
    monthsFmt = matplotlib.dates.DateFormatter("%b '%Y")

    ax.xaxis.set_major_locator(months)
    ax.xaxis.set_major_formatter(monthsFmt)

    ax.xaxis.set_minor_locator(all_months)
    xticks_location = kwargs.get("xticks_location", "bottom")

    ax.xaxis.set_label_position(xticks_location)
    ax.xaxis.set_ticks_position(xticks_location)

    for label in ax.xaxis.get_ticklabels():
        label.set_fontsize(XAXIS_FONTSIZE)

    ax.xaxis.set_tick_params(which='minor', length=8, width=2)
    if "draw_xaxis" in kwargs and kwargs["draw_xaxis"] == False:
        ax.xaxis.set_tick_params(size=0)
        ax.xaxis.set_ticklabels([])

    ax.spines["right"].set_visible(False)
    ax.spines["top"].set_visible(False)
    ax.spines["left"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.yaxis.set_major_locator(matplotlib.ticker.LinearLocator(2))
    ax.yaxis.set_minor_locator(matplotlib.ticker.LinearLocator(5))
    positions = ax.get_yticks()
    labels = [abs(int(positions[0])), int(positions[1])]
    ax.set_yticklabels(labels)
    plt.yticks(size=YAXIS_FONTSIZE)
    ax.xaxis.set_ticks_position('none')
    ax.yaxis.set_ticks_position('none')
    ax.axhline(0, color='grey')
In [33]:
def draw_box(xstart, xend, ystart, yend, color):
    coords = [
              (matplotlib.dates.date2num(xstart), ystart), # left, bottom
              (matplotlib.dates.date2num(xstart), yend), # left, top
              (matplotlib.dates.date2num(xend), yend), # right, top
              (matplotlib.dates.date2num(xend), ystart), # right, bottom
              (matplotlib.dates.date2num(xend), ystart), # left, bottom
              ]
    codes = [matplotlib.path.Path.MOVETO,
             matplotlib.path.Path.LINETO,
             matplotlib.path.Path.LINETO,
             matplotlib.path.Path.LINETO,
             matplotlib.path.Path.CLOSEPOLY,
             ]
    color = color
    path = matplotlib.path.Path(coords, codes)
    patch = matplotlib.patches.PathPatch(path, facecolor=color, lw=0)

    return patch
In [34]:
def plot_new_balance(ax, recharges, balance, filled_offers, unfilled_offers, **kwargs):
    y_locations = set()

    offer_colors = {
                    "credit":"#c6dbef",
                    "sms":"#CCEACA",
                    "data":"#FEE0C2"
                    }
    offer_accepted_colors = {
                    "credit":"#9ecae1",
                    "sms":"#99D594",
                    "data":"#2171b5"
                    }
    offer_tags = {
            "credit": "Credit Initial Offer",
            "sms": "SMS Initial Offer",
            "data": "Data Initial Offer"
            }


    fullfillment_colors = {
            "credit": "#4292c6",
            "sms": "#67A362",
            "data":"#987350"
            }
    fullfillment_tags = {
            "credit": "Credit Fullfillment",
            "sms": "SMS Fullfillment",
            "data": "Data Fullfillment"
            }


    # plot left hand axis data\
    names_for_legend = list()
    lines_for_legend = list()
    balance_color = "#08519c"
    balance_lines, = ax.plot(
            balance.index,
            balance.balance,
            color=balance_color,
            label="balance",
            linewidth=2,
            )

    lines_for_legend.append(balance_lines)
    names_for_legend.append("Account Balance")
    if "plot_recharges" in kwargs and kwargs["plot_recharges"] == True:
        recharge_lines = ax.bar(
                recharges.index,
                recharges.voucher_value,
                color="black",
                label="Recharge",
                width=.3
                )
        lines_for_legend.append(recharge_lines)
        names_for_legend.append("Recharge")

    for start in filled_offers.index:
        start = start.to_pydatetime()
        startd = start.date()
        end = (filled_offers.ix[start]["expiryDate"] + datetime.timedelta(days=1))
        endd = end.date()

        recharge_amount = filled_offers["rc_amount"][start]
        y_locations.add(recharge_amount)

        units = filled_offers.ix[start]["offerUnits"]
        width = filled_offers.ix[start]["offerAmount"]
        name = offer_tags[units]

        accepted_date = filled_offers.ix[start]["qualifying_rc_date"]
        if accepted_date == startd:
            accepted_date += datetime.timedelta(days=1)
        start_valid_date = accepted_date

        box1 = draw_box(startd, accepted_date, recharge_amount, recharge_amount +width, offer_colors[units])
        ax.add_patch(box1)

        box2 = draw_box(accepted_date, endd, recharge_amount, recharge_amount +width, offer_accepted_colors[units])
        ax.add_patch(box2)

        if name not in names_for_legend:
            names_for_legend.append(name)
            lines_for_legend.append(box1)
            names_for_legend.append(re.sub("Initial Offer", "Offer Accepted", name))
            lines_for_legend.append(box2)
        color = fullfillment_colors[units]

        filled_date = filled_offers.ix[start]["adjustment_date"]
        filled_date = datetime.datetime(filled_date.year, filled_date.month, filled_date.day)
        line = draw_box(filled_date, filled_date+datetime.timedelta(hours=12), recharge_amount, recharge_amount+width, color)
        ax.add_patch(line)
        name = fullfillment_tags[units]
        if name not in names_for_legend:
            names_for_legend.append(name)
            lines_for_legend.append(line)


    for start in unfilled_offers.index:
        end = unfilled_offers.ix[start]["expiryDate"]
        startd = start.date()
        endd = end.date()

        recharge_amount = unfilled_offers.ix[start]["rc_amount"]
        y_locations.add(recharge_amount)
        units = unfilled_offers.ix[start]["offerUnits"]
        width = unfilled_offers.ix[start]["offerAmount"]
        name = offer_tags[units]
        box1 = draw_box(startd, endd, recharge_amount, recharge_amount + width, offer_colors[units])
        ax.add_patch(box1)
        if name not in names_for_legend:
            names_for_legend.append(name)
            lines_for_legend.append(box1)

    legend = ax.legend(
                   lines_for_legend,
                   names_for_legend,
                   prop={'size':LEGEND_FONTSIZE},
                   bbox_to_anchor=LEGEND_BBOX_TO_ANCHOR,
                   )
    if legend is not None:
        legend.get_frame().set_facecolor("None")
        legend.draw_frame(False)


    setup_timeseries_on_x(ax, xticks_location="top", **kwargs)
    positions = ax.get_yticks()
    y_locations.add(max(positions))
    y_locations = sorted(y_locations)
    ax.yaxis.set_major_locator(matplotlib.ticker.FixedLocator(y_locations))
    ax.set_yticklabels(["\$" + str(int(p)) for p in y_locations])
    ax.set_ylim(-.1, max(y_locations))

    ax.grid(axis='y', color='grey', linestyle='--', which="major")
In [35]:
def plot_inbound_outbound_bars_with_network_distinction(ax, inbound_usage, outbound_usage, color1=None, color2=None, **kwargs):

    """
    Create a bar chart of a inbound/outbound metric.  Inbound is shown on the positive y axis, outbound is shown on the negative y axis

    :param ax: The matplotlib axis object to use
    :param inbound_ts: pandas.TimeSeries of some inbound metric
    :param outbound_ts: pandas.TimeSeries of some outbound metric
    """
    inbound_ts = inbound_usage
    outbound_ts = outbound_usage
    outbound_usage["in_network"] *= -1
    outbound_usage["out_network"] *= -1
    draw_legend = True
    if "draw_legend" in kwargs:
        draw_legend = kwargs["draw_legend"]


    # Don't plot zero items...
    y_min = 0
    y_max = 0

    # lables
    label1 = "In Network"
    label2 = "Out of Network"
    lines_for_legend = []
    names_for_legend = []

    # inbound activity
    def _plot(series1, color1, series2, color2):
        """
        plot stacked bars, series1 is against x axis, with series2 on top
        """
        in_network_bars = None
        out_network_bars = None
        y_max = None
        if len(series1) > 0:
            y_max = int(series1.max())
            in_network_bars = ax.bar(series1.keys(), series1.values, color=color1, edgecolor="None", width=BAR_WIDTH)

        if len(series2) > 0:
            if len(series1) == 0:
                y_max = int(series2.max())
                in_network_bars = ax.bar(series2.keys(), series2.values, color=color2, edgecolor="None", width=BAR_WIDTH)
            else:
                y_max = int((series1 + series2).max())
                out_network_bars = ax.bar(series2.keys(), series2.values, bottom=series1.values, edgecolor="None", color=color2, width=BAR_WIDTH)

        return y_max, in_network_bars, out_network_bars

    y_max, in_network_in, out_network_in = _plot(inbound_ts["in_network"], color1, inbound_ts["out_network"], color2)
    y_min, in_network_out, out_network_out = _plot(outbound_ts["in_network"], color1, outbound_ts["out_network"], color2)

    if in_network_in is not None:
        lines_for_legend.append(in_network_in)
        names_for_legend.append(label1)
    elif in_network_out is not None:
        lines_for_legend.append(in_network_out)
        names_for_legend.append(label1)
    if out_network_in is not None:
        lines_for_legend.append(out_network_in)
        names_for_legend.append(label2)
    elif out_network_out is not None:
        lines_for_legend.append(out_network_out)
        names_for_legend.append(label2)

    max_val = max(y_max, y_min)
    y_min *= -1


    # Set yticks manually or dynamically ?
    # this handles the case where matplotlib tries to sneak in .5 increments on the yticks
    if y_min > -10 and y_max < 10:
        y_ticks = range(y_min-1, y_max+2) #range(m,n) - >[m, ..., n-1]
        ax.set_yticks(y_ticks)

    # set fudge factor on ymin so that ax.grid shows our lines
    ax.set_ylim(bottom=-1.0*max_val-.1, top=max_val)

    if draw_legend:
        legend = ax.legend(lines_for_legend, names_for_legend, prop={'size':LEGEND_FONTSIZE}, bbox_to_anchor=LEGEND_BBOX_TO_ANCHOR)

        if legend is not None:
            legend.get_frame().set_facecolor("None")
            legend.draw_frame(False)

    # xticks are slightly more painful...
    setup_timeseries_on_x(ax, **kwargs)

    ax.yaxis.set_major_locator(matplotlib.ticker.FixedLocator([-max_val, max_val]))
    ax.text(-.045, .55, "Inbound", transform=ax.transAxes, fontsize=YAXIS_FONTSIZE-4)
    ax.text(-.05, .4, "Outbound", transform=ax.transAxes, fontsize=YAXIS_FONTSIZE-4)
    ax.grid(axis='y', color='grey', linestyle='--', which="major")
In [38]:
def read_and_plot():
    recharges_frame = pd.read_csv("recharges.csv", parse_dates=[0]).set_index("timestamp")
    balance_frame = pd.read_csv("balance.csv", parse_dates=[0]).set_index("timestamp")
    filled_frame = pd.read_csv("filled.csv", parse_dates=[0, 4, 5, 7]).set_index("timestamp")
    not_filled_frame = pd.read_csv("not_filled.csv", parse_dates=[0, 5]).set_index("timestamp")
    sms_voice_frame = pd.read_csv("usage.csv", parse_dates=[0]).set_index("timestamp")


    fig = plt.figure(figsize=(20, 15))
    fig.suptitle("Globys Longitudinal Customer View", fontsize=20)

    gs = matplotlib.gridspec.GridSpec(3, 1, left=.1, right=.85, bottom=.05, hspace=0.3)

    all_args = {"xmin" : datetime.date(2013, 5, 1), "bars_same_y_ticks":True}
    ax = fig.add_subplot(gs[0, 0])

    plot_new_balance(ax, recharges_frame,
                     balance_frame, filled_frame, not_filled_frame, draw_xaxis=True, **all_args)

    plt.figtext(.05, .93, "Account Balance and Offers", size=YAXIS_FONTSIZE+6)
    ax.axhline(.24, color='grey')

    ax = fig.add_subplot(gs[1, 0])
    inbound_usage = {
            "in_network":sms_voice_frame["in_network_inbound_sms_count"],
            "out_network":sms_voice_frame["out_of_network_inbound_sms_count"]
            }
    outbound_usage = {
            "in_network":sms_voice_frame["in_network_outbound_sms_count"],
            "out_network":sms_voice_frame["out_of_network_outbound_sms_count"]
            }

    plt.figtext(.05, .62, "SMS Messages/Day", size=YAXIS_FONTSIZE+6)

    plot_inbound_outbound_bars_with_network_distinction(ax, inbound_usage, outbound_usage, draw_xaxis=False, color1="#31a354", color2="#a1d99b", **all_args)

    ax = fig.add_subplot(gs[2, 0])
    plt.figtext(.05, .31, "Voice Minutes/Day", size=YAXIS_FONTSIZE+6)
    inbound_usage = {
            "in_network":sms_voice_frame["in_network_inbound_vc_minutes"],
            "out_network":sms_voice_frame["out_of_network_inbound_vc_minutes"]
            }
    outbound_usage = {
            "in_network":sms_voice_frame["in_network_outbound_vc_minutes"],
            "out_network":sms_voice_frame["out_of_network_outbound_vc_minutes"]
            }
    plot_inbound_outbound_bars_with_network_distinction(ax, inbound_usage, outbound_usage, color1="#de2d26", color2="#fc9272", **all_args)

    fig.savefig("scipy_plotting_contest.pdf")
In [39]:
read_and_plot()
In [37]:
 
In [37]: