# Globys, Inc. Data Science Team
# Matt Danielson
# Luca Cazzanti
# Olly Downs
# Julie Penzotti
# Carl Sutherland
# Garrett Tenold
# Courosh Mehanian
# Personalized Customer Usage and Behavior Longitudinal View
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import datetime
import re
%matplotlib inline
XAXIS_FONTSIZE = 14
YAXIS_FONTSIZE = XAXIS_FONTSIZE
YLABEL_FONTSIZE = XAXIS_FONTSIZE + 2
LEGEND_FONTSIZE = 12
BAR_WIDTH = .6
LEGEND_BBOX_TO_ANCHOR = (1.15, 1)
def setup_timeseries_on_x(ax, **kwargs):
"""
"""
if "xmin" in kwargs:
ax.set_xlim(left=kwargs["xmin"])
if "xmax" in kwargs:
ax.set_xlim(right=kwargs["xmax"])
all_months = matplotlib.dates.MonthLocator()
months = matplotlib.dates.MonthLocator(range(1, 13), bymonthday=1, interval=2)
monthsFmt = matplotlib.dates.DateFormatter("%b '%Y")
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(monthsFmt)
ax.xaxis.set_minor_locator(all_months)
xticks_location = kwargs.get("xticks_location", "bottom")
ax.xaxis.set_label_position(xticks_location)
ax.xaxis.set_ticks_position(xticks_location)
for label in ax.xaxis.get_ticklabels():
label.set_fontsize(XAXIS_FONTSIZE)
ax.xaxis.set_tick_params(which='minor', length=8, width=2)
if "draw_xaxis" in kwargs and kwargs["draw_xaxis"] == False:
ax.xaxis.set_tick_params(size=0)
ax.xaxis.set_ticklabels([])
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.yaxis.set_major_locator(matplotlib.ticker.LinearLocator(2))
ax.yaxis.set_minor_locator(matplotlib.ticker.LinearLocator(5))
positions = ax.get_yticks()
labels = [abs(int(positions[0])), int(positions[1])]
ax.set_yticklabels(labels)
plt.yticks(size=YAXIS_FONTSIZE)
ax.xaxis.set_ticks_position('none')
ax.yaxis.set_ticks_position('none')
ax.axhline(0, color='grey')
def draw_box(xstart, xend, ystart, yend, color):
coords = [
(matplotlib.dates.date2num(xstart), ystart), # left, bottom
(matplotlib.dates.date2num(xstart), yend), # left, top
(matplotlib.dates.date2num(xend), yend), # right, top
(matplotlib.dates.date2num(xend), ystart), # right, bottom
(matplotlib.dates.date2num(xend), ystart), # left, bottom
]
codes = [matplotlib.path.Path.MOVETO,
matplotlib.path.Path.LINETO,
matplotlib.path.Path.LINETO,
matplotlib.path.Path.LINETO,
matplotlib.path.Path.CLOSEPOLY,
]
color = color
path = matplotlib.path.Path(coords, codes)
patch = matplotlib.patches.PathPatch(path, facecolor=color, lw=0)
return patch
def plot_new_balance(ax, recharges, balance, filled_offers, unfilled_offers, **kwargs):
y_locations = set()
offer_colors = {
"credit":"#c6dbef",
"sms":"#CCEACA",
"data":"#FEE0C2"
}
offer_accepted_colors = {
"credit":"#9ecae1",
"sms":"#99D594",
"data":"#2171b5"
}
offer_tags = {
"credit": "Credit Initial Offer",
"sms": "SMS Initial Offer",
"data": "Data Initial Offer"
}
fullfillment_colors = {
"credit": "#4292c6",
"sms": "#67A362",
"data":"#987350"
}
fullfillment_tags = {
"credit": "Credit Fullfillment",
"sms": "SMS Fullfillment",
"data": "Data Fullfillment"
}
# plot left hand axis data\
names_for_legend = list()
lines_for_legend = list()
balance_color = "#08519c"
balance_lines, = ax.plot(
balance.index,
balance.balance,
color=balance_color,
label="balance",
linewidth=2,
)
lines_for_legend.append(balance_lines)
names_for_legend.append("Account Balance")
if "plot_recharges" in kwargs and kwargs["plot_recharges"] == True:
recharge_lines = ax.bar(
recharges.index,
recharges.voucher_value,
color="black",
label="Recharge",
width=.3
)
lines_for_legend.append(recharge_lines)
names_for_legend.append("Recharge")
for start in filled_offers.index:
start = start.to_pydatetime()
startd = start.date()
end = (filled_offers.ix[start]["expiryDate"] + datetime.timedelta(days=1))
endd = end.date()
recharge_amount = filled_offers["rc_amount"][start]
y_locations.add(recharge_amount)
units = filled_offers.ix[start]["offerUnits"]
width = filled_offers.ix[start]["offerAmount"]
name = offer_tags[units]
accepted_date = filled_offers.ix[start]["qualifying_rc_date"]
if accepted_date == startd:
accepted_date += datetime.timedelta(days=1)
start_valid_date = accepted_date
box1 = draw_box(startd, accepted_date, recharge_amount, recharge_amount +width, offer_colors[units])
ax.add_patch(box1)
box2 = draw_box(accepted_date, endd, recharge_amount, recharge_amount +width, offer_accepted_colors[units])
ax.add_patch(box2)
if name not in names_for_legend:
names_for_legend.append(name)
lines_for_legend.append(box1)
names_for_legend.append(re.sub("Initial Offer", "Offer Accepted", name))
lines_for_legend.append(box2)
color = fullfillment_colors[units]
filled_date = filled_offers.ix[start]["adjustment_date"]
filled_date = datetime.datetime(filled_date.year, filled_date.month, filled_date.day)
line = draw_box(filled_date, filled_date+datetime.timedelta(hours=12), recharge_amount, recharge_amount+width, color)
ax.add_patch(line)
name = fullfillment_tags[units]
if name not in names_for_legend:
names_for_legend.append(name)
lines_for_legend.append(line)
for start in unfilled_offers.index:
end = unfilled_offers.ix[start]["expiryDate"]
startd = start.date()
endd = end.date()
recharge_amount = unfilled_offers.ix[start]["rc_amount"]
y_locations.add(recharge_amount)
units = unfilled_offers.ix[start]["offerUnits"]
width = unfilled_offers.ix[start]["offerAmount"]
name = offer_tags[units]
box1 = draw_box(startd, endd, recharge_amount, recharge_amount + width, offer_colors[units])
ax.add_patch(box1)
if name not in names_for_legend:
names_for_legend.append(name)
lines_for_legend.append(box1)
legend = ax.legend(
lines_for_legend,
names_for_legend,
prop={'size':LEGEND_FONTSIZE},
bbox_to_anchor=LEGEND_BBOX_TO_ANCHOR,
)
if legend is not None:
legend.get_frame().set_facecolor("None")
legend.draw_frame(False)
setup_timeseries_on_x(ax, xticks_location="top", **kwargs)
positions = ax.get_yticks()
y_locations.add(max(positions))
y_locations = sorted(y_locations)
ax.yaxis.set_major_locator(matplotlib.ticker.FixedLocator(y_locations))
ax.set_yticklabels(["\$" + str(int(p)) for p in y_locations])
ax.set_ylim(-.1, max(y_locations))
ax.grid(axis='y', color='grey', linestyle='--', which="major")
def plot_inbound_outbound_bars_with_network_distinction(ax, inbound_usage, outbound_usage, color1=None, color2=None, **kwargs):
"""
Create a bar chart of a inbound/outbound metric. Inbound is shown on the positive y axis, outbound is shown on the negative y axis
:param ax: The matplotlib axis object to use
:param inbound_ts: pandas.TimeSeries of some inbound metric
:param outbound_ts: pandas.TimeSeries of some outbound metric
"""
inbound_ts = inbound_usage
outbound_ts = outbound_usage
outbound_usage["in_network"] *= -1
outbound_usage["out_network"] *= -1
draw_legend = True
if "draw_legend" in kwargs:
draw_legend = kwargs["draw_legend"]
# Don't plot zero items...
y_min = 0
y_max = 0
# lables
label1 = "In Network"
label2 = "Out of Network"
lines_for_legend = []
names_for_legend = []
# inbound activity
def _plot(series1, color1, series2, color2):
"""
plot stacked bars, series1 is against x axis, with series2 on top
"""
in_network_bars = None
out_network_bars = None
y_max = None
if len(series1) > 0:
y_max = int(series1.max())
in_network_bars = ax.bar(series1.keys(), series1.values, color=color1, edgecolor="None", width=BAR_WIDTH)
if len(series2) > 0:
if len(series1) == 0:
y_max = int(series2.max())
in_network_bars = ax.bar(series2.keys(), series2.values, color=color2, edgecolor="None", width=BAR_WIDTH)
else:
y_max = int((series1 + series2).max())
out_network_bars = ax.bar(series2.keys(), series2.values, bottom=series1.values, edgecolor="None", color=color2, width=BAR_WIDTH)
return y_max, in_network_bars, out_network_bars
y_max, in_network_in, out_network_in = _plot(inbound_ts["in_network"], color1, inbound_ts["out_network"], color2)
y_min, in_network_out, out_network_out = _plot(outbound_ts["in_network"], color1, outbound_ts["out_network"], color2)
if in_network_in is not None:
lines_for_legend.append(in_network_in)
names_for_legend.append(label1)
elif in_network_out is not None:
lines_for_legend.append(in_network_out)
names_for_legend.append(label1)
if out_network_in is not None:
lines_for_legend.append(out_network_in)
names_for_legend.append(label2)
elif out_network_out is not None:
lines_for_legend.append(out_network_out)
names_for_legend.append(label2)
max_val = max(y_max, y_min)
y_min *= -1
# Set yticks manually or dynamically ?
# this handles the case where matplotlib tries to sneak in .5 increments on the yticks
if y_min > -10 and y_max < 10:
y_ticks = range(y_min-1, y_max+2) #range(m,n) - >[m, ..., n-1]
ax.set_yticks(y_ticks)
# set fudge factor on ymin so that ax.grid shows our lines
ax.set_ylim(bottom=-1.0*max_val-.1, top=max_val)
if draw_legend:
legend = ax.legend(lines_for_legend, names_for_legend, prop={'size':LEGEND_FONTSIZE}, bbox_to_anchor=LEGEND_BBOX_TO_ANCHOR)
if legend is not None:
legend.get_frame().set_facecolor("None")
legend.draw_frame(False)
# xticks are slightly more painful...
setup_timeseries_on_x(ax, **kwargs)
ax.yaxis.set_major_locator(matplotlib.ticker.FixedLocator([-max_val, max_val]))
ax.text(-.045, .55, "Inbound", transform=ax.transAxes, fontsize=YAXIS_FONTSIZE-4)
ax.text(-.05, .4, "Outbound", transform=ax.transAxes, fontsize=YAXIS_FONTSIZE-4)
ax.grid(axis='y', color='grey', linestyle='--', which="major")
def read_and_plot():
recharges_frame = pd.read_csv("recharges.csv", parse_dates=[0]).set_index("timestamp")
balance_frame = pd.read_csv("balance.csv", parse_dates=[0]).set_index("timestamp")
filled_frame = pd.read_csv("filled.csv", parse_dates=[0, 4, 5, 7]).set_index("timestamp")
not_filled_frame = pd.read_csv("not_filled.csv", parse_dates=[0, 5]).set_index("timestamp")
sms_voice_frame = pd.read_csv("usage.csv", parse_dates=[0]).set_index("timestamp")
fig = plt.figure(figsize=(20, 15))
fig.suptitle("Globys Longitudinal Customer View", fontsize=20)
gs = matplotlib.gridspec.GridSpec(3, 1, left=.1, right=.85, bottom=.05, hspace=0.3)
all_args = {"xmin" : datetime.date(2013, 5, 1), "bars_same_y_ticks":True}
ax = fig.add_subplot(gs[0, 0])
plot_new_balance(ax, recharges_frame,
balance_frame, filled_frame, not_filled_frame, draw_xaxis=True, **all_args)
plt.figtext(.05, .93, "Account Balance and Offers", size=YAXIS_FONTSIZE+6)
ax.axhline(.24, color='grey')
ax = fig.add_subplot(gs[1, 0])
inbound_usage = {
"in_network":sms_voice_frame["in_network_inbound_sms_count"],
"out_network":sms_voice_frame["out_of_network_inbound_sms_count"]
}
outbound_usage = {
"in_network":sms_voice_frame["in_network_outbound_sms_count"],
"out_network":sms_voice_frame["out_of_network_outbound_sms_count"]
}
plt.figtext(.05, .62, "SMS Messages/Day", size=YAXIS_FONTSIZE+6)
plot_inbound_outbound_bars_with_network_distinction(ax, inbound_usage, outbound_usage, draw_xaxis=False, color1="#31a354", color2="#a1d99b", **all_args)
ax = fig.add_subplot(gs[2, 0])
plt.figtext(.05, .31, "Voice Minutes/Day", size=YAXIS_FONTSIZE+6)
inbound_usage = {
"in_network":sms_voice_frame["in_network_inbound_vc_minutes"],
"out_network":sms_voice_frame["out_of_network_inbound_vc_minutes"]
}
outbound_usage = {
"in_network":sms_voice_frame["in_network_outbound_vc_minutes"],
"out_network":sms_voice_frame["out_of_network_outbound_vc_minutes"]
}
plot_inbound_outbound_bars_with_network_distinction(ax, inbound_usage, outbound_usage, color1="#de2d26", color2="#fc9272", **all_args)
fig.savefig("scipy_plotting_contest.pdf")
read_and_plot()