output.py
import os
import pandas
import argparse
from bokeh.plotting import figure, ColumnDataSource
from bokeh.io import curdoc, output_file, save, export_svgs
from bokeh.resources import CDN
from bokeh.embed import file_html, autoload_static, components
from bokeh.layouts import row, column
from bokeh.models import HoverTool, CustomJS, Slider, Legend, CDSView, GroupFilter
from goodreads_shared import (
add_shared_args, process_args, iter_options,
html_path, data_path, comp_path
)
def parse_args():
parser = argparse.ArgumentParser()
parser = add_shared_args(parser)
# Note: the behavior of this script depends on whether it's executed
# as a stand-alone script or run by bokeh. If run by bokeh, it
# produces a server-backed visualization. If run as a stand-alone
# script, it produces a block of html suitable for inclusion in
# another site, but that cannot be loaded on its own. If run
# as a stand-alone script **with this option**, it generates
# a fully independent html page that *can* be loaded on its own.
parser.add_argument('-s', '--static', action='store_true',
default=False,
help='Save an html file instead of an include file')
parser.add_argument('-i', '--image', action='store_true',
default=False,
help='Save an image in SVG format instead of an include file')
parser.add_argument('--compare', type=str,
help='Compare results to data from a second file.')
args = parser.parse_args()
print(args.data_file)
args = process_args(args)
print(args.data_file)
if args.compare:
df = args.data_file
df = df[:-4] if df.endswith('.csv') else df
cf = args.compare
cf = cf[:-4] if cf.endswith('.csv') else cf
args.html_file = df + '_compare_' + cf + '.html'
print(args.data_file)
return args
def ranges(x_vals, y_vals, padright=False):
x_min = min(x_vals)
x_max = max(x_vals)
x_span = x_max - x_min
x_min -= x_span * 0.15
x_max += x_span * (0.55 if padright else 0.15)
y_min = min(y_vals)
y_max = max(y_vals)
y_span = y_max - y_min
y_min -= y_span * 0.15
y_max += y_span * 0.15
return [x_min, x_max], [y_min, y_max]
def get_plot(args):
if args.compare:
return comp_plot(args, args.compare)
else:
return simple_plot(args)
def append_glyph_settings(user_df):
glyphs = {
"Childrens": {
"glyph": "square_cross",
"color": "magenta",
"size": 4
},
"Erotic Fiction": {
"glyph": "dash",
"color": "maroon",
"size": 4
},
"Fantasy": {
"glyph": "x",
"color": "mediumblue",
"size": 4
},
"Graphic Novels": {
"glyph": "circle_x",
"color": "yellow",
"size": 4
},
"Historical Fiction": {
"glyph": "cross",
"color": "orangered",
"size": 4
},
"Horror Suspense": {
"glyph": "circle",
"color": "orchid",
"size": 4
},
"Literary Fiction": {
"glyph": "diamond",
"color": "firebrick",
"size": 4
},
"Mystery Crime Detective": {
"glyph": "triangle",
"color": "slategray",
"size": 4
},
"Romance": {
"glyph": "inverted_triangle",
"color": "limegreen",
"size": 4
},
"Science Fiction": {
"glyph": "square",
"color": "black",
"size": 4
},
"Thriller": {
"glyph": "diamond_cross",
"color": "darkgoldenrod",
"size": 4
},
"Womens Fiction": {
"glyph": "asterisk",
"color": "deeppink",
"size": 4
},
"YA": {
"glyph": "square_x",
"color": "darksalmon",
"size": 4
}
}
user_df = user_df.assign(colors=[
glyphs[user_df.genre.iloc[i]]['color']
for i in range(0, len(user_df))
])
user_df = user_df.assign(glyphs=[
glyphs[user_df.genre.iloc[i]]['glyph']
for i in range(0, len(user_df))
])
user_df = user_df.assign(sizes=[
glyphs[user_df.genre.iloc[i]]['size']
for i in range(0, len(user_df))
])
return user_df
def simple_plot(args):
# Load the data and rotate into the familiar shape.
user_dataset = pandas.read_csv(data_path(args), index_col='user_id')
user_dataset = append_glyph_settings(user_dataset)
TOOLS = ','.join(['hover', 'crosshair', 'pan', 'wheel_zoom', 'box_zoom',
'undo', 'redo', 'reset', 'tap', 'save', 'box_select',
'poly_select', 'lasso_select'])
x_range, y_range = ranges(user_dataset.x.values,
user_dataset.y.values)
p = figure(tools=TOOLS, active_scroll='wheel_zoom',
plot_height=600, plot_width=800, toolbar_location='below',
x_range=x_range, y_range=y_range)
p.axis.visible = False
glyph_map = dict(set(zip(user_dataset.genre, user_dataset.glyphs)))
genre_names = sorted(glyph_map.keys())
# Rather than creating separate data sources for each glyph,
# we need to use `CDSView` -- this allows us to modify the
# underyling data all at once using a javascript callback.
# Otherwise, our callback would need to modify the source
# for each glyph separately.
source = ColumnDataSource(user_dataset)
legend_entries = []
for gn in genre_names:
genre_view = CDSView(source=source, filters=[GroupFilter(column_name='genre', group=gn)])
glyph_render = getattr(p, glyph_map[gn])
glyph = glyph_render(x='x', y='y', source=source, view=genre_view, fill_color='colors',
fill_alpha=0.6, line_color='colors', size='sizes', muted_alpha=0.1)
legend_entries.append((gn, [glyph]))
legend = Legend(items=legend_entries)
legend.click_policy = "mute"
p.add_layout(legend, 'right')
genre_columns = [c for c in user_dataset.columns
if c not in ['colors', 'genre', 'glyphs', 'sizes']]
genre_columns = [c for c in genre_columns if
not c.startswith('x') and
not c.startswith('y')]
tooltip_template = (
'
{}: '
'@{{{}}}
'
)
tooltips_list = [tooltip_template.format('User ID', 'user_id')]
tooltips_list.extend(tooltip_template.format(c, c) for c in genre_columns)
tooltips_list.append(tooltip_template.format('Primary Genre',...