import pandas import argparse from bokeh.plotting import figure, ColumnDataSource from bokeh.io import curdoc, output_file, save, export_svgs from bokeh.resources import CDN from bokeh.embed import...

1 answer below »
I need to find and plot the geometric mean, arithmetic, and median of user rankings, using Bokeh, for the csv file attached. The .py file, goodreads_tsne_serve.py, for the visualization is attached as well, as well as another .py file that should help and that is builds upon - please refer to parts involving neighbor ranking for reference for how to do this for rankings.


import pandas import argparse from bokeh.plotting import figure, ColumnDataSource from bokeh.io import curdoc, output_file, save, export_svgs from bokeh.resources import CDN from bokeh.embed import file_html, autoload_static, components from bokeh.layouts import row, column from bokeh.models import HoverTool, CustomJS, Slider, Legend, CDSView, GroupFilter from goodreads_shared import ( add_shared_args, process_args, iter_options, html_path, data_path, comp_path ) def parse_args(): parser = argparse.ArgumentParser() parser = add_shared_args(parser) # Note: the behavior of this script depends on whether it's executed # as a stand-alone script or run by bokeh. If run by bokeh, it # produces a server-backed visualization. If run as a stand-alone # script, it produces a block of html suitable for inclusion in # another site, but that cannot be loaded on its own. If run # as a stand-alone script **with this option**, it generates # a fully independent html page that *can* be loaded on its own. parser.add_argument('-s', '--static', action='store_true', default=False, help='Save an html file instead of an include file') parser.add_argument('-i', '--image', action='store_true', default=False, help='Save an image in SVG format instead of an include file') parser.add_argument('--compare', type=str, help='Compare results to data from a second file.') args = parser.parse_args() print(args.data_file) args = process_args(args) print(args.data_file) if args.compare: df = args.data_file df = df[:-4] if df.endswith('.csv') else df cf = args.compare cf = cf[:-4] if cf.endswith('.csv') else cf args.html_file = df + '_compare_' + cf + '.html' print(args.data_file) return args def ranges(x_vals, y_vals, padright=False): x_min = min(x_vals) x_max = max(x_vals) x_span = x_max - x_min x_min -= x_span * 0.15 x_max += x_span * (0.55 if padright else 0.15) y_min = min(y_vals) y_max = max(y_vals) y_span = y_max - y_min y_min -= y_span * 0.15 y_max += y_span * 0.15 return [x_min, x_max], [y_min, y_max] def get_plot(args): if args.compare: return comp_plot(args, args.compare) else: return simple_plot(args) def append_glyph_settings(user_df): glyphs = { "Childrens": { "glyph": "square_cross", "color": "magenta", "size": 4 }, "Erotic Fiction": { "glyph": "dash", "color": "maroon", "size": 4 }, "Fantasy": { "glyph": "x", "color": "mediumblue", "size": 4 }, "Graphic Novels": { "glyph": "circle_x", "color": "yellow", "size": 4 }, "Historical Fiction": { "glyph": "cross", "color": "orangered", "size": 4 }, "Horror Suspense": { "glyph": "circle", "color": "orchid", "size": 4 }, "Literary Fiction": { "glyph": "diamond", "color": "firebrick", "size": 4 }, "Mystery Crime Detective": { "glyph": "triangle", "color": "slategray", "size": 4 }, "Romance": { "glyph": "inverted_triangle", "color": "limegreen", "size": 4 }, "Science Fiction": { "glyph": "square", "color": "black", "size": 4 }, "Thriller": { "glyph": "diamond_cross", "color": "darkgoldenrod", "size": 4 }, "Womens Fiction": { "glyph": "asterisk", "color": "deeppink", "size": 4 }, "YA": { "glyph": "square_x", "color": "darksalmon", "size": 4 } } user_df = user_df.assign(colors=[ glyphs[user_df.genre.iloc[i]]['color'] for i in range(0, len(user_df)) ]) user_df = user_df.assign(glyphs=[ glyphs[user_df.genre.iloc[i]]['glyph'] for i in range(0, len(user_df)) ]) user_df = user_df.assign(sizes=[ glyphs[user_df.genre.iloc[i]]['size'] for i in range(0, len(user_df)) ]) return user_df def simple_plot(args): # Load the data and rotate into the familiar shape. user_dataset = pandas.read_csv(data_path(args), index_col='user_id') user_dataset = append_glyph_settings(user_dataset) TOOLS = ','.join(['hover', 'crosshair', 'pan', 'wheel_zoom', 'box_zoom', 'undo', 'redo', 'reset', 'tap', 'save', 'box_select', 'poly_select', 'lasso_select']) x_range, y_range = ranges(user_dataset.x.values, user_dataset.y.values) p = figure(tools=TOOLS, active_scroll='wheel_zoom', plot_height=600, plot_width=800, toolbar_location='below', x_range=x_range, y_range=y_range) p.axis.visible = False glyph_map = dict(set(zip(user_dataset.genre, user_dataset.glyphs))) genre_names = sorted(glyph_map.keys()) # Rather than creating separate data sources for each glyph, # we need to use `CDSView` -- this allows us to modify the # underyling data all at once using a javascript callback. # Otherwise, our callback would need to modify the source # for each glyph separately. source = ColumnDataSource(user_dataset) legend_entries = [] for gn in genre_names: genre_view = CDSView(source=source, filters=[GroupFilter(column_name='genre', group=gn)]) glyph_render = getattr(p, glyph_map[gn]) glyph = glyph_render(x='x', y='y', source=source, view=genre_view, fill_color='colors', fill_alpha=0.6, line_color='colors', size='sizes', muted_alpha=0.1) legend_entries.append((gn, [glyph])) legend = Legend(items=legend_entries) legend.click_policy = "mute" p.add_layout(legend, 'right') genre_columns = [c for c in user_dataset.columns if c not in ['colors', 'genre', 'glyphs', 'sizes']] genre_columns = [c for c in genre_columns if
Answered Same DayJan 20, 2021

Answer To: import pandas import argparse from bokeh.plotting import figure, ColumnDataSource from bokeh.io...

Kshitij answered on Jan 22 2021
150 Votes
output.py
import os
import pandas
import argparse
from bokeh.plotting import figure, ColumnDataSource
from bokeh.io import curdoc, output_file, save, export_svgs
from bokeh.resources import CDN
from bokeh.embed import file_html, autoload_static, components
from bokeh.layouts import row, column
from bokeh.models import HoverTool, CustomJS, Slider, Legend, CDSView, GroupFilter
from goodreads_shared import (
add_shared_args, process_args, iter_options,
html_path, data_path, comp_path
)
def parse_args():
parser = argparse.ArgumentParser()
parser = add_shared_args(parser)
# Note: the behavior of this script depends on whether it's executed
# as a stand-alone script or run by bokeh. If run by bokeh, it
# produces a server-backed visualization. If run as a stand-alone
# script, it produces a block of html suitable for inclusion in
# another site, but that cannot be loaded on its own. If run
# as a stand-alone script **with this option**, it generates
# a fully independent html page that *can* be loaded on its own.
parser.add_argument('-s', '--static', action='store_true',
default=False,
help='Save an html file instead of an include file')
parser.add_argument('-i', '--image', action='store_true',
default=False,
help='Save an image in SVG format instead of an include file')
parser.add_argument('--compare', type=str,
help='Compare results to data from a second file.')
args = parser.parse_args()
print(args.data_file)
args = process_args(args)
print(args.data_file)
if args.compare:
df = args.data_file
df = df[:-4] if df.endswith('.csv') else df
cf = args.compare
cf = cf[:-4] if cf.endswith('.csv') else cf
args.html_file = df + '_compare_' + cf + '.html'
print(args.data_file)
return args
def ranges(x_vals, y_vals, padright=False):
x_min = min(x_vals)
x_max = max(x_vals)
x_span = x_max - x_min
x_min -= x_span * 0.15
x_max += x_span * (0.55 if padright else 0.15)
y_min = min(y_vals)
y_max = max(y_vals)
y_span = y_max - y_min
y_min -= y_span * 0.15
y_max += y_span * 0.15
return [x_min, x_max], [y_min, y_max]
def get_plot(args):
if args.compare:
return comp_plot(args, args.compare)
else:
return simple_plot(args)
def append_glyph_settings(user_df):
glyphs = {
"Childrens": {
"glyph": "square_cross",
"color": "magenta",
"size": 4
},
"Erotic Fiction": {
"glyph": "dash",
"color": "maroon",
"size": 4
},
"Fantasy": {
"glyph": "x",
"color": "mediumblue",
"size": 4
},
"Graphic Novels": {
"glyph": "circle_x",
"color": "yellow",
"size": 4
},
"Historical Fiction": {
"glyph": "cross",
"color": "orangered",
"size": 4
},
"Horror Suspense": {
"glyph": "circle",
"color": "orchid",
"size": 4
},
"Literary Fiction": {
"glyph": "diamond",
"color": "firebrick",
"size": 4
},
"Mystery Crime Detective": {
"glyph": "triangle",
"color": "slategray",
"size": 4
},
"Romance": {
"glyph": "inverted_triangle",
"color": "limegreen",
"size": 4
},
"Science Fiction": {
"glyph": "square",
"color": "black",
"size": 4
},
"Thriller": {
"glyph": "diamond_cross",
"color": "darkgoldenrod",
"size": 4
},
"Womens Fiction": {
"glyph": "asterisk",
"color": "deeppink",
"size": 4
},
"YA": {
"glyph": "square_x",
"color": "darksalmon",
"size": 4
}
}
user_df = user_df.assign(colors=[
glyphs[user_df.genre.iloc[i]]['color']
for i in range(0, len(user_df))
])
user_df = user_df.assign(glyphs=[
glyphs[user_df.genre.iloc[i]]['glyph']
for i in range(0, len(user_df))
])
user_df = user_df.assign(sizes=[
glyphs[user_df.genre.iloc[i]]['size']
for i in range(0, len(user_df))
])
return user_df
def simple_plot(args):
# Load the data and rotate into the familiar shape.
user_dataset = pandas.read_csv(data_path(args), index_col='user_id')
user_dataset = append_glyph_settings(user_dataset)
TOOLS = ','.join(['hover', 'crosshair', 'pan', 'wheel_zoom', 'box_zoom',
'undo', 'redo', 'reset', 'tap', 'save', 'box_select',
'poly_select', 'lasso_select'])
x_range, y_range = ranges(user_dataset.x.values,
user_dataset.y.values)
p = figure(tools=TOOLS, active_scroll='wheel_zoom',
plot_height=600, plot_width=800, toolbar_location='below',
x_range=x_range, y_range=y_range)
p.axis.visible = False
glyph_map = dict(set(zip(user_dataset.genre, user_dataset.glyphs)))
genre_names = sorted(glyph_map.keys())
# Rather than creating separate data sources for each glyph,
# we need to use `CDSView` -- this allows us to modify the
# underyling data all at once using a javascript callback.
# Otherwise, our callback would need to modify the source
# for each glyph separately.
source = ColumnDataSource(user_dataset)
legend_entries = []
for gn in genre_names:
genre_view = CDSView(source=source, filters=[GroupFilter(column_name='genre', group=gn)])
glyph_render = getattr(p, glyph_map[gn])
glyph = glyph_render(x='x', y='y', source=source, view=genre_view, fill_color='colors',
fill_alpha=0.6, line_color='colors', size='sizes', muted_alpha=0.1)
legend_entries.append((gn, [glyph]))
legend = Legend(items=legend_entries)
legend.click_policy = "mute"
p.add_layout(legend, 'right')
genre_columns = [c for c in user_dataset.columns
if c not in ['colors', 'genre', 'glyphs', 'sizes']]
genre_columns = [c for c in genre_columns if
not c.startswith('x') and
not c.startswith('y')]
tooltip_template = (
'
{}: '
'@{{{}}}
'
)
tooltips_list = [tooltip_template.format('User ID', 'user_id')]
tooltips_list.extend(tooltip_template.format(c, c) for c in genre_columns)
tooltips_list.append(tooltip_template.format('Primary Genre',...
SOLUTION.PDF

Answer To This Question Is Available To Download

Related Questions & Answers

More Questions »

Submit New Assignment

Copy and Paste Your Assignment Here