Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reorganize py-shiny page, remove outdated apps #102

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
/venv/
**/.venv/
**/.DS_Store
/node_modules/
/packages/*.whl
/dist/
50 changes: 12 additions & 38 deletions examples/index.json
Original file line number Diff line number Diff line change
@@ -10,12 +10,9 @@
"category": "Featured",
"apps": [
"cpuinfo",
"orbit",
"regularization",
"wordle",
"plotly",
"ipyleaflet",
"camera"
"ipyleaflet"
]
},
{
@@ -27,45 +24,11 @@
"file_download",
"insert_ui",
"input_update",
"modules",
"extra_packages",
"static_content",
"fetch",
"ipywidgets"
]
},
{
"category": "Inputs",
"apps": [
"input_text",
"input_numeric",
"input_slider",
"input_checkbox",
"input_switch",
"input_checkbox_group",
"input_select",
"input_radio",
"input_text_area",
"input_date",
"input_date_range",
"input_password"
]
},
{
"category": "Outputs",
"apps": [
"output_text",
"output_text_verbatim",
"output_ui",
"output_plot",
"output_table",
"output_data_frame_grid"
]
},
{
"category": "Layout",
"apps": ["shinyswatch", "layout_sidebar", "layout_two_column"]
},
{
"category": "Reactivity",
"apps": [
@@ -75,6 +38,17 @@
"reactive_value"
]
},
{
"category": "Shiny Core",
"apps": [
"modules",
"plot_interact_basic",
"plot_interact_exclude",
"orbit",
"wordle",
"static_content"
]
},
{
"category": "Interactive plots",
"apps": [
30 changes: 9 additions & 21 deletions examples/python/app_with_plot/app.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,14 @@
import matplotlib.pyplot as plt
import numpy as np
from shiny import App, render, ui
from shiny import render
from shiny.express import ui, input

app_ui = ui.page_fluid(
ui.layout_sidebar(
ui.panel_sidebar(
ui.input_slider("n", "N", 0, 100, 20),
),
ui.panel_main(
ui.output_plot("histogram"),
),
),
)
with ui.sidebar():
ui.input_slider("n", "N", 0, 100, 20)


def server(input, output, session):
@output
@render.plot(alt="A histogram")
def histogram():
np.random.seed(19680801)
x = 100 + 15 * np.random.randn(437)
plt.hist(x, input.n(), density=True)


app = App(app_ui, server, debug=True)
@render.plot(alt="A histogram")
def histogram():
np.random.seed(19680801)
x = 100 + 15 * np.random.randn(437)
plt.hist(x, input.n(), density=True)
18 changes: 6 additions & 12 deletions examples/python/basic_app/app.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
from shiny import App, render, ui
from shiny import render
from shiny.express import ui, input

app_ui = ui.page_fluid(
ui.input_slider("n", "N", 0, 100, 20),
ui.output_text_verbatim("txt"),
)

ui.input_slider("n", "N", 0, 100, 20),

def server(input, output, session):
@output
@render.text
def txt():
return f"n*2 is {input.n() * 2}"


app = App(app_ui, server)
@render.text
def txt():
return f"n*2 is {input.n() * 2}"
298 changes: 94 additions & 204 deletions examples/python/cpuinfo/app.py
Original file line number Diff line number Diff line change
@@ -3,20 +3,16 @@
if "pyodide" in sys.modules:
# psutil doesn't work on pyodide--use fake data instead
from fakepsutil import cpu_count, cpu_percent

shinylive_message = "Note: the CPU data is simulated when running in Shinylive."
else:
from psutil import cpu_count, cpu_percent

shinylive_message = ""

from math import ceil

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from shiny import App, reactive, render, ui
from helpers import plot_cpu

from shiny import reactive, render
from shiny.express import input, ui, output

# The agg matplotlib backend seems to be a little more efficient than the default when
# running on macOS, and also gives more consistent results across operating systems
@@ -27,83 +23,9 @@
# secs between samples
SAMPLE_PERIOD = 1


ncpu = cpu_count(logical=True)

app_ui = ui.page_fluid(
ui.tags.style(
"""
/* Don't apply fade effect, it's constantly recalculating */
.recalculating {
opacity: 1;
}
tbody > tr:last-child {
/*border: 3px solid var(--bs-dark);*/
box-shadow:
0 0 2px 1px #fff, /* inner white */
0 0 4px 2px #0ff, /* middle cyan */
0 0 5px 3px #00f; /* outer blue */
}
#table table {
table-layout: fixed;
width: %s;
font-size: 0.8em;
}
th, td {
text-align: center;
}
"""
% f"{ncpu*4}em"
),
ui.h3("CPU Usage %", class_="mt-2"),
ui.layout_sidebar(
ui.panel_sidebar(
ui.input_select(
"cmap",
"Colormap",
{
"inferno": "inferno",
"viridis": "viridis",
"copper": "copper",
"prism": "prism (not recommended)",
},
),
ui.p(ui.input_action_button("reset", "Clear history", class_="btn-sm")),
ui.input_switch("hold", "Freeze output", value=False),
shinylive_message,
class_="mb-3",
),
ui.panel_main(
ui.div(
{"class": "card mb-3"},
ui.div(
{"class": "card-body"},
ui.h5({"class": "card-title mt-0"}, "Graphs"),
ui.output_plot("plot", height=f"{ncpu * 40}px"),
),
ui.div(
{"class": "card-footer"},
ui.input_numeric("sample_count", "Number of samples per graph", 50),
),
),
ui.div(
{"class": "card"},
ui.div(
{"class": "card-body"},
ui.h5({"class": "card-title m-0"}, "Heatmap"),
),
ui.div(
{"class": "card-body overflow-auto pt-0"},
ui.output_table("table"),
),
ui.div(
{"class": "card-footer"},
ui.input_numeric("table_rows", "Rows to display", 5),
),
),
),
),
)
ui.page_opts(fillable=True)


@reactive.Calc
@@ -112,127 +34,95 @@ def cpu_current():
return cpu_percent(percpu=True)


def server(input, output, session):
cpu_history = reactive.Value(None)
cpu_history = reactive.Value(None)


@reactive.Calc
def cpu_history_with_hold():
# If "hold" is on, grab an isolated snapshot of cpu_history; if not, then do a
# regular read
if not input.hold():
@reactive.Calc
def cpu_history_with_hold():
# If "hold" is on, grab an isolated snapshot of cpu_history; if not, then do a
# regular read
if not input.hold():
return cpu_history()
else:
# Even if frozen, we still want to respond to input.reset()
input.reset()
with reactive.isolate():
return cpu_history()


@reactive.Effect
def collect_cpu_samples():
"""cpu_percent() reports just the current CPU usage sample; this Effect gathers
them up and stores them in the cpu_history reactive value, in a numpy 2D array
(rows are CPUs, columns are time)."""

new_data = np.vstack(cpu_current())
with reactive.isolate():
if cpu_history() is None:
cpu_history.set(new_data)
else:
# Even if frozen, we still want to respond to input.reset()
input.reset()
with reactive.isolate():
return cpu_history()

@reactive.Effect
def collect_cpu_samples():
"""cpu_percent() reports just the current CPU usage sample; this Effect gathers
them up and stores them in the cpu_history reactive value, in a numpy 2D array
(rows are CPUs, columns are time)."""

new_data = np.vstack(cpu_current())
with reactive.isolate():
if cpu_history() is None:
cpu_history.set(new_data)
else:
combined_data = np.hstack([cpu_history(), new_data])
# Throw away extra data so we don't consume unbounded amounts of memory
if combined_data.shape[1] > MAX_SAMPLES:
combined_data = combined_data[:, -MAX_SAMPLES:]
cpu_history.set(combined_data)

@reactive.Effect(priority=100)
@reactive.event(input.reset)
def reset_history():
cpu_history.set(None)

@output
@render.plot
def plot():
history = cpu_history_with_hold()

if history is None:
history = np.array([])
history.shape = (ncpu, 0)

nsamples = input.sample_count()

# Throw away samples too old to fit on the plot
if history.shape[1] > nsamples:
history = history[:, -nsamples:]

ncols = 2
nrows = int(ceil(ncpu / ncols))
fig, axeses = plt.subplots(
nrows=nrows,
ncols=ncols,
squeeze=False,
)
for i in range(0, ncols * nrows):
row = i // ncols
col = i % ncols
axes = axeses[row, col]
if i >= len(history):
axes.set_visible(False)
continue
data = history[i]
axes.yaxis.set_label_position("right")
axes.yaxis.tick_right()
axes.set_xlim(-(nsamples - 1), 0)
axes.set_ylim(0, 100)

assert len(data) <= nsamples

# Set up an array of x-values that will right-align the data relative to the
# plotting area
x = np.arange(0, len(data))
x = np.flip(-x)

# Color bars by cmap
color = plt.get_cmap(input.cmap())(data / 100)
axes.bar(x, data, color=color, linewidth=0, width=1.0)

axes.set_yticks([25, 50, 75])
for ytl in axes.get_yticklabels():
if col == ncols - 1 or i == ncpu - 1 or True:
ytl.set_fontsize(7)
else:
ytl.set_visible(False)
hide_ticks(axes.yaxis)
for xtl in axes.get_xticklabels():
xtl.set_visible(False)
hide_ticks(axes.xaxis)
axes.grid(True, linewidth=0.25)

return fig

@output
@render.table
def table():
history = cpu_history_with_hold()
latest = pd.DataFrame(history).transpose().tail(input.table_rows())
if latest.shape[0] == 0:
return latest
return (
latest.style.format(precision=0)
.hide(axis="index")
.set_table_attributes(
'class="dataframe shiny-table table table-borderless font-monospace"'
)
.background_gradient(cmap=input.cmap(), vmin=0, vmax=100)
)


def hide_ticks(axis):
for ticks in [axis.get_major_ticks(), axis.get_minor_ticks()]:
for tick in ticks:
tick.tick1line.set_visible(False)
tick.tick2line.set_visible(False)
tick.label1.set_visible(False)
tick.label2.set_visible(False)


app = App(app_ui, server)
combined_data = np.hstack([cpu_history(), new_data])
# Throw away extra data so we don't consume unbounded amounts of memory
if combined_data.shape[1] > MAX_SAMPLES:
combined_data = combined_data[:, -MAX_SAMPLES:]
cpu_history.set(combined_data)


@reactive.Effect(priority=100)
@reactive.event(input.reset)
def reset_history():
cpu_history.set(None)


ui.tags.style(
"""
/* Don't apply fade effect, it's constantly recalculating */
.recalculating {
opacity: 1;
}
"""
)

with ui.sidebar():
ui.input_select(
"cmap",
"Colormap",
{
"inferno": "inferno",
"viridis": "viridis",
"copper": "copper",
"prism": "prism (not recommended)",
},
),
ui.input_action_button("reset", "Clear history", class_="btn-sm")
ui.input_switch("hold", "Freeze output", value=False)

with ui.card():
with ui.navset_bar(title="CPU %"):
with ui.nav_panel(title="Graphs"):
ui.input_numeric("sample_count", "Number of samples per graph", 50)

@render.plot
def plot():
return plot_cpu(
cpu_history_with_hold(), input.sample_count(), ncpu, input.cmap()
)

with ui.nav_panel(title="Heatmap"):
ui.input_numeric("table_rows", "Rows to display", 15)

@output(suspend_when_hidden=False)
@render.table
def table():
history = cpu_history_with_hold()
latest = pd.DataFrame(history).transpose().tail(input.table_rows())
if latest.shape[0] == 0:
return latest
return (
latest.style.format(precision=0)
.hide(axis="index")
.set_table_attributes(
'class="dataframe shiny-table table table-borderless font-monospace"'
)
.background_gradient(cmap=input.cmap(), vmin=0, vmax=100)
)
69 changes: 69 additions & 0 deletions examples/python/cpuinfo/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from math import ceil

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


def hide_ticks(axis):
for ticks in [axis.get_major_ticks(), axis.get_minor_ticks()]:
for tick in ticks:
tick.tick1line.set_visible(False)
tick.tick2line.set_visible(False)
tick.label1.set_visible(False)
tick.label2.set_visible(False)


def plot_cpu(history, nsamples, ncpu, cmap):
if history is None:
history = np.array([])
history.shape = (ncpu, 0)

# Throw away samples too old to fit on the plot
if history.shape[1] > nsamples:
history = history[:, -nsamples:]

ncols = 2
nrows = int(ceil(ncpu / ncols))
fig, axeses = plt.subplots(
nrows=nrows,
ncols=ncols,
squeeze=False,
)
for i in range(0, ncols * nrows):
row = i // ncols
col = i % ncols
axes = axeses[row, col]
if i >= len(history):
axes.set_visible(False)
continue
data = history[i]
axes.yaxis.set_label_position("right")
axes.yaxis.tick_right()
axes.set_xlim(-(nsamples - 1), 0)
axes.set_ylim(0, 100)

assert len(data) <= nsamples

# Set up an array of x-values that will right-align the data relative to the
# plotting area
x = np.arange(0, len(data))
x = np.flip(-x)

# Color bars by cmap
color = plt.get_cmap(cmap)(data / 100)
axes.bar(x, data, color=color, linewidth=0, width=1.0)

axes.set_yticks([25, 50, 75])
for ytl in axes.get_yticklabels():
if col == ncols - 1 or i == ncpu - 1 or True:
ytl.set_fontsize(7)
else:
ytl.set_visible(False)
hide_ticks(axes.yaxis)
for xtl in axes.get_xticklabels():
xtl.set_visible(False)
hide_ticks(axes.xaxis)
axes.grid(True, linewidth=0.25)

return fig
116 changes: 48 additions & 68 deletions examples/python/file_download/app.py
Original file line number Diff line number Diff line change
@@ -5,71 +5,51 @@

import matplotlib.pyplot as plt
import numpy as np
from shiny import App, ui


# A card component wrapper.
def ui_card(title, *args):
return (
ui.div(
{"class": "card mb-4"},
ui.div(title, class_="card-header"),
ui.div({"class": "card-body"}, *args),
),
)


app_ui = ui.page_fluid(
ui_card(
"Download a pre-existing file, using its existing name on disk.",
ui.download_button("download1", "Download CSV"),
),
ui_card(
"Download a PNG that is generated dynamically.",
ui.input_text("title", "Plot title", "Random scatter plot"),
ui.input_slider("num_points", "Number of data points", 1, 100, 50),
ui.download_button("download2", "Download PNG"),
),
ui_card(
"Download a file with name that is generated dynamically.",
ui.download_button("download3", "Download CSV"),
),
)


def server(input, output, session):
@session.download()
def download1():
# This is the simplest case. The implementation simply returns the path to a
# file on disk.
path = Path(__file__).parent / "mtcars.csv"
return str(path)

@session.download(filename="image.png")
def download2():
# Another way to implement a file download is by yielding bytes; either all at
# once, like in this case, or by yielding multiple times. When using this
# approach, you should pass a filename argument to @session.download, which
# determines what the browser will name the downloaded file.
x = np.random.uniform(size=input.num_points())
y = np.random.uniform(size=input.num_points())
plt.figure()
plt.scatter(x, y)
plt.title(input.title())
with io.BytesIO() as buf:
plt.savefig(buf, format="png")
yield buf.getvalue()

@session.download(
filename=lambda: f"data-{date.today().isoformat()}-{np.random.randint(100,999)}.csv"
)
async def download3():
# This version uses a function to generate the filename. It also yields data
# multiple times.
await asyncio.sleep(0.25)
yield "one,two,three\n"
yield "新,1,2\n"
yield "型,4,5\n"


app = App(app_ui, server)
from shiny.express import ui, input
from shiny import render

with ui.layout_columns():
with ui.card():
ui.card_header("Download a pre-existing file, using its existing name on disk.")

@render.download(label="Download CSV", filename="mtcars.csv")
def download1():
# This is the simplest case. The implementation simply returns the path to a
# file on disk.
path = Path(__file__).parent / "mtcars.csv"
return str(path)

with ui.card():
ui.card_header("Download a PNG that is generated dynamically.")
ui.input_text("title", "Plot title", "Random scatter plot")
ui.input_slider("num_points", "Number of data points", 1, 100, 50)

@render.download(label="Download PNG", filename="image.png")
def download2():
# Another way to implement a file download is by yielding bytes; either all at
# once, like in this case, or by yielding multiple times. When using this
# approach, you should pass a filename argument to @session.download, which
# determines what the browser will name the downloaded file.
x = np.random.uniform(size=input.num_points())
y = np.random.uniform(size=input.num_points())
plt.figure()
plt.scatter(x, y)
plt.title(input.title())
with io.BytesIO() as buf:
plt.savefig(buf, format="png")
yield buf.getvalue()

with ui.card():
ui.card_header("Download a file with name that is generated dynamically.")

@render.download(
label="Dynamic file name",
filename=lambda: f"data-{date.today().isoformat()}-{np.random.randint(100,999)}.csv",
)
async def download3():
# This version uses a function to generate the filename. It also yields data
# multiple times.
await asyncio.sleep(0.25)
yield "one,two,three\n"
yield "新,1,2\n"
yield "型,4,5\n"
93 changes: 43 additions & 50 deletions examples/python/file_upload/app.py
Original file line number Diff line number Diff line change
@@ -2,57 +2,13 @@
from math import ceil
from typing import List

from shiny import App, render, ui
from shiny import render
from shiny.express import ui, input

app_ui = ui.page_fluid(
ui.input_file("file1", "Choose a file to upload:", multiple=True),
ui.input_radio_buttons("type", "Type:", ["Binary", "Text"]),
ui.output_text_verbatim("file_content"),
)


def server(input, output, session):
MAX_SIZE = 50000

@output
@render.text
def file_content():
file_infos = input.file1()
if not file_infos:
return

# file_infos is a list of dicts; each dict represents one file. Example:
# [
# {
# 'name': 'data.csv',
# 'size': 2601,
# 'type': 'text/csv',
# 'datapath': '/tmp/fileupload-1wnx_7c2/tmpga4x9mps/0.csv'
# }
# ]
out_str = ""
for file_info in file_infos:
out_str += (
"=" * 47
+ "\n"
+ file_info["name"]
+ "\nMIME type: "
+ str(mimetypes.guess_type(file_info["name"])[0])
)
if file_info["size"] > MAX_SIZE:
out_str += f"\nTruncating at {MAX_SIZE} bytes."

out_str += "\n" + "=" * 47 + "\n"

if input.type() == "Text":
with open(file_info["datapath"], "r") as f:
out_str += f.read(MAX_SIZE)
else:
with open(file_info["datapath"], "rb") as f:
data = f.read(MAX_SIZE)
out_str += format_hexdump(data)

return out_str
MAX_SIZE = 50000
ui.input_file("file1", "Choose a file to upload:", multiple=True),
ui.input_radio_buttons("type", "Type:", ["Text", "Binary"]),


def format_hexdump(data: bytes) -> str:
@@ -73,4 +29,41 @@ def group_into_blocks(x: List[str], blocksize: int):
]


app = App(app_ui, server)
@render.text
def file_content():
file_infos = input.file1()
if not file_infos:
return

# file_infos is a list of dicts; each dict represents one file. Example:
# [
# {
# 'name': 'data.csv',
# 'size': 2601,
# 'type': 'text/csv',
# 'datapath': '/tmp/fileupload-1wnx_7c2/tmpga4x9mps/0.csv'
# }
# ]
out_str = ""
for file_info in file_infos:
out_str += (
"=" * 47
+ "\n"
+ file_info["name"]
+ "\nMIME type: "
+ str(mimetypes.guess_type(file_info["name"])[0])
)
if file_info["size"] > MAX_SIZE:
out_str += f"\nTruncating at {MAX_SIZE} bytes."

out_str += "\n" + "=" * 47 + "\n"

if input.type() == "Text":
with open(file_info["datapath"], "r") as f:
out_str += f.read(MAX_SIZE)
else:
with open(file_info["datapath"], "rb") as f:
data = f.read(MAX_SIZE)
out_str += format_hexdump(data)

return out_str
19 changes: 6 additions & 13 deletions examples/python/multiple_source_files/app.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,11 @@
from shiny import App, render, ui
from shiny.express import ui, input
from utils import square

app_ui = ui.page_fluid(
ui.input_slider("n", "N", 0, 100, 20),
ui.output_text_verbatim("txt"),
)
ui.input_slider("n", "N", 0, 100, 20),


def server(input, output, session):
@output
@render.text
def txt():
val = square(input.n())
return f"{input.n()} squared is {val}"


app = App(app_ui, server, debug=True)
@render.text
def txt():
val = square(input.n())
return f"{input.n()} squared is {val}"
23 changes: 7 additions & 16 deletions examples/python/read_local_csv_file/app.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,13 @@
from pathlib import Path

import pandas
from shiny import App, render, ui
from shiny import render

app_ui = ui.page_fluid(
ui.output_table("table"),
)
# We need to import something from express to activate express mode
import shiny.express


def server(input, output, session):
@output
@render.table
def table():
infile = Path(__file__).parent / "mtcars.csv"
df = pandas.read_csv(infile)
# Use the DataFrame's to_html() function to convert it to an HTML table, and
# then wrap with ui.HTML() so Shiny knows to treat it as raw HTML.
return df


app = App(app_ui, server)
@render.table
def data_frame():
infile = Path(__file__).parent / "mtcars.csv"
return pandas.read_csv(infile)
323 changes: 126 additions & 197 deletions examples/python/regularization/app.py
Original file line number Diff line number Diff line change
@@ -8,202 +8,49 @@

# Import custom Python Functions from local file
from compare import compare, sim_data
from shiny import App, reactive, render, ui
from shiny import reactive, render
from shiny.express import ui, input

# data
nsims = 100
sim = [sim_data(n=1000) for i in range(0, nsims)]


# app
app_ui = ui.page_fixed(
# add head that allows LaTeX to be displayed via MathJax
ui.head_content(
ui.tags.script(
src="https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
),
ui.tags.script(
"if (window.MathJax) MathJax.Hub.Queue(['Typeset', MathJax.Hub]);"
),
),
ui.column(
10,
{"class": "col-md-10 col-lg-8 py-5 mx-auto text-lg-center text-left"},
# Title
ui.h1("How Does Regularization Strength Affect Coefficient Estimates?"),
# input slider
),
ui.column(
10,
{"class": "col-md-78 col-lg-5 py-4 mx-auto"},
# Title
ui.input_slider(
"a",
"Select a Regularization Strength:",
0.000000001,
1,
0.1,
step=0.01,
width="100%",
),
ui.p(
{"class": "pt-4 small"},
"(Each time you change the slider input, the simulation will take some time to run.)",
),
),
ui.column(
12,
{"class": "col-lg-11 py-5 mx-auto"},
# output plot
ui.output_plot("plot"),
),
# Explanation and Explore text row with two equal-width columns
ui.row(
ui.column(
10,
{"class": "col-lg-6 py-5 mx-auto"},
ui.h4("Explanation"),
ui.p(
"""
When we train Machine Learning models like linear regressions, logistic
regressions, or neural networks, we do so by defining a loss function
and minimizing that loss function. A loss function is a metric for
measuring how your model is performing where lower is better. For
example, Mean Squared Error is a loss function that measures the squared
distance (on average) between a model's guesses and the true values."""
),
# LaTeX
ui.p("$$MSE = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2$$"),
ui.p(
"""
Regularization works by adding a penalty to the loss function in order
to penalize large model parameters. In Linear Regression, the penalty
increases when the size of the coefficients increases. Because the loss
function is made up of two things: the original loss function (the MSE,
here) and the penalty, predictors must 'pull their weight' by reducing
the MSE enough to be 'worth' the penalty. This causes small, unimportant
predictors to have small or zero coefficients."""
),
ui.p(
"""
LASSO (L1) and Ridge (L2) are two common forms of Regularization. LASSO
adds a penalty to the loss function by taking the absolute value of each
parameter/coefficient, and adding them all together. Ridge adds a
penalty to the loss function by taking the square of each
parameter/coefficient, and adding them all together."""
),
# LaTeX
ui.p(
"$$LASSO = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2 + \\lambda \\underbrace{\\sum_{j=1}^{p} |\\beta_j|}_\\text{penalty}$$"
),
ui.p(
"$$Ridge = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2 + \\lambda \\underbrace{\\sum_{j=1}^{p} \\beta_j^2}_\\text{penalty}$$"
),
ui.p(
"""
When using regularization, we must choose the regularization strength
(see slider above) which is a number that scales how harshly we
penalize. If we multiply the penalty by 0, that's the same as not having
a penalty at all. But if we multiply the penalty by 500, that would
penalize the parameters a lot more."""
),
ui.p("$$\\lambda \\text{ is the regularization strength.}$$"),
),
),
ui.row(
ui.column(
10,
{"class": "col-lg-6 py-5 mx-auto"},
ui.h4("Explore"),
ui.h5("Comparing LASSO, Ridge, and Linear Regression"),
ui.p(
"""
With the slider at 0.1 (the default) look at the boxplot at the top of
the page. This shows the coefficients from 1000 simulated data sets. For
each data set the 'vowels' (A, E, I, O, U, Y, W) do have some
relationship with the outcome (X) that our model is predicting. A has
the largest effect then E, I, O, U, Y and finally W has the smallest
effect on X. The Consonants (B,C,D,G,H,J,K) have absolutely no effect on
X."""
),
ui.p("Look at the Graph and ask yourself these questions:"),
ui.tags.ul(
ui.tags.li(
"""
Which model (Linear, LASSO, Ridge) tends to have the highest
coefficients? What does this tell you about the various
penalties each model has?"""
),
ui.tags.li(
"""
What happens to the LASSO coefficients for the Consonant
predictors (B-K) which have no real effect on X?"""
),
ui.tags.li(
"""
The Linear and Ridge Coefficients look similar for the
Consonants (B-K) but what's slightly different between them?
What does that tell you about what Ridge penalties do?"""
),
ui.tags.li(
"""
Are the larger effects (A-I) affected differently than the
smaller effects (O-W) when you increase the Regularization
Strength?"""
),
),
ui.h5("Comparing Different Regularization Strengths"),
ui.p(
"""
Now, using the slider at the top of the page, change the Regularization
Strength. Try values that are very low, moderate, and very high."""
),
ui.p("Look at the Graph and ask yourself these questions:"),
ui.tags.ul(
ui.tags.li(
"""
What happens to the LASSO and Ridge models when the Regularization
Strength is almost 0?"""
),
ui.tags.li(
"""
What happens to the LASSO model's coefficients when the
Regularization Strength is very high?"""
),
ui.tags.li(
"""
Do the Linear Regression coefficients change when you change
Regularization Strength? (if so, why, if not, why not?)"""
),
),
),
),
# output plots separated by real effects (vowels), and zero-effects (consonants)
ui.column(
12,
{"class": "col-lg-11 py-5 mx-auto text-center"},
ui.h2("Plots Separated by Vowels and Consonants"),
),
ui.column(
12,
{"class": "col-lg-11 mb-5 pb-5 mx-auto"},
ui.output_plot("plotVOWELS"),
ui.output_plot("plotCONSONANTS"),
),
)


def server(input, output, session):
# reactive Calc that runs LASSO, Ridge, and Linear models on generated data
@reactive.Calc
def models():
sim_alpha = [compare(df, alpha=input.a()) for df in sim]
sim_alpha = pd.concat(sim_alpha)

return sim_alpha

# output plot of all simulation coefficients
@output

ui.tags.script(
src="https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
),
ui.tags.script("if (window.MathJax) MathJax.Hub.Queue(['Typeset', MathJax.Hub]);")


@reactive.Calc
def models():
sim_alpha = [compare(df, alpha=input.a()) for df in sim]
sim_alpha = pd.concat(sim_alpha)

return sim_alpha


with ui.div(class_="col-md-10 col-lg-8 py-5 mx-auto text-lg-center text-left"):
ui.h3("How Does Regularization Strength Affect Coefficient Estimates?"),

with ui.div(class_="col-md-78 col-lg-5 py-4 mx-auto"):
ui.input_slider(
"a",
"Select a Regularization Strength:",
min=0.000000001,
max=1,
value=0.1,
step=0.01,
width="100%",
)
ui.p(
{"class": "pt-4 small"},
"(Each time you change the slider input, the simulation will take some time to run.)",
)

with ui.div(class_="col-lg-11 py-5 mx-auto"):

@render.plot()
def plot():
# get data from reactive Calc
@@ -238,8 +85,95 @@ def plot():
ax2.set(xlabel="", ylabel="Coefficient Value", title=tt)
return fig

# output plot of all simulation coefficients (vowels only)
@output

with ui.div(class_="col-lg-6 py-5 mx-auto"):
ui.markdown(
"""
### Explanation
When we train Machine Learning models like linear regressions, logistic
regressions, or neural networks, we do so by defining a loss function
and minimizing that loss function. A loss function is a metric for
measuring how your model is performing where lower is better. For
example, Mean Squared Error is a loss function that measures the squared
distance (on average) between a model's guesses and the true values.
"""
)
# LaTeX
ui.p("$$MSE = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2$$")
ui.p(
"""
Regularization works by adding a penalty to the loss function in order
to penalize large model parameters. In Linear Regression, the penalty
increases when the size of the coefficients increases. Because the loss
function is made up of two things: the original loss function (the MSE,
here) and the penalty, predictors must 'pull their weight' by reducing
the MSE enough to be 'worth' the penalty. This causes small, unimportant
predictors to have small or zero coefficients.
LASSO (L1) and Ridge (L2) are two common forms of Regularization. LASSO
adds a penalty to the loss function by taking the absolute value of each
parameter/coefficient, and adding them all together. Ridge adds a
penalty to the loss function by taking the square of each
parameter/coefficient, and adding them all together."""
)
# LaTeX
ui.p(
"$$LASSO = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2 + \\lambda \\underbrace{\\sum_{j=1}^{p} |\\beta_j|}_\\text{penalty}$$"
)
ui.p(
"$$Ridge = \\frac{1}{n} \\sum_{i=1}^{n} (Y_i - \hat{Y}_i)^2 + \\lambda \\underbrace{\\sum_{j=1}^{p} \\beta_j^2}_\\text{penalty}$$"
)
ui.p(
"""
When using regularization, we must choose the regularization strength
(see slider above) which is a number that scales how harshly we
penalize. If we multiply the penalty by 0, that's the same as not having
a penalty at all. But if we multiply the penalty by 500, that would
penalize the parameters a lot more."""
)
ui.p("$$\\lambda \\text{ is the regularization strength.}$$")


with ui.div(class_="col-lg-6 py-5 mx-auto"):
ui.markdown(
"""
### Explore
#### Comparing LASSO, Ridge, and Linear Regression
With the slider at 0.1 (the default) look at the boxplot at the top of the page. This shows the
coefficients from 1000 simulated data sets. For each data set the 'vowels' (A, E, I, O, U, Y, W)
do have some relationship with the outcome (X) that our model is predicting. A has the largest
effect then E, I, O, U, Y and finally W has the smallest effect on X. The Consonants (B,C,D,G,H,J,K)
have absolutely no effect on X.
Look at the Graph and ask yourself these questions:
- Which model (Linear, LASSO, Ridge) tends to have the highest coefficients? What does this tell
you about the various penalties each model has?
- What happens to the LASSO coefficients for the Consonant predictors (B-K) which have no real
effect on X?
- The Linear and Ridge Coefficients look similar for the Consonants (B-K) but what's slightly
different between them? What does that tell you about what Ridge penalties do?
- Are the larger effects (A-I) affected differently than the smaller effects (O-W) when you increase
the Regularization Strength?
#### Comparing Different Regularization Strengths
Now, using the slider at the top of the page, change the Regularization Strength. Try values that
are very low, moderate, and very high.
Look at the Graph and ask yourself these questions:
- What happens to the LASSO and Ridge models when the Regularization Strength is almost 0?
- What happens to the LASSO model's coefficients when the Regularization Strength is very high?
- Do the Linear Regression coefficients change when you change Regularization Strength? (if so, why,
if not, why not?)
"""
)

with ui.div(class_="col-lg-11 py-5 mx-auto text-center"):
ui.h2("Plots Separated by Vowels and Consonants")

with ui.div(class_="col-lg-11 mb-5 pb-5 mx-auto"):

@render.plot()
def plotVOWELS():
# get data from reactive Calc
@@ -261,8 +195,6 @@ def plotVOWELS():
ax2.set(xlabel="", ylabel="Coefficient Value", title=tt)
return fig

# output plot of all simulation coefficients (consonants only)
@output
@render.plot()
def plotCONSONANTS():
# get data from reactive Calc
@@ -286,6 +218,3 @@ def plotCONSONANTS():
tt = "CONSONANT Coefficient Estimates when alpha = " + str(input.a())
ax2.set(xlabel="", ylabel="Coefficient Value", title=tt)
return fig


app = App(app_ui, server)