AE 21: Improving LLM outputs
Application exercise
12_plot-image-1
library(readr)
library(ellmer)
library(ggplot2)
# Step 1: Make a scatter plot of the penguins dataset
data("penguins")
ggplot(data = penguins, aes(x = flipper_len, y = bill_len)) +
geom_point(aes(color = species, shape = species), size = 3, alpha = 0.8) +
geom_smooth(method = "lm", se = FALSE, aes(color = species)) +
theme_minimal() +
scale_color_manual(values = c("darkorange", "purple", "cyan4")) +
labs(
title = "Flipper and bill length",
subtitle = "Dimensions for Adelie, Chinstrap and Gentoo Penguins at Palmer Station LTER",
x = "Flipper length (mm)",
y = "Bill length (mm)",
color = "Penguin species",
shape = "Penguin species"
)
# Step 2: Ask Claude 4 Sonnet to interpret the plot.
# (Hint: see `content_image_...`)
chat <- chat("____", echo = "output")
chat$chat(
"Interpret this plot.",
____()
)13_plot-image-2
library(readr)
library(ellmer)
library(ggplot2)
# Step 1: This time, we're going to replace our mtcars scatter plot with a plot
# of uniform random noise.
m <- 32
u <- (seq_len(floor(sqrt(m))) - 0.5) / floor(sqrt(m))
grid <- as.matrix(expand.grid(x = u, y = u))
eps <- 1 / (2 * sqrt(m))
jitter <- matrix(runif(length(grid), -eps, eps), ncol = 2)
grid_jitter <- pmin(pmax(grid + jitter, 0), 1)
ggplot() +
aes(x = grid_jitter[, 1], y = grid_jitter[, 2]) +
geom_point(color = "steelblue", size = 3, alpha = 0.8) +
theme_minimal() +
labs(
title = "Flipper and bill length",
subtitle = "Dimensions for Adelie, Chinstrap and Gentoo Penguins at Palmer Station LTER",
x = "Flipper length (mm)",
y = "Bill length (mm)"
)
# Step 2: Ask Claude 4 Sonnet to interpret the plot. How does it do this time?
chat <- chat("anthropic/claude-sonnet-4-20250514", echo = "output")
chat$chat(
"Interpret this plot.",
content_image_plot()
)
# Step 3: Work with a partner to improve the prompt to get a better
# interpretation.14_quiz-game-1
library(shiny)
library(bslib)
library(ellmer)
library(shinychat)
# UI ---------------------------------------------------------------------------
ui <- page_fillable(
chat_mod_ui("chat")
)
# Server -----------------------------------------------------------------------
server <- function(input, output, session) {
client <- chat(
"anthropic/claude-3-7-sonnet-20250219",
# Step 1: Edit `prompt.md` to get the model to play the quiz game.
system_prompt = interpolate_file(
here::here("_exercises/14_quiz-game-1/prompt.md")
)
)
chat <- chat_mod_server("chat", client)
observe({
# Note: this starts the game when the app launches
chat$update_user_input(
value = "Let's play the quiz game!",
submit = TRUE
)
})
}
shinyApp(ui, server)prompt.md
## Quiz Game Host
<!-- Add your game instructions here -->15_coding-assistant
# Task ------------------------------------------------------------------------
library(ellmer)
# **Step 1:** Run the code below as-is to try the task without any extra
# context. How does the model do? Can you run the function? Does it give you the
# weather? Does it know enough about the {weathR} package to complete the task?
#
# **Step 2:** Now, let's add some context. Head over to GitHub repo for {weathR}
# (link in `docs.R.md`). Copy the project description from the `README.md` and
# paste it into the `docs.r.md` file.
#
# **Step 3:** Uncomment the extra lines to include these docs in the prompt and
# try again.
chat <- chat("anthropic/claude-3-7-sonnet-20250219", echo = "output")
chat$chat(
## Extra context from package docs
# brio::read_file(here::here("_exercises/15_coding-assistant/docs.R.md")),
## Task prompt
paste(
"Write a simple function that takes latitude and longitude as inputs",
"and returns the weather forecast for that location using the {weathR}",
"package. Keep the function concise and simple and don't include error",
"handling or data re-formatting. Include documentation in roxygen2 format,",
"including examples for NYC and Atlanta, GA."
)
)16_rag
#+ setup
library(ragnar)
# Step 1: Read, chunk and create embeddings for "R for Data Science" ----------
#' This example is based on https://ragnar.tidyverse.org/#usage.
#'
#' The first step is to crawl the R for Data Science website to find all the
#' pages we'll need to read in.
#'
#' Then, we create a new ragnar document store that will use OpenAI's
#' `text-embedding-3-small` model to create embeddings for each chunk of text.
#'
#' Finally, we read each page as markdown, use `markdown_chunk()` to split that
#' markdown into reasonably-sized chunks, finally inserting each chunk into the
#' vector store. That insertion step automatically sends the chunk text to
#' OpenAI to create the embedding, and ragnar stores the embedding alongside the
#' original text of the chunk.
#+ create-store
base_url <- "https://r4ds.hadley.nz"
pages <- ragnar_find_links(base_url, children_only = TRUE)
store_location <- here::here("_exercises/16_rag/r4ds.ragnar.duckdb")
store <- ragnar_store_create(
store_location,
title = "R for Data Science",
# Need to start over? Set `overwrite = TRUE`.
# overwrite = TRUE,
embed = \(x) embed_openai(x, model = "text-embedding-3-small")
)
cli::cli_progress_bar(total = length(pages))
for (page in pages) {
cli::cli_progress_update(status = page)
chunks <- page |>
read_as_markdown() |>
# The next step breaks the markdown into chunks. This is where you have the
# most control over what content is grouped together for embedding and later
# retrieval. Feel free to experiment with settings in `?markdown_chunk()`.
markdown_chunk()
ragnar_store_insert(store, chunks)
}
cli::cli_progress_done()
ragnar_store_build_index(store)
# Step 2: Inspect your document store -----------------------------------------
#' Now that we have the vector store, what chunks are surfaced when we ask a
#' question? To do that, we'll use the ragnar store inspector app and an
#' example question.
#'
# Here's a question someone might ask an LLM. Copy the task markdown to use in
# the ragnar store inspector app.
#+ inspect-store
task <- r"--(
Could someone help me filter one data frame by matching values in another?
I’ve got two data frames with a common column `code.` I want to keep rows in `data1` where `code` exists in `data2$code`. I tried using `filter()` but got no rows back.
Here’s a minimal example:
```r
library(dplyr)
data1 <- data.frame(
closed_price = c(49900L, 46900L, 46500L),
opened_price = c(51000L, 49500L, 47500L),
adjust_closed_price = c(12951L, 12173L, 12069L),
stock = as.factor(c("AAA", "AAA", "AAC")),
date3 = as.factor(c("2010-07-15", "2011-07-19", "2011-07-23")),
code = as.factor(c("AAA2010", "AAA2011", "AAC2011"))
)
data2 <- data.frame(
code = as.factor(c("AAA2010", "AAC2011")),
ticker = as.factor(c("AAA", "AAM"))
)
```
What I tried:
```r
price_code <- data1 %>% filter(code %in% data2)
```
This returns zero rows. What’s the simplest way to do this?
)--"
ragnar_store_inspect(store)
# Step 3: Use document store in a chatbot --------------------------------------
#' Finally, ragnar provides a special tool that attaches to an ellmer chat
#' client and lets the model retrieve relevant chunks from the vector store on
#' demand. Run the code below to launch a chatbot backed by all the knowledge in
#' the R for Data Science book. Paste the task markdown from above into the chat
#' and see how the chatbot uses the retrieved chunks to improve its answer, or
#' ask it your own questions about R for Data Science.
#+ chatbot
library(ellmer)
chat <- chat(
name = "openai/gpt-4.1-nano",
system_prompt = r"--(
You are an expert R programmer and mentor. You are concise.
Before responding, retrieve relevant material from the knowledge store. Quote or
paraphrase passages, clearly marking your own words versus the source. Provide a
working link for every source you cite.
)--"
)
# Attach the retrieval tool to the chat client. You can choose how many chunks
# or documents are retrieved each time the model uses the tool.
ragnar_register_tool_retrieve(chat, store, top_k = 10)
live_browser(chat)Acknowledgments
- Materials derived in part from Programming with LLMs and licensed under a Creative Commons Attribution 4.0 International (CC BY) License.