AE 21: Improving LLM outputs

Application exercise

Modified

November 18, 2025

`12_plot-image-1`

library(readr)
library(ellmer)
library(ggplot2)

# Step 1: Make a scatter plot of the penguins dataset
data("penguins")

ggplot(data = penguins, aes(x = flipper_len, y = bill_len)) +
  geom_point(aes(color = species, shape = species), size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = FALSE, aes(color = species)) +
  theme_minimal() +
  scale_color_manual(values = c("darkorange", "purple", "cyan4")) +
  labs(
    title = "Flipper and bill length",
    subtitle = "Dimensions for Adelie, Chinstrap and Gentoo Penguins at Palmer Station LTER",
    x = "Flipper length (mm)",
    y = "Bill length (mm)",
    color = "Penguin species",
    shape = "Penguin species"
  )

# Step 2: Ask Claude 4 Sonnet to interpret the plot.
# (Hint: see `content_image_...`)
chat <- chat("____", echo = "output")
chat$chat(
  "Interpret this plot.",
  ____()
)

`13_plot-image-2`

library(readr)
library(ellmer)
library(ggplot2)

# Step 1: This time, we're going to replace our mtcars scatter plot with a plot
# of uniform random noise.
m <- 32
u <- (seq_len(floor(sqrt(m))) - 0.5) / floor(sqrt(m))
grid <- as.matrix(expand.grid(x = u, y = u))

eps <- 1 / (2 * sqrt(m))
jitter <- matrix(runif(length(grid), -eps, eps), ncol = 2)
grid_jitter <- pmin(pmax(grid + jitter, 0), 1)

ggplot() +
  aes(x = grid_jitter[, 1], y = grid_jitter[, 2]) +
  geom_point(color = "steelblue", size = 3, alpha = 0.8) +
  theme_minimal() +
  labs(
    title = "Flipper and bill length",
    subtitle = "Dimensions for Adelie, Chinstrap and Gentoo Penguins at Palmer Station LTER",
    x = "Flipper length (mm)",
    y = "Bill length (mm)"
  )

# Step 2: Ask Claude 4 Sonnet to interpret the plot. How does it do this time?
chat <- chat("anthropic/claude-sonnet-4-20250514", echo = "output")
chat$chat(
  "Interpret this plot.",
  content_image_plot()
)

# Step 3: Work with a partner to improve the prompt to get a better
# interpretation.

`14_quiz-game-1`

library(shiny)
library(bslib)
library(ellmer)
library(shinychat)

# UI ---------------------------------------------------------------------------

ui <- page_fillable(
  chat_mod_ui("chat")
)

# Server -----------------------------------------------------------------------

server <- function(input, output, session) {
  client <- chat(
    "anthropic/claude-3-7-sonnet-20250219",
    # Step 1: Edit `prompt.md` to get the model to play the quiz game.
    system_prompt = interpolate_file(
      here::here("_exercises/14_quiz-game-1/prompt.md")
    )
  )

  chat <- chat_mod_server("chat", client)

  observe({
    # Note: this starts the game when the app launches
    chat$update_user_input(
      value = "Let's play the quiz game!",
      submit = TRUE
    )
  })
}

shinyApp(ui, server)

`prompt.md`

## Quiz Game Host

<!-- Add your game instructions here -->

`15_coding-assistant`

# Task ------------------------------------------------------------------------
library(ellmer)

# **Step 1:** Run the code below as-is to try the task without any extra
# context. How does the model do? Can you run the function? Does it give you the
# weather? Does it know enough about the {weathR} package to complete the task?
#
# **Step 2:** Now, let's add some context. Head over to GitHub repo for {weathR}
# (link in `docs.R.md`). Copy the project description from the `README.md` and
# paste it into the `docs.r.md` file.
#
# **Step 3:** Uncomment the extra lines to include these docs in the prompt and
# try again.

chat <- chat("anthropic/claude-3-7-sonnet-20250219", echo = "output")

chat$chat(
  ## Extra context from package docs
  # brio::read_file(here::here("_exercises/15_coding-assistant/docs.R.md")),
  ## Task prompt
  paste(
    "Write a simple function that takes latitude and longitude as inputs",
    "and returns the weather forecast for that location using the {weathR}",
    "package. Keep the function concise and simple and don't include error",
    "handling or data re-formatting. Include documentation in roxygen2 format,",
    "including examples for NYC and Atlanta, GA."
  )
)

`16_rag`

#+ setup
library(ragnar)

# Step 1: Read, chunk and create embeddings for "R for Data Science" ----------

#' This example is based on https://ragnar.tidyverse.org/#usage.
#'
#' The first step is to crawl the R for Data Science website to find all the
#' pages we'll need to read in.
#'
#' Then, we create a new ragnar document store that will use OpenAI's
#' `text-embedding-3-small` model to create embeddings for each chunk of text.
#'
#' Finally, we read each page as markdown, use `markdown_chunk()` to split that
#' markdown into reasonably-sized chunks, finally inserting each chunk into the
#' vector store. That insertion step automatically sends the chunk text to
#' OpenAI to create the embedding, and ragnar stores the embedding alongside the
#' original text of the chunk.

#+ create-store

base_url <- "https://r4ds.hadley.nz"
pages <- ragnar_find_links(base_url, children_only = TRUE)

store_location <- here::here("_exercises/16_rag/r4ds.ragnar.duckdb")

store <- ragnar_store_create(
  store_location,
  title = "R for Data Science",
  # Need to start over? Set `overwrite = TRUE`.
  # overwrite = TRUE,
  embed = \(x) embed_openai(x, model = "text-embedding-3-small")
)

cli::cli_progress_bar(total = length(pages))
for (page in pages) {
  cli::cli_progress_update(status = page)

  chunks <- page |>
    read_as_markdown() |>
    # The next step breaks the markdown into chunks. This is where you have the
    # most control over what content is grouped together for embedding and later
    # retrieval. Feel free to experiment with settings in `?markdown_chunk()`.
    markdown_chunk()

  ragnar_store_insert(store, chunks)
}
cli::cli_progress_done()

ragnar_store_build_index(store)

# Step 2: Inspect your document store -----------------------------------------

#' Now that we have the vector store, what chunks are surfaced when we ask a
#' question? To do that, we'll use the ragnar store inspector app and an
#' example question.
#'
# Here's a question someone might ask an LLM. Copy the task markdown to use in
# the ragnar store inspector app.

#+ inspect-store
task <- r"--(
Could someone help me filter one data frame by matching values in another?

I’ve got two data frames with a common column `code.` I want to keep rows in `data1` where `code` exists in `data2$code`. I tried using `filter()` but got no rows back.

Here’s a minimal example:

```r
library(dplyr)

data1 <- data.frame(
    closed_price = c(49900L, 46900L, 46500L),
    opened_price = c(51000L, 49500L, 47500L),
    adjust_closed_price = c(12951L, 12173L, 12069L),
    stock = as.factor(c("AAA", "AAA", "AAC")),
    date3 = as.factor(c("2010-07-15", "2011-07-19", "2011-07-23")),
    code = as.factor(c("AAA2010", "AAA2011", "AAC2011"))
)

data2 <- data.frame(
    code = as.factor(c("AAA2010", "AAC2011")),
    ticker = as.factor(c("AAA", "AAM"))
)
```

What I tried:

```r
price_code <- data1 %>% filter(code %in% data2)
```

This returns zero rows. What’s the simplest way to do this?
)--"

ragnar_store_inspect(store)


# Step 3: Use document store in a chatbot --------------------------------------

#' Finally, ragnar provides a special tool that attaches to an ellmer chat
#' client and lets the model retrieve relevant chunks from the vector store on
#' demand. Run the code below to launch a chatbot backed by all the knowledge in
#' the R for Data Science book. Paste the task markdown from above into the chat
#' and see how the chatbot uses the retrieved chunks to improve its answer, or
#' ask it your own questions about R for Data Science.

#+ chatbot

library(ellmer)

chat <- chat(
  name = "openai/gpt-4.1-nano",
  system_prompt = r"--(
You are an expert R programmer and mentor. You are concise.

Before responding, retrieve relevant material from the knowledge store. Quote or
paraphrase passages, clearly marking your own words versus the source. Provide a
working link for every source you cite.
  )--"
)

# Attach the retrieval tool to the chat client. You can choose how many chunks
# or documents are retrieved each time the model uses the tool.
ragnar_register_tool_retrieve(chat, store, top_k = 10)

live_browser(chat)

Acknowledgments

Materials derived in part from Programming with LLMs and licensed under a Creative Commons Attribution 4.0 International (CC BY) License.