AE 20: Programming with LLMs
Application exercise
07-models
library(ellmer)
# Step 1: List available models for OpenAI and Anthropic
# List models using the `models_*()` functions.
# Hint: try using the Positron data viewer by calling `View()` on the results.
models_____
models_____
prompt <- "Write a recipe for an easy weeknight dinner my kids would like."
# Step 2: Compare responses from different models
# Try sending the same prompt to different models to compare the responses.
chat("openai/____")$chat(prompt)
chat("anthropic/____")$chat(prompt)
# Bonus: Local models?
# If you have local models installed, you can use them too.
models_ollama()
chat("ollama/____")$chat(prompt)
# Bonus: Repeat your OpenAI and Anthropic requests using direct provider
# functions.
chat_____(____)$chat(prompt)08-vision
library(ellmer)
recipe_images <- here::here("data/recipes/images")
img_pancakes <- file.path(recipe_images, "EasyBasicPancakes.jpg")
img_pad_thai <- file.path(recipe_images, "PadThai.jpg")
#' Ask OpenAI's `gpt-4.1-nano` to give a creative recipe title and description
#' for the pancakes image.
chat <- ____
chat$chat(
"____",
____(img_pancakes)
)
#' In a new chat, ask it to write a recipe for the food it sees in the Pad Thai
#' image. (Don't tell it that it's Pad Thai!)
chat <- ____
chat$chat(
"Write a recipe to make the food in this image.",
____(img_pad_thai)
)09-pdf
library(ellmer)
recipe_pdfs <- here::here("data/recipes/pdf")
pdf_waffles <- file.path(recipe_pdfs, "CinnamonPeachOatWaffles.pdf")
# Ask OpenAI's `gpt-4.1-nano` to turn this messy PDF print-out of a waffle
# recipe into a clean list of ingredients and steps to follow.
chat <- ____
chat$chat(
"____",
____(pdf_waffles)
)10-structured-output
library(ellmer)
# Read in the recipes from text files
recipe_txt <- here::here("data/recipes/text")
txt_waffles <- recipe_txt |>
file.path("CinnamonPeachOatWaffles.md") |>
brio::read_file() # Like readLines() but all in one string
# Show the first 500 characters of the first recipe
txt_waffles |> substring(1, 500) |> cat()
#' Here's an example of the structured output we want to achieve for a single
#' recipe:
#'
#' {
#' "title": "Spicy Mango Salsa Chicken",
#' "description": "A flavorful and vibrant chicken dish...",
#' "ingredients": [
#' {
#' "name": "Chicken Breast",
#' "quantity": "4",
#' "unit": "medium",
#' "notes": "Boneless, skinless"
#' },
#' {
#' "name": "Lime Juice",
#' "quantity": "2",
#' "unit": "tablespoons",
#' "notes": "Fresh"
#' }
#' ],
#' "instructions": [
#' "Preheat grill to medium-high heat.",
#' "In a bowl, combine ...",
#' "Season chicken breasts with salt and pepper.",
#' "Grill chicken breasts for 6-8 minutes per side, or until cooked through.",
#' "Serve chicken topped with the spicy mango salsa."
#' ]
#' }
#'
#' Hint: You can use `required = FALSE` in `type_*()` functions to indicate that
#' a field is optional.
type_recipe <- type_____(
title = ____(),
description = ____(),
ingredients = ____(
type_object(
name = ____(),
quantity = ____(),
unit = ____(),
notes = ____()
)
),
instructions = type_array(____())
)
chat <- chat("openai/gpt-4.1-nano")
chat$chat_structured(txt_waffles, type = type_recipe)11_batch
library(ellmer)
# Read in the recipes from text files (this time all of the recipes)
recipe_files <- fs::dir_ls(here::here("data/recipes/text"))
recipes <- purrr::map(recipe_files, brio::read_file)
# Use the type_recipe we defined in `10_structured-output`. Optionally replace
# the `type_recipe` definition below with your own version if you want to.
type_recipe <- type_object(
title = type_string(),
description = type_string(),
ingredients = type_array(
type_object(
name = type_string(),
quantity = type_number(),
unit = type_string(required = FALSE),
notes = type_string(required = FALSE)
)
),
instructions = type_array(type_string())
)
# Parallel structured extraction (fast, may be pricey) -------------------------
# First, we'll use a simple loop to process each recipe one at a time. This is
# straightforward for our 8 recipes, but would be slow (and expensive) for a
# larger dataset.
recipes_data <- ____(
chat("openai/gpt-4.1-nano"),
prompts = ____,
type = ____
)
# Hey, it's a table of recipes!
recipes_tbl <- dplyr::as_tibble(recipes_data)
recipes_tbl
# Batch API (slower, but cheaper) ----------------------------------------------
# That was pretty easy! But what if we had 10,000 recipes to process? That would
# take a long time, and be pretty expensive. We can save money by using the
# Batch API, which allows us to send multiple requests in a single API call.
#
# With the Batch API, results are processed asynchronously and are completed at
# some point, usually within a few minutes but at most within the next 24 hours.
# Because batching lets providers schedule requests more efficiently, it also
# costs less per token than the standard API.
res <- ____(
chat("anthropic/claude-3-haiku-20240307"),
prompts = ____,
type = ____,
path = here::here("data/recipes/batch_results_r_claude.json")
)
# Save the results -------------------------------------------------------------
# Now, save the results to a JSON file in `data/recipes/recipes.json`. Once
# you've done that, you can open up `11_recipe-app.py` and run the app to see
# your new recipe collection!
jsonlite::write_json(
res,
here::here("data/recipes/recipes.json"),
auto_unbox = TRUE,
pretty = TRUE
)Acknowledgments
- Materials derived in part from Programming with LLMs and licensed under a Creative Commons Attribution 4.0 International (CC BY) License.