AE 20: Programming with LLMs

`07-models`

library(ellmer)

# List models using the `models_*()` functions.
# Hint: try using the Positron data viewer by calling `View()` on the results.
models_openai() # openai/gpt-5-nano
models_anthropic() # anthropic/claude-3-5-haiku-20241022
models_ollama() # ollama/gemma3:8b

prompt <- "Write a recipe for an easy weeknight dinner my kids would like."

# Try sending the same prompt to different models to compare the responses.
chat("openai/gpt-5")$chat(prompt)
chat("anthropic/claude-3-7-sonnet-20250219")$chat(prompt)

# If you have local models installed, you can use them too.
chat("ollama/gemma3:4b")$chat(prompt)

# Instead of `chat()`, you can also use direct provider functions:
chat_openai(model = "gpt-5")
chat_anthropic(model = "claude-3-7-sonnet-20250219")
chat_ollama(model = "gemma3:4b")

`08-vision`

library(ellmer)

recipe_images <- here::here("data/recipes/images")
img_pancakes <- file.path(recipe_images, "EasyBasicPancakes.jpg")
img_pad_thai <- file.path(recipe_images, "PadThai.jpg")

chat <- chat_openai(model = "gpt-4.1-nano")
chat$chat(
  "Give the food in this image a creative recipe title and description.",
  content_image_file(img_pancakes)
)

chat <- chat_openai(model = "gpt-4.1-nano")
chat$chat(
  "Write a recipe to make the food in this image.",
  content_image_file(img_pad_thai)
)

`09-pdf`

library(ellmer)

recipe_pdfs <- here::here("data/recipes/pdf")
pdf_waffles <- file.path(recipe_pdfs, "CinnamonPeachOatWaffles.pdf")

chat <- chat_openai(model = "gpt-4.1-nano")
chat$chat(
  "Summarize the recipe in this PDF into a list of ingredients and the steps to follow to make the recipe.",
  content_pdf_file(pdf_waffles)
)

`10-structured-output`

library(ellmer)

# Read in the recipes from text files
recipe_txt <- here::here("data/recipes/text")
txt_waffles <- recipe_txt |>
  file.path("CinnamonPeachOatWaffles.md") |>
  readLines()

# Show the first 500 characters of the first recipe
txt_waffles |> substring(1, 500) |> cat()

#' Here's an example of the structured output we want to achieve for a single
#' recipe:
#'
#' {
#'   "title": "Spicy Mango Salsa Chicken",
#'   "description": "A flavorful and vibrant chicken dish...",
#'   "ingredients": [
#'     {
#'       "name": "Chicken Breast",
#'       "quantity": "4",
#'       "unit": "medium",
#'       "notes": "Boneless, skinless"
#'     },
#'     {
#'       "name": "Lime Juice",
#'       "quantity": "2",
#'       "unit": "tablespoons",
#'       "notes": "Fresh"
#'     }
#'   ],
#'   "instructions": [
#'     "Preheat grill to medium-high heat.",
#'     "In a bowl, combine ...",
#'     "Season chicken breasts with salt and pepper.",
#'     "Grill chicken breasts for 6-8 minutes per side, or until cooked through.",
#'     "Serve chicken topped with the spicy mango salsa."
#'   ]
#' }

type_recipe <- type_object(
  title = type_string(),
  description = type_string(),
  ingredients = type_array(
    type_object(
      name = type_string(),
      quantity = type_number(),
      unit = type_string(required = FALSE),
      notes = type_string(required = FALSE)
    ),
  ),
  instructions = type_array(type_string())
)

chat <- chat("openai/gpt-4.1-nano")

chat$chat_structured(txt_waffles, type = type_recipe)

`11_batch`

library(ellmer)

# Read in the recipes from text files (this time all of the recipes)
recipe_files <- fs::dir_ls(here::here("data/recipes/text"))
recipes <- purrr::map(recipe_files, brio::read_file)

# Use the type_recipe we defined in `10_structured-output`
type_recipe <- type_object(
  title = type_string(),
  description = type_string(),
  ingredients = type_array(
    type_object(
      name = type_string(),
      quantity = type_number(),
      unit = type_string(required = FALSE),
      notes = type_string(required = FALSE)
    )
  ),
  instructions = type_array(type_string())
)

# Parallel structured extraction (fast, may be pricey) -------------------------
# First, we'll use a simple loop to process each recipe one at a time. This is
# straightforward for our 8 recipes, but would be slow (and expensive) for a
# larger dataset.
recipes_data <- parallel_chat_structured(
  chat("openai/gpt-4.1-nano"),
  prompts = recipes,
  type = type_recipe
)

# Hey, it's a table of recipes!
recipes_tbl <- dplyr::as_tibble(recipes_data)
recipes_tbl

# Batch API (slower, but cheaper) ----------------------------------------------
# That was pretty easy! But what if we had 10,000 recipes to process? That would
# take a long time, and be pretty expensive. We can save money by using the
# Batch API, which allows us to send multiple requests in a single API call.
#
# With the Batch API, results are processed asynchronously and are completed at
# some point, usually within a few minutes but at most within the next 24 hours.
# Because batching lets providers schedule requests more efficiently, it also
# costs less per token than the standard API.

res <- batch_chat_structured(
  chat("anthropic/claude-3-haiku-20240307"),
  prompts = recipes,
  type = type_recipe,
  path = here::here("data/recipes/batch_results_r_claude.json"),
  wait = FALSE
)

# Save the results -------------------------------------------------------------
# Now, save the results to a JSON file in `data/recipes/recipes.json`. Once
# you've done that, you can open up `11_recipe-app.py` and run the app to see
# your new recipe collection!
jsonlite::write_json(
  res,
  here::here("data/recipes/recipes.json"),
  auto_unbox = TRUE,
  pretty = TRUE
)

Acknowledgments

Materials derived in part from Programming with LLMs and licensed under a Creative Commons Attribution 4.0 International (CC BY) License.