From 634068dd4d033ce93e83c49a153b8969b6518b24 Mon Sep 17 00:00:00 2001
From: Genevieve Warren <24882762+gewarren@users.noreply.github.com>
Date: Mon, 28 Apr 2025 13:44:20 -0700
Subject: [PATCH 01/12] meai overview revamp

---
 docs/ai/ai-extensions.md                         |  2 +-
 .../conceptual/microsoft-extensions-ai.md}       |  1 -
 docs/ai/dotnet-ai-ecosystem.md                   |  6 +++---
 docs/ai/{get-started => }/dotnet-ai-overview.md  | 16 ++++++++--------
 docs/ai/index.yml                                | 10 +++++-----
 docs/ai/toc.yml                                  |  2 +-
 docs/fundamentals/toc.yml                        |  2 +-
 7 files changed, 19 insertions(+), 20 deletions(-)
 rename docs/{core/extensions/artificial-intelligence.md => ai/conceptual/microsoft-extensions-ai.md} (99%)
 rename docs/ai/{get-started => }/dotnet-ai-overview.md (81%)

diff --git a/docs/ai/ai-extensions.md b/docs/ai/ai-extensions.md
index f17860b02e845..9bd0f85681fc1 100644
--- a/docs/ai/ai-extensions.md
+++ b/docs/ai/ai-extensions.md
@@ -10,7 +10,7 @@ ms.author: alexwolf
 
 # Unified AI building blocks for .NET using Microsoft.Extensions.AI
 
-The .NET ecosystem provides abstractions for integrating AI services into .NET applications and libraries using the <xref:Microsoft.Extensions.AI> libraries. The .NET team has also enhanced the core `Microsoft.Extensions` libraries with these abstractions for use in generative AI .NET applications and libraries. In the sections ahead, you learn:
+The .NET ecosystem provides abstractions for integrating AI services into .NET applications and libraries using the <xref:Microsoft.Extensions.AI> libraries. In this article, you learn:
 
 - Core concepts and capabilities of the `Microsoft.Extensions.AI` libraries.
 - How to work with AI abstractions in your apps and the benefits they offer.
diff --git a/docs/core/extensions/artificial-intelligence.md b/docs/ai/conceptual/microsoft-extensions-ai.md
similarity index 99%
rename from docs/core/extensions/artificial-intelligence.md
rename to docs/ai/conceptual/microsoft-extensions-ai.md
index b97c27c2b6fdd..cf680faeb0e51 100644
--- a/docs/core/extensions/artificial-intelligence.md
+++ b/docs/ai/conceptual/microsoft-extensions-ai.md
@@ -4,7 +4,6 @@ description: Learn how to use the Microsoft.Extensions.AI libraries to integrate
 author: IEvangelist
 ms.author: dapine
 ms.date: 01/06/2025
-ms.collection: ce-skilling-ai-copilot
 ---
 
 # Artificial intelligence in .NET (Preview)
diff --git a/docs/ai/dotnet-ai-ecosystem.md b/docs/ai/dotnet-ai-ecosystem.md
index 4e7b6db204328..102cb89cc99e6 100644
--- a/docs/ai/dotnet-ai-ecosystem.md
+++ b/docs/ai/dotnet-ai-ecosystem.md
@@ -1,17 +1,17 @@
 ---
-title: Overview of the .NET + AI ecosystem
+title: .NET + AI ecosystem tools and SDKs
 description: This article provides an overview of the ecosystem of SDKs and tools available to .NET developers integrating AI into their applications.
 ms.date: 11/24/2024
 ms.topic: overview
 ms.custom: devx-track-dotnet, devx-track-dotnet-ai
 ---
 
-# Overview of the .NET + AI ecosystem
+# .NET + AI ecosystem tools and SDKs
 
 The .NET ecosystem provides many powerful tools, libraries, and services to develop AI applications. .NET supports both cloud and local AI model connections, many different SDKs for various AI and vector database services, and other tools to help you build intelligent apps of varying scope and complexity.
 
 > [!IMPORTANT]
-> Not all of the SDKs and services presented in this doc are maintained by Microsoft. When considering an SDK, make sure to evaluate its quality, licensing, support, and compatibility to ensure they meet your requirements.
+> Not all of the SDKs and services presented in this article are maintained by Microsoft. When considering an SDK, make sure to evaluate its quality, licensing, support, and compatibility to ensure they meet your requirements.
 
 ## Microsoft.Extensions.AI libraries
 
diff --git a/docs/ai/get-started/dotnet-ai-overview.md b/docs/ai/dotnet-ai-overview.md
similarity index 81%
rename from docs/ai/get-started/dotnet-ai-overview.md
rename to docs/ai/dotnet-ai-overview.md
index 497c40cec1d43..852d3d4ccf82e 100644
--- a/docs/ai/get-started/dotnet-ai-overview.md
+++ b/docs/ai/dotnet-ai-overview.md
@@ -1,30 +1,30 @@
 ---
-title: Develop .NET applications with AI features
+title: Develop .NET apps with AI features
 description: Learn how you can build .NET applications that include AI features.
 ms.date: 04/09/2025
 ms.topic: overview
 ms.custom: devx-track-dotnet, devx-track-dotnet-ai
 ---
 
-# Develop .NET applications with AI features
+# Develop .NET apps with AI features
 
 With .NET, you can use artificial intelligence (AI) to automate and accomplish complex tasks in your applications using the tools, platforms, and services that are familiar to you.
 
-## Why choose .NET to build AI applications?
+## Why choose .NET to build AI apps?
 
 Millions of developers use .NET to create applications that run on the web, on mobile and desktop devices, or in the cloud. By using .NET to integrate AI into your applications, you can take advantage of all that .NET has to offer:
 
 * A unified story for building web UIs, APIs, and applications.
-* Develop on Windows, macOS, and Linux.
-* Open-source and community-focused.
+* Supported on Windows, macOS, and Linux.
+* Is open-source and community-focused.
 * Runs on top of the most popular web servers and cloud platforms.
-* Powerful tooling to edit, debug, test, and deploy.
+* Provides powerful tooling to edit, debug, test, and deploy.
 
 ## What can you build with AI and .NET?
 
 The opportunities with AI are near endless. Here are a few examples of solutions you can build using AI in your .NET applications:
 
-* Language processing: Create virtual agents / chatbots to talk with your data and generate content and images.
+* Language processing: Create virtual agents or chatbots to talk with your data and generate content and images.
 * Computer vision: Identify objects in an object or video.
 * Audio generation: Use synthesized voices to interact with customers.
 * Classification: Label the severity of a customer-reported issue.
@@ -38,7 +38,7 @@ We recommend the following sequence of tutorials and articles for an introductio
 |----------|----------|
 | Create a chat application | [Build an Azure AI chat app with .NET](../quickstarts/build-chat-app.md)|
 | Summarize text | [Summarize text using Azure AI chat app with .NET](../quickstarts/prompt-model.md) |
-| Chat with your data     | [Get insight about your data from an .NET Azure AI chat app](../quickstarts/build-vector-search-app.md) |
+| Chat with your data | [Get insight about your data from an .NET Azure AI chat app](../quickstarts/build-vector-search-app.md) |
 | Call .NET functions with AI | [Extend Azure AI using tools and execute a local function with .NET](../quickstarts/use-function-calling.md) |
 | Generate images | [Generate images using Azure AI with .NET](../quickstarts/generate-images.md) |
 | Train your own model |[ML.NET tutorial](https://dotnet.microsoft.com/learn/ml-dotnet/get-started-tutorial/intro) |
diff --git a/docs/ai/index.yml b/docs/ai/index.yml
index 0c476af7dbd5a..4391ec5e7d6b7 100644
--- a/docs/ai/index.yml
+++ b/docs/ai/index.yml
@@ -24,14 +24,14 @@ landingContent:
       - linkListType: get-started
         links:
           - text: Develop .NET applications
-            url: get-started/dotnet-ai-overview.md
-          - text: Learning resources and samples
-            url: azure-ai-for-dotnet-developers.md
-          - text: Build an Azure AI chat app with .NET
+            url: dotnet-ai-overview.md
+          - text: Connect to and prompt an AI model
+            url: quickstarts/prompt-model.md
+          - text: Build an Azure AI chat app
             url: quickstarts/get-started-openai.md
           - text: Summarize text using an Azure OpenAI chat app
             url: quickstarts/quickstart-openai-summarize-text.md
-          - text: Generate images using Azure AI with .NET
+          - text: Generate images using Azure AI
             url: quickstarts/quickstart-openai-generate-images.md
 
   # Card
diff --git a/docs/ai/toc.yml b/docs/ai/toc.yml
index 2cabaa1a073ea..4258976c898c7 100644
--- a/docs/ai/toc.yml
+++ b/docs/ai/toc.yml
@@ -2,7 +2,7 @@ items:
 - name: AI for .NET developers
   href: index.yml
 - name: Overview
-  href: get-started/dotnet-ai-overview.md
+  href: dotnet-ai-overview.md
 - name: "Quickstart: Connect to and prompt an AI model"
   href: quickstarts/prompt-model.md
 - name: AI frameworks and SDKs
diff --git a/docs/fundamentals/toc.yml b/docs/fundamentals/toc.yml
index 8ca869da34675..dddd9c82b27b1 100644
--- a/docs/fundamentals/toc.yml
+++ b/docs/fundamentals/toc.yml
@@ -1039,7 +1039,7 @@ items:
         href: runtime-libraries/system-random.md
       - name: Artificial intelligence (AI)
         displayName: microsoft.extensions.ai,ollama,ai,openai,azure inference,ichatclient
-        href: ../core/extensions/artificial-intelligence.md
+        href: ../ai/microsoft-extensions-ai.md?toc=/dotnet/fundamentals/toc.json&bc=/dotnet/breadcrumb/toc.json
       - name: Dependency injection
         items:
           - name: Overview

From 36540d2a90097172282e58949f40e3cc92452a86 Mon Sep 17 00:00:00 2001
From: Genevieve Warren <24882762+gewarren@users.noreply.github.com>
Date: Tue, 29 Apr 2025 19:25:08 -0700
Subject: [PATCH 02/12] move/combine meai overviews

---
 .openpublishing.redirection.ai.json           |   4 +
 .openpublishing.redirection.core.json         |   4 +
 docs/ai/ai-extensions.md                      | 103 -------
 docs/ai/conceptual/evaluation-libraries.md    |   2 +-
 docs/ai/conceptual/microsoft-extensions-ai.md | 279 ------------------
 docs/ai/dotnet-ai-ecosystem.md                |   2 +-
 docs/ai/dotnet-ai-overview.md                 |  18 +-
 docs/ai/how-to/app-service-aoai-auth.md       |   2 +-
 docs/ai/index.yml                             |   2 +
 docs/ai/microsoft-extensions-ai.md            | 268 +++++++++++++++++
 .../AI.Shared/AI.Shared.csproj                |   0
 .../AI.Shared/RateLimitingChatClient.cs       |  34 +++
 ...ngChatClientExtensions.OptionalOverload.cs |  13 +
 .../RateLimitingChatClientExtensions.cs       |   3 +-
 .../RateLimitingEmbeddingGenerator.cs         |   0
 .../AI.Shared/SampleChatClient.cs             |   0
 .../AI.Shared/SampleEmbeddingGenerator.cs     |  27 +-
 .../ConsoleAI.AddMessages.csproj}             |   2 +-
 .../ConsoleAI.AddMessages/Program.cs          |  34 +++
 .../ConsoleAI.CacheResponses.csproj           |   0
 .../ConsoleAI.CacheResponses/Program.cs       |   4 +-
 .../ConsoleAI.ConsumeClientMiddleware.csproj  |   0
 .../Program.cs                                |  19 +-
 ...soleAI.ConsumeRateLimitingEmbedding.csproj |   0
 .../Program.cs                                |   0
 .../ConsoleAI.CreateEmbeddings.csproj         |   0
 .../ConsoleAI.CreateEmbeddings/Program.cs     |   8 +-
 .../ConsoleAI.CustomClientMiddle.csproj       |   0
 .../ConsoleAI.CustomClientMiddle/Program.cs   |   8 +
 .../ConsoleAI.CustomEmbeddingsMiddle.csproj   |   0
 .../Program.cs                                |   4 +-
 .../ConsoleAI.DependencyInjection.csproj      |   0
 .../ConsoleAI.DependencyInjection/Program.cs  |  14 +
 .../ConsoleAI.FunctionalityPipelines.csproj   |   0
 .../Program.cs                                |  40 +++
 .../ConsoleAI.GetResponseAsyncArgs.csproj     |   0
 .../ConsoleAI.GetResponseAsyncArgs/Program.cs |   0
 ...ConsoleAI.GetStreamingResponseAsync.csproj |   0
 .../Program.cs                                |   2 +
 .../ConsoleAI.ProvideOptions.csproj           |   0
 .../ConsoleAI.ProvideOptions/Program.cs       |  11 +
 .../ConsoleAI.StatelessStateful.csproj        |  18 ++
 .../ConsoleAI.StatelessStateful/Program.cs    |  66 +++++
 .../ConsoleAI.ToolCalling.csproj              |   0
 .../ConsoleAI.ToolCalling/Program.cs          |  16 +
 .../ConsoleAI.UseExample.csproj               |  18 ++
 .../ConsoleAI.UseExample/Program.cs           |  14 +-
 .../ConsoleAI.UseTelemetry.csproj             |   0
 .../ConsoleAI.UseTelemetry/Program.cs         |   4 +-
 .../ConsoleAI/ConsoleAI.csproj                |   0
 .../ConsoleAI/Program.cs                      |   4 +-
 docs/ai/toc.yml                               |   2 +-
 .../ai/AI.Shared/RateLimitingChatClient.cs    |  55 ----
 ...ngChatClientExtensions.OptionalOverload.cs |  17 --
 .../ConsoleAI.CustomClientMiddle/Program.cs   |  12 -
 .../ConsoleAI.DependencyInjection/Program.cs  |  20 --
 .../Program.cs                                |  46 ---
 .../ai/ConsoleAI.ProvideOptions/Program.cs    |  13 -
 .../ai/ConsoleAI.ToolCalling/Program.cs       |  21 --
 59 files changed, 604 insertions(+), 629 deletions(-)
 delete mode 100644 docs/ai/ai-extensions.md
 delete mode 100644 docs/ai/conceptual/microsoft-extensions-ai.md
 create mode 100644 docs/ai/microsoft-extensions-ai.md
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/AI.Shared/AI.Shared.csproj (100%)
 create mode 100644 docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClient.cs
 create mode 100644 docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.OptionalOverload.cs
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/AI.Shared/RateLimitingChatClientExtensions.cs (68%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/AI.Shared/RateLimitingEmbeddingGenerator.cs (100%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/AI.Shared/SampleChatClient.cs (100%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/AI.Shared/SampleEmbeddingGenerator.cs (50%)
 rename docs/{core/extensions/snippets/ai/ConsoleAI.UseExample/ConsoleAI.UseExample.csproj => ai/snippets/microsoft-extensions-ai/ConsoleAI.AddMessages/ConsoleAI.AddMessages.csproj} (89%)
 create mode 100644 docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.AddMessages/Program.cs
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.CacheResponses/ConsoleAI.CacheResponses.csproj (100%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.CacheResponses/Program.cs (85%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.ConsumeClientMiddleware/ConsoleAI.ConsumeClientMiddleware.csproj (100%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.ConsumeClientMiddleware/Program.cs (50%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.ConsumeRateLimitingEmbedding/ConsoleAI.ConsumeRateLimitingEmbedding.csproj (100%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.ConsumeRateLimitingEmbedding/Program.cs (100%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.CreateEmbeddings/ConsoleAI.CreateEmbeddings.csproj (100%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.CreateEmbeddings/Program.cs (65%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.CustomClientMiddle/ConsoleAI.CustomClientMiddle.csproj (100%)
 create mode 100644 docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomClientMiddle/Program.cs
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.CustomEmbeddingsMiddle/ConsoleAI.CustomEmbeddingsMiddle.csproj (100%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.CustomEmbeddingsMiddle/Program.cs (94%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.DependencyInjection/ConsoleAI.DependencyInjection.csproj (100%)
 create mode 100644 docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.DependencyInjection/Program.cs
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.FunctionalityPipelines/ConsoleAI.FunctionalityPipelines.csproj (100%)
 create mode 100644 docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.FunctionalityPipelines/Program.cs
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.GetResponseAsyncArgs/ConsoleAI.GetResponseAsyncArgs.csproj (100%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.GetResponseAsyncArgs/Program.cs (100%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.GetStreamingResponseAsync/ConsoleAI.GetStreamingResponseAsync.csproj (100%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.GetStreamingResponseAsync/Program.cs (89%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.ProvideOptions/ConsoleAI.ProvideOptions.csproj (100%)
 create mode 100644 docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ProvideOptions/Program.cs
 create mode 100644 docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.StatelessStateful/ConsoleAI.StatelessStateful.csproj
 create mode 100644 docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.StatelessStateful/Program.cs
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.ToolCalling/ConsoleAI.ToolCalling.csproj (100%)
 create mode 100644 docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ToolCalling/Program.cs
 create mode 100644 docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.UseExample/ConsoleAI.UseExample.csproj
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.UseExample/Program.cs (58%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.UseTelemetry/ConsoleAI.UseTelemetry.csproj (100%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI.UseTelemetry/Program.cs (85%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI/ConsoleAI.csproj (100%)
 rename docs/{core/extensions/snippets/ai => ai/snippets/microsoft-extensions-ai}/ConsoleAI/Program.cs (55%)
 delete mode 100644 docs/core/extensions/snippets/ai/AI.Shared/RateLimitingChatClient.cs
 delete mode 100644 docs/core/extensions/snippets/ai/AI.Shared/RateLimitingChatClientExtensions.OptionalOverload.cs
 delete mode 100644 docs/core/extensions/snippets/ai/ConsoleAI.CustomClientMiddle/Program.cs
 delete mode 100644 docs/core/extensions/snippets/ai/ConsoleAI.DependencyInjection/Program.cs
 delete mode 100644 docs/core/extensions/snippets/ai/ConsoleAI.FunctionalityPipelines/Program.cs
 delete mode 100644 docs/core/extensions/snippets/ai/ConsoleAI.ProvideOptions/Program.cs
 delete mode 100644 docs/core/extensions/snippets/ai/ConsoleAI.ToolCalling/Program.cs

diff --git a/.openpublishing.redirection.ai.json b/.openpublishing.redirection.ai.json
index c22ca5170dd6b..a34adfd937675 100644
--- a/.openpublishing.redirection.ai.json
+++ b/.openpublishing.redirection.ai.json
@@ -1,5 +1,9 @@
 {
     "redirections": [
+        {
+            "source_path_from_root": "/docs/ai/ai-extensions.md",
+            "redirect_url": "/dotnet/ai/microsoft-extensions-ai"
+        },
         {
             "source_path_from_root": "/docs/ai/conceptual/agents.md",
             "redirect_url": "/dotnet/ai"
diff --git a/.openpublishing.redirection.core.json b/.openpublishing.redirection.core.json
index 2542c8aef65af..5f3df9aae20b7 100644
--- a/.openpublishing.redirection.core.json
+++ b/.openpublishing.redirection.core.json
@@ -692,6 +692,10 @@
             "source_path_from_root": "/docs/core/dotnet-five.md",
             "redirect_url": "/dotnet/core/whats-new/dotnet-5"
         },
+        {
+            "source_path_from_root": "/docs/core/extensions/artificial-intelligence.md",
+            "redirect_url": "/dotnet/ai/microsoft-extensions-ai"
+        },
         {
             "source_path_from_root": "/docs/core/extensions/culture-insensitive-string-operations.md",
             "redirect_url": "/dotnet/core/extensions/performing-culture-insensitive-string-operations"
diff --git a/docs/ai/ai-extensions.md b/docs/ai/ai-extensions.md
deleted file mode 100644
index 9bd0f85681fc1..0000000000000
--- a/docs/ai/ai-extensions.md
+++ /dev/null
@@ -1,103 +0,0 @@
----
-title:  Unified AI building blocks for .NET
-description: Learn how to develop with unified AI building blocks for .NET using Microsoft.Extensions.AI and Microsoft.Extensions.AI.Abstractions libraries
-ms.date: 12/16/2024
-ms.topic: quickstart
-ms.custom: devx-track-dotnet, devx-track-dotnet-ai
-author: alexwolfmsft
-ms.author: alexwolf
----
-
-# Unified AI building blocks for .NET using Microsoft.Extensions.AI
-
-The .NET ecosystem provides abstractions for integrating AI services into .NET applications and libraries using the <xref:Microsoft.Extensions.AI> libraries. In this article, you learn:
-
-- Core concepts and capabilities of the `Microsoft.Extensions.AI` libraries.
-- How to work with AI abstractions in your apps and the benefits they offer.
-- Essential AI middleware concepts.
-
-For more information, see [Introduction to Microsoft.Extensions.AI](../core/extensions/artificial-intelligence.md).
-
-## What are the Microsoft.Extensions.AI libraries?
-
-The `Microsoft.Extensions.AI` libraries provides core exchange types and abstractions for interacting with AI services, such as small and large language models (SLMs and LLMs). They also provide the ability to register services like logging and caching in your dependency injection (DI) container.
-
-:::image type="content" source="media/ai-extensions/meai-architecture-diagram.png" lightbox="media/ai-extensions/meai-architecture-diagram.png" alt-text="An architectural diagram of the AI extensions libraries.":::
-
-The `Microsoft.Extensions.AI` namespaces provide abstractions that can be implemented by various services, all adhering to the same core concepts. This library is not intended to provide APIs tailored to any specific provider's services. The goal of `Microsoft.Extensions.AI` is to act as a unifying layer within the .NET ecosystem, enabling developers to choose their preferred frameworks and libraries while ensuring seamless integration and collaboration across the ecosystem.
-
-## Work with abstractions for common AI services
-
-AI capabilities are rapidly evolving, with patterns emerging for common functionality:
-
-- Chat features to conversationally prompt an AI for information or data analysis.
-- Embedding generation to integrate with vector search capabilities.
-- Tool calling to integrate with other services, platforms, or code.
-
-The `Microsoft.Extensions.AI.Abstractions` package provides abstractions for these types of tasks, so developers can focus on coding against conceptual AI capabilities rather than specific platforms or provider implementations. Unified abstractions are crucial for developers to work effectively across different sources.
-
-For example, the <xref:Microsoft.Extensions.AI.IChatClient> interface allows consumption of language models from various providers, such as an Azure OpenAI service or a local Ollama installation. Any .NET package that provides an AI client can implement the `IChatClient` interface to enable seamless integration with consuming .NET code:
-
-```csharp
-IChatClient client =
-    environment.IsDevelopment ?
-    new OllamaChatClient(...) :
-    new AzureAIInferenceChatClient(...);
-```
-
-Then, regardless of the provider you're using, you can send requests by calling <xref:Microsoft.Extensions.AI.IChatClient.GetResponseAsync(System.Collections.Generic.IEnumerable{Microsoft.Extensions.AI.ChatMessage},Microsoft.Extensions.AI.ChatOptions,System.Threading.CancellationToken)>, as follows:
-
-```csharp
-var response = await chatClient.GetResponseAsync(
-      "Translate the following text into Pig Latin: I love .NET and AI");
-
-Console.WriteLine(response.Message);
-```
-
-These abstractions allow for idiomatic C# code for various scenarios with minimal code changes. They make it easy to use different services for development and production, addressing hybrid scenarios, or exploring other service providers.
-
-Library authors who implement these abstractions make their clients interoperable with the broader `Microsoft.Extensions.AI` ecosystem. Service-specific APIs remain accessible if needed, allowing consumers to code against the standard abstractions and pass through to proprietary APIs only when required.
-
-`Microsoft.Extensions.AI` provides implementations for the following services through additional packages:
-
-- [OpenAI](https://aka.ms/meai-openai-nuget)
-- [Azure OpenAI](https://aka.ms/meai-openai-nuget)
-- [Azure AI Inference](https://aka.ms/meai-azaiinference-nuget)
-- [Ollama](https://aka.ms/meai-ollama-nuget)
-
-In the future, implementations of these `Microsoft.Extensions.AI` abstractions will be part of the respective client libraries rather than requiring installation of additional packages.
-
-## Middleware implementations for AI services
-
-Connecting to and using AI services is just one aspect of building robust applications. Production-ready applications require additional features like telemetry, logging, caching, and tool-calling capabilities. The `Microsoft.Extensions.AI` packages provides APIs that enable you to easily integrate these components into your applications using familiar dependency injection and middleware patterns.
-
-The following sample demonstrates how to register an OpenAI `IChatClient`. You can attach capabilities in a consistent way across various providers by calling methods such as <xref:Microsoft.Extensions.AI.FunctionInvokingChatClientBuilderExtensions.UseFunctionInvocation(Microsoft.Extensions.AI.ChatClientBuilder,Microsoft.Extensions.Logging.ILoggerFactory,System.Action{Microsoft.Extensions.AI.FunctionInvokingChatClient})> on a <xref:Microsoft.Extensions.AI.ChatClientBuilder>.
-
-```csharp
-app.Services.AddChatClient(builder => builder
-    .UseLogging()
-    .UseFunctionInvocation()
-    .UseDistributedCache()   
-    .UseOpenTelemetry()
-    .Use(new OpenAIClient(...)).AsChatClient(...));
-```
-
-The capabilities demonstrated in this snippet are included in the `Microsoft.Extensions.AI` library, but they're only a small subset of the capabilities that can be layered in with this approach. .NET developers are able to expose many types of middleware to create powerful AI functionality.
-
-## Build with Microsoft.Extensions.AI
-
-You can start building with `Microsoft.Extensions.AI` in the following ways:
-
-- **Library developers**: If you own libraries that provide clients for AI services, consider implementing the interfaces in your libraries. This allows users to easily integrate your NuGet package via the abstractions.
-- **Service consumers**: If you're developing libraries that consume AI services, use the abstractions instead of hardcoding to a specific AI service. This approach gives your consumers the flexibility to choose their preferred service.
-- **Application developers**: Use the abstractions to simplify integration into your apps. This enables portability across models and services, facilitates testing and mocking, leverages middleware provided by the ecosystem, and maintains a consistent API throughout your app, even if you use different services in different parts of your application.
-- **Ecosystem contributors**: If you're interested in contributing to the ecosystem, consider writing custom middleware components.
-
-To get started, see the samples in the [dotnet/ai-samples](https://aka.ms/meai-samples) GitHub repository.
-
-For an end-to-end sample using `Microsoft.Extensions.AI`, see [eShopSupport](https://github.com/dotnet/eShopSupport).
-
-## Next steps
-
-- [Build an AI chat app with .NET](quickstarts/build-chat-app.md)
-- [Quickstart - Summarize text using Azure AI chat app with .NET](quickstarts/prompt-model.md)
diff --git a/docs/ai/conceptual/evaluation-libraries.md b/docs/ai/conceptual/evaluation-libraries.md
index 2d94f60468c31..c15f91fadb946 100644
--- a/docs/ai/conceptual/evaluation-libraries.md
+++ b/docs/ai/conceptual/evaluation-libraries.md
@@ -8,7 +8,7 @@ ms.date: 03/18/2025
 
 The Microsoft.Extensions.AI.Evaluation libraries (currently in preview) simplify the process of evaluating the quality and accuracy of responses generated by AI models in .NET intelligent apps. Various metrics measure aspects like relevance, truthfulness, coherence, and completeness of the responses. Evaluations are crucial in testing, because they help ensure that the AI model performs as expected and provides reliable and accurate results.
 
-The evaluation libraries, which are built on top of the [Microsoft.Extensions.AI abstractions](../ai-extensions.md), are composed of the following NuGet packages:
+The evaluation libraries, which are built on top of the [Microsoft.Extensions.AI abstractions](../microsoft-extensions-ai.md), are composed of the following NuGet packages:
 
 - [📦 Microsoft.Extensions.AI.Evaluation](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation) – Defines the core abstractions and types for supporting evaluation.
 - [📦 Microsoft.Extensions.AI.Evaluation.Quality](https://www.nuget.org/packages/Microsoft.Extensions.AI.Evaluation.Quality) – Contains evaluators that assess the quality of LLM responses in an app according to metrics such as relevance, fluency, coherence, and truthfulness.
diff --git a/docs/ai/conceptual/microsoft-extensions-ai.md b/docs/ai/conceptual/microsoft-extensions-ai.md
deleted file mode 100644
index cf680faeb0e51..0000000000000
--- a/docs/ai/conceptual/microsoft-extensions-ai.md
+++ /dev/null
@@ -1,279 +0,0 @@
----
-title: Artificial Intelligence in .NET (Preview)
-description: Learn how to use the Microsoft.Extensions.AI libraries to integrate and interact with various AI services in your .NET applications.
-author: IEvangelist
-ms.author: dapine
-ms.date: 01/06/2025
----
-
-# Artificial intelligence in .NET (Preview)
-
-With a growing variety of artificial intelligence (AI) services available, developers need a way to integrate and interact with these services in their .NET applications. The `Microsoft.Extensions.AI` libraries provide a unified approach for representing generative AI components, which enables seamless integration and interoperability with various AI services. This article introduces the libraries and provides installation instructions and usage examples to help you get started.
-
-The [📦 Microsoft.Extensions.AI.Abstractions](https://www.nuget.org/packages/Microsoft.Extensions.AI.Abstractions) package provides the core exchange types: <xref:Microsoft.Extensions.AI.IChatClient> and <xref:Microsoft.Extensions.AI.IEmbeddingGenerator`2>. Any .NET library that provides an AI client can implement the `IChatClient` interface to enable seamless integration with consuming code.
-
-The [📦 Microsoft.Extensions.AI](https://www.nuget.org/packages/Microsoft.Extensions.AI) package has an implicit dependency on the `Microsoft.Extensions.AI.Abstractions` package. This package enables you to easily integrate components such as telemetry and caching into your applications using familiar dependency injection and middleware patterns. For example, it provides the <xref:Microsoft.Extensions.AI.OpenTelemetryChatClientBuilderExtensions.UseOpenTelemetry(Microsoft.Extensions.AI.ChatClientBuilder,Microsoft.Extensions.Logging.ILoggerFactory,System.String,System.Action{Microsoft.Extensions.AI.OpenTelemetryChatClient})> extension method, which adds OpenTelemetry support to the chat client pipeline.
-
-## Install the package
-
-To install the [📦 Microsoft.Extensions.AI](https://www.nuget.org/packages/Microsoft.Extensions.AI) and [📦 Microsoft.Extensions.AI.Abstractions](https://www.nuget.org/packages/Microsoft.Extensions.AI.Abstractions) NuGet packages, use the .NET CLI or add package references directly to your C# project file:
-
-### [.NET CLI](#tab/dotnet-cli)
-
-```dotnetcli
-dotnet add package Microsoft.Extensions.AI --prerelease
-```
-
-Or, if you're using .NET 10+ SDK:
-
-```dotnetcli
-dotnet package add Microsoft.Extensions.AI --prerelease
-```
-
-### [PackageReference](#tab/package-reference)
-
-```xml
-<PackageReference Include="Microsoft.Extensions.AI"
-                  Version="*" />
-```
-
----
-
-For more information, see [dotnet package add](../tools/dotnet-package-add.md) or [Manage package dependencies in .NET applications](../tools/dependencies.md).
-
-## The `IChatClient` interface
-
-The <xref:Microsoft.Extensions.AI.IChatClient> interface defines a client abstraction responsible for interacting with AI services that provide chat capabilities. It includes methods for sending and receiving messages with multi-modal content (such as text, images, and audio), either as a complete set or streamed incrementally. Additionally, it provides metadata information about the client and allows retrieving strongly typed services.
-
-> [!IMPORTANT]
-> For more usage examples and real-world scenarios, see [AI for .NET developers](../../ai/index.yml).
-
-The following sample implements `IChatClient` to show the general structure.
-
-:::code language="csharp" source="snippets/ai/AI.Shared/SampleChatClient.cs":::
-
-You can find other concrete implementations of `IChatClient` in the following NuGet packages:
-
-- [📦 Microsoft.Extensions.AI.AzureAIInference](https://www.nuget.org/packages/Microsoft.Extensions.AI.AzureAIInference): Implementation backed by [Azure AI Model Inference API](/azure/ai-studio/reference/reference-model-inference-api).
-- [📦 Microsoft.Extensions.AI.Ollama](https://www.nuget.org/packages/Microsoft.Extensions.AI.Ollama): Implementation backed by [Ollama](https://ollama.com/).
-- [📦 Microsoft.Extensions.AI.OpenAI](https://www.nuget.org/packages/Microsoft.Extensions.AI.OpenAI): Implementation backed by either [OpenAI](https://openai.com/) or OpenAI-compatible endpoints (such as [Azure OpenAI](https://azure.microsoft.com/products/ai-services/openai-service)).
-
-The following subsections show specific `IChatClient` usage examples:
-
-- [Request chat completion](#request-chat-completion)
-- [Request chat completion with streaming](#request-chat-completion-with-streaming)
-- [Tool calling](#tool-calling)
-- [Cache responses](#cache-responses)
-- [Use telemetry](#use-telemetry)
-- [Provide options](#provide-options)
-- [Functionality pipelines](#functionality-pipelines)
-- [Custom `IChatClient` middleware](#custom-ichatclient-middleware)
-- [Dependency injection](#dependency-injection)
-
-### Request chat completion
-
-To request a completion, call the <xref:Microsoft.Extensions.AI.IChatClient.GetResponseAsync*?displayProperty=nameWithType> method. The request is composed of one or more messages, each of which is composed of one or more pieces of content. Accelerator methods exist to simplify common cases, such as constructing a request for a single piece of text content.
-
-:::code language="csharp" source="snippets/ai/ConsoleAI/Program.cs":::
-
-The core `IChatClient.GetResponseAsync` method accepts a list of messages. This list represents the history of all messages that are part of the conversation.
-
-:::code language="csharp" source="snippets/ai/ConsoleAI.GetResponseAsyncArgs/Program.cs":::
-
-Each message in the history is represented by a <xref:Microsoft.Extensions.AI.ChatMessage> object. The `ChatMessage` class provides a <xref:Microsoft.Extensions.AI.ChatMessage.Role?displayProperty=nameWithType> property that indicates the role of the message. By default, the <xref:Microsoft.Extensions.AI.ChatRole.User?displayProperty=nameWithType> is used. The following roles are available:
-
-- <xref:Microsoft.Extensions.AI.ChatRole.Assistant?displayProperty=nameWithType>: Instructs or sets the behavior of the assistant.
-- <xref:Microsoft.Extensions.AI.ChatRole.System?displayProperty=nameWithType>: Provides responses to system-instructed, user-prompted input.
-- <xref:Microsoft.Extensions.AI.ChatRole.Tool?displayProperty=nameWithType>: Provides additional information and references for chat completions.
-- <xref:Microsoft.Extensions.AI.ChatRole.User?displayProperty=nameWithType>: Provides input for chat completions.
-
-Each chat message is instantiated, assigning to its <xref:Microsoft.Extensions.AI.ChatMessage.Contents> property a new <xref:Microsoft.Extensions.AI.TextContent>. There are various [types of content](xref:Microsoft.Extensions.AI.AIContent) that can be represented, such as a simple string or a more complex object that represents a multi-modal message with text, images, and audio:
-
-- <xref:Microsoft.Extensions.AI.DataContent>
-- <xref:Microsoft.Extensions.AI.FunctionCallContent>
-- <xref:Microsoft.Extensions.AI.FunctionResultContent>
-- <xref:Microsoft.Extensions.AI.TextContent>
-- <xref:Microsoft.Extensions.AI.UsageContent>
-
-### Request chat completion with streaming
-
-The inputs to <xref:Microsoft.Extensions.AI.IChatClient.GetStreamingResponseAsync*?displayProperty=nameWithType> are identical to those of `GetResponseAsync`. However, rather than returning the complete response as part of a <xref:Microsoft.Extensions.AI.ChatResponse> object, the method returns an <xref:System.Collections.Generic.IAsyncEnumerable`1> where `T` is <xref:Microsoft.Extensions.AI.ChatResponseUpdate>, providing a stream of updates that collectively form the single response.
-
-:::code language="csharp" source="snippets/ai/ConsoleAI.GetStreamingResponseAsync/Program.cs":::
-
-> [!TIP]
-> Streaming APIs are nearly synonymous with AI user experiences. C# enables compelling scenarios with its `IAsyncEnumerable<T>` support, allowing for a natural and efficient way to stream data.
-
-### Tool calling
-
-Some models and services support _tool calling_, where requests can include tools for the model to invoke functions to gather additional information. Instead of sending a final response, the model requests a function invocation with specific arguments. The client then invokes the function and sends the results back to the model along with the conversation history. The `Microsoft.Extensions.AI` library includes abstractions for various message content types, including function call requests and results. While consumers can interact with this content directly, `Microsoft.Extensions.AI` automates these interactions and provides:
-
-- <xref:Microsoft.Extensions.AI.AIFunction>: Represents a function that can be described to an AI service and invoked.
-- <xref:Microsoft.Extensions.AI.AIFunctionFactory>: Provides factory methods for creating commonly used implementations of `AIFunction`.
-- <xref:Microsoft.Extensions.AI.FunctionInvokingChatClient>: Wraps an `IChatClient` to add automatic function invocation capabilities.
-
-Consider the following example that demonstrates a random function invocation:
-
-:::code language="csharp" source="snippets/ai/ConsoleAI.ToolCalling/Program.cs":::
-
-The preceding example depends on the [📦 Microsoft.Extensions.AI.Ollama](https://www.nuget.org/packages/Microsoft.Extensions.AI.Ollama) NuGet package.
-
-The preceding code:
-
-- Defines a function named `GetCurrentWeather` that returns a random weather forecast.
-  - This function is decorated with a <xref:System.ComponentModel.DescriptionAttribute>, which is used to provide a description of the function to the AI service.
-- Instantiates a <xref:Microsoft.Extensions.AI.ChatClientBuilder> with an <xref:Microsoft.Extensions.AI.OllamaChatClient> and configures it to use function invocation.
-- Calls `GetStreamingResponseAsync` on the client, passing a prompt and a list of tools that includes a function created with <xref:Microsoft.Extensions.AI.AIFunctionFactory.Create*>.
-- Iterates over the response, printing each update to the console.
-
-### Cache responses
-
-If you're familiar with [Caching in .NET](caching.md), it's good to know that <xref:Microsoft.Extensions.AI> provides other such delegating `IChatClient` implementations. The <xref:Microsoft.Extensions.AI.DistributedCachingChatClient> is an `IChatClient` that layers caching around another arbitrary `IChatClient` instance. When a unique chat history is submitted to the `DistributedCachingChatClient`, it forwards it to the underlying client and then caches the response before sending it back to the consumer. The next time the same prompt is submitted, such that a cached response can be found in the cache, the `DistributedCachingChatClient` returns the cached response rather than needing to forward the request along the pipeline.
-
-:::code language="csharp" source="snippets/ai/ConsoleAI.CacheResponses/Program.cs":::
-
-The preceding example depends on the [📦 Microsoft.Extensions.Caching.Memory](https://www.nuget.org/packages/Microsoft.Extensions.Caching.Memory) NuGet package. For more information, see [Caching in .NET](caching.md).
-
-### Use telemetry
-
-Another example of a delegating chat client is the <xref:Microsoft.Extensions.AI.OpenTelemetryChatClient>. This implementation adheres to the [OpenTelemetry Semantic Conventions for Generative AI systems](https://opentelemetry.io/docs/specs/semconv/gen-ai/). Similar to other `IChatClient` delegators, it layers metrics and spans around any underlying `IChatClient` implementation, providing enhanced observability.
-
-:::code language="csharp" source="snippets/ai/ConsoleAI.UseTelemetry/Program.cs":::
-
-The preceding example depends on the [📦 OpenTelemetry.Exporter.Console](https://www.nuget.org/packages/OpenTelemetry.Exporter.Console) NuGet package.
-
-### Provide options
-
-Every call to <xref:Microsoft.Extensions.AI.IChatClient.GetResponseAsync*> or <xref:Microsoft.Extensions.AI.IChatClient.GetStreamingResponseAsync*> can optionally supply a <xref:Microsoft.Extensions.AI.ChatOptions> instance containing additional parameters for the operation. The most common parameters among AI models and services show up as strongly typed properties on the type, such as <xref:Microsoft.Extensions.AI.ChatOptions.Temperature?displayProperty=nameWithType>. Other parameters can be supplied by name in a weakly typed manner via the <xref:Microsoft.Extensions.AI.ChatOptions.AdditionalProperties?displayProperty=nameWithType> dictionary.
-
-You can also specify options when building an `IChatClient` with the fluent <xref:Microsoft.Extensions.AI.ChatClientBuilder> API and chaining a call to the `ConfigureOptions` extension method. This delegating client wraps another client and invokes the supplied delegate to populate a `ChatOptions` instance for every call. For example, to ensure that the <xref:Microsoft.Extensions.AI.ChatOptions.ModelId?displayProperty=nameWithType> property defaults to a particular model name, you can use code like the following:
-
-:::code language="csharp" source="snippets/ai/ConsoleAI.ProvideOptions/Program.cs":::
-
-The preceding example depends on the [📦 Microsoft.Extensions.AI.Ollama](https://www.nuget.org/packages/Microsoft.Extensions.AI.Ollama) NuGet package.
-
-### Functionality pipelines
-
-`IChatClient` instances can be layered to create a pipeline of components, each adding specific functionality. These components can come from `Microsoft.Extensions.AI`, other NuGet packages, or custom implementations. This approach allows you to augment the behavior of the `IChatClient` in various ways to meet your specific needs. Consider the following example code that layers a distributed cache, function invocation, and OpenTelemetry tracing around a sample chat client:
-
-:::code language="csharp" source="snippets/ai/ConsoleAI.FunctionalityPipelines/Program.cs":::
-
-The preceding example depends on the following NuGet packages:
-
-- [📦 Microsoft.Extensions.Caching.Memory](https://www.nuget.org/packages/Microsoft.Extensions.Caching.Memory)
-- [📦 Microsoft.Extensions.AI.Ollama](https://www.nuget.org/packages/Microsoft.Extensions.AI.Ollama)
-- [📦 OpenTelemetry.Exporter.Console](https://www.nuget.org/packages/OpenTelemetry.Exporter.Console)
-
-### Custom `IChatClient` middleware
-
-To add additional functionality, you can implement `IChatClient` directly or use the <xref:Microsoft.Extensions.AI.DelegatingChatClient> class. This class serves as a base for creating chat clients that delegate operations to another `IChatClient` instance. It simplifies chaining multiple clients, allowing calls to pass through to an underlying client.
-
-The `DelegatingChatClient` class provides default implementations for methods like `GetResponseAsync`, `GetStreamingResponseAsync`, and `Dispose`, which forward calls to the inner client. You can derive from this class and override only the methods you need to enhance behavior, while delegating other calls to the base implementation. This approach helps create flexible and modular chat clients that are easy to extend and compose.
-
-The following is an example class derived from `DelegatingChatClient` to provide rate limiting functionality, utilizing the <xref:System.Threading.RateLimiting.RateLimiter>:
-
-:::code language="csharp" source="snippets/ai/AI.Shared/RateLimitingChatClient.cs":::
-
-The preceding example depends on the [📦 System.Threading.RateLimiting](https://www.nuget.org/packages/System.Threading.RateLimiting) NuGet package. Composition of the `RateLimitingChatClient` with another client is straightforward:
-
-:::code language="csharp" source="snippets/ai/ConsoleAI.CustomClientMiddle/Program.cs":::
-
-To simplify the composition of such components with others, component authors should create a `Use*` extension method for registering the component into a pipeline. For example, consider the following extension method:
-
-:::code language="csharp" source="snippets/ai/AI.Shared/RateLimitingChatClientExtensions.cs" id="one":::
-
-Such extensions can also query for relevant services from the DI container; the <xref:System.IServiceProvider> used by the pipeline is passed in as an optional parameter:
-
-:::code language="csharp" source="snippets/ai/AI.Shared/RateLimitingChatClientExtensions.OptionalOverload.cs"  id="two":::
-
-The consumer can then easily use this in their pipeline, for example:
-
-:::code language="csharp" source="snippets/ai/ConsoleAI.ConsumeClientMiddleware/Program.cs" id="program":::
-
-This example demonstrates [hosted scenario](generic-host.md), where the consumer relies on [dependency injection](dependency-injection.md) to provide the `RateLimiter` instance. The preceding extension methods demonstrate using a `Use` method on <xref:Microsoft.Extensions.AI.ChatClientBuilder>. The `ChatClientBuilder` also provides <xref:Microsoft.Extensions.AI.ChatClientBuilder.Use*> overloads that make it easier to write such delegating handlers.
-
-For example, in the earlier `RateLimitingChatClient` example, the overrides of `GetResponseAsync` and `GetStreamingResponseAsync` only need to do work before and after delegating to the next client in the pipeline. To achieve the same thing without writing a custom class, you can use an overload of `Use` that accepts a delegate that's used for both `GetResponseAsync` and `GetStreamingResponseAsync`, reducing the boilerplate required:
-
-:::code language="csharp" source="snippets/ai/ConsoleAI.UseExample/Program.cs":::
-
-The preceding overload internally uses an `AnonymousDelegatingChatClient`, which enables more complicated patterns with only a little additional code.
-
-For scenarios where you'd like to specify delegating implementations of `GetResponseAsync` and `GetStreamingResponseAsync` inline, and where it's important to be able to write a different implementation for each in order to handle their unique return types specially, you can use the <xref:Microsoft.Extensions.AI.ChatClientBuilder.Use(System.Func{System.Collections.Generic.IEnumerable{Microsoft.Extensions.AI.ChatMessage},Microsoft.Extensions.AI.ChatOptions,Microsoft.Extensions.AI.IChatClient,System.Threading.CancellationToken,System.Threading.Tasks.Task{Microsoft.Extensions.AI.ChatResponse}},System.Func{System.Collections.Generic.IEnumerable{Microsoft.Extensions.AI.ChatMessage},Microsoft.Extensions.AI.ChatOptions,Microsoft.Extensions.AI.IChatClient,System.Threading.CancellationToken,System.Collections.Generic.IAsyncEnumerable{Microsoft.Extensions.AI.ChatResponseUpdate}})> overload that accepts a delegate for each.
-
-### Dependency injection
-
-<xref:Microsoft.Extensions.AI.IChatClient> implementations will typically be provided to an application via [dependency injection (DI)](dependency-injection.md). In this example, an <xref:Microsoft.Extensions.Caching.Distributed.IDistributedCache> is added into the DI container, as is an `IChatClient`. The registration for the `IChatClient` employs a builder that creates a pipeline containing a caching client (which will then use an `IDistributedCache` retrieved from DI) and the sample client. The injected `IChatClient` can be retrieved and used elsewhere in the app.
-
-:::code language="csharp" source="snippets/ai/ConsoleAI.DependencyInjection/Program.cs":::
-
-The preceding example depends on the following NuGet packages:
-
-- [📦 Microsoft.Extensions.Hosting](https://www.nuget.org/packages/Microsoft.Extensions.Hosting)
-- [📦 Microsoft.Extensions.Caching.Memory](https://www.nuget.org/packages/Microsoft.Extensions.Caching.Memory)
-
-What instance and configuration is injected can differ based on the current needs of the application, and multiple pipelines can be injected with different keys.
-
-## The `IEmbeddingGenerator` interface
-
-The <xref:Microsoft.Extensions.AI.IEmbeddingGenerator`2> interface represents a generic generator of embeddings. Here, `TInput` is the type of input values being embedded, and `TEmbedding` is the type of generated embedding, which inherits from the <xref:Microsoft.Extensions.AI.Embedding> class.
-
-The `Embedding` class serves as a base class for embeddings generated by an `IEmbeddingGenerator`. It's designed to store and manage the metadata and data associated with embeddings. Derived types like `Embedding<T>` provide the concrete embedding vector data. For instance, an embedding exposes a <xref:Microsoft.Extensions.AI.Embedding`1.Vector?displayProperty=nameWithType> property to access its embedding data.
-
-The `IEmbeddingGenerator` interface defines a method to asynchronously generate embeddings for a collection of input values, with optional configuration and cancellation support. It also provides metadata describing the generator and allows for the retrieval of strongly typed services that can be provided by the generator or its underlying services.
-
-The following sample implementation of `IEmbeddingGenerator` shows the general structure (however, it just generates random embedding vectors).
-
-:::code language="csharp" source="snippets/ai/AI.Shared/SampleEmbeddingGenerator.cs":::
-
-The preceding code:
-
-- Defines a class named `SampleEmbeddingGenerator` that implements the `IEmbeddingGenerator<string, Embedding<float>>` interface.
-- Has a primary constructor that accepts an endpoint and model ID, which are used to identify the generator.
-- Exposes a `Metadata` property that provides metadata about the generator.
-- Implements the `GenerateAsync` method to generate embeddings for a collection of input values:
-  - Simulates an asynchronous operation by delaying for 100 milliseconds.
-  - Returns random embeddings for each input value.
-
-You can find actual concrete implementations in the following packages:
-
-- [📦 Microsoft.Extensions.AI.OpenAI](https://www.nuget.org/packages/Microsoft.Extensions.AI.OpenAI)
-- [📦 Microsoft.Extensions.AI.Ollama](https://www.nuget.org/packages/Microsoft.Extensions.AI.Ollama)
-
-The following sections show specific `IEmbeddingGenerator` usage examples:
-
-- [Create embeddings](#create-embeddings)
-- [Custom `IEmbeddingGenerator` middleware](#custom-iembeddinggenerator-middleware)
-
-### Create embeddings
-
-The primary operation performed with an <xref:Microsoft.Extensions.AI.IEmbeddingGenerator`2> is embedding generation, which is accomplished with its <xref:Microsoft.Extensions.AI.IEmbeddingGenerator`2.GenerateAsync*> method.
-
-:::code language="csharp" source="snippets/ai/ConsoleAI.CreateEmbeddings/Program.cs":::
-
-### Custom `IEmbeddingGenerator` middleware
-
-As with `IChatClient`, `IEmbeddingGenerator` implementations can be layered. Just as `Microsoft.Extensions.AI` provides delegating implementations of `IChatClient` for caching and telemetry, it provides an implementation for `IEmbeddingGenerator` as well.
-
-:::code language="csharp" source="snippets/ai/ConsoleAI.CustomEmbeddingsMiddle/Program.cs":::
-
-The `IEmbeddingGenerator` enables building custom middleware that extends the functionality of an `IEmbeddingGenerator`. The <xref:Microsoft.Extensions.AI.DelegatingEmbeddingGenerator`2> class is an implementation of the `IEmbeddingGenerator<TInput, TEmbedding>` interface that serves as a base class for creating embedding generators that delegate their operations to another `IEmbeddingGenerator<TInput, TEmbedding>` instance. It allows for chaining multiple generators in any order, passing calls through to an underlying generator. The class provides default implementations for methods such as <xref:Microsoft.Extensions.AI.DelegatingEmbeddingGenerator`2.GenerateAsync*> and `Dispose`, which forward the calls to the inner generator instance, enabling flexible and modular embedding generation.
-
-The following is an example implementation of such a delegating embedding generator that rate limits embedding generation requests:
-
-:::code language="csharp" source="snippets/ai/AI.Shared/RateLimitingEmbeddingGenerator.cs":::
-
-This can then be layered around an arbitrary `IEmbeddingGenerator<string, Embedding<float>>` to rate limit all embedding generation operations performed.
-
-:::code language="csharp" source="snippets/ai/ConsoleAI.ConsumeRateLimitingEmbedding/Program.cs":::
-
-In this way, the `RateLimitingEmbeddingGenerator` can be composed with other `IEmbeddingGenerator<string, Embedding<float>>` instances to provide rate limiting functionality.
-
-## See also
-
-- [Develop .NET applications with AI features](../../ai/get-started/dotnet-ai-overview.md)
-- [Unified AI building blocks for .NET using Microsoft.Extensions.AI](../../ai/ai-extensions.md)
-- [Build an AI chat app with .NET](../../ai/quickstarts/build-chat-app.md)
-- [.NET dependency injection](dependency-injection.md)
-- [Rate limit an HTTP handler in .NET](http-ratelimiter.md)
-- [.NET Generic Host](generic-host.md)
-- [Caching in .NET](caching.md)
diff --git a/docs/ai/dotnet-ai-ecosystem.md b/docs/ai/dotnet-ai-ecosystem.md
index 102cb89cc99e6..911b75612c410 100644
--- a/docs/ai/dotnet-ai-ecosystem.md
+++ b/docs/ai/dotnet-ai-ecosystem.md
@@ -15,7 +15,7 @@ The .NET ecosystem provides many powerful tools, libraries, and services to deve
 
 ## Microsoft.Extensions.AI libraries
 
-[`Microsoft.Extensions.AI`](ai-extensions.md) is a set of core .NET libraries that provide a unified layer of C# abstractions for interacting with AI services, such as small and large language models (SLMs and LLMs), embeddings, and middleware. These APIs were created in collaboration with developers across the .NET ecosystem, including Semantic Kernel. The low-level APIs, such as <xref:Microsoft.Extensions.AI.IChatClient> and <xref:Microsoft.Extensions.AI.IEmbeddingGenerator`2>, were extracted from Semantic Kernel and moved into the <xref:Microsoft.Extensions.AI> namespace.
+[`Microsoft.Extensions.AI`](microsoft-extensions-ai.md) is a set of core .NET libraries that provide a unified layer of C# abstractions for interacting with AI services, such as small and large language models (SLMs and LLMs), embeddings, and middleware. These APIs were created in collaboration with developers across the .NET ecosystem, including Semantic Kernel. The low-level APIs, such as <xref:Microsoft.Extensions.AI.IChatClient> and <xref:Microsoft.Extensions.AI.IEmbeddingGenerator`2>, were extracted from Semantic Kernel and moved into the <xref:Microsoft.Extensions.AI> namespace.
 
 `Microsoft.Extensions.AI` provides abstractions that can be implemented by various services, all adhering to the same core concepts. This library is not intended to provide APIs tailored to any specific provider's services. The goal of `Microsoft.Extensions.AI` is to act as a unifying layer within the .NET ecosystem, enabling developers to choose their preferred frameworks and libraries while ensuring seamless integration and collaboration across the ecosystem.
 
diff --git a/docs/ai/dotnet-ai-overview.md b/docs/ai/dotnet-ai-overview.md
index 852d3d4ccf82e..bf203f5aec4a2 100644
--- a/docs/ai/dotnet-ai-overview.md
+++ b/docs/ai/dotnet-ai-overview.md
@@ -34,18 +34,18 @@ The opportunities with AI are near endless. Here are a few examples of solutions
 
 We recommend the following sequence of tutorials and articles for an introduction to developing applications with AI and .NET:
 
-| Scenario | Tutorial |
-|----------|----------|
-| Create a chat application | [Build an Azure AI chat app with .NET](../quickstarts/build-chat-app.md)|
-| Summarize text | [Summarize text using Azure AI chat app with .NET](../quickstarts/prompt-model.md) |
-| Chat with your data | [Get insight about your data from an .NET Azure AI chat app](../quickstarts/build-vector-search-app.md) |
+| Scenario                    | Tutorial |
+|-----------------------------|----------|
+| Create a chat application   | [Build an Azure AI chat app with .NET](../quickstarts/build-chat-app.md) |
+| Summarize text              | [Summarize text using Azure AI chat app with .NET](../quickstarts/prompt-model.md) |
+| Chat with your data         | [Get insight about your data from an .NET Azure AI chat app](../quickstarts/build-vector-search-app.md) |
 | Call .NET functions with AI | [Extend Azure AI using tools and execute a local function with .NET](../quickstarts/use-function-calling.md) |
-| Generate images | [Generate images using Azure AI with .NET](../quickstarts/generate-images.md) |
-| Train your own model |[ML.NET tutorial](https://dotnet.microsoft.com/learn/ml-dotnet/get-started-tutorial/intro) |
+| Generate images             | [Generate images using Azure AI with .NET](../quickstarts/generate-images.md) |
+| Train your own model        | [ML.NET tutorial](https://dotnet.microsoft.com/learn/ml-dotnet/get-started-tutorial/intro) |
 
 Browse the table of contents to learn more about the core concepts, starting with [How generative AI and LLMs work](../conceptual/how-genai-and-llms-work.md).
 
 ## Next steps
 
-- [Quickstart: Build an Azure AI chat app with .NET](../quickstarts/build-chat-app.md)
-- [Video series: Machine Learning and AI with .NET](/shows/machine-learning-and-ai-with-dotnet-for-beginners)
+* [Quickstart: Build an Azure AI chat app with .NET](../quickstarts/build-chat-app.md)
+* [Video series: Machine Learning and AI with .NET](/shows/machine-learning-and-ai-with-dotnet-for-beginners)
diff --git a/docs/ai/how-to/app-service-aoai-auth.md b/docs/ai/how-to/app-service-aoai-auth.md
index 1774c71503432..7b70bfc763eca 100644
--- a/docs/ai/how-to/app-service-aoai-auth.md
+++ b/docs/ai/how-to/app-service-aoai-auth.md
@@ -11,7 +11,7 @@ zone_pivot_groups: azure-interface
 
 # Authenticate to Azure OpenAI from an Azure hosted app using Microsoft Entra ID
 
-This article demonstrates how to use [Microsoft Entra ID managed identities](/azure/app-service/overview-managed-identity) and the [Microsoft.Extensions.AI library](../ai-extensions.md) to authenticate an Azure hosted app to an Azure OpenAI resource.
+This article demonstrates how to use [Microsoft Entra ID managed identities](/azure/app-service/overview-managed-identity) and the [Microsoft.Extensions.AI library](../microsoft-extensions-ai.md) to authenticate an Azure hosted app to an Azure OpenAI resource.
 
 A managed identity from Microsoft Entra ID allows your app to easily access other Microsoft Entra protected resources such as Azure OpenAI. The identity is managed by the Azure platform and doesn't require you to provision, manage, or rotate any secrets.
 
diff --git a/docs/ai/index.yml b/docs/ai/index.yml
index 4391ec5e7d6b7..b9f0766b43c43 100644
--- a/docs/ai/index.yml
+++ b/docs/ai/index.yml
@@ -27,6 +27,8 @@ landingContent:
             url: dotnet-ai-overview.md
           - text: Connect to and prompt an AI model
             url: quickstarts/prompt-model.md
+          - text: Microsoft.Extensions.AI libraries
+            url: microsoft-extensions-ai.md
           - text: Build an Azure AI chat app
             url: quickstarts/get-started-openai.md
           - text: Summarize text using an Azure OpenAI chat app
diff --git a/docs/ai/microsoft-extensions-ai.md b/docs/ai/microsoft-extensions-ai.md
new file mode 100644
index 0000000000000..c54976d0d0a54
--- /dev/null
+++ b/docs/ai/microsoft-extensions-ai.md
@@ -0,0 +1,268 @@
+---
+title: Microsoft.Extensions.AI libraries (Preview)
+description: Learn how to use the Microsoft.Extensions.AI libraries to integrate and interact with various AI services in your .NET applications.
+author: IEvangelist
+ms.author: dapine
+ms.date: 04/29/2025
+---
+
+# Microsoft.Extensions.AI libraries (Preview)
+
+.NET developers need a way to integrate and interact with a growing variety of artificial intelligence (AI) services in their apps. The `Microsoft.Extensions.AI` libraries provide a unified approach for representing generative AI components, and enables seamless integration and interoperability with various AI services. This article introduces the libraries and provides in-depth usage examples to help you get started.
+
+## The packages
+
+The [📦 Microsoft.Extensions.AI.Abstractions](https://www.nuget.org/packages/Microsoft.Extensions.AI.Abstractions) package provides the core exchange types: <xref:Microsoft.Extensions.AI.IChatClient> and <xref:Microsoft.Extensions.AI.IEmbeddingGenerator`2>. Any .NET library that provides an AI client can implement the `IChatClient` interface to enable seamless integration with consuming code.
+
+The [📦 Microsoft.Extensions.AI](https://www.nuget.org/packages/Microsoft.Extensions.AI) package has an implicit dependency on the `Microsoft.Extensions.AI.Abstractions` package. This package enables you to easily integrate components such as telemetry and caching into your applications using familiar dependency injection and middleware patterns. For example, it provides the <xref:Microsoft.Extensions.AI.OpenTelemetryChatClientBuilderExtensions.UseOpenTelemetry(Microsoft.Extensions.AI.ChatClientBuilder,Microsoft.Extensions.Logging.ILoggerFactory,System.String,System.Action{Microsoft.Extensions.AI.OpenTelemetryChatClient})> extension method, which adds OpenTelemetry support to the chat client pipeline.
+
+## Which package to reference
+
+Libraries that provide implementations of the abstractions typically reference only `Microsoft.Extensions.AI.Abstractions`.
+
+To also have access to higher-level utilities for working with generative AI components, reference the `Microsoft.Extensions.AI` package instead (which itself references `Microsoft.Extensions.AI.Abstractions`). Most consuming applications and services should reference the `Microsoft.Extensions.AI` package along with one or more libraries that provide concrete implementations of the abstractions.
+
+## Install the package
+
+For information about how to install NuGet packages, see [dotnet package add](../tools/dotnet-package-add.md) or [Manage package dependencies in .NET applications](../tools/dependencies.md).
+
+## Usage examples
+
+The following subsections show specific [IChatClient](#the-ichatclient-interface) usage examples:
+
+- [Request a chat response](#request-a-chat-response)
+- [Request a streaming chat response](#request-a-streaming-chat-response)
+- [Tool calling](#tool-calling)
+- [Cache responses](#cache-responses)
+- [Use telemetry](#use-telemetry)
+- [Provide options](#provide-options)
+- [Pipelines of functionality](#functionality-pipelines)
+- [Custom `IChatClient` middleware](#custom-ichatclient-middleware)
+- [Dependency injection](#dependency-injection)
+- [Stateless vs. stateful clients](#stateless-vs-stateful-clients)
+
+The following sections show specific [IEmbeddingGenerator](#the-iembeddinggenerator-interface) usage examples:
+
+- [Sample implementation](#sample-implementation)
+- [Create embeddings](#create-embeddings)
+- [Pipelines of functionality](#pipelines-of-functionality)
+
+### The `IChatClient` interface
+
+The <xref:Microsoft.Extensions.AI.IChatClient> interface defines a client abstraction responsible for interacting with AI services that provide chat capabilities. It includes methods for sending and receiving messages with multi-modal content (such as text, images, and audio), either as a complete set or streamed incrementally. Additionally, it allows for retrieving strongly typed services provided by the client or its underlying services.
+
+.NET libraries that provide clients for language models and services can provide an implementation of the `IChatClient` interface. Any consumers of the interface are then able to interoperate seamlessly with these models and services via the abstractions.
+
+#### Request a chat response
+
+With an instance of <xref:Microsoft.Extensions.AI.IChatClient>, you can call the <xref:Microsoft.Extensions.AI.IChatClient.GetResponseAsync*?displayProperty=nameWithType> method to send a request and get a response. The request is composed of one or more messages, each of which is composed of one or more pieces of content. Accelerator methods exist to simplify common cases, such as constructing a request for a single piece of text content.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI/Program.cs":::
+
+The core `IChatClient.GetResponseAsync` method accepts a list of messages. This list represents the history of all messages that are part of the conversation.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.GetResponseAsyncArgs/Program.cs":::
+
+The <xref:Microsoft.Extensions.AI.ChatResponse> that's returned from `GetResponseAsync` exposes a list of <xref:Microsoft.Extensions.AI.ChatMessage> instances that represent one or more messages generated as part of the operation. In common cases, there is only one response message, but in some situations, there can be multiple messages. The message list is ordered, such that the last message in the list represents the final message to the request. To provide all of those response messages back to the service in a subsequent request, you can add the messages from the response back into the messages list.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.AddMessages/Program.cs" id="Snippet1":::
+
+#### Request a streaming chat response
+
+The inputs to <xref:Microsoft.Extensions.AI.IChatClient.GetStreamingResponseAsync*?displayProperty=nameWithType> are identical to those of `GetResponseAsync`. However, rather than returning the complete response as part of a <xref:Microsoft.Extensions.AI.ChatResponse> object, the method returns an <xref:System.Collections.Generic.IAsyncEnumerable`1> where `T` is <xref:Microsoft.Extensions.AI.ChatResponseUpdate>, providing a stream of updates that collectively form the single response.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.GetStreamingResponseAsync/Program.cs" id="Snippet1":::
+
+> [!TIP]
+> Streaming APIs are nearly synonymous with AI user experiences. C# enables compelling scenarios with its `IAsyncEnumerable<T>` support, allowing for a natural and efficient way to stream data.
+
+As with `GetResponseAsync`, you can add the updates from <xref:Microsoft.Extensions.AI.IChatClient.GetStreamingResponseAsync*?displayProperty=nameWithType> back into the messages list. As the updates are individual pieces of a response, you can use helpers like <xref:Microsoft.Extensions.AI.ChatResponseExtensions.ToChatResponse(System.Collections.Generic.IEnumerable{Microsoft.Extensions.AI.ChatResponseUpdate})> to compose one or more updates back into a single <xref:Microsoft.Extensions.AI.ChatResponse> instance.
+
+Helpers like <xref:Microsoft.Extensions.AI.ChatResponseExtensions.AddMessages*> compose a <xref:Microsoft.Extensions.AI.ChatResponse> and then extract the composed messages from the response and add them to a list.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.AddMessages/Program.cs" id="Snippet2":::
+
+#### Tool calling
+
+Some models and services support _tool calling_. To gather additional information, you can configure the <xref:Microsoft.Extensions.AI.ChatOptions> with information about tools (usually .NET methods) that the model can request the client to invoke. Instead of sending a final response, the model requests a function invocation with specific arguments. The client then invokes the function and sends the results back to the model with the conversation history. The `Microsoft.Extensions.AI` library includes abstractions for various message content types, including function call requests and results. While `IChatClient` consumers can interact with this content directly, `Microsoft.Extensions.AI` automates these interactions pro. It provides the following types:
+
+- <xref:Microsoft.Extensions.AI.AIFunction>: Represents a function that can be described to an AI model and invoked.
+- <xref:Microsoft.Extensions.AI.AIFunctionFactory>: Provides factory methods for creating `AIFunction` instances that represent .NET methods.
+- <xref:Microsoft.Extensions.AI.FunctionInvokingChatClient>: Wraps an `IChatClient` to add automatic function-invocation capabilities.
+
+The following example demonstrates a random function invocation (this example depends on the [📦 Microsoft.Extensions.AI.Ollama](https://www.nuget.org/packages/Microsoft.Extensions.AI.Ollama) NuGet package):
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.ToolCalling/Program.cs":::
+
+The preceding code:
+
+- Defines a function named `GetCurrentWeather` that returns a random weather forecast.
+- Instantiates a <xref:Microsoft.Extensions.AI.ChatClientBuilder> with an <xref:Microsoft.Extensions.AI.OllamaChatClient> and configures it to use function invocation.
+- Calls `GetStreamingResponseAsync` on the client, passing a prompt and a list of tools that includes a function created with <xref:Microsoft.Extensions.AI.AIFunctionFactory.Create*>.
+- Iterates over the response, printing each update to the console.
+
+#### Cache responses
+
+If you're familiar with [Caching in .NET](caching.md), it's good to know that <xref:Microsoft.Extensions.AI> provides other such delegating `IChatClient` implementations. The <xref:Microsoft.Extensions.AI.DistributedCachingChatClient> is an `IChatClient` that layers caching around another arbitrary `IChatClient` instance. When a novel chat history is submitted to the `DistributedCachingChatClient`, it forwards it to the underlying client and then caches the response before sending it back to the consumer. The next time the same history is submitted, such that a cached response can be found in the cache, the `DistributedCachingChatClient` returns the cached response rather than forwarding the request along the pipeline.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.CacheResponses/Program.cs":::
+
+This example depends on the [📦 Microsoft.Extensions.Caching.Memory](https://www.nuget.org/packages/Microsoft.Extensions.Caching.Memory) NuGet package. For more information, see [Caching in .NET](caching.md).
+
+#### Use telemetry
+
+Another example of a delegating chat client is the <xref:Microsoft.Extensions.AI.OpenTelemetryChatClient>. This implementation adheres to the [OpenTelemetry Semantic Conventions for Generative AI systems](https://opentelemetry.io/docs/specs/semconv/gen-ai/). Similar to other `IChatClient` delegators, it layers metrics and spans around any underlying `IChatClient` implementation, providing enhanced observability.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.UseTelemetry/Program.cs":::
+
+(The preceding example depends on the [📦 OpenTelemetry.Exporter.Console](https://www.nuget.org/packages/OpenTelemetry.Exporter.Console) NuGet package.)
+
+Alternatively, the <xref:Microsoft.Extensions.AI.LoggingChatClient> and corresponding <xref:Microsoft.Extensions.AI.LoggingChatClientBuilderExtensions.UseLogging(Microsoft.Extensions.AI.ChatClientBuilder,Microsoft.Extensions.Logging.ILoggerFactory,System.Action{Microsoft.Extensions.AI.LoggingChatClient})> method provide a simple way to write log entries to an <xref:Microsoft.Extensions.Logging.ILogger> for every request and response.
+
+#### Provide options
+
+Every call to <xref:Microsoft.Extensions.AI.IChatClient.GetResponseAsync*> or <xref:Microsoft.Extensions.AI.IChatClient.GetStreamingResponseAsync*> can optionally supply a <xref:Microsoft.Extensions.AI.ChatOptions> instance containing additional parameters for the operation. The most common parameters among AI models and services show up as strongly typed properties on the type, such as <xref:Microsoft.Extensions.AI.ChatOptions.Temperature?displayProperty=nameWithType>. Other parameters can be supplied by name in a weakly typed manner via the <xref:Microsoft.Extensions.AI.ChatOptions.AdditionalProperties?displayProperty=nameWithType> dictionary.
+
+You can also specify options when building an `IChatClient` with the fluent <xref:Microsoft.Extensions.AI.ChatClientBuilder> API by chaining a call to the <xref:Microsoft.Extensions.AI.ConfigureOptionsChatClientBuilderExtensions.ConfigureOptions(Microsoft.Extensions.AI.ChatClientBuilder,System.Action{Microsoft.Extensions.AI.ChatOptions})> extension method. This delegating client wraps another client and invokes the supplied delegate to populate a `ChatOptions` instance for every call. For example, to ensure that the <xref:Microsoft.Extensions.AI.ChatOptions.ModelId?displayProperty=nameWithType> property defaults to a particular model name, you can use code like the following:
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.ProvideOptions/Program.cs":::
+
+#### Functionality pipelines
+
+`IChatClient` instances can be layered to create a pipeline of components that each add additional functionality. These components can come from `Microsoft.Extensions.AI`, other NuGet packages, or custom implementations. This approach allows you to augment the behavior of the `IChatClient` in various ways to meet your specific needs. Consider the following code snippet that layers a distributed cache, function invocation, and OpenTelemetry tracing around a sample chat client:
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.FunctionalityPipelines/Program.cs" id="Snippet1":::
+
+#### Custom `IChatClient` middleware
+
+To add additional functionality, you can implement `IChatClient` directly or use the <xref:Microsoft.Extensions.AI.DelegatingChatClient> class. This class serves as a base for creating chat clients that delegate operations to another `IChatClient` instance. It simplifies chaining multiple clients, allowing calls to pass through to an underlying client.
+
+The `DelegatingChatClient` class provides default implementations for methods like `GetResponseAsync`, `GetStreamingResponseAsync`, and `Dispose`, which forward calls to the inner client. A derived class can then override only the methods it needs to augment the behavior, while delegating other calls to the base implementation. This approach is useful for creating flexible and modular chat clients that are easy to extend and compose.
+
+The following is an example class derived from `DelegatingChatClient` that uses the [System.Threading.RateLimiting](https://www.nuget.org/packages/System.Threading.RateLimiting) library to provide rate-limiting functionality.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClient.cs":::
+
+As with other `IChatClient` implementations, the `RateLimitingChatClient` can be composed:
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.CustomClientMiddle/Program.cs":::
+
+To simplify the composition of such components with others, component authors should create a `Use*` extension method for registering the component into a pipeline. For example, consider the following extension method:
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.cs" id="one":::
+
+Such extensions can also query for relevant services from the DI container; the <xref:System.IServiceProvider> used by the pipeline is passed in as an optional parameter:
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.OptionalOverload.cs"  id="two":::
+
+Now it's easy for the consumer to use this in their pipeline, for example:
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.ConsumeClientMiddleware/Program.cs" id="SnippetUse":::
+
+The previous extension methods demonstrate using a `Use` method on <xref:Microsoft.Extensions.AI.ChatClientBuilder>. `ChatClientBuilder` also provides <xref:Microsoft.Extensions.AI.ChatClientBuilder.Use*> overloads that make it easier to write such delegating handlers.
+
+For example, in the earlier `RateLimitingChatClient` example, the overrides of `GetResponseAsync` and `GetStreamingResponseAsync` only need to do work before and after delegating to the next client in the pipeline. To achieve the same thing without writing a custom class, you can use an overload of `Use` that accepts a delegate that's used for both `GetResponseAsync` and `GetStreamingResponseAsync`, reducing the boilerplate required:
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.UseExample/Program.cs":::
+
+For scenarios where you need a different implementation for `GetResponseAsync` and `GetStreamingResponseAsync` in order to handle their unique return types, you can use the <xref:Microsoft.Extensions.AI.ChatClientBuilder.Use(System.Func{System.Collections.Generic.IEnumerable{Microsoft.Extensions.AI.ChatMessage},Microsoft.Extensions.AI.ChatOptions,Microsoft.Extensions.AI.IChatClient,System.Threading.CancellationToken,System.Threading.Tasks.Task{Microsoft.Extensions.AI.ChatResponse}},System.Func{System.Collections.Generic.IEnumerable{Microsoft.Extensions.AI.ChatMessage},Microsoft.Extensions.AI.ChatOptions,Microsoft.Extensions.AI.IChatClient,System.Threading.CancellationToken,System.Collections.Generic.IAsyncEnumerable{Microsoft.Extensions.AI.ChatResponseUpdate}})> overload that accepts a delegate for each.
+
+#### Dependency injection
+
+<xref:Microsoft.Extensions.AI.IChatClient> implementations will often be provided to an application via [dependency injection (DI)](dependency-injection.md). In this example, an <xref:Microsoft.Extensions.Caching.Distributed.IDistributedCache> is added into the DI container, as is an `IChatClient`. The registration for the `IChatClient` uses a builder that creates a pipeline containing a caching client (which then uses an `IDistributedCache` retrieved from DI) and the sample client. The injected `IChatClient` can be retrieved and used elsewhere in the app.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.DependencyInjection/Program.cs":::
+
+What instance and configuration is injected can differ based on the current needs of the application, and multiple pipelines can be injected with different keys.
+
+#### Stateless vs. stateful clients
+
+_Stateless_ services require all relevant conversation history to be sent back on every request. In contrast, _stateful_ services keep track of the history and require only additional messages to be sent with a request. The <xref:Microsoft.Extensions.AI.IChatClient> interface is designed to handle both stateless and stateful AI services.
+
+When working with a stateless service, callers maintain a list of all messages. They add in all received response messages and provide the list back on subsequent interactions.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.StatelessStateful/Program.cs" id="Snippet1":::
+
+For stateful services, you might already know the identifier used for the relevant conversation. You can put that identifier into <xref:Microsoft.Extensions.AI.ChatOptions.ChatThreadId?displayProperty=nameWithType>. Usage then follows the same pattern, except there's no need to maintain a history manually.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.StatelessStateful/Program.cs" id="Snippet2":::
+
+Some services might support automatically creating a thread ID for a request that doesn't have one. In such cases, you can transfer the <xref:Microsoft.Extensions.AI.ChatResponse.ChatThreadId?displayProperty=nameWithType> over to the `ChatOptions.ChatThreadId` for subsequent requests. For example:
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.StatelessStateful/Program.cs" id="Snippet3":::
+
+If you don't know ahead of time whether the service is stateless or stateful, you can check the response <xref:Microsoft.Extensions.AI.ChatResponse.ChatThreadId> and act based on its value. If it's set, then that value is propagated to the options and the history is cleared so as to not resend the same history again. If the response `ChatThreadId` isn't set, then the response message is added to the history so that it's sent back to the service on the next turn.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.StatelessStateful/Program.cs" id="Snippet4":::
+
+### The `IEmbeddingGenerator` interface
+
+The <xref:Microsoft.Extensions.AI.IEmbeddingGenerator`2> interface represents a generic generator of embeddings. Here, `TInput` is the type of input values being embedded, and `TEmbedding` is the type of generated embedding, which inherits from the <xref:Microsoft.Extensions.AI.Embedding> class.
+
+The `Embedding` class serves as a base class for embeddings generated by an `IEmbeddingGenerator`. It's designed to store and manage the metadata and data associated with embeddings. Derived types, like `Embedding<T>`, provide the concrete embedding vector data. For example, an `Embedding<float>` exposes a `ReadOnlyMemory<float> Vector { get; }` property for access to its embedding data.
+
+The `IEmbeddingGenerator` interface defines a method to asynchronously generate embeddings for a collection of input values, with optional configuration and cancellation support. It also provides metadata describing the generator and allows for the retrieval of strongly typed services that can be provided by the generator or its underlying services.
+
+#### Sample implementation
+
+The following sample implementation of `IEmbeddingGenerator` shows the general structure.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/AI.Shared/SampleEmbeddingGenerator.cs":::
+
+The preceding code:
+
+- Defines a class named `SampleEmbeddingGenerator` that implements the `IEmbeddingGenerator<string, Embedding<float>>` interface.
+- Has a primary constructor that accepts an endpoint and model ID, which are used to identify the generator.
+- Implements the `GenerateAsync` method to generate embeddings for a collection of input values.
+
+The sample implementation just generates random embedding vectors. You can find actual concrete implementations in the following packages:
+
+- [📦 Microsoft.Extensions.AI.OpenAI](https://www.nuget.org/packages/Microsoft.Extensions.AI.OpenAI)
+- [📦 Microsoft.Extensions.AI.Ollama](https://www.nuget.org/packages/Microsoft.Extensions.AI.Ollama)
+
+#### Create embeddings
+
+The primary operation performed with an <xref:Microsoft.Extensions.AI.IEmbeddingGenerator`2> is embedding generation, which is accomplished with its <xref:Microsoft.Extensions.AI.IEmbeddingGenerator`2.GenerateAsync*> method.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.CreateEmbeddings/Program.cs" id="Snippet1":::
+
+Accelerator extension methods also exist to simplify common cases, such as generating an embedding vector from a single input.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.CreateEmbeddings/Program.cs" id="Snippet2":::
+
+#### Pipelines of functionality
+
+As with `IChatClient`, `IEmbeddingGenerator` implementations can be layered. `Microsoft.Extensions.AI` provides a delegating implementation for `IEmbeddingGenerator` for caching and telemetry.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.CustomEmbeddingsMiddle/Program.cs":::
+
+The `IEmbeddingGenerator` enables building custom middleware that extends the functionality of an `IEmbeddingGenerator`. The <xref:Microsoft.Extensions.AI.DelegatingEmbeddingGenerator`2> class is an implementation of the `IEmbeddingGenerator<TInput, TEmbedding>` interface that serves as a base class for creating embedding generators that delegate their operations to another `IEmbeddingGenerator<TInput, TEmbedding>` instance. It allows for chaining multiple generators in any order, passing calls through to an underlying generator. The class provides default implementations for methods such as <xref:Microsoft.Extensions.AI.DelegatingEmbeddingGenerator`2.GenerateAsync*> and `Dispose`, which forward the calls to the inner generator instance, enabling flexible and modular embedding generation.
+
+The following is an example implementation of such a delegating embedding generator that rate-limits embedding generation requests:
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/AI.Shared/RateLimitingEmbeddingGenerator.cs":::
+
+This can then be layered around an arbitrary `IEmbeddingGenerator<string, Embedding<float>>` to rate-limit all embedding generation operations.
+
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.ConsumeRateLimitingEmbedding/Program.cs":::
+
+In this way, the `RateLimitingEmbeddingGenerator` can be composed with other `IEmbeddingGenerator<string, Embedding<float>>` instances to provide rate-limiting functionality.
+
+## Build with Microsoft.Extensions.AI
+
+You can start building with `Microsoft.Extensions.AI` in the following ways:
+
+- **Library developers**: If you own libraries that provide clients for AI services, consider implementing the interfaces in your libraries. This allows users to easily integrate your NuGet package via the abstractions.
+- **Service consumers**: If you're developing libraries that consume AI services, use the abstractions instead of hardcoding to a specific AI service. This approach gives your consumers the flexibility to choose their preferred service.
+- **Application developers**: Use the abstractions to simplify integration into your apps. This enables portability across models and services, facilitates testing and mocking, leverages middleware provided by the ecosystem, and maintains a consistent API throughout your app, even if you use different services in different parts of your application.
+- **Ecosystem contributors**: If you're interested in contributing to the ecosystem, consider writing custom middleware components.
+
+For more samples, see the [dotnet/ai-samples](https://aka.ms/meai-samples) GitHub repository. For an end-to-end sample, see [eShopSupport](https://github.com/dotnet/eShopSupport).
+
+## See also
+
+- [Build an AI chat app with .NET](../../ai/quickstarts/build-chat-app.md)
+- [.NET dependency injection](dependency-injection.md)
+- [Rate limit an HTTP handler in .NET](http-ratelimiter.md)
+- [.NET generic host](generic-host.md)
+- [Caching in .NET](caching.md)
diff --git a/docs/core/extensions/snippets/ai/AI.Shared/AI.Shared.csproj b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/AI.Shared.csproj
similarity index 100%
rename from docs/core/extensions/snippets/ai/AI.Shared/AI.Shared.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/AI.Shared/AI.Shared.csproj
diff --git a/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClient.cs b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClient.cs
new file mode 100644
index 0000000000000..93d367b006ab8
--- /dev/null
+++ b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClient.cs
@@ -0,0 +1,34 @@
+﻿using Microsoft.Extensions.AI;
+using System.Threading.RateLimiting;
+
+public sealed class RateLimitingChatClient(IChatClient innerClient, RateLimiter rateLimiter) : DelegatingChatClient(innerClient)
+{
+    public override async Task<ChatResponse> GetResponseAsync(
+        IEnumerable<ChatMessage> messages, ChatOptions? options = null, CancellationToken cancellationToken = default)
+    {
+        using var lease = await rateLimiter.AcquireAsync(permitCount: 1, cancellationToken).ConfigureAwait(false);
+        if (!lease.IsAcquired)
+            throw new InvalidOperationException("Unable to acquire lease.");
+
+        return await base.GetResponseAsync(messages, options, cancellationToken).ConfigureAwait(false);
+    }
+
+    public override async IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseAsync(
+        IEnumerable<ChatMessage> messages, ChatOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        using var lease = await rateLimiter.AcquireAsync(permitCount: 1, cancellationToken).ConfigureAwait(false);
+        if (!lease.IsAcquired)
+            throw new InvalidOperationException("Unable to acquire lease.");
+
+        await foreach (var update in base.GetStreamingResponseAsync(messages, options, cancellationToken).ConfigureAwait(false))
+            yield return update;
+    }
+
+    protected override void Dispose(bool disposing)
+    {
+        if (disposing)
+            rateLimiter.Dispose();
+
+        base.Dispose(disposing);
+    }
+}
diff --git a/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.OptionalOverload.cs b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.OptionalOverload.cs
new file mode 100644
index 0000000000000..a56afd5c74a4d
--- /dev/null
+++ b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.OptionalOverload.cs
@@ -0,0 +1,13 @@
+﻿namespace Example.Two;
+
+// <two>
+using Microsoft.Extensions.AI;
+using Microsoft.Extensions.DependencyInjection;
+using System.Threading.RateLimiting;
+
+public static class RateLimitingChatClientExtensions
+{
+    public static ChatClientBuilder UseRateLimiting(this ChatClientBuilder builder, RateLimiter? rateLimiter = null) =>
+        builder.Use((innerClient, services) => new RateLimitingChatClient(innerClient, services.GetRequiredService<RateLimiter>()));
+}
+// </two>
diff --git a/docs/core/extensions/snippets/ai/AI.Shared/RateLimitingChatClientExtensions.cs b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.cs
similarity index 68%
rename from docs/core/extensions/snippets/ai/AI.Shared/RateLimitingChatClientExtensions.cs
rename to docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.cs
index 5f0fe5765b193..e9f3a865c2c31 100644
--- a/docs/core/extensions/snippets/ai/AI.Shared/RateLimitingChatClientExtensions.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.cs
@@ -6,8 +6,7 @@
 
 public static class RateLimitingChatClientExtensions
 {
-    public static ChatClientBuilder UseRateLimiting(
-        this ChatClientBuilder builder, RateLimiter rateLimiter) =>
+    public static ChatClientBuilder UseRateLimiting(this ChatClientBuilder builder, RateLimiter rateLimiter) =>
         builder.Use(innerClient => new RateLimitingChatClient(innerClient, rateLimiter));
 }
 // </one>
diff --git a/docs/core/extensions/snippets/ai/AI.Shared/RateLimitingEmbeddingGenerator.cs b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingEmbeddingGenerator.cs
similarity index 100%
rename from docs/core/extensions/snippets/ai/AI.Shared/RateLimitingEmbeddingGenerator.cs
rename to docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingEmbeddingGenerator.cs
diff --git a/docs/core/extensions/snippets/ai/AI.Shared/SampleChatClient.cs b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/SampleChatClient.cs
similarity index 100%
rename from docs/core/extensions/snippets/ai/AI.Shared/SampleChatClient.cs
rename to docs/ai/snippets/microsoft-extensions-ai/AI.Shared/SampleChatClient.cs
diff --git a/docs/core/extensions/snippets/ai/AI.Shared/SampleEmbeddingGenerator.cs b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/SampleEmbeddingGenerator.cs
similarity index 50%
rename from docs/core/extensions/snippets/ai/AI.Shared/SampleEmbeddingGenerator.cs
rename to docs/ai/snippets/microsoft-extensions-ai/AI.Shared/SampleEmbeddingGenerator.cs
index 8cf53982d2cb1..420a2de9f52ab 100644
--- a/docs/core/extensions/snippets/ai/AI.Shared/SampleEmbeddingGenerator.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/SampleEmbeddingGenerator.cs
@@ -4,32 +4,29 @@ public sealed class SampleEmbeddingGenerator(
     Uri endpoint, string modelId)
         : IEmbeddingGenerator<string, Embedding<float>>
 {
-    public EmbeddingGeneratorMetadata Metadata { get; } =
-        new(nameof(SampleEmbeddingGenerator), endpoint, modelId);
+    private readonly EmbeddingGeneratorMetadata _metadata =
+        new("SampleEmbeddingGenerator", endpoint, modelId);
 
     public async Task<GeneratedEmbeddings<Embedding<float>>> GenerateAsync(
         IEnumerable<string> values,
         EmbeddingGenerationOptions? options = null,
         CancellationToken cancellationToken = default)
     {
-        // Simulate some async operation
+        // Simulate some async operation.
         await Task.Delay(100, cancellationToken);
 
-        // Create random embeddings
-        return
-        [
-            .. from value in values
+        // Create random embeddings.
+        return new GeneratedEmbeddings<Embedding<float>>(
+            from value in values
             select new Embedding<float>(
-                Enumerable.Range(0, 384)
-                          .Select(_ => Random.Shared.NextSingle())
-                          .ToArray())
-        ];
+                Enumerable.Range(0, 384).Select(_ => Random.Shared.NextSingle()).ToArray()));
     }
 
-    public object? GetService(Type serviceType, object? serviceKey) => this;
-
-    public TService? GetService<TService>(object? key = null)
-        where TService : class => this as TService;
+    public object? GetService(Type serviceType, object? serviceKey) =>
+        serviceKey is not null ? null :
+        serviceType == typeof(EmbeddingGeneratorMetadata) ? _metadata :
+        serviceType?.IsInstanceOfType(this) is true ? this :
+        null;
 
     void IDisposable.Dispose() { }
 }
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.UseExample/ConsoleAI.UseExample.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.AddMessages/ConsoleAI.AddMessages.csproj
similarity index 89%
rename from docs/core/extensions/snippets/ai/ConsoleAI.UseExample/ConsoleAI.UseExample.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.AddMessages/ConsoleAI.AddMessages.csproj
index b615dd1b868c2..821fdd5c951db 100644
--- a/docs/core/extensions/snippets/ai/ConsoleAI.UseExample/ConsoleAI.UseExample.csproj
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.AddMessages/ConsoleAI.AddMessages.csproj
@@ -1,4 +1,4 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
+<Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
     <OutputType>Exe</OutputType>
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.AddMessages/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.AddMessages/Program.cs
new file mode 100644
index 0000000000000..dc32df78e9359
--- /dev/null
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.AddMessages/Program.cs
@@ -0,0 +1,34 @@
+IChatClient client = new SampleChatClient(
+    new Uri("http://coolsite.ai"), "target-ai-model");
+
+// <Snippet1>
+List<ChatMessage> history = [];
+while (true)
+{
+    Console.Write("Q: ");
+    history.Add(new(ChatRole.User, Console.ReadLine()));
+
+    var response = await client.GetResponseAsync(history);
+    Console.WriteLine(response);
+
+    history.AddMessages(response);
+}
+// </Snippet1>
+
+// <Snippet2>
+List<ChatMessage> history = [];
+while (true)
+{
+    Console.Write("Q: ");
+    history.Add(new(ChatRole.User, Console.ReadLine()));
+
+    List<ChatResponseUpdate> updates = [];
+    await foreach (var update in client.GetStreamingResponseAsync(history))
+    {
+        Console.Write(update);
+    }
+    Console.WriteLine();
+
+    history.AddMessages(updates);
+}
+// </Snippet2>
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.CacheResponses/ConsoleAI.CacheResponses.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CacheResponses/ConsoleAI.CacheResponses.csproj
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI.CacheResponses/ConsoleAI.CacheResponses.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CacheResponses/ConsoleAI.CacheResponses.csproj
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.CacheResponses/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CacheResponses/Program.cs
similarity index 85%
rename from docs/core/extensions/snippets/ai/ConsoleAI.CacheResponses/Program.cs
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CacheResponses/Program.cs
index eac78def1b9e0..6e15bf3da0788 100644
--- a/docs/core/extensions/snippets/ai/ConsoleAI.CacheResponses/Program.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CacheResponses/Program.cs
@@ -3,8 +3,7 @@
 using Microsoft.Extensions.Caching.Memory;
 using Microsoft.Extensions.Options;
 
-var sampleChatClient = new SampleChatClient(
-    new Uri("http://coolsite.ai"), "target-ai-model");
+var sampleChatClient = new OllamaChatClient(new Uri("http://localhost:11434"), "llama3.1");
 
 IChatClient client = new ChatClientBuilder(sampleChatClient)
     .UseDistributedCache(new MemoryDistributedCache(
@@ -19,6 +18,5 @@
     {
         Console.Write(update);
     }
-
     Console.WriteLine();
 }
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.ConsumeClientMiddleware/ConsoleAI.ConsumeClientMiddleware.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeClientMiddleware/ConsoleAI.ConsumeClientMiddleware.csproj
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI.ConsumeClientMiddleware/ConsoleAI.ConsumeClientMiddleware.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeClientMiddleware/ConsoleAI.ConsumeClientMiddleware.csproj
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.ConsumeClientMiddleware/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeClientMiddleware/Program.cs
similarity index 50%
rename from docs/core/extensions/snippets/ai/ConsoleAI.ConsumeClientMiddleware/Program.cs
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeClientMiddleware/Program.cs
index 082ee7821cdc2..394738caa9ca5 100644
--- a/docs/core/extensions/snippets/ai/ConsoleAI.ConsumeClientMiddleware/Program.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeClientMiddleware/Program.cs
@@ -5,17 +5,14 @@
 using Microsoft.Extensions.DependencyInjection;
 using Microsoft.Extensions.Hosting;
 
-var builder = Host.CreateApplicationBuilder(args);
-
-builder.Services.AddChatClient(services =>
-    new SampleChatClient(new Uri("http://localhost"), "test")
-        .AsBuilder()
-        .UseDistributedCache()
-        .UseRateLimiting()
-        .UseOpenTelemetry()
-        .Build(services));
-
-using var app = builder.Build();
+// <SnippetUse>
+var client = new OllamaChatClient(new Uri("http://localhost:11434"), "llama3.1")
+    .AsBuilder()
+    .UseDistributedCache()
+    .UseRateLimiting()
+    .UseOpenTelemetry()
+    .Build(services);
+// </SnippetUse>
 
 // Elsewhere in the app
 var chatClient = app.Services.GetRequiredService<IChatClient>();
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.ConsumeRateLimitingEmbedding/ConsoleAI.ConsumeRateLimitingEmbedding.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeRateLimitingEmbedding/ConsoleAI.ConsumeRateLimitingEmbedding.csproj
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI.ConsumeRateLimitingEmbedding/ConsoleAI.ConsumeRateLimitingEmbedding.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeRateLimitingEmbedding/ConsoleAI.ConsumeRateLimitingEmbedding.csproj
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.ConsumeRateLimitingEmbedding/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeRateLimitingEmbedding/Program.cs
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI.ConsumeRateLimitingEmbedding/Program.cs
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeRateLimitingEmbedding/Program.cs
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.CreateEmbeddings/ConsoleAI.CreateEmbeddings.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CreateEmbeddings/ConsoleAI.CreateEmbeddings.csproj
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI.CreateEmbeddings/ConsoleAI.CreateEmbeddings.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CreateEmbeddings/ConsoleAI.CreateEmbeddings.csproj
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.CreateEmbeddings/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CreateEmbeddings/Program.cs
similarity index 65%
rename from docs/core/extensions/snippets/ai/ConsoleAI.CreateEmbeddings/Program.cs
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CreateEmbeddings/Program.cs
index c3d8ece9410fb..e9b7539cb43ae 100644
--- a/docs/core/extensions/snippets/ai/ConsoleAI.CreateEmbeddings/Program.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CreateEmbeddings/Program.cs
@@ -1,4 +1,5 @@
-﻿using Microsoft.Extensions.AI;
+﻿// <Snippet1>
+using Microsoft.Extensions.AI;
 
 IEmbeddingGenerator<string, Embedding<float>> generator =
     new SampleEmbeddingGenerator(
@@ -8,3 +9,8 @@
 {
     Console.WriteLine(string.Join(", ", embedding.Vector.ToArray()));
 }
+// </Snippet1>
+
+// <Snippet2>
+ReadOnlyMemory<float> vector = generator.GenerateVectorAsync("What is AI?");
+// </Snippet2>
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.CustomClientMiddle/ConsoleAI.CustomClientMiddle.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomClientMiddle/ConsoleAI.CustomClientMiddle.csproj
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI.CustomClientMiddle/ConsoleAI.CustomClientMiddle.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomClientMiddle/ConsoleAI.CustomClientMiddle.csproj
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomClientMiddle/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomClientMiddle/Program.cs
new file mode 100644
index 0000000000000..bc71d9bb9e897
--- /dev/null
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomClientMiddle/Program.cs
@@ -0,0 +1,8 @@
+﻿using Microsoft.Extensions.AI;
+using System.Threading.RateLimiting;
+
+var client = new RateLimitingChatClient(
+    new OllamaChatClient(new Uri("http://localhost:11434"), "llama3.1"),
+    new ConcurrencyLimiter(new() { PermitLimit = 1, QueueLimit = int.MaxValue }));
+
+Console.WriteLine(await client.GetResponseAsync("What color is the sky?"));
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.CustomEmbeddingsMiddle/ConsoleAI.CustomEmbeddingsMiddle.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomEmbeddingsMiddle/ConsoleAI.CustomEmbeddingsMiddle.csproj
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI.CustomEmbeddingsMiddle/ConsoleAI.CustomEmbeddingsMiddle.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomEmbeddingsMiddle/ConsoleAI.CustomEmbeddingsMiddle.csproj
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.CustomEmbeddingsMiddle/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomEmbeddingsMiddle/Program.cs
similarity index 94%
rename from docs/core/extensions/snippets/ai/ConsoleAI.CustomEmbeddingsMiddle/Program.cs
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomEmbeddingsMiddle/Program.cs
index d03272c2024ac..1c7437ecaba7b 100644
--- a/docs/core/extensions/snippets/ai/ConsoleAI.CustomEmbeddingsMiddle/Program.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomEmbeddingsMiddle/Program.cs
@@ -11,8 +11,8 @@
     .AddConsoleExporter()
     .Build();
 
-// Explore changing the order of the intermediate "Use" calls to see that impact
-// that has on what gets cached, traced, etc.
+// Explore changing the order of the intermediate "Use" calls to see
+// what impact that has on what gets cached and traced.
 IEmbeddingGenerator<string, Embedding<float>> generator = new EmbeddingGeneratorBuilder<string, Embedding<float>>(
         new SampleEmbeddingGenerator(new Uri("http://coolsite.ai"), "target-ai-model"))
     .UseDistributedCache(
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.DependencyInjection/ConsoleAI.DependencyInjection.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.DependencyInjection/ConsoleAI.DependencyInjection.csproj
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI.DependencyInjection/ConsoleAI.DependencyInjection.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.DependencyInjection/ConsoleAI.DependencyInjection.csproj
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.DependencyInjection/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.DependencyInjection/Program.cs
new file mode 100644
index 0000000000000..67b58783b56fa
--- /dev/null
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.DependencyInjection/Program.cs
@@ -0,0 +1,14 @@
+﻿using Microsoft.Extensions.AI;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Hosting;
+
+// App setup.
+var builder = Host.CreateApplicationBuilder();
+builder.Services.AddDistributedMemoryCache();
+builder.Services.AddChatClient(new OllamaChatClient(new Uri("http://localhost:11434"), "llama3.1"))
+    .UseDistributedCache();
+var host = builder.Build();
+
+// Elsewhere in the app.
+var chatClient = host.Services.GetRequiredService<IChatClient>();
+Console.WriteLine(await chatClient.GetResponseAsync("What is AI?"));
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.FunctionalityPipelines/ConsoleAI.FunctionalityPipelines.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.FunctionalityPipelines/ConsoleAI.FunctionalityPipelines.csproj
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI.FunctionalityPipelines/ConsoleAI.FunctionalityPipelines.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.FunctionalityPipelines/ConsoleAI.FunctionalityPipelines.csproj
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.FunctionalityPipelines/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.FunctionalityPipelines/Program.cs
new file mode 100644
index 0000000000000..9d9c913d0d481
--- /dev/null
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.FunctionalityPipelines/Program.cs
@@ -0,0 +1,40 @@
+﻿using Microsoft.Extensions.AI;
+using Microsoft.Extensions.Caching.Distributed;
+using Microsoft.Extensions.Caching.Memory;
+using Microsoft.Extensions.Options;
+using OpenTelemetry.Trace;
+
+// Configure OpenTelemetry exporter.
+var sourceName = Guid.NewGuid().ToString();
+var tracerProvider = OpenTelemetry.Sdk.CreateTracerProviderBuilder()
+    .AddSource(sourceName)
+    .AddConsoleExporter()
+    .Build();
+
+// <Snippet1>
+// Explore changing the order of the intermediate "Use" calls.
+IChatClient client = new ChatClientBuilder(new OllamaChatClient(new Uri("http://localhost:11434"), "llama3.1"))
+    .UseDistributedCache(new MemoryDistributedCache(Options.Create(new MemoryDistributedCacheOptions())))
+    .UseFunctionInvocation()
+    .UseOpenTelemetry(sourceName: sourceName, configure: c => c.EnableSensitiveData = true)
+    .Build();
+// </Snippet1>
+
+ChatOptions options = new()
+{
+    Tools = [AIFunctionFactory.Create(
+        () => Random.Shared.NextDouble() > 0.5 ? "It's sunny" : "It's raining",
+        name: "GetCurrentWeather",
+        description: "Gets the current weather")]
+};
+
+for (int i = 0; i < 3; i++)
+{
+    List<ChatMessage> history =
+    [
+        new ChatMessage(ChatRole.System, "You are a helpful AI assistant"),
+        new ChatMessage(ChatRole.User, "Do I need an umbrella?")
+    ];
+
+    Console.WriteLine(await client.GetResponseAsync(history, options));
+}
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.GetResponseAsyncArgs/ConsoleAI.GetResponseAsyncArgs.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.GetResponseAsyncArgs/ConsoleAI.GetResponseAsyncArgs.csproj
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI.GetResponseAsyncArgs/ConsoleAI.GetResponseAsyncArgs.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.GetResponseAsyncArgs/ConsoleAI.GetResponseAsyncArgs.csproj
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.GetResponseAsyncArgs/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.GetResponseAsyncArgs/Program.cs
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI.GetResponseAsyncArgs/Program.cs
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.GetResponseAsyncArgs/Program.cs
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.GetStreamingResponseAsync/ConsoleAI.GetStreamingResponseAsync.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.GetStreamingResponseAsync/ConsoleAI.GetStreamingResponseAsync.csproj
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI.GetStreamingResponseAsync/ConsoleAI.GetStreamingResponseAsync.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.GetStreamingResponseAsync/ConsoleAI.GetStreamingResponseAsync.csproj
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.GetStreamingResponseAsync/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.GetStreamingResponseAsync/Program.cs
similarity index 89%
rename from docs/core/extensions/snippets/ai/ConsoleAI.GetStreamingResponseAsync/Program.cs
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.GetStreamingResponseAsync/Program.cs
index 67cc73d828867..37f80109796ce 100644
--- a/docs/core/extensions/snippets/ai/ConsoleAI.GetStreamingResponseAsync/Program.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.GetStreamingResponseAsync/Program.cs
@@ -3,7 +3,9 @@
 IChatClient client = new SampleChatClient(
     new Uri("http://coolsite.ai"), "target-ai-model");
 
+// <Snippet1>
 await foreach (ChatResponseUpdate update in client.GetStreamingResponseAsync("What is AI?"))
 {
     Console.Write(update);
 }
+// </Snippet1>
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.ProvideOptions/ConsoleAI.ProvideOptions.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ProvideOptions/ConsoleAI.ProvideOptions.csproj
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI.ProvideOptions/ConsoleAI.ProvideOptions.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ProvideOptions/ConsoleAI.ProvideOptions.csproj
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ProvideOptions/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ProvideOptions/Program.cs
new file mode 100644
index 0000000000000..bbf1bf5c385e2
--- /dev/null
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ProvideOptions/Program.cs
@@ -0,0 +1,11 @@
+﻿using Microsoft.Extensions.AI;
+
+IChatClient client = new OllamaChatClient(new Uri("http://localhost:11434"))
+    .AsBuilder()
+    .ConfigureOptions(options => options.ModelId ??= "phi3")
+    .Build();
+
+// Will request "phi3".
+Console.WriteLine(await client.GetResponseAsync("What is AI?"));
+// Will request "llama3.1".
+Console.WriteLine(await client.GetResponseAsync("What is AI?", new() { ModelId = "llama3.1" }));
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.StatelessStateful/ConsoleAI.StatelessStateful.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.StatelessStateful/ConsoleAI.StatelessStateful.csproj
new file mode 100644
index 0000000000000..64aeca66a5074
--- /dev/null
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.StatelessStateful/ConsoleAI.StatelessStateful.csproj
@@ -0,0 +1,18 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net9.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.Extensions.AI.Ollama" Version="9.4.0-preview.1.25207.5" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\AI.Shared\AI.Shared.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.StatelessStateful/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.StatelessStateful/Program.cs
new file mode 100644
index 0000000000000..2e50df5071158
--- /dev/null
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.StatelessStateful/Program.cs
@@ -0,0 +1,66 @@
+using Microsoft.Extensions.AI;
+
+IChatClient client = new SampleChatClient(
+    new Uri("http://coolsite.ai"), "target-ai-model");
+
+// <Snippet1>
+List<ChatMessage> history = [];
+while (true)
+{
+    Console.Write("Q: ");
+    history.Add(new(ChatRole.User, Console.ReadLine()));
+
+    var response = await client.GetResponseAsync(history);
+    Console.WriteLine(response);
+
+    history.AddMessages(response);
+}
+// </Snippet1>
+
+// <Snippet2>
+ChatOptions statefulOptions = new() { ChatThreadId = "my-conversation-id" };
+while (true)
+{
+    Console.Write("Q: ");
+    ChatMessage message = new(ChatRole.User, Console.ReadLine());
+
+    Console.WriteLine(await client.GetResponseAsync(message, statefulOptions));
+}
+// </Snippet2>
+
+// <Snippet3>
+ChatOptions options = new();
+while (true)
+{
+    Console.Write("Q: ");
+    ChatMessage message = new(ChatRole.User, Console.ReadLine());
+
+    ChatResponse response = await client.GetResponseAsync(message, options);
+    Console.WriteLine(response);
+
+    options.ChatThreadId = response.ChatThreadId;
+}
+// </Snippet3>
+
+// <Snippet4>
+List<ChatMessage> chatHistory = [];
+ChatOptions chatOptions = new();
+while (true)
+{
+    Console.Write("Q: ");
+    chatHistory.Add(new(ChatRole.User, Console.ReadLine()));
+
+    ChatResponse response = await client.GetResponseAsync(chatHistory);
+    Console.WriteLine(response);
+
+    chatOptions.ChatThreadId = response.ChatThreadId;
+    if (response.ChatThreadId is not null)
+    {
+        chatHistory.Clear();
+    }
+    else
+    {
+        chatHistory.AddMessages(response);
+    }
+}
+// </Snippet4>
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.ToolCalling/ConsoleAI.ToolCalling.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ToolCalling/ConsoleAI.ToolCalling.csproj
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI.ToolCalling/ConsoleAI.ToolCalling.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ToolCalling/ConsoleAI.ToolCalling.csproj
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ToolCalling/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ToolCalling/Program.cs
new file mode 100644
index 0000000000000..438cd7a4bd7dd
--- /dev/null
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ToolCalling/Program.cs
@@ -0,0 +1,16 @@
+﻿using Microsoft.Extensions.AI;
+
+string GetCurrentWeather() => Random.Shared.NextDouble() > 0.5 ? "It's sunny" : "It's raining";
+
+IChatClient client = new OllamaChatClient(new Uri("http://localhost:11434"), "llama3.1")
+    .AsBuilder()
+    .UseFunctionInvocation()
+    .Build();
+
+ChatOptions options = new() { Tools = [AIFunctionFactory.Create(GetCurrentWeather)] };
+
+var response = client.GetStreamingResponseAsync("Should I wear a rain coat?", options);
+await foreach (var update in response)
+{
+    Console.Write(update);
+}
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.UseExample/ConsoleAI.UseExample.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.UseExample/ConsoleAI.UseExample.csproj
new file mode 100644
index 0000000000000..deb4106409231
--- /dev/null
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.UseExample/ConsoleAI.UseExample.csproj
@@ -0,0 +1,18 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net9.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.Extensions.AI.Ollama" Version="9.4.0-preview.1.25207.5" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\AI.Shared\AI.Shared.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.UseExample/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.UseExample/Program.cs
similarity index 58%
rename from docs/core/extensions/snippets/ai/ConsoleAI.UseExample/Program.cs
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.UseExample/Program.cs
index 5918576009ffe..333d9290e31f9 100644
--- a/docs/core/extensions/snippets/ai/ConsoleAI.UseExample/Program.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.UseExample/Program.cs
@@ -7,22 +7,16 @@
     QueueLimit = int.MaxValue
 });
 
-IChatClient client = new SampleChatClient(new Uri("http://localhost"), "test")
+IChatClient client = new OllamaChatClient(new Uri("http://localhost:11434"), "llama3.1")
     .AsBuilder()
     .UseDistributedCache()
-    .Use(async (chatMessages, options, nextAsync, cancellationToken) =>
+    .Use(async (messages, options, nextAsync, cancellationToken) =>
     {
-        using var lease = await rateLimiter.AcquireAsync(permitCount: 1, cancellationToken)
-            .ConfigureAwait(false);
-
+        using var lease = await rateLimiter.AcquireAsync(permitCount: 1, cancellationToken).ConfigureAwait(false);
         if (!lease.IsAcquired)
-        {
             throw new InvalidOperationException("Unable to acquire lease.");
-        }
 
-        await nextAsync(chatMessages, options, cancellationToken);
+        await nextAsync(messages, options, cancellationToken);
     })
     .UseOpenTelemetry()
     .Build();
-
-// Use client
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.UseTelemetry/ConsoleAI.UseTelemetry.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.UseTelemetry/ConsoleAI.UseTelemetry.csproj
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI.UseTelemetry/ConsoleAI.UseTelemetry.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.UseTelemetry/ConsoleAI.UseTelemetry.csproj
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.UseTelemetry/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.UseTelemetry/Program.cs
similarity index 85%
rename from docs/core/extensions/snippets/ai/ConsoleAI.UseTelemetry/Program.cs
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.UseTelemetry/Program.cs
index fc7e483f8118f..db1c7a2502712 100644
--- a/docs/core/extensions/snippets/ai/ConsoleAI.UseTelemetry/Program.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.UseTelemetry/Program.cs
@@ -1,7 +1,7 @@
 ﻿using Microsoft.Extensions.AI;
 using OpenTelemetry.Trace;
 
-// Configure OpenTelemetry exporter
+// Configure OpenTelemetry exporter.
 string sourceName = Guid.NewGuid().ToString();
 TracerProvider tracerProvider = OpenTelemetry.Sdk.CreateTracerProviderBuilder()
     .AddSource(sourceName)
@@ -14,7 +14,7 @@
 IChatClient client = new ChatClientBuilder(sampleChatClient)
     .UseOpenTelemetry(
         sourceName: sourceName,
-        configure: static c => c.EnableSensitiveData = true)
+        configure: c => c.EnableSensitiveData = true)
     .Build();
 
 Console.WriteLine((await client.GetResponseAsync("What is AI?")).Text);
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI/ConsoleAI.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI/ConsoleAI.csproj
similarity index 100%
rename from docs/core/extensions/snippets/ai/ConsoleAI/ConsoleAI.csproj
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI/ConsoleAI.csproj
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI/Program.cs
similarity index 55%
rename from docs/core/extensions/snippets/ai/ConsoleAI/Program.cs
rename to docs/ai/snippets/microsoft-extensions-ai/ConsoleAI/Program.cs
index dafbef8ee3ae1..f92735dee5e72 100644
--- a/docs/core/extensions/snippets/ai/ConsoleAI/Program.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI/Program.cs
@@ -3,6 +3,4 @@
 IChatClient client = new SampleChatClient(
     new Uri("http://coolsite.ai"), "target-ai-model");
 
-var response = await client.GetResponseAsync("What is AI?");
-
-Console.WriteLine(response.Messages.Single());
+Console.WriteLine(await client.GetResponseAsync("What is AI?"));
diff --git a/docs/ai/toc.yml b/docs/ai/toc.yml
index 4258976c898c7..41c3a20edf9e1 100644
--- a/docs/ai/toc.yml
+++ b/docs/ai/toc.yml
@@ -10,7 +10,7 @@ items:
   - name: Overview
     href: dotnet-ai-ecosystem.md
   - name: Microsoft.Extensions.AI
-    href: ai-extensions.md
+    href: microsoft-extensions-ai.md
   - name: Semantic Kernel
     href: semantic-kernel-dotnet-overview.md
 - name: Quickstarts
diff --git a/docs/core/extensions/snippets/ai/AI.Shared/RateLimitingChatClient.cs b/docs/core/extensions/snippets/ai/AI.Shared/RateLimitingChatClient.cs
deleted file mode 100644
index d913e2a20f86a..0000000000000
--- a/docs/core/extensions/snippets/ai/AI.Shared/RateLimitingChatClient.cs
+++ /dev/null
@@ -1,55 +0,0 @@
-﻿using Microsoft.Extensions.AI;
-using System.Runtime.CompilerServices;
-using System.Threading.RateLimiting;
-
-public sealed class RateLimitingChatClient(
-    IChatClient innerClient, RateLimiter rateLimiter)
-        : DelegatingChatClient(innerClient)
-{
-    public override async Task<ChatResponse> GetResponseAsync(
-        IEnumerable<ChatMessage> chatMessages,
-        ChatOptions? options = null,
-        CancellationToken cancellationToken = default)
-    {
-        using var lease = await rateLimiter.AcquireAsync(permitCount: 1, cancellationToken)
-            .ConfigureAwait(false);
-
-        if (!lease.IsAcquired)
-        {
-            throw new InvalidOperationException("Unable to acquire lease.");
-        }
-
-        return await base.GetResponseAsync(chatMessages, options, cancellationToken)
-            .ConfigureAwait(false);
-    }
-
-    public override async IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseAsync(
-        IEnumerable<ChatMessage> chatMessages,
-        ChatOptions? options = null,
-        [EnumeratorCancellation] CancellationToken cancellationToken = default)
-    {
-        using var lease = await rateLimiter.AcquireAsync(permitCount: 1, cancellationToken)
-            .ConfigureAwait(false);
-
-        if (!lease.IsAcquired)
-        {
-            throw new InvalidOperationException("Unable to acquire lease.");
-        }
-
-        await foreach (var update in base.GetStreamingResponseAsync(chatMessages, options, cancellationToken)
-            .ConfigureAwait(false))
-        {
-            yield return update;
-        }
-    }
-
-    protected override void Dispose(bool disposing)
-    {
-        if (disposing)
-        {
-            rateLimiter.Dispose();
-        }
-
-        base.Dispose(disposing);
-    }
-}
diff --git a/docs/core/extensions/snippets/ai/AI.Shared/RateLimitingChatClientExtensions.OptionalOverload.cs b/docs/core/extensions/snippets/ai/AI.Shared/RateLimitingChatClientExtensions.OptionalOverload.cs
deleted file mode 100644
index 066cf22f6ee44..0000000000000
--- a/docs/core/extensions/snippets/ai/AI.Shared/RateLimitingChatClientExtensions.OptionalOverload.cs
+++ /dev/null
@@ -1,17 +0,0 @@
-﻿namespace Example.Two;
-
-// <two>
-using Microsoft.Extensions.AI;
-using Microsoft.Extensions.DependencyInjection;
-using System.Threading.RateLimiting;
-
-public static class RateLimitingChatClientExtensions
-{
-    public static ChatClientBuilder UseRateLimiting(
-        this ChatClientBuilder builder, RateLimiter? rateLimiter = null) =>
-        builder.Use((innerClient, services) =>
-            new RateLimitingChatClient(
-                innerClient,
-                rateLimiter ?? services.GetRequiredService<RateLimiter>()));
-}
-// </two>
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.CustomClientMiddle/Program.cs b/docs/core/extensions/snippets/ai/ConsoleAI.CustomClientMiddle/Program.cs
deleted file mode 100644
index 31b73e10c8f53..0000000000000
--- a/docs/core/extensions/snippets/ai/ConsoleAI.CustomClientMiddle/Program.cs
+++ /dev/null
@@ -1,12 +0,0 @@
-﻿using Microsoft.Extensions.AI;
-using System.Threading.RateLimiting;
-
-var client = new RateLimitingChatClient(
-    new SampleChatClient(new Uri("http://localhost"), "test"),
-    new ConcurrencyLimiter(new()
-    {
-        PermitLimit = 1,
-        QueueLimit = int.MaxValue
-    }));
-
-await client.GetResponseAsync("What color is the sky?");
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.DependencyInjection/Program.cs b/docs/core/extensions/snippets/ai/ConsoleAI.DependencyInjection/Program.cs
deleted file mode 100644
index 255791a824dda..0000000000000
--- a/docs/core/extensions/snippets/ai/ConsoleAI.DependencyInjection/Program.cs
+++ /dev/null
@@ -1,20 +0,0 @@
-﻿using Microsoft.Extensions.AI;
-using Microsoft.Extensions.DependencyInjection;
-using Microsoft.Extensions.Hosting;
-
-// App setup
-HostApplicationBuilder builder = Host.CreateApplicationBuilder();
-
-builder.Services.AddDistributedMemoryCache();
-builder.Services.AddChatClient(new SampleChatClient(
-        new Uri("http://coolsite.ai"), "target-ai-model"))
-    .UseDistributedCache();
-
-using IHost app = builder.Build();
-
-// Elsewhere in the app
-IChatClient chatClient = app.Services.GetRequiredService<IChatClient>();
-
-Console.WriteLine(await chatClient.GetResponseAsync("What is AI?"));
-
-app.Run();
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.FunctionalityPipelines/Program.cs b/docs/core/extensions/snippets/ai/ConsoleAI.FunctionalityPipelines/Program.cs
deleted file mode 100644
index db6660ab5d5a1..0000000000000
--- a/docs/core/extensions/snippets/ai/ConsoleAI.FunctionalityPipelines/Program.cs
+++ /dev/null
@@ -1,46 +0,0 @@
-﻿using Microsoft.Extensions.AI;
-using Microsoft.Extensions.Caching.Distributed;
-using Microsoft.Extensions.Caching.Memory;
-using Microsoft.Extensions.Options;
-using OpenTelemetry.Trace;
-
-// Configure OpenTelemetry exporter
-string sourceName = Guid.NewGuid().ToString();
-TracerProvider tracerProvider = OpenTelemetry.Sdk.CreateTracerProviderBuilder()
-    .AddSource(sourceName)
-    .AddConsoleExporter()
-    .Build();
-
-// Explore changing the order of the intermediate "Use" calls to see that impact
-// that has on what gets cached, traced, etc.
-IChatClient client = new ChatClientBuilder(
-        new OllamaChatClient(new Uri("http://localhost:11434"), "llama3.1"))
-    .UseDistributedCache(new MemoryDistributedCache(
-        Options.Create(new MemoryDistributedCacheOptions())))
-    .UseFunctionInvocation()
-    .UseOpenTelemetry(
-        sourceName: sourceName,
-        configure: static c => c.EnableSensitiveData = true)
-    .Build();
-
-ChatOptions options = new()
-{
-    Tools =
-    [
-        AIFunctionFactory.Create(
-            () => Random.Shared.NextDouble() > 0.5 ? "It's sunny" : "It's raining",
-            name: "GetCurrentWeather",
-            description: "Gets the current weather")
-    ]
-};
-
-for (int i = 0; i < 3; ++i)
-{
-    List<ChatMessage> history =
-    [
-        new ChatMessage(ChatRole.System, "You are a helpful AI assistant"),
-        new ChatMessage(ChatRole.User, "Do I need an umbrella?")
-    ];
-
-    Console.WriteLine(await client.GetResponseAsync(history, options));
-}
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.ProvideOptions/Program.cs b/docs/core/extensions/snippets/ai/ConsoleAI.ProvideOptions/Program.cs
deleted file mode 100644
index 77098afb4b8ba..0000000000000
--- a/docs/core/extensions/snippets/ai/ConsoleAI.ProvideOptions/Program.cs
+++ /dev/null
@@ -1,13 +0,0 @@
-﻿using Microsoft.Extensions.AI;
-
-IChatClient client = new ChatClientBuilder(
-        new OllamaChatClient(new Uri("http://localhost:11434")))
-    .ConfigureOptions(options => options.ModelId ??= "phi3")
-    .Build();
-
-// will request "phi3"
-Console.WriteLine(await client.GetResponseAsync("What is AI?"));
-
-// will request "llama3.1"
-Console.WriteLine(await client.GetResponseAsync(
-    "What is AI?", new() { ModelId = "llama3.1" }));
diff --git a/docs/core/extensions/snippets/ai/ConsoleAI.ToolCalling/Program.cs b/docs/core/extensions/snippets/ai/ConsoleAI.ToolCalling/Program.cs
deleted file mode 100644
index 7c2abd8d1e5d6..0000000000000
--- a/docs/core/extensions/snippets/ai/ConsoleAI.ToolCalling/Program.cs
+++ /dev/null
@@ -1,21 +0,0 @@
-﻿using System.ComponentModel;
-using Microsoft.Extensions.AI;
-
-[Description("Gets the current weather")]
-string GetCurrentWeather() => Random.Shared.NextDouble() > 0.5
-    ? "It's sunny"
-    : "It's raining";
-
-IChatClient client = new ChatClientBuilder(
-        new OllamaChatClient(new Uri("http://localhost:11434"), "llama3.1"))
-    .UseFunctionInvocation()
-    .Build();
-
-IAsyncEnumerable<ChatResponseUpdate> response = client.GetStreamingResponseAsync(
-    "Should I wear a rain coat?",
-    new() { Tools = [AIFunctionFactory.Create(GetCurrentWeather)] });
-
-await foreach (ChatResponseUpdate update in response)
-{
-    Console.Write(update);
-}

From 342ce16b8532f480bd8845872ad3009a56bd52f6 Mon Sep 17 00:00:00 2001
From: Genevieve Warren <24882762+gewarren@users.noreply.github.com>
Date: Wed, 30 Apr 2025 08:15:25 -0700
Subject: [PATCH 03/12] fix links

---
 docs/ai/dotnet-ai-overview.md    | 18 +++++++++---------
 docs/azure/{TOC.yml => TOC1.yml} |  4 ++--
 2 files changed, 11 insertions(+), 11 deletions(-)
 rename docs/azure/{TOC.yml => TOC1.yml} (98%)

diff --git a/docs/ai/dotnet-ai-overview.md b/docs/ai/dotnet-ai-overview.md
index bf203f5aec4a2..9f7f6dc118046 100644
--- a/docs/ai/dotnet-ai-overview.md
+++ b/docs/ai/dotnet-ai-overview.md
@@ -34,18 +34,18 @@ The opportunities with AI are near endless. Here are a few examples of solutions
 
 We recommend the following sequence of tutorials and articles for an introduction to developing applications with AI and .NET:
 
-| Scenario                    | Tutorial |
-|-----------------------------|----------|
-| Create a chat application   | [Build an Azure AI chat app with .NET](../quickstarts/build-chat-app.md) |
-| Summarize text              | [Summarize text using Azure AI chat app with .NET](../quickstarts/prompt-model.md) |
-| Chat with your data         | [Get insight about your data from an .NET Azure AI chat app](../quickstarts/build-vector-search-app.md) |
-| Call .NET functions with AI | [Extend Azure AI using tools and execute a local function with .NET](../quickstarts/use-function-calling.md) |
-| Generate images             | [Generate images using Azure AI with .NET](../quickstarts/generate-images.md) |
+| Scenario                    | Tutorial                                                                |
+|-----------------------------|-------------------------------------------------------------------------|
+| Create a chat application   | [Build an Azure AI chat app with .NET](./quickstarts/build-chat-app.md) |
+| Summarize text              | [Summarize text using Azure AI chat app with .NET](./quickstarts/prompt-model.md) |
+| Chat with your data         | [Get insight about your data from an .NET Azure AI chat app](./quickstarts/build-vector-search-app.md) |
+| Call .NET functions with AI | [Extend Azure AI using tools and execute a local function with .NET](./quickstarts/use-function-calling.md) |
+| Generate images             | [Generate images using Azure AI with .NET](./quickstarts/generate-images.md) |
 | Train your own model        | [ML.NET tutorial](https://dotnet.microsoft.com/learn/ml-dotnet/get-started-tutorial/intro) |
 
-Browse the table of contents to learn more about the core concepts, starting with [How generative AI and LLMs work](../conceptual/how-genai-and-llms-work.md).
+Browse the table of contents to learn more about the core concepts, starting with [How generative AI and LLMs work](./conceptual/how-genai-and-llms-work.md).
 
 ## Next steps
 
-* [Quickstart: Build an Azure AI chat app with .NET](../quickstarts/build-chat-app.md)
+* [Quickstart: Build an Azure AI chat app with .NET](./quickstarts/build-chat-app.md)
 * [Video series: Machine Learning and AI with .NET](/shows/machine-learning-and-ai-with-dotnet-for-beginners)
diff --git a/docs/azure/TOC.yml b/docs/azure/TOC1.yml
similarity index 98%
rename from docs/azure/TOC.yml
rename to docs/azure/TOC1.yml
index 92284500bf699..ee53aad611c9f 100644
--- a/docs/azure/TOC.yml
+++ b/docs/azure/TOC1.yml
@@ -49,8 +49,8 @@
       href: ./migration/vm.md
     - name: Migrate a SQL Server database to Azure
       href: ./migration/sql.md
-- name: Azure AI for .NET
-  href: ../ai/get-started/dotnet-ai-overview.md
+- name: AI for .NET
+  href: ../ai/dotnet-ai-overview.md
 - name: Azure SDK for .NET
   items:
     - name: What is the Azure SDK for .NET?

From fe5f6566507760f5ea3f5cfe252ea76773d71e57 Mon Sep 17 00:00:00 2001
From: Genevieve Warren <24882762+gewarren@users.noreply.github.com>
Date: Wed, 30 Apr 2025 08:31:46 -0700
Subject: [PATCH 04/12] fix more links

---
 .openpublishing.redirection.ai.json            |  4 ++++
 docs/ai/microsoft-extensions-ai.md             | 17 ++++++++---------
 docs/ai/{dotnet-ai-overview.md => overview.md} |  0
 docs/azure/index.yml                           |  6 +++---
 docs/azure/{TOC1.yml => toc.yml}               |  2 +-
 5 files changed, 16 insertions(+), 13 deletions(-)
 rename docs/ai/{dotnet-ai-overview.md => overview.md} (100%)
 rename docs/azure/{TOC1.yml => toc.yml} (99%)

diff --git a/.openpublishing.redirection.ai.json b/.openpublishing.redirection.ai.json
index a34adfd937675..2590e0ea7f3b4 100644
--- a/.openpublishing.redirection.ai.json
+++ b/.openpublishing.redirection.ai.json
@@ -8,6 +8,10 @@
             "source_path_from_root": "/docs/ai/conceptual/agents.md",
             "redirect_url": "/dotnet/ai"
         },
+        {
+            "source_path_from_root": "/docs/ai/get-started/dotnet-ai-overview.md",
+            "redirect_url": "/dotnet/ai/overview"
+        },
         {
             "source_path_from_root": "/docs/ai/how-to/app-service-db-auth.md",
             "redirect_url": "/dotnet/ai"
diff --git a/docs/ai/microsoft-extensions-ai.md b/docs/ai/microsoft-extensions-ai.md
index c54976d0d0a54..fb11342bfd156 100644
--- a/docs/ai/microsoft-extensions-ai.md
+++ b/docs/ai/microsoft-extensions-ai.md
@@ -24,7 +24,7 @@ To also have access to higher-level utilities for working with generative AI com
 
 ## Install the package
 
-For information about how to install NuGet packages, see [dotnet package add](../tools/dotnet-package-add.md) or [Manage package dependencies in .NET applications](../tools/dependencies.md).
+For information about how to install NuGet packages, see [dotnet package add](../core/tools/dotnet-package-add.md) or [Manage package dependencies in .NET applications](../core/tools/dependencies.md).
 
 ## Usage examples
 
@@ -103,11 +103,11 @@ The preceding code:
 
 #### Cache responses
 
-If you're familiar with [Caching in .NET](caching.md), it's good to know that <xref:Microsoft.Extensions.AI> provides other such delegating `IChatClient` implementations. The <xref:Microsoft.Extensions.AI.DistributedCachingChatClient> is an `IChatClient` that layers caching around another arbitrary `IChatClient` instance. When a novel chat history is submitted to the `DistributedCachingChatClient`, it forwards it to the underlying client and then caches the response before sending it back to the consumer. The next time the same history is submitted, such that a cached response can be found in the cache, the `DistributedCachingChatClient` returns the cached response rather than forwarding the request along the pipeline.
+If you're familiar with [Caching in .NET](../../core/extensions/caching.md), it's good to know that <xref:Microsoft.Extensions.AI> provides other such delegating `IChatClient` implementations. The <xref:Microsoft.Extensions.AI.DistributedCachingChatClient> is an `IChatClient` that layers caching around another arbitrary `IChatClient` instance. When a novel chat history is submitted to the `DistributedCachingChatClient`, it forwards it to the underlying client and then caches the response before sending it back to the consumer. The next time the same history is submitted, such that a cached response can be found in the cache, the `DistributedCachingChatClient` returns the cached response rather than forwarding the request along the pipeline.
 
 :::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.CacheResponses/Program.cs":::
 
-This example depends on the [📦 Microsoft.Extensions.Caching.Memory](https://www.nuget.org/packages/Microsoft.Extensions.Caching.Memory) NuGet package. For more information, see [Caching in .NET](caching.md).
+This example depends on the [📦 Microsoft.Extensions.Caching.Memory](https://www.nuget.org/packages/Microsoft.Extensions.Caching.Memory) NuGet package. For more information, see [Caching in .NET](c../../core/extensions/caching.md).
 
 #### Use telemetry
 
@@ -169,7 +169,7 @@ For scenarios where you need a different implementation for `GetResponseAsync` a
 
 #### Dependency injection
 
-<xref:Microsoft.Extensions.AI.IChatClient> implementations will often be provided to an application via [dependency injection (DI)](dependency-injection.md). In this example, an <xref:Microsoft.Extensions.Caching.Distributed.IDistributedCache> is added into the DI container, as is an `IChatClient`. The registration for the `IChatClient` uses a builder that creates a pipeline containing a caching client (which then uses an `IDistributedCache` retrieved from DI) and the sample client. The injected `IChatClient` can be retrieved and used elsewhere in the app.
+<xref:Microsoft.Extensions.AI.IChatClient> implementations will often be provided to an application via [dependency injection (DI)](../../core/extensions/dependency-injection.md). In this example, an <xref:Microsoft.Extensions.Caching.Distributed.IDistributedCache> is added into the DI container, as is an `IChatClient`. The registration for the `IChatClient` uses a builder that creates a pipeline containing a caching client (which then uses an `IDistributedCache` retrieved from DI) and the sample client. The injected `IChatClient` can be retrieved and used elsewhere in the app.
 
 :::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.DependencyInjection/Program.cs":::
 
@@ -261,8 +261,7 @@ For more samples, see the [dotnet/ai-samples](https://aka.ms/meai-samples) GitHu
 
 ## See also
 
-- [Build an AI chat app with .NET](../../ai/quickstarts/build-chat-app.md)
-- [.NET dependency injection](dependency-injection.md)
-- [Rate limit an HTTP handler in .NET](http-ratelimiter.md)
-- [.NET generic host](generic-host.md)
-- [Caching in .NET](caching.md)
+- [Build an AI chat app with .NET](./quickstarts/build-chat-app.md)
+- [.NET dependency injection](../../core/extensions/dependency-injection.md)
+- [Rate limit an HTTP handler in .NET](../../core/extensions/http-ratelimiter.md)
+- [Caching in .NET](../../core/extensions/caching.md)
diff --git a/docs/ai/dotnet-ai-overview.md b/docs/ai/overview.md
similarity index 100%
rename from docs/ai/dotnet-ai-overview.md
rename to docs/ai/overview.md
diff --git a/docs/azure/index.yml b/docs/azure/index.yml
index 470dcb9085236..beb767a046168 100644
--- a/docs/azure/index.yml
+++ b/docs/azure/index.yml
@@ -12,7 +12,7 @@ metadata:
   ms.date: 08/15/2024
 
 highlightedContent:
-# itemType: architecture | concept | deploy | download | get-started | how-to-guide | learn | overview | quickstart | reference | tutorial | whats-new
+  # itemType: architecture | concept | deploy | download | get-started | how-to-guide | learn | overview | quickstart | reference | tutorial | whats-new
   items:
     - itemType: overview
       title: Introduction to Azure and .NET
@@ -42,7 +42,7 @@ highlightedContent:
 conceptualContent:
   title: Featured content
   summary: Learn to develop .NET apps leveraging a variety of Azure services.
-# itemType: architecture | concept | deploy | download | get-started | how-to-guide | learn | overview | quickstart | reference | tutorial | video | whats-new
+  # itemType: architecture | concept | deploy | download | get-started | how-to-guide | learn | overview | quickstart | reference | tutorial | video | whats-new
   items:
     - title: Create web apps
       links:
@@ -96,7 +96,7 @@ conceptualContent:
     - title: Create intelligent apps with AI
       links:
         - itemType: overview
-          url: ../ai/get-started/dotnet-ai-overview.md
+          url: ../ai/overview.md
           text: AI for .NET overview
         - itemType: quickstart
           url: ../ai/quickstarts/get-started-openai.md
diff --git a/docs/azure/TOC1.yml b/docs/azure/toc.yml
similarity index 99%
rename from docs/azure/TOC1.yml
rename to docs/azure/toc.yml
index ee53aad611c9f..66737bb60f65a 100644
--- a/docs/azure/TOC1.yml
+++ b/docs/azure/toc.yml
@@ -50,7 +50,7 @@
     - name: Migrate a SQL Server database to Azure
       href: ./migration/sql.md
 - name: AI for .NET
-  href: ../ai/dotnet-ai-overview.md
+  href: ../ai/overview.md
 - name: Azure SDK for .NET
   items:
     - name: What is the Azure SDK for .NET?

From e3da3dba5cacbf7c5b074b51fc25d2eb32902776 Mon Sep 17 00:00:00 2001
From: Genevieve Warren <24882762+gewarren@users.noreply.github.com>
Date: Wed, 30 Apr 2025 09:39:57 -0700
Subject: [PATCH 05/12] fix compilation warnings

---
 docs/ai/microsoft-extensions-ai.md            |  2 +-
 .../AI.Shared/RateLimitingChatClient.cs       |  1 +
 .../ConsoleAI.CacheResponses.csproj           |  1 +
 .../ConsoleAI.ConsumeClientMiddleware.csproj  |  1 +
 .../Program.cs                                | 20 ++++++++++---------
 .../Program.cs                                |  3 ++-
 .../ConsoleAI.CreateEmbeddings/Program.cs     |  5 +++--
 .../ConsoleAI.CustomClientMiddle.csproj       |  3 ++-
 .../Program.cs                                |  2 +-
 .../ConsoleAI.DependencyInjection.csproj      |  1 +
 .../ConsoleAI.GetResponseAsyncArgs/Program.cs |  2 ++
 11 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/docs/ai/microsoft-extensions-ai.md b/docs/ai/microsoft-extensions-ai.md
index fb11342bfd156..cc0711aedc7a2 100644
--- a/docs/ai/microsoft-extensions-ai.md
+++ b/docs/ai/microsoft-extensions-ai.md
@@ -61,7 +61,7 @@ With an instance of <xref:Microsoft.Extensions.AI.IChatClient>, you can call the
 
 The core `IChatClient.GetResponseAsync` method accepts a list of messages. This list represents the history of all messages that are part of the conversation.
 
-:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.GetResponseAsyncArgs/Program.cs":::
+:::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.GetResponseAsyncArgs/Program.cs" id="Snippet1":::
 
 The <xref:Microsoft.Extensions.AI.ChatResponse> that's returned from `GetResponseAsync` exposes a list of <xref:Microsoft.Extensions.AI.ChatMessage> instances that represent one or more messages generated as part of the operation. In common cases, there is only one response message, but in some situations, there can be multiple messages. The message list is ordered, such that the last message in the list represents the final message to the request. To provide all of those response messages back to the service in a subsequent request, you can add the messages from the response back into the messages list.
 
diff --git a/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClient.cs b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClient.cs
index 93d367b006ab8..1751e8f805b0a 100644
--- a/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClient.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClient.cs
@@ -1,4 +1,5 @@
 ﻿using Microsoft.Extensions.AI;
+using System.Runtime.CompilerServices;
 using System.Threading.RateLimiting;
 
 public sealed class RateLimitingChatClient(IChatClient innerClient, RateLimiter rateLimiter) : DelegatingChatClient(innerClient)
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CacheResponses/ConsoleAI.CacheResponses.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CacheResponses/ConsoleAI.CacheResponses.csproj
index 6d87d65be1031..94db5114b14ba 100644
--- a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CacheResponses/ConsoleAI.CacheResponses.csproj
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CacheResponses/ConsoleAI.CacheResponses.csproj
@@ -8,6 +8,7 @@
   </PropertyGroup>
 
   <ItemGroup>
+    <PackageReference Include="Microsoft.Extensions.AI.Ollama" Version="9.4.0-preview.1.25207.5" />
     <PackageReference Include="Microsoft.Extensions.Caching.Memory" Version="10.0.0-preview.3.25171.5" />
   </ItemGroup>
 
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeClientMiddleware/ConsoleAI.ConsumeClientMiddleware.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeClientMiddleware/ConsoleAI.ConsumeClientMiddleware.csproj
index bec56340237d7..6da26fd2e1c47 100644
--- a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeClientMiddleware/ConsoleAI.ConsumeClientMiddleware.csproj
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeClientMiddleware/ConsoleAI.ConsumeClientMiddleware.csproj
@@ -8,6 +8,7 @@
   </PropertyGroup>
 
   <ItemGroup>
+    <PackageReference Include="Microsoft.Extensions.AI.Ollama" Version="9.4.0-preview.1.25207.5" />
     <PackageReference Include="Microsoft.Extensions.Hosting" Version="10.0.0-preview.3.25171.5" />
     <ProjectReference Include="..\AI.Shared\AI.Shared.csproj" />
   </ItemGroup>
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeClientMiddleware/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeClientMiddleware/Program.cs
index 394738caa9ca5..d59952ee6f485 100644
--- a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeClientMiddleware/Program.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeClientMiddleware/Program.cs
@@ -1,23 +1,25 @@
 ﻿using Example.Two;
 
-// <program>
 using Microsoft.Extensions.AI;
 using Microsoft.Extensions.DependencyInjection;
 using Microsoft.Extensions.Hosting;
 
 // <SnippetUse>
-var client = new OllamaChatClient(new Uri("http://localhost:11434"), "llama3.1")
-    .AsBuilder()
-    .UseDistributedCache()
-    .UseRateLimiting()
-    .UseOpenTelemetry()
-    .Build(services);
+HostApplicationBuilder builder = Host.CreateApplicationBuilder(args);
+
+builder.Services.AddChatClient(services =>
+    new SampleChatClient(new Uri("http://localhost"), "test")
+        .AsBuilder()
+        .UseDistributedCache()
+        .UseRateLimiting()
+        .UseOpenTelemetry()
+        .Build(services));
 // </SnippetUse>
 
 // Elsewhere in the app
-var chatClient = app.Services.GetRequiredService<IChatClient>();
+using IHost app = builder.Build();
+IChatClient chatClient = app.Services.GetRequiredService<IChatClient>();
 
 Console.WriteLine(await chatClient.GetResponseAsync("What is AI?"));
 
 app.Run();
-// </program>
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeRateLimitingEmbedding/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeRateLimitingEmbedding/Program.cs
index d7987319e07ee..b6bb41eb7e5af 100644
--- a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeRateLimitingEmbedding/Program.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.ConsumeRateLimitingEmbedding/Program.cs
@@ -10,7 +10,8 @@
             QueueLimit = int.MaxValue
         }));
 
-foreach (var embedding in await generator.GenerateAsync(["What is AI?", "What is .NET?"]))
+foreach (Embedding<float> embedding in
+    await generator.GenerateAsync(["What is AI?", "What is .NET?"]))
 {
     Console.WriteLine(string.Join(", ", embedding.Vector.ToArray()));
 }
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CreateEmbeddings/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CreateEmbeddings/Program.cs
index e9b7539cb43ae..a9b9c3adacc34 100644
--- a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CreateEmbeddings/Program.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CreateEmbeddings/Program.cs
@@ -5,12 +5,13 @@
     new SampleEmbeddingGenerator(
         new Uri("http://coolsite.ai"), "target-ai-model");
 
-foreach (var embedding in await generator.GenerateAsync(["What is AI?", "What is .NET?"]))
+foreach (Embedding<float> embedding in
+    await generator.GenerateAsync(["What is AI?", "What is .NET?"]))
 {
     Console.WriteLine(string.Join(", ", embedding.Vector.ToArray()));
 }
 // </Snippet1>
 
 // <Snippet2>
-ReadOnlyMemory<float> vector = generator.GenerateVectorAsync("What is AI?");
+ReadOnlyMemory<float> vector = await generator.GenerateEmbeddingVectorAsync("What is AI?");
 // </Snippet2>
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomClientMiddle/ConsoleAI.CustomClientMiddle.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomClientMiddle/ConsoleAI.CustomClientMiddle.csproj
index b7875a3582d78..5a2a4cd8162cc 100644
--- a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomClientMiddle/ConsoleAI.CustomClientMiddle.csproj
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomClientMiddle/ConsoleAI.CustomClientMiddle.csproj
@@ -8,7 +8,8 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="System.Threading.RateLimiting" Version="10.0.0-preview.2.25163.2" />
+    <PackageReference Include="Microsoft.Extensions.AI.Ollama" Version="9.4.0-preview.1.25207.5" />
+    <PackageReference Include="System.Threading.RateLimiting" Version="10.0.0-preview.3.25171.5" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomEmbeddingsMiddle/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomEmbeddingsMiddle/Program.cs
index 1c7437ecaba7b..f35794faf6dff 100644
--- a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomEmbeddingsMiddle/Program.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.CustomEmbeddingsMiddle/Program.cs
@@ -28,7 +28,7 @@
     "What is AI?"
 ]);
 
-foreach (var embedding in embeddings)
+foreach (Embedding<float> embedding in embeddings)
 {
     Console.WriteLine(string.Join(", ", embedding.Vector.ToArray()));
 }
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.DependencyInjection/ConsoleAI.DependencyInjection.csproj b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.DependencyInjection/ConsoleAI.DependencyInjection.csproj
index 4ac04759c2475..621ab0bd78e45 100644
--- a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.DependencyInjection/ConsoleAI.DependencyInjection.csproj
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.DependencyInjection/ConsoleAI.DependencyInjection.csproj
@@ -8,6 +8,7 @@
   </PropertyGroup>
 
   <ItemGroup>
+    <PackageReference Include="Microsoft.Extensions.AI.Ollama" Version="9.4.0-preview.1.25207.5" />
     <PackageReference Include="Microsoft.Extensions.Hosting" Version="10.0.0-preview.3.25171.5" />
     <PackageReference Include="Microsoft.Extensions.Caching.Memory" Version="10.0.0-preview.3.25171.5" />
     <ProjectReference Include="..\AI.Shared\AI.Shared.csproj" />
diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.GetResponseAsyncArgs/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.GetResponseAsyncArgs/Program.cs
index b33fe5f1a3d80..92bb0e9f6891b 100644
--- a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.GetResponseAsyncArgs/Program.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.GetResponseAsyncArgs/Program.cs
@@ -3,8 +3,10 @@
 IChatClient client = new SampleChatClient(
     new Uri("http://coolsite.ai"), "target-ai-model");
 
+// <Snippet1>
 Console.WriteLine(await client.GetResponseAsync(
 [
     new(ChatRole.System, "You are a helpful AI assistant"),
     new(ChatRole.User, "What is AI?"),
 ]));
+// </Snippet1>

From 20ce855252e30c70e4eca16e86f349950bd3f39a Mon Sep 17 00:00:00 2001
From: Genevieve Warren <24882762+gewarren@users.noreply.github.com>
Date: Wed, 30 Apr 2025 09:44:10 -0700
Subject: [PATCH 06/12] fix links

---
 docs/ai/microsoft-extensions-ai.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/ai/microsoft-extensions-ai.md b/docs/ai/microsoft-extensions-ai.md
index cc0711aedc7a2..4bb78d2dc4607 100644
--- a/docs/ai/microsoft-extensions-ai.md
+++ b/docs/ai/microsoft-extensions-ai.md
@@ -103,11 +103,11 @@ The preceding code:
 
 #### Cache responses
 
-If you're familiar with [Caching in .NET](../../core/extensions/caching.md), it's good to know that <xref:Microsoft.Extensions.AI> provides other such delegating `IChatClient` implementations. The <xref:Microsoft.Extensions.AI.DistributedCachingChatClient> is an `IChatClient` that layers caching around another arbitrary `IChatClient` instance. When a novel chat history is submitted to the `DistributedCachingChatClient`, it forwards it to the underlying client and then caches the response before sending it back to the consumer. The next time the same history is submitted, such that a cached response can be found in the cache, the `DistributedCachingChatClient` returns the cached response rather than forwarding the request along the pipeline.
+If you're familiar with [Caching in .NET](../core/extensions/caching.md), it's good to know that <xref:Microsoft.Extensions.AI> provides other such delegating `IChatClient` implementations. The <xref:Microsoft.Extensions.AI.DistributedCachingChatClient> is an `IChatClient` that layers caching around another arbitrary `IChatClient` instance. When a novel chat history is submitted to the `DistributedCachingChatClient`, it forwards it to the underlying client and then caches the response before sending it back to the consumer. The next time the same history is submitted, such that a cached response can be found in the cache, the `DistributedCachingChatClient` returns the cached response rather than forwarding the request along the pipeline.
 
 :::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.CacheResponses/Program.cs":::
 
-This example depends on the [📦 Microsoft.Extensions.Caching.Memory](https://www.nuget.org/packages/Microsoft.Extensions.Caching.Memory) NuGet package. For more information, see [Caching in .NET](c../../core/extensions/caching.md).
+This example depends on the [📦 Microsoft.Extensions.Caching.Memory](https://www.nuget.org/packages/Microsoft.Extensions.Caching.Memory) NuGet package. For more information, see [Caching in .NET](c../core/extensions/caching.md).
 
 #### Use telemetry
 
@@ -169,7 +169,7 @@ For scenarios where you need a different implementation for `GetResponseAsync` a
 
 #### Dependency injection
 
-<xref:Microsoft.Extensions.AI.IChatClient> implementations will often be provided to an application via [dependency injection (DI)](../../core/extensions/dependency-injection.md). In this example, an <xref:Microsoft.Extensions.Caching.Distributed.IDistributedCache> is added into the DI container, as is an `IChatClient`. The registration for the `IChatClient` uses a builder that creates a pipeline containing a caching client (which then uses an `IDistributedCache` retrieved from DI) and the sample client. The injected `IChatClient` can be retrieved and used elsewhere in the app.
+<xref:Microsoft.Extensions.AI.IChatClient> implementations will often be provided to an application via [dependency injection (DI)](../core/extensions/dependency-injection.md). In this example, an <xref:Microsoft.Extensions.Caching.Distributed.IDistributedCache> is added into the DI container, as is an `IChatClient`. The registration for the `IChatClient` uses a builder that creates a pipeline containing a caching client (which then uses an `IDistributedCache` retrieved from DI) and the sample client. The injected `IChatClient` can be retrieved and used elsewhere in the app.
 
 :::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.DependencyInjection/Program.cs":::
 
@@ -262,6 +262,6 @@ For more samples, see the [dotnet/ai-samples](https://aka.ms/meai-samples) GitHu
 ## See also
 
 - [Build an AI chat app with .NET](./quickstarts/build-chat-app.md)
-- [.NET dependency injection](../../core/extensions/dependency-injection.md)
-- [Rate limit an HTTP handler in .NET](../../core/extensions/http-ratelimiter.md)
-- [Caching in .NET](../../core/extensions/caching.md)
+- [.NET dependency injection](../core/extensions/dependency-injection.md)
+- [Rate limit an HTTP handler in .NET](../core/extensions/http-ratelimiter.md)
+- [Caching in .NET](../core/extensions/caching.md)

From 3fa8c68b63eabb4f52b8a77212d86dc65399ca6c Mon Sep 17 00:00:00 2001
From: Genevieve Warren <24882762+gewarren@users.noreply.github.com>
Date: Wed, 30 Apr 2025 09:51:06 -0700
Subject: [PATCH 07/12] fix compilation warning

---
 .../ConsoleAI.AddMessages/Program.cs                | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.AddMessages/Program.cs b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.AddMessages/Program.cs
index dc32df78e9359..9fc42e45199e4 100644
--- a/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.AddMessages/Program.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/ConsoleAI.AddMessages/Program.cs
@@ -1,3 +1,5 @@
+﻿using Microsoft.Extensions.AI;
+
 IChatClient client = new SampleChatClient(
     new Uri("http://coolsite.ai"), "target-ai-model");
 
@@ -8,7 +10,7 @@
     Console.Write("Q: ");
     history.Add(new(ChatRole.User, Console.ReadLine()));
 
-    var response = await client.GetResponseAsync(history);
+    ChatResponse response = await client.GetResponseAsync(history);
     Console.WriteLine(response);
 
     history.AddMessages(response);
@@ -16,19 +18,20 @@
 // </Snippet1>
 
 // <Snippet2>
-List<ChatMessage> history = [];
+List<ChatMessage> chatHistory = [];
 while (true)
 {
     Console.Write("Q: ");
-    history.Add(new(ChatRole.User, Console.ReadLine()));
+    chatHistory.Add(new(ChatRole.User, Console.ReadLine()));
 
     List<ChatResponseUpdate> updates = [];
-    await foreach (var update in client.GetStreamingResponseAsync(history))
+    await foreach (ChatResponseUpdate update in
+        client.GetStreamingResponseAsync(history))
     {
         Console.Write(update);
     }
     Console.WriteLine();
 
-    history.AddMessages(updates);
+    chatHistory.AddMessages(updates);
 }
 // </Snippet2>

From 6e2ad704a889c0ef1f949e234c538952468e1ddd Mon Sep 17 00:00:00 2001
From: Genevieve Warren <24882762+gewarren@users.noreply.github.com>
Date: Wed, 30 Apr 2025 09:55:26 -0700
Subject: [PATCH 08/12] make azure toc link contextual

---
 docs/ai/microsoft-extensions-ai.md | 2 +-
 docs/azure/toc.yml                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ai/microsoft-extensions-ai.md b/docs/ai/microsoft-extensions-ai.md
index 4bb78d2dc4607..2219b6635a3d0 100644
--- a/docs/ai/microsoft-extensions-ai.md
+++ b/docs/ai/microsoft-extensions-ai.md
@@ -107,7 +107,7 @@ If you're familiar with [Caching in .NET](../core/extensions/caching.md), it's g
 
 :::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.CacheResponses/Program.cs":::
 
-This example depends on the [📦 Microsoft.Extensions.Caching.Memory](https://www.nuget.org/packages/Microsoft.Extensions.Caching.Memory) NuGet package. For more information, see [Caching in .NET](c../core/extensions/caching.md).
+This example depends on the [📦 Microsoft.Extensions.Caching.Memory](https://www.nuget.org/packages/Microsoft.Extensions.Caching.Memory) NuGet package. For more information, see [Caching in .NET](../core/extensions/caching.md).
 
 #### Use telemetry
 
diff --git a/docs/azure/toc.yml b/docs/azure/toc.yml
index 66737bb60f65a..fb85b4f900b16 100644
--- a/docs/azure/toc.yml
+++ b/docs/azure/toc.yml
@@ -50,7 +50,7 @@
     - name: Migrate a SQL Server database to Azure
       href: ./migration/sql.md
 - name: AI for .NET
-  href: ../ai/overview.md
+  href: ../ai/overview.md?toc=/dotnet/azure/toc.json&bc=/dotnet/breadcrumb/toc.json
 - name: Azure SDK for .NET
   items:
     - name: What is the Azure SDK for .NET?

From f370df9325f490bad6c2f12abde4802497c2bd72 Mon Sep 17 00:00:00 2001
From: Genevieve Warren <24882762+gewarren@users.noreply.github.com>
Date: Wed, 30 Apr 2025 09:57:58 -0700
Subject: [PATCH 09/12] fix toc link

---
 docs/ai/toc.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ai/toc.yml b/docs/ai/toc.yml
index a8a7dc510b7a7..0ebbe18161046 100644
--- a/docs/ai/toc.yml
+++ b/docs/ai/toc.yml
@@ -2,7 +2,7 @@ items:
 - name: AI for .NET developers
   href: index.yml
 - name: Overview
-  href: dotnet-ai-overview.md
+  href: overview.md
 - name: "Quickstart: Connect to and prompt an AI model"
   href: quickstarts/prompt-model.md
 - name: AI frameworks and SDKs

From 122ad4bf6b6b979e02836bd627be637762d4c024 Mon Sep 17 00:00:00 2001
From: Genevieve Warren <24882762+gewarren@users.noreply.github.com>
Date: Wed, 30 Apr 2025 10:11:13 -0700
Subject: [PATCH 10/12] more touchups

---
 docs/ai/index.yml                  |  4 ++--
 docs/ai/microsoft-extensions-ai.md | 22 ++++++++++------------
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/docs/ai/index.yml b/docs/ai/index.yml
index 4f2ace0e2f3e4..fefbd4ebfdecf 100644
--- a/docs/ai/index.yml
+++ b/docs/ai/index.yml
@@ -23,8 +23,8 @@ landingContent:
     linkLists:
       - linkListType: get-started
         links:
-          - text: Develop .NET applications
-            url: dotnet-ai-overview.md
+          - text: Develop .NET apps with AI features
+            url: overview.md
           - text: Connect to and prompt an AI model
             url: quickstarts/prompt-model.md
           - text: Microsoft.Extensions.AI libraries
diff --git a/docs/ai/microsoft-extensions-ai.md b/docs/ai/microsoft-extensions-ai.md
index 2219b6635a3d0..ca4465bf29d7d 100644
--- a/docs/ai/microsoft-extensions-ai.md
+++ b/docs/ai/microsoft-extensions-ai.md
@@ -16,17 +16,17 @@ The [📦 Microsoft.Extensions.AI.Abstractions](https://www.nuget.org/packages/M
 
 The [📦 Microsoft.Extensions.AI](https://www.nuget.org/packages/Microsoft.Extensions.AI) package has an implicit dependency on the `Microsoft.Extensions.AI.Abstractions` package. This package enables you to easily integrate components such as telemetry and caching into your applications using familiar dependency injection and middleware patterns. For example, it provides the <xref:Microsoft.Extensions.AI.OpenTelemetryChatClientBuilderExtensions.UseOpenTelemetry(Microsoft.Extensions.AI.ChatClientBuilder,Microsoft.Extensions.Logging.ILoggerFactory,System.String,System.Action{Microsoft.Extensions.AI.OpenTelemetryChatClient})> extension method, which adds OpenTelemetry support to the chat client pipeline.
 
-## Which package to reference
+### Which package to reference
 
 Libraries that provide implementations of the abstractions typically reference only `Microsoft.Extensions.AI.Abstractions`.
 
 To also have access to higher-level utilities for working with generative AI components, reference the `Microsoft.Extensions.AI` package instead (which itself references `Microsoft.Extensions.AI.Abstractions`). Most consuming applications and services should reference the `Microsoft.Extensions.AI` package along with one or more libraries that provide concrete implementations of the abstractions.
 
-## Install the package
+### Install the packages
 
 For information about how to install NuGet packages, see [dotnet package add](../core/tools/dotnet-package-add.md) or [Manage package dependencies in .NET applications](../core/tools/dependencies.md).
 
-## Usage examples
+## API usage examples
 
 The following subsections show specific [IChatClient](#the-ichatclient-interface) usage examples:
 
@@ -76,7 +76,7 @@ The inputs to <xref:Microsoft.Extensions.AI.IChatClient.GetStreamingResponseAsyn
 > [!TIP]
 > Streaming APIs are nearly synonymous with AI user experiences. C# enables compelling scenarios with its `IAsyncEnumerable<T>` support, allowing for a natural and efficient way to stream data.
 
-As with `GetResponseAsync`, you can add the updates from <xref:Microsoft.Extensions.AI.IChatClient.GetStreamingResponseAsync*?displayProperty=nameWithType> back into the messages list. As the updates are individual pieces of a response, you can use helpers like <xref:Microsoft.Extensions.AI.ChatResponseExtensions.ToChatResponse(System.Collections.Generic.IEnumerable{Microsoft.Extensions.AI.ChatResponseUpdate})> to compose one or more updates back into a single <xref:Microsoft.Extensions.AI.ChatResponse> instance.
+As with `GetResponseAsync`, you can add the updates from <xref:Microsoft.Extensions.AI.IChatClient.GetStreamingResponseAsync*?displayProperty=nameWithType> back into the messages list. Because the updates are individual pieces of a response, you can use helpers like <xref:Microsoft.Extensions.AI.ChatResponseExtensions.ToChatResponse(System.Collections.Generic.IEnumerable{Microsoft.Extensions.AI.ChatResponseUpdate})> to compose one or more updates back into a single <xref:Microsoft.Extensions.AI.ChatResponse> instance.
 
 Helpers like <xref:Microsoft.Extensions.AI.ChatResponseExtensions.AddMessages*> compose a <xref:Microsoft.Extensions.AI.ChatResponse> and then extract the composed messages from the response and add them to a list.
 
@@ -111,7 +111,7 @@ This example depends on the [📦 Microsoft.Extensions.Caching.Memory](https://w
 
 #### Use telemetry
 
-Another example of a delegating chat client is the <xref:Microsoft.Extensions.AI.OpenTelemetryChatClient>. This implementation adheres to the [OpenTelemetry Semantic Conventions for Generative AI systems](https://opentelemetry.io/docs/specs/semconv/gen-ai/). Similar to other `IChatClient` delegators, it layers metrics and spans around any underlying `IChatClient` implementation, providing enhanced observability.
+Another example of a delegating chat client is the <xref:Microsoft.Extensions.AI.OpenTelemetryChatClient>. This implementation adheres to the [OpenTelemetry Semantic Conventions for Generative AI systems](https://opentelemetry.io/docs/specs/semconv/gen-ai/). Similar to other `IChatClient` delegators, it layers metrics and spans around other arbitrary `IChatClient` implementations.
 
 :::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.UseTelemetry/Program.cs":::
 
@@ -121,7 +121,7 @@ Alternatively, the <xref:Microsoft.Extensions.AI.LoggingChatClient> and correspo
 
 #### Provide options
 
-Every call to <xref:Microsoft.Extensions.AI.IChatClient.GetResponseAsync*> or <xref:Microsoft.Extensions.AI.IChatClient.GetStreamingResponseAsync*> can optionally supply a <xref:Microsoft.Extensions.AI.ChatOptions> instance containing additional parameters for the operation. The most common parameters among AI models and services show up as strongly typed properties on the type, such as <xref:Microsoft.Extensions.AI.ChatOptions.Temperature?displayProperty=nameWithType>. Other parameters can be supplied by name in a weakly typed manner via the <xref:Microsoft.Extensions.AI.ChatOptions.AdditionalProperties?displayProperty=nameWithType> dictionary.
+Every call to <xref:Microsoft.Extensions.AI.IChatClient.GetResponseAsync*> or <xref:Microsoft.Extensions.AI.IChatClient.GetStreamingResponseAsync*> can optionally supply a <xref:Microsoft.Extensions.AI.ChatOptions> instance containing additional parameters for the operation. The most common parameters among AI models and services show up as strongly typed properties on the type, such as <xref:Microsoft.Extensions.AI.ChatOptions.Temperature?displayProperty=nameWithType>. Other parameters can be supplied by name in a weakly typed manner, via the <xref:Microsoft.Extensions.AI.ChatOptions.AdditionalProperties?displayProperty=nameWithType> dictionary.
 
 You can also specify options when building an `IChatClient` with the fluent <xref:Microsoft.Extensions.AI.ChatClientBuilder> API by chaining a call to the <xref:Microsoft.Extensions.AI.ConfigureOptionsChatClientBuilderExtensions.ConfigureOptions(Microsoft.Extensions.AI.ChatClientBuilder,System.Action{Microsoft.Extensions.AI.ChatOptions})> extension method. This delegating client wraps another client and invokes the supplied delegate to populate a `ChatOptions` instance for every call. For example, to ensure that the <xref:Microsoft.Extensions.AI.ChatOptions.ModelId?displayProperty=nameWithType> property defaults to a particular model name, you can use code like the following:
 
@@ -147,7 +147,7 @@ As with other `IChatClient` implementations, the `RateLimitingChatClient` can be
 
 :::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.CustomClientMiddle/Program.cs":::
 
-To simplify the composition of such components with others, component authors should create a `Use*` extension method for registering the component into a pipeline. For example, consider the following extension method:
+To simplify the composition of such components with others, component authors should create a `Use*` extension method for registering the component into a pipeline. For example, consider the following `UseRatingLimiting` extension method:
 
 :::code language="csharp" source="snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.cs" id="one":::
 
@@ -159,9 +159,7 @@ Now it's easy for the consumer to use this in their pipeline, for example:
 
 :::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.ConsumeClientMiddleware/Program.cs" id="SnippetUse":::
 
-The previous extension methods demonstrate using a `Use` method on <xref:Microsoft.Extensions.AI.ChatClientBuilder>. `ChatClientBuilder` also provides <xref:Microsoft.Extensions.AI.ChatClientBuilder.Use*> overloads that make it easier to write such delegating handlers.
-
-For example, in the earlier `RateLimitingChatClient` example, the overrides of `GetResponseAsync` and `GetStreamingResponseAsync` only need to do work before and after delegating to the next client in the pipeline. To achieve the same thing without writing a custom class, you can use an overload of `Use` that accepts a delegate that's used for both `GetResponseAsync` and `GetStreamingResponseAsync`, reducing the boilerplate required:
+The previous extension methods demonstrate using a `Use` method on <xref:Microsoft.Extensions.AI.ChatClientBuilder>. `ChatClientBuilder` also provides <xref:Microsoft.Extensions.AI.ChatClientBuilder.Use*> overloads that make it easier to write such delegating handlers. For example, in the earlier `RateLimitingChatClient` example, the overrides of `GetResponseAsync` and `GetStreamingResponseAsync` only need to do work before and after delegating to the next client in the pipeline. To achieve the same thing without writing a custom class, you can use an overload of `Use` that accepts a delegate that's used for both `GetResponseAsync` and `GetStreamingResponseAsync`, reducing the boilerplate required:
 
 :::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.UseExample/Program.cs":::
 
@@ -169,7 +167,7 @@ For scenarios where you need a different implementation for `GetResponseAsync` a
 
 #### Dependency injection
 
-<xref:Microsoft.Extensions.AI.IChatClient> implementations will often be provided to an application via [dependency injection (DI)](../core/extensions/dependency-injection.md). In this example, an <xref:Microsoft.Extensions.Caching.Distributed.IDistributedCache> is added into the DI container, as is an `IChatClient`. The registration for the `IChatClient` uses a builder that creates a pipeline containing a caching client (which then uses an `IDistributedCache` retrieved from DI) and the sample client. The injected `IChatClient` can be retrieved and used elsewhere in the app.
+<xref:Microsoft.Extensions.AI.IChatClient> implementations are often provided to an application via [dependency injection (DI)](../core/extensions/dependency-injection.md). In this example, an <xref:Microsoft.Extensions.Caching.Distributed.IDistributedCache> is added into the DI container, as is an `IChatClient`. The registration for the `IChatClient` uses a builder that creates a pipeline containing a caching client (which then uses an `IDistributedCache` retrieved from DI) and the sample client. The injected `IChatClient` can be retrieved and used elsewhere in the app.
 
 :::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.DependencyInjection/Program.cs":::
 
@@ -242,7 +240,7 @@ The following is an example implementation of such a delegating embedding genera
 
 :::code language="csharp" source="snippets/microsoft-extensions-ai/AI.Shared/RateLimitingEmbeddingGenerator.cs":::
 
-This can then be layered around an arbitrary `IEmbeddingGenerator<string, Embedding<float>>` to rate-limit all embedding generation operations.
+This can then be layered around an arbitrary `IEmbeddingGenerator<string, Embedding<float>>` to rate limit all embedding generation operations.
 
 :::code language="csharp" source="snippets/microsoft-extensions-ai/ConsoleAI.ConsumeRateLimitingEmbedding/Program.cs":::
 

From 3efb1e0df745432dd14d024b3d41ef53908ae34d Mon Sep 17 00:00:00 2001
From: Genevieve Warren <24882762+gewarren@users.noreply.github.com>
Date: Thu, 1 May 2025 10:52:34 -0700
Subject: [PATCH 11/12] Update docs/ai/toc.yml

---
 docs/ai/toc.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ai/toc.yml b/docs/ai/toc.yml
index 0ebbe18161046..ba1462cca3da3 100644
--- a/docs/ai/toc.yml
+++ b/docs/ai/toc.yml
@@ -5,7 +5,7 @@ items:
   href: overview.md
 - name: "Quickstart: Connect to and prompt an AI model"
   href: quickstarts/prompt-model.md
-- name: AI frameworks and SDKs
+- name: AI tools and SDKs
   items:
   - name: Overview
     href: dotnet-ai-ecosystem.md

From 0b946d321ede4f383fc87d0f2d750808493ef891 Mon Sep 17 00:00:00 2001
From: Genevieve Warren <24882762+gewarren@users.noreply.github.com>
Date: Fri, 2 May 2025 13:17:22 -0700
Subject: [PATCH 12/12] break up long code lines

---
 .../AI.Shared/RateLimitingChatClient.cs       | 26 ++++++++++++++-----
 ...ngChatClientExtensions.OptionalOverload.cs | 10 +++++--
 .../RateLimitingChatClientExtensions.cs       |  8 ++++--
 .../AI.Shared/SampleEmbeddingGenerator.cs     | 11 +++++---
 4 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClient.cs b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClient.cs
index 1751e8f805b0a..f4cf2b2cd8df8 100644
--- a/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClient.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClient.cs
@@ -2,27 +2,39 @@
 using System.Runtime.CompilerServices;
 using System.Threading.RateLimiting;
 
-public sealed class RateLimitingChatClient(IChatClient innerClient, RateLimiter rateLimiter) : DelegatingChatClient(innerClient)
+public sealed class RateLimitingChatClient(
+    IChatClient innerClient, RateLimiter rateLimiter)
+        : DelegatingChatClient(innerClient)
 {
     public override async Task<ChatResponse> GetResponseAsync(
-        IEnumerable<ChatMessage> messages, ChatOptions? options = null, CancellationToken cancellationToken = default)
+        IEnumerable<ChatMessage> messages,
+        ChatOptions? options = null,
+        CancellationToken cancellationToken = default)
     {
-        using var lease = await rateLimiter.AcquireAsync(permitCount: 1, cancellationToken).ConfigureAwait(false);
+        using var lease = await rateLimiter.AcquireAsync(permitCount: 1, cancellationToken)
+            .ConfigureAwait(false);
         if (!lease.IsAcquired)
             throw new InvalidOperationException("Unable to acquire lease.");
 
-        return await base.GetResponseAsync(messages, options, cancellationToken).ConfigureAwait(false);
+        return await base.GetResponseAsync(messages, options, cancellationToken)
+            .ConfigureAwait(false);
     }
 
     public override async IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseAsync(
-        IEnumerable<ChatMessage> messages, ChatOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+        IEnumerable<ChatMessage> messages,
+        ChatOptions? options = null,
+        [EnumeratorCancellation] CancellationToken cancellationToken = default)
     {
-        using var lease = await rateLimiter.AcquireAsync(permitCount: 1, cancellationToken).ConfigureAwait(false);
+        using var lease = await rateLimiter.AcquireAsync(permitCount: 1, cancellationToken)
+            .ConfigureAwait(false);
         if (!lease.IsAcquired)
             throw new InvalidOperationException("Unable to acquire lease.");
 
-        await foreach (var update in base.GetStreamingResponseAsync(messages, options, cancellationToken).ConfigureAwait(false))
+        await foreach (var update in base.GetStreamingResponseAsync(messages, options, cancellationToken)
+            .ConfigureAwait(false))
+        {
             yield return update;
+        }
     }
 
     protected override void Dispose(bool disposing)
diff --git a/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.OptionalOverload.cs b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.OptionalOverload.cs
index a56afd5c74a4d..8973a3391e482 100644
--- a/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.OptionalOverload.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.OptionalOverload.cs
@@ -7,7 +7,13 @@
 
 public static class RateLimitingChatClientExtensions
 {
-    public static ChatClientBuilder UseRateLimiting(this ChatClientBuilder builder, RateLimiter? rateLimiter = null) =>
-        builder.Use((innerClient, services) => new RateLimitingChatClient(innerClient, services.GetRequiredService<RateLimiter>()));
+    public static ChatClientBuilder UseRateLimiting(
+        this ChatClientBuilder builder,
+        RateLimiter? rateLimiter = null) =>
+        builder.Use((innerClient, services) =>
+            new RateLimitingChatClient(
+                innerClient,
+                services.GetRequiredService<RateLimiter>())
+        );
 }
 // </two>
diff --git a/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.cs b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.cs
index e9f3a865c2c31..9ab89f6227e44 100644
--- a/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/RateLimitingChatClientExtensions.cs
@@ -6,7 +6,11 @@
 
 public static class RateLimitingChatClientExtensions
 {
-    public static ChatClientBuilder UseRateLimiting(this ChatClientBuilder builder, RateLimiter rateLimiter) =>
-        builder.Use(innerClient => new RateLimitingChatClient(innerClient, rateLimiter));
+    public static ChatClientBuilder UseRateLimiting(
+        this ChatClientBuilder builder,
+        RateLimiter rateLimiter) =>
+        builder.Use(innerClient =>
+            new RateLimitingChatClient(innerClient, rateLimiter)
+        );
 }
 // </one>
diff --git a/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/SampleEmbeddingGenerator.cs b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/SampleEmbeddingGenerator.cs
index 420a2de9f52ab..ddf1e6b53aa28 100644
--- a/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/SampleEmbeddingGenerator.cs
+++ b/docs/ai/snippets/microsoft-extensions-ai/AI.Shared/SampleEmbeddingGenerator.cs
@@ -23,10 +23,13 @@ from value in values
     }
 
     public object? GetService(Type serviceType, object? serviceKey) =>
-        serviceKey is not null ? null :
-        serviceType == typeof(EmbeddingGeneratorMetadata) ? _metadata :
-        serviceType?.IsInstanceOfType(this) is true ? this :
-        null;
+        serviceKey is not null
+        ? null
+        : serviceType == typeof(EmbeddingGeneratorMetadata)
+            ? _metadata
+            : serviceType?.IsInstanceOfType(this) is true
+                ? this
+                : null;
 
     void IDisposable.Dispose() { }
 }