Copyright (c) 2026 MindMesh Academy. All rights reserved. This content is proprietary and may not be reproduced or distributed without permission.
3.3.2. Retrieval and Grounding
The retrieval step finds relevant documents; grounding ensures the model uses that context rather than hallucinating.
RAG Pattern:
# 1. Generate query embedding
embedding = client.embeddings.create(model="text-embedding-ada-002", input=query).data[0].embedding
# 2. Search for relevant documents
results = search_client.search(search_text=query, vector_queries=[VectorizedQuery(vector=embedding, fields="contentVector")])
# 3. Build prompt with context
context = "\n".join([doc["content"] for doc in results])
messages = [
{"role": "system", "content": f"Answer based on this context:\n{context}"},
{"role": "user", "content": query}
]
# 4. Generate grounded response
response = client.chat.completions.create(model="gpt-4o", messages=messages)
Azure OpenAI "On Your Data" (Built-in RAG):
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "What are our vacation policies?"}],
extra_body={
"data_sources": [{
"type": "azure_search",
"parameters": {
"endpoint": search_endpoint,
"index_name": index_name,
"authentication": {"type": "api_key", "key": search_key}
}
}]
}
)
Error Handling Pattern:
from openai import AzureOpenAI, APIError
from azure.core.exceptions import HttpResponseError
def rag_query_with_fallback(query: str) -> str:
"""RAG query with graceful degradation."""
try:
# Step 1: Generate embedding
embedding = client.embeddings.create(
model="text-embedding-ada-002",
input=query
).data[0].embedding
except APIError as e:
logging.error(f"Embedding generation failed: {e}")
# Fallback to keyword search only
embedding = None
try:
# Step 2: Search for context
if embedding:
results = search_client.search(
search_text=query,
vector_queries=[VectorizedQuery(vector=embedding, fields="contentVector")]
)
else:
# Fallback: keyword search without vector
results = search_client.search(search_text=query)
context = "\n".join([doc["content"] for doc in results])
except HttpResponseError as e:
logging.error(f"Search failed: {e}")
context = "" # Proceed without context (acknowledge limitation to user)
try:
# Step 3: Generate response
messages = [
{"role": "system", "content": f"Answer based on: {context}" if context else "Answer to the best of your ability."},
{"role": "user", "content": query}
]
response = client.chat.completions.create(model="gpt-4o", messages=messages)
return response.choices[0].message.content
except APIError as e:
logging.error(f"Generation failed: {e}")
return "I'm sorry, I couldn't process your request. Please try again."
⚠️ Exam Trap: RAG implementations should handle failures at each stage (embedding, search, generation) independently with appropriate fallbacks.
Written byAlvin Varughese
Founder•15 professional certifications