Azure OpenAI Service を活用した高精度RAGシステム構築ガイド
Azure OpenAI Service を活用した高精度RAGシステム構築ガイド
はじめに
RAG(Retrieval-Augmented Generation)は、大規模言語モデルに外部知識を統合することで、より正確で最新の情報に基づいた回答を生成する技術です。本記事では、Azure OpenAI ServiceとAzure AI Searchを組み合わせた、エンタープライズグレードのRAGシステム構築方法を解説します。
RAGアーキテクチャの設計
1. システム全体構成
graph TB
A[User Query] --> B[Query Processor]
B --> C[Azure OpenAI Embeddings]
C --> D[Azure AI Search]
D --> E[Relevant Documents]
E --> F[Context Builder]
F --> G[Azure OpenAI GPT-4]
G --> H[Response Generator]
H --> I[User Response]
J[Document Sources] --> K[Document Processor]
K --> L[Chunking & Embedding]
L --> D
2. 基本実装
// RAG Service Implementation
using Azure;
using Azure.AI.OpenAI;
using Azure.Search.Documents;
using Azure.Search.Documents.Indexes;
using Azure.Search.Documents.Models;
public class RAGService
{
private readonly OpenAIClient _openAIClient;
private readonly SearchClient _searchClient;
private readonly string _embeddingDeployment = "text-embedding-ada-002";
private readonly string _completionDeployment = "gpt-4";
public RAGService(
string openAIEndpoint,
string openAIKey,
string searchEndpoint,
string searchKey,
string indexName)
{
_openAIClient = new OpenAIClient(
new Uri(openAIEndpoint),
new AzureKeyCredential(openAIKey)
);
_searchClient = new SearchClient(
new Uri(searchEndpoint),
indexName,
new AzureKeyCredential(searchKey)
);
}
public async Task<RAGResponse> GenerateResponseAsync(
string query,
RAGOptions options = null)
{
options ??= new RAGOptions();
// Step 1: Generate embeddings for the query
var queryEmbedding = await GenerateEmbeddingAsync(query);
// Step 2: Search for relevant documents
var searchResults = await SearchDocumentsAsync(
queryEmbedding,
options.TopK,
options.MinRelevanceScore
);
// Step 3: Build context from search results
var context = BuildContext(searchResults, options.MaxContextTokens);
// Step 4: Generate response using GPT-4
var response = await GenerateCompletionAsync(
query,
context,
options
);
return new RAGResponse
{
Answer = response,
Sources = searchResults.Select(r => new Source
{
Title = r.Document.GetString("title"),
Url = r.Document.GetString("url"),
Excerpt = r.Document.GetString("content"),
Score = r.Score ?? 0
}).ToList(),
Context = context
};
}
private async Task<float[]> GenerateEmbeddingAsync(string text)
{
var embeddingOptions = new EmbeddingsOptions(
_embeddingDeployment,
new[] { text }
);
var response = await _openAIClient.GetEmbeddingsAsync(embeddingOptions);
return response.Value.Data[0].Embedding.ToArray();
}
private async Task<List<SearchResult<SearchDocument>>> SearchDocumentsAsync(
float[] queryVector,
int topK,
double minScore)
{
var searchOptions = new SearchOptions
{
Select = { "id", "title", "content", "url", "metadata" },
Size = topK,
IncludeTotalCount = true,
QueryType = SearchQueryType.Semantic,
SemanticSearch = new()
{
SemanticConfigurationName = "default",
QueryCaption = new(QueryCaptionType.Extractive),
QueryAnswer = new(QueryAnswerType.Extractive)
},
VectorSearch = new()
{
Queries = { new VectorizedQuery(queryVector)
{
KNearestNeighborsCount = topK,
Fields = { "contentVector" }
}}
}
};
var searchResponse = await _searchClient.SearchAsync<SearchDocument>(
searchText: null,
searchOptions
);
var results = new List<SearchResult<SearchDocument>>();
await foreach (var result in searchResponse.Value.GetResultsAsync())
{
if (result.Score >= minScore)
{
results.Add(result);
}
}
return results;
}
private string BuildContext(
List<SearchResult<SearchDocument>> searchResults,
int maxTokens)
{
var contextBuilder = new StringBuilder();
var currentTokens = 0;
foreach (var result in searchResults.OrderByDescending(r => r.Score))
{
var content = result.Document.GetString("content");
var tokens = EstimateTokenCount(content);
if (currentTokens + tokens > maxTokens)
{
break;
}
contextBuilder.AppendLine($"---");
contextBuilder.AppendLine($"Source: {result.Document.GetString("title")}");
contextBuilder.AppendLine($"Content: {content}");
contextBuilder.AppendLine();
currentTokens += tokens;
}
return contextBuilder.ToString();
}
private async Task<string> GenerateCompletionAsync(
string query,
string context,
RAGOptions options)
{
var systemPrompt = @"
あなたは親切で正確な情報を提供するAIアシスタントです。
以下のコンテキスト情報を使用して、ユーザーの質問に答えてください。
重要なルール:
1. コンテキストに基づいて回答してください
2. コンテキストに情報がない場合は、その旨を明確に伝えてください
3. 推測や憶測は避け、事実に基づいた回答を心がけてください
4. 回答の根拠となる情報源を明示してください";
var userPrompt = $@"
コンテキスト情報:
{context}
質問:{query}
上記のコンテキスト情報を使用して、質問に対する詳細で正確な回答を提供してください。";
var completionOptions = new ChatCompletionsOptions
{
DeploymentName = _completionDeployment,
Messages =
{
new ChatRequestSystemMessage(systemPrompt),
new ChatRequestUserMessage(userPrompt)
},
Temperature = options.Temperature,
MaxTokens = options.MaxResponseTokens,
NucleusSamplingFactor = options.TopP,
FrequencyPenalty = options.FrequencyPenalty,
PresencePenalty = options.PresencePenalty
};
var response = await _openAIClient.GetChatCompletionsAsync(completionOptions);
return response.Value.Choices[0].Message.Content;
}
private int EstimateTokenCount(string text)
{
// Simple estimation: ~4 characters per token for Japanese
// For production, use tiktoken library
return text.Length / 4;
}
}
// Configuration classes
public class RAGOptions
{
public int TopK { get; set; } = 5;
public double MinRelevanceScore { get; set; } = 0.7;
public int MaxContextTokens { get; set; } = 2000;
public float Temperature { get; set; } = 0.3f;
public int MaxResponseTokens { get; set; } = 1000;
public float TopP { get; set; } = 0.95f;
public float FrequencyPenalty { get; set; } = 0;
public float PresencePenalty { get; set; } = 0;
}
public class RAGResponse
{
public string Answer { get; set; }
public List<Source> Sources { get; set; }
public string Context { get; set; }
}
public class Source
{
public string Title { get; set; }
public string Url { get; set; }
public string Excerpt { get; set; }
public double Score { get; set; }
}
ドキュメント処理パイプライン
1. インテリジェントなチャンキング
# document_processor.py
import tiktoken
from typing import List, Dict, Tuple
import re
from dataclasses import dataclass
import numpy as np
@dataclass
class Chunk:
content: str
metadata: Dict
start_index: int
end_index: int
token_count: int
class IntelligentChunker:
def __init__(
self,
model_name: str = "gpt-4",
chunk_size: int = 512,
chunk_overlap: int = 128
):
self.encoder = tiktoken.encoding_for_model(model_name)
self.chunk_size = chunk_size
self.chunk_overlap = chunk_overlap
def chunk_document(
self,
text: str,
metadata: Dict
) -> List[Chunk]:
"""
Intelligently chunk document while preserving semantic boundaries
"""
# Split into paragraphs first
paragraphs = self._split_paragraphs(text)
chunks = []
current_chunk = []
current_tokens = 0
start_index = 0
for i, paragraph in enumerate(paragraphs):
paragraph_tokens = len(self.encoder.encode(paragraph))
# If paragraph itself is too large, split it
if paragraph_tokens > self.chunk_size:
sub_chunks = self._split_large_paragraph(
paragraph,
start_index
)
chunks.extend(sub_chunks)
start_index += len(paragraph)
continue
# If adding paragraph exceeds chunk size, create new chunk
if current_tokens + paragraph_tokens > self.chunk_size:
if current_chunk:
chunk_content = '\n\n'.join(current_chunk)
chunks.append(Chunk(
content=chunk_content,
metadata=metadata,
start_index=start_index - len(chunk_content),
end_index=start_index,
token_count=current_tokens
))
# Start new chunk with overlap
overlap_chunks = self._get_overlap_chunks(
current_chunk,
self.chunk_overlap
)
current_chunk = overlap_chunks + [paragraph]
current_tokens = sum(
len(self.encoder.encode(c)) for c in current_chunk
)
else:
current_chunk.append(paragraph)
current_tokens += paragraph_tokens
start_index += len(paragraph) + 2 # Account for \n\n
# Add final chunk
if current_chunk:
chunk_content = '\n\n'.join(current_chunk)
chunks.append(Chunk(
content=chunk_content,
metadata=metadata,
start_index=start_index - len(chunk_content),
end_index=start_index,
token_count=current_tokens
))
return chunks
def _split_paragraphs(self, text: str) -> List[str]:
"""Split text into semantic paragraphs"""
# Preserve headers
lines = text.split('\n')
paragraphs = []
current_paragraph = []
for line in lines:
line = line.strip()
# Check if line is a header
if self._is_header(line):
if current_paragraph:
paragraphs.append('\n'.join(current_paragraph))
current_paragraph = []
paragraphs.append(line)
elif line:
current_paragraph.append(line)
elif current_paragraph:
paragraphs.append('\n'.join(current_paragraph))
current_paragraph = []
if current_paragraph:
paragraphs.append('\n'.join(current_paragraph))
return [p for p in paragraphs if p.strip()]
def _is_header(self, line: str) -> bool:
"""Detect if line is a header"""
header_patterns = [
r'^#{1,6}\s', # Markdown headers
r'^[A-Z][^.!?]*:$', # Title case with colon
r'^\d+\.\s+[A-Z]', # Numbered sections
r'^[A-Z][A-Z\s]+$' # All caps
]
return any(re.match(pattern, line) for pattern in header_patterns)
def _split_large_paragraph(
self,
paragraph: str,
start_index: int
) -> List[Chunk]:
"""Split large paragraphs at sentence boundaries"""
sentences = self._split_sentences(paragraph)
chunks = []
current_sentences = []
current_tokens = 0
for sentence in sentences:
sentence_tokens = len(self.encoder.encode(sentence))
if current_tokens + sentence_tokens > self.chunk_size:
if current_sentences:
chunk_content = ' '.join(current_sentences)
chunks.append(Chunk(
content=chunk_content,
metadata={},
start_index=start_index,
end_index=start_index + len(chunk_content),
token_count=current_tokens
))
start_index += len(chunk_content) + 1
current_sentences = [sentence]
current_tokens = sentence_tokens
else:
current_sentences.append(sentence)
current_tokens += sentence_tokens
if current_sentences:
chunk_content = ' '.join(current_sentences)
chunks.append(Chunk(
content=chunk_content,
metadata={},
start_index=start_index,
end_index=start_index + len(chunk_content),
token_count=current_tokens
))
return chunks
def _split_sentences(self, text: str) -> List[str]:
"""Split text into sentences"""
# Simple sentence splitter - use spaCy for production
sentences = re.split(r'(?<=[.!?])\s+', text)
return [s.strip() for s in sentences if s.strip()]
def _get_overlap_chunks(
self,
chunks: List[str],
overlap_tokens: int
) -> List[str]:
"""Get chunks for overlap from the end"""
overlap_chunks = []
total_tokens = 0
for chunk in reversed(chunks):
chunk_tokens = len(self.encoder.encode(chunk))
if total_tokens + chunk_tokens <= overlap_tokens:
overlap_chunks.insert(0, chunk)
total_tokens += chunk_tokens
else:
break
return overlap_chunks
2. メタデータ抽出とエンリッチメント
// Document Enrichment Service
public class DocumentEnrichmentService
{
private readonly OpenAIClient _openAIClient;
private readonly ILogger<DocumentEnrichmentService> _logger;
public async Task<EnrichedDocument> EnrichDocumentAsync(
Document document,
EnrichmentOptions options)
{
var enrichedDoc = new EnrichedDocument
{
Id = document.Id,
OriginalContent = document.Content,
Metadata = document.Metadata ?? new Dictionary<string, object>()
};
// Extract key entities
if (options.ExtractEntities)
{
var entities = await ExtractEntitiesAsync(document.Content);
enrichedDoc.Entities = entities;
enrichedDoc.Metadata["entities"] = entities;
}
// Generate summary
if (options.GenerateSummary)
{
var summary = await GenerateSummaryAsync(document.Content);
enrichedDoc.Summary = summary;
enrichedDoc.Metadata["summary"] = summary;
}
// Extract keywords
if (options.ExtractKeywords)
{
var keywords = await ExtractKeywordsAsync(document.Content);
enrichedDoc.Keywords = keywords;
enrichedDoc.Metadata["keywords"] = keywords;
}
// Classify document
if (options.ClassifyDocument)
{
var classification = await ClassifyDocumentAsync(document.Content);
enrichedDoc.Classification = classification;
enrichedDoc.Metadata["category"] = classification.Category;
enrichedDoc.Metadata["confidence"] = classification.Confidence;
}
// Generate Q&A pairs for better retrieval
if (options.GenerateQAPairs)
{
var qaPairs = await GenerateQAPairsAsync(document.Content);
enrichedDoc.QAPairs = qaPairs;
}
return enrichedDoc;
}
private async Task<List<Entity>> ExtractEntitiesAsync(string content)
{
var prompt = @"
以下のテキストから重要なエンティティ(人名、組織名、場所、日付、技術用語など)を抽出してください。
JSON形式で出力してください。
テキスト:
{content}
出力形式:
{
""entities"": [
{
""text"": ""エンティティテキスト"",
""type"": ""PERSON|ORGANIZATION|LOCATION|DATE|TECHNOLOGY|OTHER"",
""context"": ""エンティティが現れる文脈""
}
]
}";
var response = await _openAIClient.GetChatCompletionsAsync(
new ChatCompletionsOptions
{
DeploymentName = "gpt-4",
Messages = {
new ChatRequestSystemMessage("You are an entity extraction expert."),
new ChatRequestUserMessage(prompt.Replace("{content}", content))
},
Temperature = 0.1f,
ResponseFormat = new ChatCompletionsResponseFormatJSON()
}
);
var json = response.Value.Choices[0].Message.Content;
var result = JsonSerializer.Deserialize<EntityExtractionResult>(json);
return result.Entities;
}
private async Task<string> GenerateSummaryAsync(string content)
{
var prompt = @"
以下のテキストの要約を作成してください。
重要なポイントを箇条書きで3-5個挙げてください。
テキスト:
{content}
要約:";
var response = await _openAIClient.GetChatCompletionsAsync(
new ChatCompletionsOptions
{
DeploymentName = "gpt-4",
Messages = {
new ChatRequestSystemMessage("You are a summarization expert."),
new ChatRequestUserMessage(prompt.Replace("{content}", content))
},
Temperature = 0.3f,
MaxTokens = 300
}
);
return response.Value.Choices[0].Message.Content;
}
private async Task<List<QAPair>> GenerateQAPairsAsync(string content)
{
var prompt = @"
以下のテキストから、ユーザーが尋ねそうな質問と回答のペアを3-5個生成してください。
JSON形式で出力してください。
テキスト:
{content}
出力形式:
{
""qa_pairs"": [
{
""question"": ""質問文"",
""answer"": ""回答文"",
""relevance_score"": 0.9
}
]
}";
var response = await _openAIClient.GetChatCompletionsAsync(
new ChatCompletionsOptions
{
DeploymentName = "gpt-4",
Messages = {
new ChatRequestSystemMessage("You are a Q&A generation expert."),
new ChatRequestUserMessage(prompt.Replace("{content}", content))
},
Temperature = 0.5f,
ResponseFormat = new ChatCompletionsResponseFormatJSON()
}
);
var json = response.Value.Choices[0].Message.Content;
var result = JsonSerializer.Deserialize<QAGenerationResult>(json);
return result.QAPairs;
}
}
// Models
public class EnrichedDocument
{
public string Id { get; set; }
public string OriginalContent { get; set; }
public string Summary { get; set; }
public List<Entity> Entities { get; set; }
public List<string> Keywords { get; set; }
public Classification Classification { get; set; }
public List<QAPair> QAPairs { get; set; }
public Dictionary<string, object> Metadata { get; set; }
}
public class Entity
{
public string Text { get; set; }
public string Type { get; set; }
public string Context { get; set; }
}
public class QAPair
{
public string Question { get; set; }
public string Answer { get; set; }
public double RelevanceScore { get; set; }
}
高度な検索戦略
1. ハイブリッド検索の実装
// Hybrid Search Implementation
public class HybridSearchService
{
private readonly SearchClient _searchClient;
private readonly OpenAIClient _openAIClient;
private readonly IMemoryCache _cache;
public async Task<HybridSearchResults> SearchAsync(
string query,
HybridSearchOptions options)
{
// Parallel execution of different search strategies
var searchTasks = new List<Task<SearchStrategy>>();
// 1. Vector search
searchTasks.Add(ExecuteVectorSearchAsync(query, options));
// 2. Keyword search
searchTasks.Add(ExecuteKeywordSearchAsync(query, options));
// 3. Semantic search
searchTasks.Add(ExecuteSemanticSearchAsync(query, options));
// 4. Q&A matching (if enabled)
if (options.EnableQAMatching)
{
searchTasks.Add(ExecuteQAMatchingAsync(query, options));
}
var searchResults = await Task.WhenAll(searchTasks);
// Combine and re-rank results
var combinedResults = await CombineAndRerankAsync(
searchResults,
query,
options
);
return new HybridSearchResults
{
Results = combinedResults,
SearchStrategiesUsed = searchResults.Select(r => r.Name).ToList(),
TotalMatches = combinedResults.Count,
Query = query
};
}
private async Task<SearchStrategy> ExecuteVectorSearchAsync(
string query,
HybridSearchOptions options)
{
var queryEmbedding = await GenerateEmbeddingAsync(query);
var searchOptions = new SearchOptions
{
Size = options.TopK * 2, // Get more for re-ranking
VectorSearch = new()
{
Queries = { new VectorizedQuery(queryEmbedding)
{
KNearestNeighborsCount = options.TopK * 2,
Fields = { "contentVector" }
}}
}
};
var results = await _searchClient.SearchAsync<SearchDocument>(
searchText: null,
searchOptions
);
var documents = new List<RankedDocument>();
await foreach (var result in results.Value.GetResultsAsync())
{
documents.Add(new RankedDocument
{
Document = result.Document,
Score = result.Score ?? 0,
SearchType = "vector"
});
}
return new SearchStrategy
{
Name = "VectorSearch",
Weight = options.VectorSearchWeight,
Results = documents
};
}
private async Task<SearchStrategy> ExecuteKeywordSearchAsync(
string query,
HybridSearchOptions options)
{
// Expand query with synonyms
var expandedQuery = await ExpandQueryAsync(query);
var searchOptions = new SearchOptions
{
Size = options.TopK * 2,
QueryType = SearchQueryType.Full,
SearchMode = SearchMode.All,
ScoringProfile = "keywordBoost",
HighlightFields = { "content" },
HighlightPreTag = "<mark>",
HighlightPostTag = "</mark>"
};
var results = await _searchClient.SearchAsync<SearchDocument>(
expandedQuery,
searchOptions
);
var documents = new List<RankedDocument>();
await foreach (var result in results.Value.GetResultsAsync())
{
documents.Add(new RankedDocument
{
Document = result.Document,
Score = result.Score ?? 0,
SearchType = "keyword",
Highlights = result.Highlights
});
}
return new SearchStrategy
{
Name = "KeywordSearch",
Weight = options.KeywordSearchWeight,
Results = documents
};
}
private async Task<List<RankedDocument>> CombineAndRerankAsync(
SearchStrategy[] strategies,
string query,
HybridSearchOptions options)
{
// Collect all unique documents
var documentScores = new Dictionary<string, DocumentScore>();
foreach (var strategy in strategies)
{
foreach (var result in strategy.Results)
{
var docId = result.Document.GetString("id");
if (!documentScores.ContainsKey(docId))
{
documentScores[docId] = new DocumentScore
{
Document = result.Document,
Highlights = result.Highlights
};
}
// Apply strategy weight
var weightedScore = result.Score * strategy.Weight;
documentScores[docId].Scores[strategy.Name] = weightedScore;
documentScores[docId].TotalScore += weightedScore;
}
}
// Apply cross-encoder re-ranking for top candidates
var topCandidates = documentScores.Values
.OrderByDescending(d => d.TotalScore)
.Take(options.RerankTopK)
.ToList();
if (options.EnableCrossEncoderReranking)
{
await RerankWithCrossEncoderAsync(topCandidates, query);
}
// Final ranking
return topCandidates
.OrderByDescending(d => d.FinalScore)
.Take(options.TopK)
.Select(d => new RankedDocument
{
Document = d.Document,
Score = d.FinalScore,
SearchType = "hybrid",
Highlights = d.Highlights,
ScoreBreakdown = d.Scores
})
.ToList();
}
private async Task RerankWithCrossEncoderAsync(
List<DocumentScore> candidates,
string query)
{
var rerankingTasks = candidates.Select(async candidate =>
{
var content = candidate.Document.GetString("content");
var relevanceScore = await CalculateRelevanceScoreAsync(query, content);
candidate.CrossEncoderScore = relevanceScore;
candidate.FinalScore = candidate.TotalScore * 0.3 + relevanceScore * 0.7;
});
await Task.WhenAll(rerankingTasks);
}
private async Task<double> CalculateRelevanceScoreAsync(
string query,
string document)
{
var prompt = $@"
質問とドキュメントの関連性を0から1のスコアで評価してください。
質問: {query}
ドキュメント: {document.Substring(0, Math.Min(1000, document.Length))}
スコア(0-1):";
var response = await _openAIClient.GetChatCompletionsAsync(
new ChatCompletionsOptions
{
DeploymentName = "gpt-4",
Messages = {
new ChatRequestSystemMessage("You are a relevance scoring expert. Output only a number between 0 and 1."),
new ChatRequestUserMessage(prompt)
},
Temperature = 0,
MaxTokens = 10
}
);
if (double.TryParse(response.Value.Choices[0].Message.Content, out var score))
{
return Math.Max(0, Math.Min(1, score));
}
return 0.5; // Default score
}
}
2. クエリ理解と拡張
# query_understanding.py
from typing import List, Dict, Tuple
import openai
from dataclasses import dataclass
import json
@dataclass
class QueryIntent:
intent_type: str # factual, analytical, navigational, transactional
entities: List[Dict]
temporal_context: str # past, present, future, timeless
required_depth: str # shallow, medium, deep
language: str
class QueryUnderstandingService:
def __init__(self, openai_client):
self.client = openai_client
async def analyze_query(self, query: str) -> QueryIntent:
"""
Analyze user query to understand intent and context
"""
prompt = f"""
Analyze the following query and extract:
1. Intent type (factual/analytical/navigational/transactional)
2. Key entities and their types
3. Temporal context
4. Required depth of answer
5. Language
Query: {query}
Output in JSON format.
"""
response = await self.client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a query analysis expert."},
{"role": "user", "content": prompt}
],
temperature=0,
response_format={"type": "json_object"}
)
analysis = json.loads(response.choices[0].message.content)
return QueryIntent(
intent_type=analysis.get("intent_type", "factual"),
entities=analysis.get("entities", []),
temporal_context=analysis.get("temporal_context", "timeless"),
required_depth=analysis.get("required_depth", "medium"),
language=analysis.get("language", "ja")
)
async def expand_query(
self,
query: str,
intent: QueryIntent
) -> List[str]:
"""
Expand query with variations and related terms
"""
expansion_prompt = f"""
Given the query: "{query}"
Intent: {intent.intent_type}
Entities: {intent.entities}
Generate 3-5 query variations that capture the same intent but with different phrasings.
Include relevant synonyms and related terms.
Output as JSON array of strings.
"""
response = await self.client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a query expansion expert."},
{"role": "user", "content": expansion_prompt}
],
temperature=0.3,
response_format={"type": "json_object"}
)
expansions = json.loads(response.choices[0].message.content)
return expansions.get("queries", [query])
async def generate_filters(
self,
query: str,
intent: QueryIntent
) -> Dict[str, any]:
"""
Generate search filters based on query understanding
"""
filters = {}
# Temporal filters
if intent.temporal_context != "timeless":
filters["temporal_filter"] = self._get_temporal_filter(
intent.temporal_context
)
# Entity-based filters
for entity in intent.entities:
if entity["type"] == "ORGANIZATION":
filters["organization"] = entity["text"]
elif entity["type"] == "TECHNOLOGY":
filters["technology_stack"] = entity["text"]
elif entity["type"] == "DATE":
filters["date_range"] = self._parse_date_entity(entity["text"])
# Depth-based filters
if intent.required_depth == "deep":
filters["min_content_length"] = 1000
filters["include_technical_details"] = True
elif intent.required_depth == "shallow":
filters["max_content_length"] = 500
filters["summary_only"] = True
return filters
def _get_temporal_filter(self, context: str) -> Dict:
"""Generate temporal filter based on context"""
from datetime import datetime, timedelta
now = datetime.now()
if context == "past":
return {
"end_date": now - timedelta(days=30),
"start_date": now - timedelta(days=365)
}
elif context == "present":
return {
"start_date": now - timedelta(days=30),
"end_date": now
}
elif context == "future":
return {
"start_date": now,
"include_predictions": True
}
return {}
回答生成の最適化
1. コンテキスト管理と回答品質向上
// Advanced Response Generation
public class AdvancedResponseGenerator
{
private readonly OpenAIClient _openAIClient;
private readonly IMemoryCache _cache;
private readonly ILogger<AdvancedResponseGenerator> _logger;
public async Task<EnhancedResponse> GenerateResponseAsync(
string query,
List<RankedDocument> documents,
ResponseGenerationOptions options)
{
// Build structured context
var context = BuildStructuredContext(documents, options);
// Generate initial response
var response = await GenerateInitialResponseAsync(
query,
context,
options
);
// Validate and enhance response
var enhancedResponse = await EnhanceResponseAsync(
response,
query,
documents,
options
);
// Add citations
var citedResponse = AddCitations(enhancedResponse, documents);
return new EnhancedResponse
{
Answer = citedResponse,
Confidence = CalculateConfidence(documents),
Sources = ExtractSources(documents),
FollowUpQuestions = await GenerateFollowUpQuestionsAsync(
query,
citedResponse
),
Metadata = new ResponseMetadata
{
TokensUsed = CountTokens(citedResponse),
ResponseTime = DateTime.UtcNow,
Model = options.Model
}
};
}
private StructuredContext BuildStructuredContext(
List<RankedDocument> documents,
ResponseGenerationOptions options)
{
var context = new StructuredContext();
var tokenBudget = options.MaxContextTokens;
var usedTokens = 0;
// Group documents by relevance and type
var groupedDocs = documents
.GroupBy(d => d.Document.GetString("type") ?? "general")
.OrderByDescending(g => g.Max(d => d.Score));
foreach (var group in groupedDocs)
{
var section = new ContextSection
{
Type = group.Key,
Documents = new List<ContextDocument>()
};
foreach (var doc in group.OrderByDescending(d => d.Score))
{
var content = doc.Document.GetString("content");
var tokens = EstimateTokens(content);
if (usedTokens + tokens > tokenBudget)
{
// Truncate content to fit
content = TruncateToTokenLimit(
content,
tokenBudget - usedTokens
);
tokens = tokenBudget - usedTokens;
}
section.Documents.Add(new ContextDocument
{
Id = doc.Document.GetString("id"),
Title = doc.Document.GetString("title"),
Content = content,
Score = doc.Score,
Metadata = doc.Document.GetObject("metadata")
});
usedTokens += tokens;
if (usedTokens >= tokenBudget)
break;
}
context.Sections.Add(section);
if (usedTokens >= tokenBudget)
break;
}
context.TotalTokens = usedTokens;
return context;
}
private async Task<string> GenerateInitialResponseAsync(
string query,
StructuredContext context,
ResponseGenerationOptions options)
{
var systemPrompt = BuildSystemPrompt(options);
var userPrompt = BuildUserPrompt(query, context);
var completionOptions = new ChatCompletionsOptions
{
DeploymentName = options.Model,
Messages =
{
new ChatRequestSystemMessage(systemPrompt),
new ChatRequestUserMessage(userPrompt)
},
Temperature = options.Temperature,
MaxTokens = options.MaxResponseTokens,
Functions = GetAvailableFunctions(options),
FunctionCall = options.EnableFunctionCalling
? FunctionCallOption.Auto
: FunctionCallOption.None
};
var response = await _openAIClient.GetChatCompletionsAsync(completionOptions);
// Handle function calls if any
if (response.Value.Choices[0].Message.FunctionCall != null)
{
var functionResult = await ExecuteFunctionCallAsync(
response.Value.Choices[0].Message.FunctionCall
);
completionOptions.Messages.Add(
new ChatRequestFunctionMessage(
response.Value.Choices[0].Message.FunctionCall.Name,
functionResult
)
);
response = await _openAIClient.GetChatCompletionsAsync(completionOptions);
}
return response.Value.Choices[0].Message.Content;
}
private string BuildSystemPrompt(ResponseGenerationOptions options)
{
var prompt = new StringBuilder();
prompt.AppendLine("あなたは正確で信頼性の高い情報を提供するAIアシスタントです。");
prompt.AppendLine();
prompt.AppendLine("回答する際の重要なガイドライン:");
prompt.AppendLine("1. 提供されたコンテキスト情報のみに基づいて回答する");
prompt.AppendLine("2. 情報源を明確に示す([1], [2]などの参照番号を使用)");
prompt.AppendLine("3. 確信が持てない場合は、その旨を明記する");
prompt.AppendLine("4. 段階的で論理的な説明を心がける");
if (options.ResponseStyle == ResponseStyle.Technical)
{
prompt.AppendLine("5. 技術的な詳細を含め、専門用語を適切に使用する");
}
else if (options.ResponseStyle == ResponseStyle.Simple)
{
prompt.AppendLine("5. 専門用語を避け、分かりやすい説明を心がける");
}
if (options.IncludeConfidenceLevel)
{
prompt.AppendLine("6. 回答の確信度を明示する(高/中/低)");
}
return prompt.ToString();
}
private async Task<string> EnhanceResponseAsync(
string initialResponse,
string query,
List<RankedDocument> documents,
ResponseGenerationOptions options)
{
// Fact-check the response
var factCheckResult = await FactCheckResponseAsync(
initialResponse,
documents
);
if (!factCheckResult.IsAccurate)
{
_logger.LogWarning(
"Fact check failed for response. Regenerating..."
);
// Regenerate with stricter prompt
var stricterOptions = options with
{
Temperature = 0.1f,
StrictFactChecking = true
};
return await GenerateInitialResponseAsync(
query,
BuildStructuredContext(documents, stricterOptions),
stricterOptions
);
}
// Enhance clarity and structure
if (options.EnhanceReadability)
{
return await EnhanceReadabilityAsync(initialResponse);
}
return initialResponse;
}
private string AddCitations(string response, List<RankedDocument> documents)
{
var citationMap = new Dictionary<string, int>();
var citationIndex = 1;
foreach (var doc in documents)
{
var title = doc.Document.GetString("title");
var content = doc.Document.GetString("content");
// Find sentences in response that match document content
var sentences = response.Split(new[] { '.', '。' },
StringSplitOptions.RemoveEmptyEntries);
for (int i = 0; i < sentences.Length; i++)
{
var sentence = sentences[i].Trim();
if (IsContentMatch(sentence, content))
{
if (!citationMap.ContainsKey(title))
{
citationMap[title] = citationIndex++;
}
// Add citation to sentence
sentences[i] = $"{sentence}[{citationMap[title]}]";
}
}
response = string.Join("。", sentences) + "。";
}
// Add citation list at the end
if (citationMap.Any())
{
response += "\n\n参考文献:\n";
foreach (var citation in citationMap.OrderBy(c => c.Value))
{
response += $"[{citation.Value}] {citation.Key}\n";
}
}
return response;
}
private async Task<List<string>> GenerateFollowUpQuestionsAsync(
string query,
string response)
{
var prompt = $@"
元の質問: {query}
回答: {response}
この回答を踏まえて、ユーザーが次に尋ねそうな関連質問を3つ生成してください。
JSON配列形式で出力してください。";
var completionResponse = await _openAIClient.GetChatCompletionsAsync(
new ChatCompletionsOptions
{
DeploymentName = "gpt-4",
Messages =
{
new ChatRequestSystemMessage("You are a helpful assistant that generates follow-up questions."),
new ChatRequestUserMessage(prompt)
},
Temperature = 0.7f,
ResponseFormat = new ChatCompletionsResponseFormatJSON()
}
);
var json = completionResponse.Value.Choices[0].Message.Content;
var questions = JsonSerializer.Deserialize<List<string>>(json);
return questions ?? new List<string>();
}
}
// Supporting classes
public class StructuredContext
{
public List<ContextSection> Sections { get; set; } = new();
public int TotalTokens { get; set; }
}
public class ContextSection
{
public string Type { get; set; }
public List<ContextDocument> Documents { get; set; }
}
public class ContextDocument
{
public string Id { get; set; }
public string Title { get; set; }
public string Content { get; set; }
public double Score { get; set; }
public Dictionary<string, object> Metadata { get; set; }
}
public class ResponseGenerationOptions
{
public string Model { get; set; } = "gpt-4";
public int MaxContextTokens { get; set; } = 3000;
public int MaxResponseTokens { get; set; } = 1000;
public float Temperature { get; set; } = 0.3f;
public ResponseStyle ResponseStyle { get; set; } = ResponseStyle.Balanced;
public bool IncludeConfidenceLevel { get; set; } = true;
public bool EnableFunctionCalling { get; set; } = false;
public bool EnhanceReadability { get; set; } = true;
public bool StrictFactChecking { get; set; } = false;
}
public enum ResponseStyle
{
Technical,
Simple,
Balanced
}
評価とモニタリング
1. RAGシステムの評価メトリクス
# rag_evaluation.py
import numpy as np
from typing import List, Dict, Tuple
from dataclasses import dataclass
import asyncio
from sklearn.metrics.pairwise import cosine_similarity
@dataclass
class EvaluationResult:
retrieval_metrics: Dict[str, float]
generation_metrics: Dict[str, float]
end_to_end_metrics: Dict[str, float]
detailed_results: List[Dict]
class RAGEvaluator:
def __init__(self, openai_client, search_service, rag_service):
self.openai_client = openai_client
self.search_service = search_service
self.rag_service = rag_service
async def evaluate_system(
self,
test_queries: List[Dict]
) -> EvaluationResult:
"""
Comprehensive evaluation of RAG system
"""
retrieval_scores = []
generation_scores = []
end_to_end_scores = []
detailed_results = []
for test_case in test_queries:
query = test_case["query"]
expected_docs = test_case.get("relevant_docs", [])
expected_answer = test_case.get("expected_answer", "")
# Evaluate retrieval
retrieval_result = await self._evaluate_retrieval(
query,
expected_docs
)
retrieval_scores.append(retrieval_result)
# Evaluate generation
generation_result = await self._evaluate_generation(
query,
retrieval_result["retrieved_docs"],
expected_answer
)
generation_scores.append(generation_result)
# Evaluate end-to-end
e2e_result = await self._evaluate_end_to_end(
query,
expected_answer
)
end_to_end_scores.append(e2e_result)
detailed_results.append({
"query": query,
"retrieval": retrieval_result,
"generation": generation_result,
"end_to_end": e2e_result
})
return EvaluationResult(
retrieval_metrics=self._aggregate_retrieval_metrics(retrieval_scores),
generation_metrics=self._aggregate_generation_metrics(generation_scores),
end_to_end_metrics=self._aggregate_e2e_metrics(end_to_end_scores),
detailed_results=detailed_results
)
async def _evaluate_retrieval(
self,
query: str,
expected_docs: List[str]
) -> Dict:
"""Evaluate retrieval performance"""
# Get retrieved documents
search_results = await self.search_service.search(query, top_k=10)
retrieved_docs = [r.document_id for r in search_results]
# Calculate metrics
if expected_docs:
precision_at_k = self._calculate_precision_at_k(
retrieved_docs,
expected_docs,
k=5
)
recall_at_k = self._calculate_recall_at_k(
retrieved_docs,
expected_docs,
k=5
)
mrr = self._calculate_mrr(retrieved_docs, expected_docs)
ndcg = self._calculate_ndcg(
retrieved_docs,
expected_docs,
[r.score for r in search_results]
)
else:
# If no ground truth, use relevance scoring
relevance_scores = await self._score_relevance(
query,
search_results
)
precision_at_k = np.mean(relevance_scores[:5] > 0.7)
recall_at_k = None
mrr = None
ndcg = None
return {
"retrieved_docs": retrieved_docs[:5],
"precision_at_5": precision_at_k,
"recall_at_5": recall_at_k,
"mrr": mrr,
"ndcg": ndcg,
"avg_score": np.mean([r.score for r in search_results[:5]])
}
async def _evaluate_generation(
self,
query: str,
retrieved_docs: List[str],
expected_answer: str
) -> Dict:
"""Evaluate generation quality"""
# Get generated answer
context = await self._build_context(retrieved_docs)
generated_answer = await self._generate_answer(query, context)
metrics = {}
# Semantic similarity
if expected_answer:
metrics["semantic_similarity"] = await self._calculate_semantic_similarity(
generated_answer,
expected_answer
)
# Faithfulness to context
metrics["faithfulness"] = await self._evaluate_faithfulness(
generated_answer,
context
)
# Answer relevance
metrics["relevance"] = await self._evaluate_answer_relevance(
query,
generated_answer
)
# Fluency and coherence
metrics["fluency"] = await self._evaluate_fluency(generated_answer)
return metrics
async def _evaluate_faithfulness(
self,
answer: str,
context: str
) -> float:
"""Evaluate if answer is faithful to context"""
prompt = f"""
Given the context and answer, evaluate if the answer is faithful to the context.
Score from 0 to 1, where 1 means completely faithful.
Context: {context[:1000]}
Answer: {answer}
Output only a number between 0 and 1.
"""
response = await self.openai_client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are an evaluation expert."},
{"role": "user", "content": prompt}
],
temperature=0
)
try:
return float(response.choices[0].message.content)
except:
return 0.5
def _calculate_ndcg(
self,
retrieved: List[str],
relevant: List[str],
scores: List[float]
) -> float:
"""Calculate Normalized Discounted Cumulative Gain"""
def dcg(relevances, k=None):
if k is None:
k = len(relevances)
return sum(
rel / np.log2(i + 2)
for i, rel in enumerate(relevances[:k])
)
# Create relevance array
relevances = [1 if doc in relevant else 0 for doc in retrieved]
# Calculate DCG
dcg_score = dcg(relevances)
# Calculate ideal DCG
ideal_relevances = sorted(relevances, reverse=True)
idcg_score = dcg(ideal_relevances)
# Calculate NDCG
if idcg_score == 0:
return 0
return dcg_score / idcg_score
2. 継続的改善のためのフィードバックループ
// Feedback and Improvement System
public class RAGFeedbackSystem
{
private readonly IFeedbackRepository _feedbackRepo;
private readonly IRAGService _ragService;
private readonly IMetricsService _metricsService;
private readonly ILogger<RAGFeedbackSystem> _logger;
public async Task RecordFeedbackAsync(UserFeedback feedback)
{
// Store feedback
await _feedbackRepo.SaveFeedbackAsync(feedback);
// Update metrics
await _metricsService.UpdateUserSatisfactionMetricsAsync(
feedback.SessionId,
feedback.Rating,
feedback.WasHelpful
);
// Trigger improvement if needed
if (feedback.Rating < 3 || !feedback.WasHelpful)
{
await TriggerImprovementWorkflowAsync(feedback);
}
}
private async Task TriggerImprovementWorkflowAsync(UserFeedback feedback)
{
// Analyze the problematic query
var analysis = await AnalyzeFailureAsync(
feedback.Query,
feedback.Response,
feedback.Sources
);
switch (analysis.FailureType)
{
case FailureType.InsufficientContext:
await ImproveDocumentCoverageAsync(feedback.Query);
break;
case FailureType.PoorRetrieval:
await OptimizeSearchParametersAsync(feedback.Query);
break;
case FailureType.IncorrectGeneration:
await RefineGenerationPromptsAsync(feedback);
break;
}
// Log improvement action
_logger.LogInformation(
"Triggered improvement workflow for query: {Query}, Type: {Type}",
feedback.Query,
analysis.FailureType
);
}
private async Task<FailureAnalysis> AnalyzeFailureAsync(
string query,
string response,
List<Source> sources)
{
// Analyze retrieval quality
var retrievalScore = await EvaluateRetrievalQualityAsync(query, sources);
// Analyze response quality
var responseScore = await EvaluateResponseQualityAsync(
query,
response,
sources
);
// Determine failure type
var failureType = DetermineFailureType(retrievalScore, responseScore);
return new FailureAnalysis
{
FailureType = failureType,
RetrievalScore = retrievalScore,
ResponseScore = responseScore,
Recommendations = GenerateRecommendations(failureType)
};
}
private async Task ImproveDocumentCoverageAsync(string query)
{
// Identify missing content areas
var missingTopics = await IdentifyMissingTopicsAsync(query);
// Generate synthetic documents for missing topics
foreach (var topic in missingTopics)
{
var syntheticDoc = await GenerateSyntheticDocumentAsync(topic);
await IndexDocumentAsync(syntheticDoc);
}
// Update document metadata to improve retrieval
await UpdateDocumentMetadataAsync(query);
}
private async Task OptimizeSearchParametersAsync(string query)
{
// A/B test different search configurations
var configurations = GenerateSearchConfigurations();
var bestConfig = await RunSearchOptimizationAsync(
query,
configurations
);
// Update search service with optimized parameters
await _ragService.UpdateSearchConfigurationAsync(
query,
bestConfig
);
}
}
// Monitoring Dashboard Data
public class RAGMonitoringService
{
public async Task<RAGDashboardData> GetDashboardDataAsync(
DateTime startDate,
DateTime endDate)
{
return new RAGDashboardData
{
// Performance Metrics
AverageResponseTime = await CalculateAverageResponseTimeAsync(startDate, endDate),
MedianResponseTime = await CalculateMedianResponseTimeAsync(startDate, endDate),
P95ResponseTime = await CalculateP95ResponseTimeAsync(startDate, endDate),
// Quality Metrics
AverageUserRating = await CalculateAverageRatingAsync(startDate, endDate),
AnswerAccuracy = await CalculateAnswerAccuracyAsync(startDate, endDate),
RetrievalPrecision = await CalculateRetrievalPrecisionAsync(startDate, endDate),
// Usage Metrics
TotalQueries = await GetTotalQueriesAsync(startDate, endDate),
UniqueUsers = await GetUniqueUsersAsync(startDate, endDate),
TopQueries = await GetTopQueriesAsync(startDate, endDate, 10),
// System Health
ErrorRate = await CalculateErrorRateAsync(startDate, endDate),
CacheHitRate = await CalculateCacheHitRateAsync(startDate, endDate),
IndexFreshness = await CalculateIndexFreshnessAsync(),
// Cost Analysis
TotalTokensUsed = await GetTotalTokensUsedAsync(startDate, endDate),
EstimatedCost = await CalculateEstimatedCostAsync(startDate, endDate)
};
}
}
パフォーマンス最適化
1. キャッシングとレスポンス最適化
// Intelligent Caching System
public class RAGCacheManager
{
private readonly IDistributedCache _cache;
private readonly IVectorDatabase _vectorDb;
private readonly ILogger<RAGCacheManager> _logger;
public async Task<CachedResponse?> GetCachedResponseAsync(
string query,
CacheOptions options)
{
// Check exact match cache
var exactMatch = await GetExactMatchAsync(query);
if (exactMatch != null)
{
return exactMatch;
}
// Check semantic similarity cache
if (options.EnableSemanticCache)
{
return await GetSemanticMatchAsync(query, options.SimilarityThreshold);
}
return null;
}
private async Task<CachedResponse?> GetSemanticMatchAsync(
string query,
float threshold)
{
// Generate query embedding
var queryEmbedding = await GenerateEmbeddingAsync(query);
// Search for similar cached queries
var similarQueries = await _vectorDb.SearchSimilarAsync(
queryEmbedding,
"query_cache",
topK: 5,
minScore: threshold
);
foreach (var match in similarQueries)
{
var cacheKey = $"rag_response:{match.Id}";
var cachedJson = await _cache.GetStringAsync(cacheKey);
if (!string.IsNullOrEmpty(cachedJson))
{
var cached = JsonSerializer.Deserialize<CachedResponse>(cachedJson);
// Validate cache freshness
if (IsCacheFresh(cached, TimeSpan.FromHours(24)))
{
_logger.LogInformation(
"Semantic cache hit for query. Similarity: {Score}",
match.Score
);
return cached;
}
}
}
return null;
}
public async Task CacheResponseAsync(
string query,
RAGResponse response,
CacheOptions options)
{
var cacheEntry = new CachedResponse
{
Query = query,
Response = response,
CachedAt = DateTime.UtcNow,
ExpiresAt = DateTime.UtcNow.Add(options.CacheDuration),
Metadata = new CacheMetadata
{
SourceDocuments = response.Sources.Select(s => s.Id).ToList(),
ModelVersion = response.ModelVersion,
QueryEmbedding = await GenerateEmbeddingAsync(query)
}
};
// Store in distributed cache
var cacheKey = $"rag_response:{GenerateCacheKey(query)}";
await _cache.SetStringAsync(
cacheKey,
JsonSerializer.Serialize(cacheEntry),
new DistributedCacheEntryOptions
{
AbsoluteExpiration = cacheEntry.ExpiresAt,
SlidingExpiration = TimeSpan.FromHours(1)
}
);
// Store in vector database for semantic search
if (options.EnableSemanticCache)
{
await _vectorDb.UpsertAsync(
"query_cache",
new VectorDocument
{
Id = GenerateCacheKey(query),
Vector = cacheEntry.Metadata.QueryEmbedding,
Metadata = new Dictionary<string, object>
{
["query"] = query,
["cached_at"] = cacheEntry.CachedAt,
["expires_at"] = cacheEntry.ExpiresAt
}
}
);
}
}
public async Task InvalidateCacheAsync(InvalidationOptions options)
{
if (options.InvalidateAll)
{
await _cache.RemoveAsync("rag_response:*");
await _vectorDb.DeleteCollectionAsync("query_cache");
}
else if (options.DocumentIds?.Any() == true)
{
// Invalidate cache entries that reference specific documents
var keysToInvalidate = await FindCacheKeysReferencingDocumentsAsync(
options.DocumentIds
);
foreach (var key in keysToInvalidate)
{
await _cache.RemoveAsync(key);
}
}
else if (options.OlderThan.HasValue)
{
// Invalidate old cache entries
await InvalidateOldEntriesAsync(options.OlderThan.Value);
}
}
}
まとめ
Azure OpenAI ServiceとAzure AI Searchを活用したRAGシステムは、エンタープライズレベルの知識ベースシステムを構築するための強力なソリューションです。本記事で紹介した実装パターンにより、以下の利点が得られます:
- 高精度な情報検索: ハイブリッド検索とセマンティック理解
- 信頼性の高い回答生成: コンテキストに忠実な回答と引用
- スケーラビリティ: 効率的なキャッシングと最適化
- 継続的改善: フィードバックループと自動最適化
エンハンスド株式会社では、Azure OpenAI Serviceを活用したRAGシステムの設計・構築を支援しています。お客様のビジネスニーズに合わせたカスタマイズも承っております。
#AzureOpenAI #RAG #AzureAISearch #GPT4 #エンタープライズAI #知識管理 #自然言語処理 #ベクトル検索 #AI開発
執筆者: エンハンスド株式会社 技術チーム
公開日: 2024年12月24日