Azure OpenAI Service を活用した高精度RAGシステム構築ガイド

はじめに

RAG(Retrieval-Augmented Generation)は、大規模言語モデルに外部知識を統合することで、より正確で最新の情報に基づいた回答を生成する技術です。本記事では、Azure OpenAI ServiceとAzure AI Searchを組み合わせた、エンタープライズグレードのRAGシステム構築方法を解説します。

RAGアーキテクチャの設計

1. システム全体構成

graph TB
    A[User Query] --> B[Query Processor]
    B --> C[Azure OpenAI Embeddings]
    C --> D[Azure AI Search]
    D --> E[Relevant Documents]
    E --> F[Context Builder]
    F --> G[Azure OpenAI GPT-4]
    G --> H[Response Generator]
    H --> I[User Response]
    
    J[Document Sources] --> K[Document Processor]
    K --> L[Chunking & Embedding]
    L --> D

2. 基本実装

// RAG Service Implementation
using Azure;
using Azure.AI.OpenAI;
using Azure.Search.Documents;
using Azure.Search.Documents.Indexes;
using Azure.Search.Documents.Models;

public class RAGService
{
    private readonly OpenAIClient _openAIClient;
    private readonly SearchClient _searchClient;
    private readonly string _embeddingDeployment = "text-embedding-ada-002";
    private readonly string _completionDeployment = "gpt-4";

    public RAGService(
        string openAIEndpoint,
        string openAIKey,
        string searchEndpoint,
        string searchKey,
        string indexName)
    {
        _openAIClient = new OpenAIClient(
            new Uri(openAIEndpoint),
            new AzureKeyCredential(openAIKey)
        );

        _searchClient = new SearchClient(
            new Uri(searchEndpoint),
            indexName,
            new AzureKeyCredential(searchKey)
        );
    }

    public async Task<RAGResponse> GenerateResponseAsync(
        string query,
        RAGOptions options = null)
    {
        options ??= new RAGOptions();

        // Step 1: Generate embeddings for the query
        var queryEmbedding = await GenerateEmbeddingAsync(query);

        // Step 2: Search for relevant documents
        var searchResults = await SearchDocumentsAsync(
            queryEmbedding,
            options.TopK,
            options.MinRelevanceScore
        );

        // Step 3: Build context from search results
        var context = BuildContext(searchResults, options.MaxContextTokens);

        // Step 4: Generate response using GPT-4
        var response = await GenerateCompletionAsync(
            query,
            context,
            options
        );

        return new RAGResponse
        {
            Answer = response,
            Sources = searchResults.Select(r => new Source
            {
                Title = r.Document.GetString("title"),
                Url = r.Document.GetString("url"),
                Excerpt = r.Document.GetString("content"),
                Score = r.Score ?? 0
            }).ToList(),
            Context = context
        };
    }

    private async Task<float[]> GenerateEmbeddingAsync(string text)
    {
        var embeddingOptions = new EmbeddingsOptions(
            _embeddingDeployment,
            new[] { text }
        );

        var response = await _openAIClient.GetEmbeddingsAsync(embeddingOptions);
        return response.Value.Data[0].Embedding.ToArray();
    }

    private async Task<List<SearchResult<SearchDocument>>> SearchDocumentsAsync(
        float[] queryVector,
        int topK,
        double minScore)
    {
        var searchOptions = new SearchOptions
        {
            Select = { "id", "title", "content", "url", "metadata" },
            Size = topK,
            IncludeTotalCount = true,
            QueryType = SearchQueryType.Semantic,
            SemanticSearch = new()
            {
                SemanticConfigurationName = "default",
                QueryCaption = new(QueryCaptionType.Extractive),
                QueryAnswer = new(QueryAnswerType.Extractive)
            },
            VectorSearch = new()
            {
                Queries = { new VectorizedQuery(queryVector)
                {
                    KNearestNeighborsCount = topK,
                    Fields = { "contentVector" }
                }}
            }
        };

        var searchResponse = await _searchClient.SearchAsync<SearchDocument>(
            searchText: null,
            searchOptions
        );

        var results = new List<SearchResult<SearchDocument>>();
        await foreach (var result in searchResponse.Value.GetResultsAsync())
        {
            if (result.Score >= minScore)
            {
                results.Add(result);
            }
        }

        return results;
    }

    private string BuildContext(
        List<SearchResult<SearchDocument>> searchResults,
        int maxTokens)
    {
        var contextBuilder = new StringBuilder();
        var currentTokens = 0;

        foreach (var result in searchResults.OrderByDescending(r => r.Score))
        {
            var content = result.Document.GetString("content");
            var tokens = EstimateTokenCount(content);

            if (currentTokens + tokens > maxTokens)
            {
                break;
            }

            contextBuilder.AppendLine($"---");
            contextBuilder.AppendLine($"Source: {result.Document.GetString("title")}");
            contextBuilder.AppendLine($"Content: {content}");
            contextBuilder.AppendLine();

            currentTokens += tokens;
        }

        return contextBuilder.ToString();
    }

    private async Task<string> GenerateCompletionAsync(
        string query,
        string context,
        RAGOptions options)
    {
        var systemPrompt = @"
あなたは親切で正確な情報を提供するAIアシスタントです。
以下のコンテキスト情報を使用して、ユーザーの質問に答えてください。

重要なルール:
1. コンテキストに基づいて回答してください
2. コンテキストに情報がない場合は、その旨を明確に伝えてください
3. 推測や憶測は避け、事実に基づいた回答を心がけてください
4. 回答の根拠となる情報源を明示してください";

        var userPrompt = $@"
コンテキスト情報:
{context}

質問:{query}

上記のコンテキスト情報を使用して、質問に対する詳細で正確な回答を提供してください。";

        var completionOptions = new ChatCompletionsOptions
        {
            DeploymentName = _completionDeployment,
            Messages =
            {
                new ChatRequestSystemMessage(systemPrompt),
                new ChatRequestUserMessage(userPrompt)
            },
            Temperature = options.Temperature,
            MaxTokens = options.MaxResponseTokens,
            NucleusSamplingFactor = options.TopP,
            FrequencyPenalty = options.FrequencyPenalty,
            PresencePenalty = options.PresencePenalty
        };

        var response = await _openAIClient.GetChatCompletionsAsync(completionOptions);
        return response.Value.Choices[0].Message.Content;
    }

    private int EstimateTokenCount(string text)
    {
        // Simple estimation: ~4 characters per token for Japanese
        // For production, use tiktoken library
        return text.Length / 4;
    }
}

// Configuration classes
public class RAGOptions
{
    public int TopK { get; set; } = 5;
    public double MinRelevanceScore { get; set; } = 0.7;
    public int MaxContextTokens { get; set; } = 2000;
    public float Temperature { get; set; } = 0.3f;
    public int MaxResponseTokens { get; set; } = 1000;
    public float TopP { get; set; } = 0.95f;
    public float FrequencyPenalty { get; set; } = 0;
    public float PresencePenalty { get; set; } = 0;
}

public class RAGResponse
{
    public string Answer { get; set; }
    public List<Source> Sources { get; set; }
    public string Context { get; set; }
}

public class Source
{
    public string Title { get; set; }
    public string Url { get; set; }
    public string Excerpt { get; set; }
    public double Score { get; set; }
}

ドキュメント処理パイプライン

1. インテリジェントなチャンキング

# document_processor.py
import tiktoken
from typing import List, Dict, Tuple
import re
from dataclasses import dataclass
import numpy as np

@dataclass
class Chunk:
    content: str
    metadata: Dict
    start_index: int
    end_index: int
    token_count: int

class IntelligentChunker:
    def __init__(
        self,
        model_name: str = "gpt-4",
        chunk_size: int = 512,
        chunk_overlap: int = 128
    ):
        self.encoder = tiktoken.encoding_for_model(model_name)
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        
    def chunk_document(
        self,
        text: str,
        metadata: Dict
    ) -> List[Chunk]:
        """
        Intelligently chunk document while preserving semantic boundaries
        """
        # Split into paragraphs first
        paragraphs = self._split_paragraphs(text)
        
        chunks = []
        current_chunk = []
        current_tokens = 0
        start_index = 0
        
        for i, paragraph in enumerate(paragraphs):
            paragraph_tokens = len(self.encoder.encode(paragraph))
            
            # If paragraph itself is too large, split it
            if paragraph_tokens > self.chunk_size:
                sub_chunks = self._split_large_paragraph(
                    paragraph,
                    start_index
                )
                chunks.extend(sub_chunks)
                start_index += len(paragraph)
                continue
            
            # If adding paragraph exceeds chunk size, create new chunk
            if current_tokens + paragraph_tokens > self.chunk_size:
                if current_chunk:
                    chunk_content = '\n\n'.join(current_chunk)
                    chunks.append(Chunk(
                        content=chunk_content,
                        metadata=metadata,
                        start_index=start_index - len(chunk_content),
                        end_index=start_index,
                        token_count=current_tokens
                    ))
                
                # Start new chunk with overlap
                overlap_chunks = self._get_overlap_chunks(
                    current_chunk,
                    self.chunk_overlap
                )
                current_chunk = overlap_chunks + [paragraph]
                current_tokens = sum(
                    len(self.encoder.encode(c)) for c in current_chunk
                )
            else:
                current_chunk.append(paragraph)
                current_tokens += paragraph_tokens
            
            start_index += len(paragraph) + 2  # Account for \n\n
        
        # Add final chunk
        if current_chunk:
            chunk_content = '\n\n'.join(current_chunk)
            chunks.append(Chunk(
                content=chunk_content,
                metadata=metadata,
                start_index=start_index - len(chunk_content),
                end_index=start_index,
                token_count=current_tokens
            ))
        
        return chunks
    
    def _split_paragraphs(self, text: str) -> List[str]:
        """Split text into semantic paragraphs"""
        # Preserve headers
        lines = text.split('\n')
        paragraphs = []
        current_paragraph = []
        
        for line in lines:
            line = line.strip()
            
            # Check if line is a header
            if self._is_header(line):
                if current_paragraph:
                    paragraphs.append('\n'.join(current_paragraph))
                    current_paragraph = []
                paragraphs.append(line)
            elif line:
                current_paragraph.append(line)
            elif current_paragraph:
                paragraphs.append('\n'.join(current_paragraph))
                current_paragraph = []
        
        if current_paragraph:
            paragraphs.append('\n'.join(current_paragraph))
        
        return [p for p in paragraphs if p.strip()]
    
    def _is_header(self, line: str) -> bool:
        """Detect if line is a header"""
        header_patterns = [
            r'^#{1,6}\s',  # Markdown headers
            r'^[A-Z][^.!?]*:$',  # Title case with colon
            r'^\d+\.\s+[A-Z]',  # Numbered sections
            r'^[A-Z][A-Z\s]+$'  # All caps
        ]
        
        return any(re.match(pattern, line) for pattern in header_patterns)
    
    def _split_large_paragraph(
        self,
        paragraph: str,
        start_index: int
    ) -> List[Chunk]:
        """Split large paragraphs at sentence boundaries"""
        sentences = self._split_sentences(paragraph)
        chunks = []
        current_sentences = []
        current_tokens = 0
        
        for sentence in sentences:
            sentence_tokens = len(self.encoder.encode(sentence))
            
            if current_tokens + sentence_tokens > self.chunk_size:
                if current_sentences:
                    chunk_content = ' '.join(current_sentences)
                    chunks.append(Chunk(
                        content=chunk_content,
                        metadata={},
                        start_index=start_index,
                        end_index=start_index + len(chunk_content),
                        token_count=current_tokens
                    ))
                    start_index += len(chunk_content) + 1
                
                current_sentences = [sentence]
                current_tokens = sentence_tokens
            else:
                current_sentences.append(sentence)
                current_tokens += sentence_tokens
        
        if current_sentences:
            chunk_content = ' '.join(current_sentences)
            chunks.append(Chunk(
                content=chunk_content,
                metadata={},
                start_index=start_index,
                end_index=start_index + len(chunk_content),
                token_count=current_tokens
            ))
        
        return chunks
    
    def _split_sentences(self, text: str) -> List[str]:
        """Split text into sentences"""
        # Simple sentence splitter - use spaCy for production
        sentences = re.split(r'(?<=[.!?])\s+', text)
        return [s.strip() for s in sentences if s.strip()]
    
    def _get_overlap_chunks(
        self,
        chunks: List[str],
        overlap_tokens: int
    ) -> List[str]:
        """Get chunks for overlap from the end"""
        overlap_chunks = []
        total_tokens = 0
        
        for chunk in reversed(chunks):
            chunk_tokens = len(self.encoder.encode(chunk))
            if total_tokens + chunk_tokens <= overlap_tokens:
                overlap_chunks.insert(0, chunk)
                total_tokens += chunk_tokens
            else:
                break
        
        return overlap_chunks

2. メタデータ抽出とエンリッチメント

// Document Enrichment Service
public class DocumentEnrichmentService
{
    private readonly OpenAIClient _openAIClient;
    private readonly ILogger<DocumentEnrichmentService> _logger;

    public async Task<EnrichedDocument> EnrichDocumentAsync(
        Document document,
        EnrichmentOptions options)
    {
        var enrichedDoc = new EnrichedDocument
        {
            Id = document.Id,
            OriginalContent = document.Content,
            Metadata = document.Metadata ?? new Dictionary<string, object>()
        };

        // Extract key entities
        if (options.ExtractEntities)
        {
            var entities = await ExtractEntitiesAsync(document.Content);
            enrichedDoc.Entities = entities;
            enrichedDoc.Metadata["entities"] = entities;
        }

        // Generate summary
        if (options.GenerateSummary)
        {
            var summary = await GenerateSummaryAsync(document.Content);
            enrichedDoc.Summary = summary;
            enrichedDoc.Metadata["summary"] = summary;
        }

        // Extract keywords
        if (options.ExtractKeywords)
        {
            var keywords = await ExtractKeywordsAsync(document.Content);
            enrichedDoc.Keywords = keywords;
            enrichedDoc.Metadata["keywords"] = keywords;
        }

        // Classify document
        if (options.ClassifyDocument)
        {
            var classification = await ClassifyDocumentAsync(document.Content);
            enrichedDoc.Classification = classification;
            enrichedDoc.Metadata["category"] = classification.Category;
            enrichedDoc.Metadata["confidence"] = classification.Confidence;
        }

        // Generate Q&A pairs for better retrieval
        if (options.GenerateQAPairs)
        {
            var qaPairs = await GenerateQAPairsAsync(document.Content);
            enrichedDoc.QAPairs = qaPairs;
        }

        return enrichedDoc;
    }

    private async Task<List<Entity>> ExtractEntitiesAsync(string content)
    {
        var prompt = @"
以下のテキストから重要なエンティティ(人名、組織名、場所、日付、技術用語など)を抽出してください。
JSON形式で出力してください。

テキスト:
{content}

出力形式:
{
  ""entities"": [
    {
      ""text"": ""エンティティテキスト"",
      ""type"": ""PERSON|ORGANIZATION|LOCATION|DATE|TECHNOLOGY|OTHER"",
      ""context"": ""エンティティが現れる文脈""
    }
  ]
}";

        var response = await _openAIClient.GetChatCompletionsAsync(
            new ChatCompletionsOptions
            {
                DeploymentName = "gpt-4",
                Messages = {
                    new ChatRequestSystemMessage("You are an entity extraction expert."),
                    new ChatRequestUserMessage(prompt.Replace("{content}", content))
                },
                Temperature = 0.1f,
                ResponseFormat = new ChatCompletionsResponseFormatJSON()
            }
        );

        var json = response.Value.Choices[0].Message.Content;
        var result = JsonSerializer.Deserialize<EntityExtractionResult>(json);
        
        return result.Entities;
    }

    private async Task<string> GenerateSummaryAsync(string content)
    {
        var prompt = @"
以下のテキストの要約を作成してください。
重要なポイントを箇条書きで3-5個挙げてください。

テキスト:
{content}

要約:";

        var response = await _openAIClient.GetChatCompletionsAsync(
            new ChatCompletionsOptions
            {
                DeploymentName = "gpt-4",
                Messages = {
                    new ChatRequestSystemMessage("You are a summarization expert."),
                    new ChatRequestUserMessage(prompt.Replace("{content}", content))
                },
                Temperature = 0.3f,
                MaxTokens = 300
            }
        );

        return response.Value.Choices[0].Message.Content;
    }

    private async Task<List<QAPair>> GenerateQAPairsAsync(string content)
    {
        var prompt = @"
以下のテキストから、ユーザーが尋ねそうな質問と回答のペアを3-5個生成してください。
JSON形式で出力してください。

テキスト:
{content}

出力形式:
{
  ""qa_pairs"": [
    {
      ""question"": ""質問文"",
      ""answer"": ""回答文"",
      ""relevance_score"": 0.9
    }
  ]
}";

        var response = await _openAIClient.GetChatCompletionsAsync(
            new ChatCompletionsOptions
            {
                DeploymentName = "gpt-4",
                Messages = {
                    new ChatRequestSystemMessage("You are a Q&A generation expert."),
                    new ChatRequestUserMessage(prompt.Replace("{content}", content))
                },
                Temperature = 0.5f,
                ResponseFormat = new ChatCompletionsResponseFormatJSON()
            }
        );

        var json = response.Value.Choices[0].Message.Content;
        var result = JsonSerializer.Deserialize<QAGenerationResult>(json);
        
        return result.QAPairs;
    }
}

// Models
public class EnrichedDocument
{
    public string Id { get; set; }
    public string OriginalContent { get; set; }
    public string Summary { get; set; }
    public List<Entity> Entities { get; set; }
    public List<string> Keywords { get; set; }
    public Classification Classification { get; set; }
    public List<QAPair> QAPairs { get; set; }
    public Dictionary<string, object> Metadata { get; set; }
}

public class Entity
{
    public string Text { get; set; }
    public string Type { get; set; }
    public string Context { get; set; }
}

public class QAPair
{
    public string Question { get; set; }
    public string Answer { get; set; }
    public double RelevanceScore { get; set; }
}

高度な検索戦略

1. ハイブリッド検索の実装

// Hybrid Search Implementation
public class HybridSearchService
{
    private readonly SearchClient _searchClient;
    private readonly OpenAIClient _openAIClient;
    private readonly IMemoryCache _cache;

    public async Task<HybridSearchResults> SearchAsync(
        string query,
        HybridSearchOptions options)
    {
        // Parallel execution of different search strategies
        var searchTasks = new List<Task<SearchStrategy>>();

        // 1. Vector search
        searchTasks.Add(ExecuteVectorSearchAsync(query, options));

        // 2. Keyword search
        searchTasks.Add(ExecuteKeywordSearchAsync(query, options));

        // 3. Semantic search
        searchTasks.Add(ExecuteSemanticSearchAsync(query, options));

        // 4. Q&A matching (if enabled)
        if (options.EnableQAMatching)
        {
            searchTasks.Add(ExecuteQAMatchingAsync(query, options));
        }

        var searchResults = await Task.WhenAll(searchTasks);

        // Combine and re-rank results
        var combinedResults = await CombineAndRerankAsync(
            searchResults,
            query,
            options
        );

        return new HybridSearchResults
        {
            Results = combinedResults,
            SearchStrategiesUsed = searchResults.Select(r => r.Name).ToList(),
            TotalMatches = combinedResults.Count,
            Query = query
        };
    }

    private async Task<SearchStrategy> ExecuteVectorSearchAsync(
        string query,
        HybridSearchOptions options)
    {
        var queryEmbedding = await GenerateEmbeddingAsync(query);
        
        var searchOptions = new SearchOptions
        {
            Size = options.TopK * 2, // Get more for re-ranking
            VectorSearch = new()
            {
                Queries = { new VectorizedQuery(queryEmbedding)
                {
                    KNearestNeighborsCount = options.TopK * 2,
                    Fields = { "contentVector" }
                }}
            }
        };

        var results = await _searchClient.SearchAsync<SearchDocument>(
            searchText: null,
            searchOptions
        );

        var documents = new List<RankedDocument>();
        await foreach (var result in results.Value.GetResultsAsync())
        {
            documents.Add(new RankedDocument
            {
                Document = result.Document,
                Score = result.Score ?? 0,
                SearchType = "vector"
            });
        }

        return new SearchStrategy
        {
            Name = "VectorSearch",
            Weight = options.VectorSearchWeight,
            Results = documents
        };
    }

    private async Task<SearchStrategy> ExecuteKeywordSearchAsync(
        string query,
        HybridSearchOptions options)
    {
        // Expand query with synonyms
        var expandedQuery = await ExpandQueryAsync(query);
        
        var searchOptions = new SearchOptions
        {
            Size = options.TopK * 2,
            QueryType = SearchQueryType.Full,
            SearchMode = SearchMode.All,
            ScoringProfile = "keywordBoost",
            HighlightFields = { "content" },
            HighlightPreTag = "<mark>",
            HighlightPostTag = "</mark>"
        };

        var results = await _searchClient.SearchAsync<SearchDocument>(
            expandedQuery,
            searchOptions
        );

        var documents = new List<RankedDocument>();
        await foreach (var result in results.Value.GetResultsAsync())
        {
            documents.Add(new RankedDocument
            {
                Document = result.Document,
                Score = result.Score ?? 0,
                SearchType = "keyword",
                Highlights = result.Highlights
            });
        }

        return new SearchStrategy
        {
            Name = "KeywordSearch",
            Weight = options.KeywordSearchWeight,
            Results = documents
        };
    }

    private async Task<List<RankedDocument>> CombineAndRerankAsync(
        SearchStrategy[] strategies,
        string query,
        HybridSearchOptions options)
    {
        // Collect all unique documents
        var documentScores = new Dictionary<string, DocumentScore>();
        
        foreach (var strategy in strategies)
        {
            foreach (var result in strategy.Results)
            {
                var docId = result.Document.GetString("id");
                
                if (!documentScores.ContainsKey(docId))
                {
                    documentScores[docId] = new DocumentScore
                    {
                        Document = result.Document,
                        Highlights = result.Highlights
                    };
                }
                
                // Apply strategy weight
                var weightedScore = result.Score * strategy.Weight;
                documentScores[docId].Scores[strategy.Name] = weightedScore;
                documentScores[docId].TotalScore += weightedScore;
            }
        }

        // Apply cross-encoder re-ranking for top candidates
        var topCandidates = documentScores.Values
            .OrderByDescending(d => d.TotalScore)
            .Take(options.RerankTopK)
            .ToList();

        if (options.EnableCrossEncoderReranking)
        {
            await RerankWithCrossEncoderAsync(topCandidates, query);
        }

        // Final ranking
        return topCandidates
            .OrderByDescending(d => d.FinalScore)
            .Take(options.TopK)
            .Select(d => new RankedDocument
            {
                Document = d.Document,
                Score = d.FinalScore,
                SearchType = "hybrid",
                Highlights = d.Highlights,
                ScoreBreakdown = d.Scores
            })
            .ToList();
    }

    private async Task RerankWithCrossEncoderAsync(
        List<DocumentScore> candidates,
        string query)
    {
        var rerankingTasks = candidates.Select(async candidate =>
        {
            var content = candidate.Document.GetString("content");
            var relevanceScore = await CalculateRelevanceScoreAsync(query, content);
            candidate.CrossEncoderScore = relevanceScore;
            candidate.FinalScore = candidate.TotalScore * 0.3 + relevanceScore * 0.7;
        });

        await Task.WhenAll(rerankingTasks);
    }

    private async Task<double> CalculateRelevanceScoreAsync(
        string query,
        string document)
    {
        var prompt = $@"
質問とドキュメントの関連性を0から1のスコアで評価してください。

質問: {query}
ドキュメント: {document.Substring(0, Math.Min(1000, document.Length))}

スコア(0-1):";

        var response = await _openAIClient.GetChatCompletionsAsync(
            new ChatCompletionsOptions
            {
                DeploymentName = "gpt-4",
                Messages = {
                    new ChatRequestSystemMessage("You are a relevance scoring expert. Output only a number between 0 and 1."),
                    new ChatRequestUserMessage(prompt)
                },
                Temperature = 0,
                MaxTokens = 10
            }
        );

        if (double.TryParse(response.Value.Choices[0].Message.Content, out var score))
        {
            return Math.Max(0, Math.Min(1, score));
        }

        return 0.5; // Default score
    }
}

2. クエリ理解と拡張

# query_understanding.py
from typing import List, Dict, Tuple
import openai
from dataclasses import dataclass
import json

@dataclass
class QueryIntent:
    intent_type: str  # factual, analytical, navigational, transactional
    entities: List[Dict]
    temporal_context: str  # past, present, future, timeless
    required_depth: str  # shallow, medium, deep
    language: str

class QueryUnderstandingService:
    def __init__(self, openai_client):
        self.client = openai_client
        
    async def analyze_query(self, query: str) -> QueryIntent:
        """
        Analyze user query to understand intent and context
        """
        prompt = f"""
Analyze the following query and extract:
1. Intent type (factual/analytical/navigational/transactional)
2. Key entities and their types
3. Temporal context
4. Required depth of answer
5. Language

Query: {query}

Output in JSON format.
"""
        
        response = await self.client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a query analysis expert."},
                {"role": "user", "content": prompt}
            ],
            temperature=0,
            response_format={"type": "json_object"}
        )
        
        analysis = json.loads(response.choices[0].message.content)
        
        return QueryIntent(
            intent_type=analysis.get("intent_type", "factual"),
            entities=analysis.get("entities", []),
            temporal_context=analysis.get("temporal_context", "timeless"),
            required_depth=analysis.get("required_depth", "medium"),
            language=analysis.get("language", "ja")
        )
    
    async def expand_query(
        self,
        query: str,
        intent: QueryIntent
    ) -> List[str]:
        """
        Expand query with variations and related terms
        """
        expansion_prompt = f"""
Given the query: "{query}"
Intent: {intent.intent_type}
Entities: {intent.entities}

Generate 3-5 query variations that capture the same intent but with different phrasings.
Include relevant synonyms and related terms.

Output as JSON array of strings.
"""
        
        response = await self.client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a query expansion expert."},
                {"role": "user", "content": expansion_prompt}
            ],
            temperature=0.3,
            response_format={"type": "json_object"}
        )
        
        expansions = json.loads(response.choices[0].message.content)
        return expansions.get("queries", [query])
    
    async def generate_filters(
        self,
        query: str,
        intent: QueryIntent
    ) -> Dict[str, any]:
        """
        Generate search filters based on query understanding
        """
        filters = {}
        
        # Temporal filters
        if intent.temporal_context != "timeless":
            filters["temporal_filter"] = self._get_temporal_filter(
                intent.temporal_context
            )
        
        # Entity-based filters
        for entity in intent.entities:
            if entity["type"] == "ORGANIZATION":
                filters["organization"] = entity["text"]
            elif entity["type"] == "TECHNOLOGY":
                filters["technology_stack"] = entity["text"]
            elif entity["type"] == "DATE":
                filters["date_range"] = self._parse_date_entity(entity["text"])
        
        # Depth-based filters
        if intent.required_depth == "deep":
            filters["min_content_length"] = 1000
            filters["include_technical_details"] = True
        elif intent.required_depth == "shallow":
            filters["max_content_length"] = 500
            filters["summary_only"] = True
        
        return filters
    
    def _get_temporal_filter(self, context: str) -> Dict:
        """Generate temporal filter based on context"""
        from datetime import datetime, timedelta
        
        now = datetime.now()
        
        if context == "past":
            return {
                "end_date": now - timedelta(days=30),
                "start_date": now - timedelta(days=365)
            }
        elif context == "present":
            return {
                "start_date": now - timedelta(days=30),
                "end_date": now
            }
        elif context == "future":
            return {
                "start_date": now,
                "include_predictions": True
            }
        
        return {}

回答生成の最適化

1. コンテキスト管理と回答品質向上

// Advanced Response Generation
public class AdvancedResponseGenerator
{
    private readonly OpenAIClient _openAIClient;
    private readonly IMemoryCache _cache;
    private readonly ILogger<AdvancedResponseGenerator> _logger;

    public async Task<EnhancedResponse> GenerateResponseAsync(
        string query,
        List<RankedDocument> documents,
        ResponseGenerationOptions options)
    {
        // Build structured context
        var context = BuildStructuredContext(documents, options);
        
        // Generate initial response
        var response = await GenerateInitialResponseAsync(
            query,
            context,
            options
        );
        
        // Validate and enhance response
        var enhancedResponse = await EnhanceResponseAsync(
            response,
            query,
            documents,
            options
        );
        
        // Add citations
        var citedResponse = AddCitations(enhancedResponse, documents);
        
        return new EnhancedResponse
        {
            Answer = citedResponse,
            Confidence = CalculateConfidence(documents),
            Sources = ExtractSources(documents),
            FollowUpQuestions = await GenerateFollowUpQuestionsAsync(
                query,
                citedResponse
            ),
            Metadata = new ResponseMetadata
            {
                TokensUsed = CountTokens(citedResponse),
                ResponseTime = DateTime.UtcNow,
                Model = options.Model
            }
        };
    }

    private StructuredContext BuildStructuredContext(
        List<RankedDocument> documents,
        ResponseGenerationOptions options)
    {
        var context = new StructuredContext();
        var tokenBudget = options.MaxContextTokens;
        var usedTokens = 0;

        // Group documents by relevance and type
        var groupedDocs = documents
            .GroupBy(d => d.Document.GetString("type") ?? "general")
            .OrderByDescending(g => g.Max(d => d.Score));

        foreach (var group in groupedDocs)
        {
            var section = new ContextSection
            {
                Type = group.Key,
                Documents = new List<ContextDocument>()
            };

            foreach (var doc in group.OrderByDescending(d => d.Score))
            {
                var content = doc.Document.GetString("content");
                var tokens = EstimateTokens(content);

                if (usedTokens + tokens > tokenBudget)
                {
                    // Truncate content to fit
                    content = TruncateToTokenLimit(
                        content,
                        tokenBudget - usedTokens
                    );
                    tokens = tokenBudget - usedTokens;
                }

                section.Documents.Add(new ContextDocument
                {
                    Id = doc.Document.GetString("id"),
                    Title = doc.Document.GetString("title"),
                    Content = content,
                    Score = doc.Score,
                    Metadata = doc.Document.GetObject("metadata")
                });

                usedTokens += tokens;

                if (usedTokens >= tokenBudget)
                    break;
            }

            context.Sections.Add(section);

            if (usedTokens >= tokenBudget)
                break;
        }

        context.TotalTokens = usedTokens;
        return context;
    }

    private async Task<string> GenerateInitialResponseAsync(
        string query,
        StructuredContext context,
        ResponseGenerationOptions options)
    {
        var systemPrompt = BuildSystemPrompt(options);
        var userPrompt = BuildUserPrompt(query, context);

        var completionOptions = new ChatCompletionsOptions
        {
            DeploymentName = options.Model,
            Messages =
            {
                new ChatRequestSystemMessage(systemPrompt),
                new ChatRequestUserMessage(userPrompt)
            },
            Temperature = options.Temperature,
            MaxTokens = options.MaxResponseTokens,
            Functions = GetAvailableFunctions(options),
            FunctionCall = options.EnableFunctionCalling 
                ? FunctionCallOption.Auto 
                : FunctionCallOption.None
        };

        var response = await _openAIClient.GetChatCompletionsAsync(completionOptions);
        
        // Handle function calls if any
        if (response.Value.Choices[0].Message.FunctionCall != null)
        {
            var functionResult = await ExecuteFunctionCallAsync(
                response.Value.Choices[0].Message.FunctionCall
            );
            
            completionOptions.Messages.Add(
                new ChatRequestFunctionMessage(
                    response.Value.Choices[0].Message.FunctionCall.Name,
                    functionResult
                )
            );
            
            response = await _openAIClient.GetChatCompletionsAsync(completionOptions);
        }

        return response.Value.Choices[0].Message.Content;
    }

    private string BuildSystemPrompt(ResponseGenerationOptions options)
    {
        var prompt = new StringBuilder();
        
        prompt.AppendLine("あなたは正確で信頼性の高い情報を提供するAIアシスタントです。");
        prompt.AppendLine();
        prompt.AppendLine("回答する際の重要なガイドライン:");
        prompt.AppendLine("1. 提供されたコンテキスト情報のみに基づいて回答する");
        prompt.AppendLine("2. 情報源を明確に示す([1], [2]などの参照番号を使用)");
        prompt.AppendLine("3. 確信が持てない場合は、その旨を明記する");
        prompt.AppendLine("4. 段階的で論理的な説明を心がける");
        
        if (options.ResponseStyle == ResponseStyle.Technical)
        {
            prompt.AppendLine("5. 技術的な詳細を含め、専門用語を適切に使用する");
        }
        else if (options.ResponseStyle == ResponseStyle.Simple)
        {
            prompt.AppendLine("5. 専門用語を避け、分かりやすい説明を心がける");
        }
        
        if (options.IncludeConfidenceLevel)
        {
            prompt.AppendLine("6. 回答の確信度を明示する(高/中/低)");
        }

        return prompt.ToString();
    }

    private async Task<string> EnhanceResponseAsync(
        string initialResponse,
        string query,
        List<RankedDocument> documents,
        ResponseGenerationOptions options)
    {
        // Fact-check the response
        var factCheckResult = await FactCheckResponseAsync(
            initialResponse,
            documents
        );
        
        if (!factCheckResult.IsAccurate)
        {
            _logger.LogWarning(
                "Fact check failed for response. Regenerating..."
            );
            
            // Regenerate with stricter prompt
            var stricterOptions = options with
            {
                Temperature = 0.1f,
                StrictFactChecking = true
            };
            
            return await GenerateInitialResponseAsync(
                query,
                BuildStructuredContext(documents, stricterOptions),
                stricterOptions
            );
        }
        
        // Enhance clarity and structure
        if (options.EnhanceReadability)
        {
            return await EnhanceReadabilityAsync(initialResponse);
        }
        
        return initialResponse;
    }

    private string AddCitations(string response, List<RankedDocument> documents)
    {
        var citationMap = new Dictionary<string, int>();
        var citationIndex = 1;
        
        foreach (var doc in documents)
        {
            var title = doc.Document.GetString("title");
            var content = doc.Document.GetString("content");
            
            // Find sentences in response that match document content
            var sentences = response.Split(new[] { '.', '。' }, 
                StringSplitOptions.RemoveEmptyEntries);
            
            for (int i = 0; i < sentences.Length; i++)
            {
                var sentence = sentences[i].Trim();
                
                if (IsContentMatch(sentence, content))
                {
                    if (!citationMap.ContainsKey(title))
                    {
                        citationMap[title] = citationIndex++;
                    }
                    
                    // Add citation to sentence
                    sentences[i] = $"{sentence}[{citationMap[title]}]";
                }
            }
            
            response = string.Join("。", sentences) + "。";
        }
        
        // Add citation list at the end
        if (citationMap.Any())
        {
            response += "\n\n参考文献:\n";
            foreach (var citation in citationMap.OrderBy(c => c.Value))
            {
                response += $"[{citation.Value}] {citation.Key}\n";
            }
        }
        
        return response;
    }

    private async Task<List<string>> GenerateFollowUpQuestionsAsync(
        string query,
        string response)
    {
        var prompt = $@"
元の質問: {query}
回答: {response}

この回答を踏まえて、ユーザーが次に尋ねそうな関連質問を3つ生成してください。
JSON配列形式で出力してください。";

        var completionResponse = await _openAIClient.GetChatCompletionsAsync(
            new ChatCompletionsOptions
            {
                DeploymentName = "gpt-4",
                Messages =
                {
                    new ChatRequestSystemMessage("You are a helpful assistant that generates follow-up questions."),
                    new ChatRequestUserMessage(prompt)
                },
                Temperature = 0.7f,
                ResponseFormat = new ChatCompletionsResponseFormatJSON()
            }
        );

        var json = completionResponse.Value.Choices[0].Message.Content;
        var questions = JsonSerializer.Deserialize<List<string>>(json);
        
        return questions ?? new List<string>();
    }
}

// Supporting classes
public class StructuredContext
{
    public List<ContextSection> Sections { get; set; } = new();
    public int TotalTokens { get; set; }
}

public class ContextSection
{
    public string Type { get; set; }
    public List<ContextDocument> Documents { get; set; }
}

public class ContextDocument
{
    public string Id { get; set; }
    public string Title { get; set; }
    public string Content { get; set; }
    public double Score { get; set; }
    public Dictionary<string, object> Metadata { get; set; }
}

public class ResponseGenerationOptions
{
    public string Model { get; set; } = "gpt-4";
    public int MaxContextTokens { get; set; } = 3000;
    public int MaxResponseTokens { get; set; } = 1000;
    public float Temperature { get; set; } = 0.3f;
    public ResponseStyle ResponseStyle { get; set; } = ResponseStyle.Balanced;
    public bool IncludeConfidenceLevel { get; set; } = true;
    public bool EnableFunctionCalling { get; set; } = false;
    public bool EnhanceReadability { get; set; } = true;
    public bool StrictFactChecking { get; set; } = false;
}

public enum ResponseStyle
{
    Technical,
    Simple,
    Balanced
}

評価とモニタリング

1. RAGシステムの評価メトリクス

# rag_evaluation.py
import numpy as np
from typing import List, Dict, Tuple
from dataclasses import dataclass
import asyncio
from sklearn.metrics.pairwise import cosine_similarity

@dataclass
class EvaluationResult:
    retrieval_metrics: Dict[str, float]
    generation_metrics: Dict[str, float]
    end_to_end_metrics: Dict[str, float]
    detailed_results: List[Dict]

class RAGEvaluator:
    def __init__(self, openai_client, search_service, rag_service):
        self.openai_client = openai_client
        self.search_service = search_service
        self.rag_service = rag_service
    
    async def evaluate_system(
        self,
        test_queries: List[Dict]
    ) -> EvaluationResult:
        """
        Comprehensive evaluation of RAG system
        """
        retrieval_scores = []
        generation_scores = []
        end_to_end_scores = []
        detailed_results = []
        
        for test_case in test_queries:
            query = test_case["query"]
            expected_docs = test_case.get("relevant_docs", [])
            expected_answer = test_case.get("expected_answer", "")
            
            # Evaluate retrieval
            retrieval_result = await self._evaluate_retrieval(
                query,
                expected_docs
            )
            retrieval_scores.append(retrieval_result)
            
            # Evaluate generation
            generation_result = await self._evaluate_generation(
                query,
                retrieval_result["retrieved_docs"],
                expected_answer
            )
            generation_scores.append(generation_result)
            
            # Evaluate end-to-end
            e2e_result = await self._evaluate_end_to_end(
                query,
                expected_answer
            )
            end_to_end_scores.append(e2e_result)
            
            detailed_results.append({
                "query": query,
                "retrieval": retrieval_result,
                "generation": generation_result,
                "end_to_end": e2e_result
            })
        
        return EvaluationResult(
            retrieval_metrics=self._aggregate_retrieval_metrics(retrieval_scores),
            generation_metrics=self._aggregate_generation_metrics(generation_scores),
            end_to_end_metrics=self._aggregate_e2e_metrics(end_to_end_scores),
            detailed_results=detailed_results
        )
    
    async def _evaluate_retrieval(
        self,
        query: str,
        expected_docs: List[str]
    ) -> Dict:
        """Evaluate retrieval performance"""
        # Get retrieved documents
        search_results = await self.search_service.search(query, top_k=10)
        retrieved_docs = [r.document_id for r in search_results]
        
        # Calculate metrics
        if expected_docs:
            precision_at_k = self._calculate_precision_at_k(
                retrieved_docs,
                expected_docs,
                k=5
            )
            recall_at_k = self._calculate_recall_at_k(
                retrieved_docs,
                expected_docs,
                k=5
            )
            mrr = self._calculate_mrr(retrieved_docs, expected_docs)
            ndcg = self._calculate_ndcg(
                retrieved_docs,
                expected_docs,
                [r.score for r in search_results]
            )
        else:
            # If no ground truth, use relevance scoring
            relevance_scores = await self._score_relevance(
                query,
                search_results
            )
            precision_at_k = np.mean(relevance_scores[:5] > 0.7)
            recall_at_k = None
            mrr = None
            ndcg = None
        
        return {
            "retrieved_docs": retrieved_docs[:5],
            "precision_at_5": precision_at_k,
            "recall_at_5": recall_at_k,
            "mrr": mrr,
            "ndcg": ndcg,
            "avg_score": np.mean([r.score for r in search_results[:5]])
        }
    
    async def _evaluate_generation(
        self,
        query: str,
        retrieved_docs: List[str],
        expected_answer: str
    ) -> Dict:
        """Evaluate generation quality"""
        # Get generated answer
        context = await self._build_context(retrieved_docs)
        generated_answer = await self._generate_answer(query, context)
        
        metrics = {}
        
        # Semantic similarity
        if expected_answer:
            metrics["semantic_similarity"] = await self._calculate_semantic_similarity(
                generated_answer,
                expected_answer
            )
        
        # Faithfulness to context
        metrics["faithfulness"] = await self._evaluate_faithfulness(
            generated_answer,
            context
        )
        
        # Answer relevance
        metrics["relevance"] = await self._evaluate_answer_relevance(
            query,
            generated_answer
        )
        
        # Fluency and coherence
        metrics["fluency"] = await self._evaluate_fluency(generated_answer)
        
        return metrics
    
    async def _evaluate_faithfulness(
        self,
        answer: str,
        context: str
    ) -> float:
        """Evaluate if answer is faithful to context"""
        prompt = f"""
Given the context and answer, evaluate if the answer is faithful to the context.
Score from 0 to 1, where 1 means completely faithful.

Context: {context[:1000]}
Answer: {answer}

Output only a number between 0 and 1.
"""
        
        response = await self.openai_client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are an evaluation expert."},
                {"role": "user", "content": prompt}
            ],
            temperature=0
        )
        
        try:
            return float(response.choices[0].message.content)
        except:
            return 0.5
    
    def _calculate_ndcg(
        self,
        retrieved: List[str],
        relevant: List[str],
        scores: List[float]
    ) -> float:
        """Calculate Normalized Discounted Cumulative Gain"""
        def dcg(relevances, k=None):
            if k is None:
                k = len(relevances)
            return sum(
                rel / np.log2(i + 2)
                for i, rel in enumerate(relevances[:k])
            )
        
        # Create relevance array
        relevances = [1 if doc in relevant else 0 for doc in retrieved]
        
        # Calculate DCG
        dcg_score = dcg(relevances)
        
        # Calculate ideal DCG
        ideal_relevances = sorted(relevances, reverse=True)
        idcg_score = dcg(ideal_relevances)
        
        # Calculate NDCG
        if idcg_score == 0:
            return 0
        
        return dcg_score / idcg_score

2. 継続的改善のためのフィードバックループ

// Feedback and Improvement System
public class RAGFeedbackSystem
{
    private readonly IFeedbackRepository _feedbackRepo;
    private readonly IRAGService _ragService;
    private readonly IMetricsService _metricsService;
    private readonly ILogger<RAGFeedbackSystem> _logger;

    public async Task RecordFeedbackAsync(UserFeedback feedback)
    {
        // Store feedback
        await _feedbackRepo.SaveFeedbackAsync(feedback);
        
        // Update metrics
        await _metricsService.UpdateUserSatisfactionMetricsAsync(
            feedback.SessionId,
            feedback.Rating,
            feedback.WasHelpful
        );
        
        // Trigger improvement if needed
        if (feedback.Rating < 3 || !feedback.WasHelpful)
        {
            await TriggerImprovementWorkflowAsync(feedback);
        }
    }

    private async Task TriggerImprovementWorkflowAsync(UserFeedback feedback)
    {
        // Analyze the problematic query
        var analysis = await AnalyzeFailureAsync(
            feedback.Query,
            feedback.Response,
            feedback.Sources
        );
        
        switch (analysis.FailureType)
        {
            case FailureType.InsufficientContext:
                await ImproveDocumentCoverageAsync(feedback.Query);
                break;
                
            case FailureType.PoorRetrieval:
                await OptimizeSearchParametersAsync(feedback.Query);
                break;
                
            case FailureType.IncorrectGeneration:
                await RefineGenerationPromptsAsync(feedback);
                break;
        }
        
        // Log improvement action
        _logger.LogInformation(
            "Triggered improvement workflow for query: {Query}, Type: {Type}",
            feedback.Query,
            analysis.FailureType
        );
    }

    private async Task<FailureAnalysis> AnalyzeFailureAsync(
        string query,
        string response,
        List<Source> sources)
    {
        // Analyze retrieval quality
        var retrievalScore = await EvaluateRetrievalQualityAsync(query, sources);
        
        // Analyze response quality
        var responseScore = await EvaluateResponseQualityAsync(
            query,
            response,
            sources
        );
        
        // Determine failure type
        var failureType = DetermineFailureType(retrievalScore, responseScore);
        
        return new FailureAnalysis
        {
            FailureType = failureType,
            RetrievalScore = retrievalScore,
            ResponseScore = responseScore,
            Recommendations = GenerateRecommendations(failureType)
        };
    }

    private async Task ImproveDocumentCoverageAsync(string query)
    {
        // Identify missing content areas
        var missingTopics = await IdentifyMissingTopicsAsync(query);
        
        // Generate synthetic documents for missing topics
        foreach (var topic in missingTopics)
        {
            var syntheticDoc = await GenerateSyntheticDocumentAsync(topic);
            await IndexDocumentAsync(syntheticDoc);
        }
        
        // Update document metadata to improve retrieval
        await UpdateDocumentMetadataAsync(query);
    }

    private async Task OptimizeSearchParametersAsync(string query)
    {
        // A/B test different search configurations
        var configurations = GenerateSearchConfigurations();
        var bestConfig = await RunSearchOptimizationAsync(
            query,
            configurations
        );
        
        // Update search service with optimized parameters
        await _ragService.UpdateSearchConfigurationAsync(
            query,
            bestConfig
        );
    }
}

// Monitoring Dashboard Data
public class RAGMonitoringService
{
    public async Task<RAGDashboardData> GetDashboardDataAsync(
        DateTime startDate,
        DateTime endDate)
    {
        return new RAGDashboardData
        {
            // Performance Metrics
            AverageResponseTime = await CalculateAverageResponseTimeAsync(startDate, endDate),
            MedianResponseTime = await CalculateMedianResponseTimeAsync(startDate, endDate),
            P95ResponseTime = await CalculateP95ResponseTimeAsync(startDate, endDate),
            
            // Quality Metrics
            AverageUserRating = await CalculateAverageRatingAsync(startDate, endDate),
            AnswerAccuracy = await CalculateAnswerAccuracyAsync(startDate, endDate),
            RetrievalPrecision = await CalculateRetrievalPrecisionAsync(startDate, endDate),
            
            // Usage Metrics
            TotalQueries = await GetTotalQueriesAsync(startDate, endDate),
            UniqueUsers = await GetUniqueUsersAsync(startDate, endDate),
            TopQueries = await GetTopQueriesAsync(startDate, endDate, 10),
            
            // System Health
            ErrorRate = await CalculateErrorRateAsync(startDate, endDate),
            CacheHitRate = await CalculateCacheHitRateAsync(startDate, endDate),
            IndexFreshness = await CalculateIndexFreshnessAsync(),
            
            // Cost Analysis
            TotalTokensUsed = await GetTotalTokensUsedAsync(startDate, endDate),
            EstimatedCost = await CalculateEstimatedCostAsync(startDate, endDate)
        };
    }
}

パフォーマンス最適化

1. キャッシングとレスポンス最適化

// Intelligent Caching System
public class RAGCacheManager
{
    private readonly IDistributedCache _cache;
    private readonly IVectorDatabase _vectorDb;
    private readonly ILogger<RAGCacheManager> _logger;

    public async Task<CachedResponse?> GetCachedResponseAsync(
        string query,
        CacheOptions options)
    {
        // Check exact match cache
        var exactMatch = await GetExactMatchAsync(query);
        if (exactMatch != null)
        {
            return exactMatch;
        }
        
        // Check semantic similarity cache
        if (options.EnableSemanticCache)
        {
            return await GetSemanticMatchAsync(query, options.SimilarityThreshold);
        }
        
        return null;
    }

    private async Task<CachedResponse?> GetSemanticMatchAsync(
        string query,
        float threshold)
    {
        // Generate query embedding
        var queryEmbedding = await GenerateEmbeddingAsync(query);
        
        // Search for similar cached queries
        var similarQueries = await _vectorDb.SearchSimilarAsync(
            queryEmbedding,
            "query_cache",
            topK: 5,
            minScore: threshold
        );
        
        foreach (var match in similarQueries)
        {
            var cacheKey = $"rag_response:{match.Id}";
            var cachedJson = await _cache.GetStringAsync(cacheKey);
            
            if (!string.IsNullOrEmpty(cachedJson))
            {
                var cached = JsonSerializer.Deserialize<CachedResponse>(cachedJson);
                
                // Validate cache freshness
                if (IsCacheFresh(cached, TimeSpan.FromHours(24)))
                {
                    _logger.LogInformation(
                        "Semantic cache hit for query. Similarity: {Score}",
                        match.Score
                    );
                    
                    return cached;
                }
            }
        }
        
        return null;
    }

    public async Task CacheResponseAsync(
        string query,
        RAGResponse response,
        CacheOptions options)
    {
        var cacheEntry = new CachedResponse
        {
            Query = query,
            Response = response,
            CachedAt = DateTime.UtcNow,
            ExpiresAt = DateTime.UtcNow.Add(options.CacheDuration),
            Metadata = new CacheMetadata
            {
                SourceDocuments = response.Sources.Select(s => s.Id).ToList(),
                ModelVersion = response.ModelVersion,
                QueryEmbedding = await GenerateEmbeddingAsync(query)
            }
        };
        
        // Store in distributed cache
        var cacheKey = $"rag_response:{GenerateCacheKey(query)}";
        await _cache.SetStringAsync(
            cacheKey,
            JsonSerializer.Serialize(cacheEntry),
            new DistributedCacheEntryOptions
            {
                AbsoluteExpiration = cacheEntry.ExpiresAt,
                SlidingExpiration = TimeSpan.FromHours(1)
            }
        );
        
        // Store in vector database for semantic search
        if (options.EnableSemanticCache)
        {
            await _vectorDb.UpsertAsync(
                "query_cache",
                new VectorDocument
                {
                    Id = GenerateCacheKey(query),
                    Vector = cacheEntry.Metadata.QueryEmbedding,
                    Metadata = new Dictionary<string, object>
                    {
                        ["query"] = query,
                        ["cached_at"] = cacheEntry.CachedAt,
                        ["expires_at"] = cacheEntry.ExpiresAt
                    }
                }
            );
        }
    }

    public async Task InvalidateCacheAsync(InvalidationOptions options)
    {
        if (options.InvalidateAll)
        {
            await _cache.RemoveAsync("rag_response:*");
            await _vectorDb.DeleteCollectionAsync("query_cache");
        }
        else if (options.DocumentIds?.Any() == true)
        {
            // Invalidate cache entries that reference specific documents
            var keysToInvalidate = await FindCacheKeysReferencingDocumentsAsync(
                options.DocumentIds
            );
            
            foreach (var key in keysToInvalidate)
            {
                await _cache.RemoveAsync(key);
            }
        }
        else if (options.OlderThan.HasValue)
        {
            // Invalidate old cache entries
            await InvalidateOldEntriesAsync(options.OlderThan.Value);
        }
    }
}

まとめ

Azure OpenAI ServiceとAzure AI Searchを活用したRAGシステムは、エンタープライズレベルの知識ベースシステムを構築するための強力なソリューションです。本記事で紹介した実装パターンにより、以下の利点が得られます:

  1. 高精度な情報検索: ハイブリッド検索とセマンティック理解
  2. 信頼性の高い回答生成: コンテキストに忠実な回答と引用
  3. スケーラビリティ: 効率的なキャッシングと最適化
  4. 継続的改善: フィードバックループと自動最適化

エンハンスド株式会社では、Azure OpenAI Serviceを活用したRAGシステムの設計・構築を支援しています。お客様のビジネスニーズに合わせたカスタマイズも承っております。


#AzureOpenAI #RAG #AzureAISearch #GPT4 #エンタープライズAI #知識管理 #自然言語処理 #ベクトル検索 #AI開発

執筆者: エンハンスド株式会社 技術チーム
公開日: 2024年12月24日