2024年12月24日公開

Azureクラウド

読了時間: 約152分

Azure OpenAI Service を活用した高精度RAGシステム構築ガイド

はじめに

RAG（Retrieval-Augmented Generation）は、大規模言語モデルに外部知識を統合することで、より正確で最新の情報に基づいた回答を生成する技術です。本記事では、Azure OpenAI ServiceとAzure AI Searchを組み合わせた、エンタープライズグレードのRAGシステム構築方法を解説します。

RAGアーキテクチャの設計

1. システム全体構成

graph TB
    A[User Query] --> B[Query Processor]
    B --> C[Azure OpenAI Embeddings]
    C --> D[Azure AI Search]
    D --> E[Relevant Documents]
    E --> F[Context Builder]
    F --> G[Azure OpenAI GPT-4]
    G --> H[Response Generator]
    H --> I[User Response]
    
    J[Document Sources] --> K[Document Processor]
    K --> L[Chunking & Embedding]
    L --> D

2. 基本実装

// RAG Service Implementation
using Azure;
using Azure.AI.OpenAI;
using Azure.Search.Documents;
using Azure.Search.Documents.Indexes;
using Azure.Search.Documents.Models;

public class RAGService
{
    private readonly OpenAIClient _openAIClient;
    private readonly SearchClient _searchClient;
    private readonly string _embeddingDeployment = "text-embedding-ada-002";
    private readonly string _completionDeployment = "gpt-4";

    public RAGService(
        string openAIEndpoint,
        string openAIKey,
        string searchEndpoint,
        string searchKey,
        string indexName)
    {
        _openAIClient = new OpenAIClient(
            new Uri(openAIEndpoint),
            new AzureKeyCredential(openAIKey)
        );

        _searchClient = new SearchClient(
            new Uri(searchEndpoint),
            indexName,
            new AzureKeyCredential(searchKey)
        );
    }

    public async Task<RAGResponse> GenerateResponseAsync(
        string query,
        RAGOptions options = null)
    {
        options ??= new RAGOptions();

        // Step 1: Generate embeddings for the query
        var queryEmbedding = await GenerateEmbeddingAsync(query);

        // Step 2: Search for relevant documents
        var searchResults = await SearchDocumentsAsync(
            queryEmbedding,
            options.TopK,
            options.MinRelevanceScore
        );

        // Step 3: Build context from search results
        var context = BuildContext(searchResults, options.MaxContextTokens);

        // Step 4: Generate response using GPT-4
        var response = await GenerateCompletionAsync(
            query,
            context,
            options
        );

        return new RAGResponse
        {
            Answer = response,
            Sources = searchResults.Select(r => new Source
            {
                Title = r.Document.GetString("title"),
                Url = r.Document.GetString("url"),
                Excerpt = r.Document.GetString("content"),
                Score = r.Score ?? 0
            }).ToList(),
            Context = context
        };
    }

    private async Task<float[]> GenerateEmbeddingAsync(string text)
    {
        var embeddingOptions = new EmbeddingsOptions(
            _embeddingDeployment,
            new[] { text }
        );

        var response = await _openAIClient.GetEmbeddingsAsync(embeddingOptions);
        return response.Value.Data[0].Embedding.ToArray();
    }

    private async Task<List<SearchResult<SearchDocument>>> SearchDocumentsAsync(
        float[] queryVector,
        int topK,
        double minScore)
    {
        var searchOptions = new SearchOptions
        {
            Select = { "id", "title", "content", "url", "metadata" },
            Size = topK,
            IncludeTotalCount = true,
            QueryType = SearchQueryType.Semantic,
            SemanticSearch = new()
            {
                SemanticConfigurationName = "default",
                QueryCaption = new(QueryCaptionType.Extractive),
                QueryAnswer = new(QueryAnswerType.Extractive)
            },
            VectorSearch = new()
            {
                Queries = { new VectorizedQuery(queryVector)
                {
                    KNearestNeighborsCount = topK,
                    Fields = { "contentVector" }
                }}
            }
        };

        var searchResponse = await _searchClient.SearchAsync<SearchDocument>(
            searchText: null,
            searchOptions
        );

        var results = new List<SearchResult<SearchDocument>>();
        await foreach (var result in searchResponse.Value.GetResultsAsync())
        {
            if (result.Score >= minScore)
            {
                results.Add(result);
            }
        }

        return results;
    }

    private string BuildContext(
        List<SearchResult<SearchDocument>> searchResults,
        int maxTokens)
    {
        var contextBuilder = new StringBuilder();
        var currentTokens = 0;

        foreach (var result in searchResults.OrderByDescending(r => r.Score))
        {
            var content = result.Document.GetString("content");
            var tokens = EstimateTokenCount(content);

            if (currentTokens + tokens > maxTokens)
            {
                break;
            }

            contextBuilder.AppendLine($"---");
            contextBuilder.AppendLine($"Source: {result.Document.GetString("title")}");
            contextBuilder.AppendLine($"Content: {content}");
            contextBuilder.AppendLine();

            currentTokens += tokens;
        }

        return contextBuilder.ToString();
    }

    private async Task<string> GenerateCompletionAsync(
        string query,
        string context,
        RAGOptions options)
    {
        var systemPrompt = @"
あなたは親切で正確な情報を提供するAIアシスタントです。
以下のコンテキスト情報を使用して、ユーザーの質問に答えてください。

重要なルール：
1. コンテキストに基づいて回答してください
2. コンテキストに情報がない場合は、その旨を明確に伝えてください
3. 推測や憶測は避け、事実に基づいた回答を心がけてください
4. 回答の根拠となる情報源を明示してください";

        var userPrompt = $@"
コンテキスト情報：
{context}

質問：{query}

上記のコンテキスト情報を使用して、質問に対する詳細で正確な回答を提供してください。";

        var completionOptions = new ChatCompletionsOptions
        {
            DeploymentName = _completionDeployment,
            Messages =
            {
                new ChatRequestSystemMessage(systemPrompt),
                new ChatRequestUserMessage(userPrompt)
            },
            Temperature = options.Temperature,
            MaxTokens = options.MaxResponseTokens,
            NucleusSamplingFactor = options.TopP,
            FrequencyPenalty = options.FrequencyPenalty,
            PresencePenalty = options.PresencePenalty
        };

        var response = await _openAIClient.GetChatCompletionsAsync(completionOptions);
        return response.Value.Choices[0].Message.Content;
    }

    private int EstimateTokenCount(string text)
    {
        // Simple estimation: ~4 characters per token for Japanese
        // For production, use tiktoken library
        return text.Length / 4;
    }
}

// Configuration classes
public class RAGOptions
{
    public int TopK { get; set; } = 5;
    public double MinRelevanceScore { get; set; } = 0.7;
    public int MaxContextTokens { get; set; } = 2000;
    public float Temperature { get; set; } = 0.3f;
    public int MaxResponseTokens { get; set; } = 1000;
    public float TopP { get; set; } = 0.95f;
    public float FrequencyPenalty { get; set; } = 0;
    public float PresencePenalty { get; set; } = 0;
}

public class RAGResponse
{
    public string Answer { get; set; }
    public List<Source> Sources { get; set; }
    public string Context { get; set; }
}

public class Source
{
    public string Title { get; set; }
    public string Url { get; set; }
    public string Excerpt { get; set; }
    public double Score { get; set; }
}

ドキュメント処理パイプライン

1. インテリジェントなチャンキング

# document_processor.py
import tiktoken
from typing import List, Dict, Tuple
import re
from dataclasses import dataclass
import numpy as np

@dataclass
class Chunk:
    content: str
    metadata: Dict
    start_index: int
    end_index: int
    token_count: int

class IntelligentChunker:
    def __init__(
        self,
        model_name: str = "gpt-4",
        chunk_size: int = 512,
        chunk_overlap: int = 128
    ):
        self.encoder = tiktoken.encoding_for_model(model_name)
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        
    def chunk_document(
        self,
        text: str,
        metadata: Dict
    ) -> List[Chunk]:
        """
        Intelligently chunk document while preserving semantic boundaries
        """
        # Split into paragraphs first
        paragraphs = self._split_paragraphs(text)
        
        chunks = []
        current_chunk = []
        current_tokens = 0
        start_index = 0
        
        for i, paragraph in enumerate(paragraphs):
            paragraph_tokens = len(self.encoder.encode(paragraph))
            
            # If paragraph itself is too large, split it
            if paragraph_tokens > self.chunk_size:
                sub_chunks = self._split_large_paragraph(
                    paragraph,
                    start_index
                )
                chunks.extend(sub_chunks)
                start_index += len(paragraph)
                continue
            
            # If adding paragraph exceeds chunk size, create new chunk
            if current_tokens + paragraph_tokens > self.chunk_size:
                if current_chunk:
                    chunk_content = '\n\n'.join(current_chunk)
                    chunks.append(Chunk(
                        content=chunk_content,
                        metadata=metadata,
                        start_index=start_index - len(chunk_content),
                        end_index=start_index,
                        token_count=current_tokens
                    ))
                
                # Start new chunk with overlap
                overlap_chunks = self._get_overlap_chunks(
                    current_chunk,
                    self.chunk_overlap
                )
                current_chunk = overlap_chunks + [paragraph]
                current_tokens = sum(
                    len(self.encoder.encode(c)) for c in current_chunk
                )
            else:
                current_chunk.append(paragraph)
                current_tokens += paragraph_tokens
            
            start_index += len(paragraph) + 2  # Account for \n\n
        
        # Add final chunk
        if current_chunk:
            chunk_content = '\n\n'.join(current_chunk)
            chunks.append(Chunk(
                content=chunk_content,
                metadata=metadata,
                start_index=start_index - len(chunk_content),
                end_index=start_index,
                token_count=current_tokens
            ))
        
        return chunks
    
    def _split_paragraphs(self, text: str) -> List[str]:
        """Split text into semantic paragraphs"""
        # Preserve headers
        lines = text.split('\n')
        paragraphs = []
        current_paragraph = []
        
        for line in lines:
            line = line.strip()
            
            # Check if line is a header
            if self._is_header(line):
                if current_paragraph:
                    paragraphs.append('\n'.join(current_paragraph))
                    current_paragraph = []
                paragraphs.append(line)
            elif line:
                current_paragraph.append(line)
            elif current_paragraph:
                paragraphs.append('\n'.join(current_paragraph))
                current_paragraph = []
        
        if current_paragraph:
            paragraphs.append('\n'.join(current_paragraph))
        
        return [p for p in paragraphs if p.strip()]
    
    def _is_header(self, line: str) -> bool:
        """Detect if line is a header"""
        header_patterns = [
            r'^#{1,6}\s',  # Markdown headers
            r'^[A-Z][^.!?]*:$',  # Title case with colon
            r'^\d+\.\s+[A-Z]',  # Numbered sections
            r'^[A-Z][A-Z\s]+$'  # All caps
        ]
        
        return any(re.match(pattern, line) for pattern in header_patterns)
    
    def _split_large_paragraph(
        self,
        paragraph: str,
        start_index: int
    ) -> List[Chunk]:
        """Split large paragraphs at sentence boundaries"""
        sentences = self._split_sentences(paragraph)
        chunks = []
        current_sentences = []
        current_tokens = 0
        
        for sentence in sentences:
            sentence_tokens = len(self.encoder.encode(sentence))
            
            if current_tokens + sentence_tokens > self.chunk_size:
                if current_sentences:
                    chunk_content = ' '.join(current_sentences)
                    chunks.append(Chunk(
                        content=chunk_content,
                        metadata={},
                        start_index=start_index,
                        end_index=start_index + len(chunk_content),
                        token_count=current_tokens
                    ))
                    start_index += len(chunk_content) + 1
                
                current_sentences = [sentence]
                current_tokens = sentence_tokens
            else:
                current_sentences.append(sentence)
                current_tokens += sentence_tokens
        
        if current_sentences:
            chunk_content = ' '.join(current_sentences)
            chunks.append(Chunk(
                content=chunk_content,
                metadata={},
                start_index=start_index,
                end_index=start_index + len(chunk_content),
                token_count=current_tokens
            ))
        
        return chunks
    
    def _split_sentences(self, text: str) -> List[str]:
        """Split text into sentences"""
        # Simple sentence splitter - use spaCy for production
        sentences = re.split(r'(?<=[.!?])\s+', text)
        return [s.strip() for s in sentences if s.strip()]
    
    def _get_overlap_chunks(
        self,
        chunks: List[str],
        overlap_tokens: int
    ) -> List[str]:
        """Get chunks for overlap from the end"""
        overlap_chunks = []
        total_tokens = 0
        
        for chunk in reversed(chunks):
            chunk_tokens = len(self.encoder.encode(chunk))
            if total_tokens + chunk_tokens <= overlap_tokens:
                overlap_chunks.insert(0, chunk)
                total_tokens += chunk_tokens
            else:
                break
        
        return overlap_chunks

2. メタデータ抽出とエンリッチメント

// Document Enrichment Service
public class DocumentEnrichmentService
{
    private readonly OpenAIClient _openAIClient;
    private readonly ILogger<DocumentEnrichmentService> _logger;

    public async Task<EnrichedDocument> EnrichDocumentAsync(
        Document document,
        EnrichmentOptions options)
    {
        var enrichedDoc = new EnrichedDocument
        {
            Id = document.Id,
            OriginalContent = document.Content,
            Metadata = document.Metadata ?? new Dictionary<string, object>()
        };

        // Extract key entities
        if (options.ExtractEntities)
        {
            var entities = await ExtractEntitiesAsync(document.Content);
            enrichedDoc.Entities = entities;
            enrichedDoc.Metadata["entities"] = entities;
        }

        // Generate summary
        if (options.GenerateSummary)
        {
            var summary = await GenerateSummaryAsync(document.Content);
            enrichedDoc.Summary = summary;
            enrichedDoc.Metadata["summary"] = summary;
        }

        // Extract keywords
        if (options.ExtractKeywords)
        {
            var keywords = await ExtractKeywordsAsync(document.Content);
            enrichedDoc.Keywords = keywords;
            enrichedDoc.Metadata["keywords"] = keywords;
        }

        // Classify document
        if (options.ClassifyDocument)
        {
            var classification = await ClassifyDocumentAsync(document.Content);
            enrichedDoc.Classification = classification;
            enrichedDoc.Metadata["category"] = classification.Category;
            enrichedDoc.Metadata["confidence"] = classification.Confidence;
        }

        // Generate Q&A pairs for better retrieval
        if (options.GenerateQAPairs)
        {
            var qaPairs = await GenerateQAPairsAsync(document.Content);
            enrichedDoc.QAPairs = qaPairs;
        }

        return enrichedDoc;
    }

    private async Task<List<Entity>> ExtractEntitiesAsync(string content)
    {
        var prompt = @"
以下のテキストから重要なエンティティ（人名、組織名、場所、日付、技術用語など）を抽出してください。
JSON形式で出力してください。

テキスト：
{content}

出力形式：
{
  ""entities"": [
    {
      ""text"": ""エンティティテキスト"",
      ""type"": ""PERSON|ORGANIZATION|LOCATION|DATE|TECHNOLOGY|OTHER"",
      ""context"": ""エンティティが現れる文脈""
    }
  ]
}";

        var response = await _openAIClient.GetChatCompletionsAsync(
            new ChatCompletionsOptions
            {
                DeploymentName = "gpt-4",
                Messages = {
                    new ChatRequestSystemMessage("You are an entity extraction expert."),
                    new ChatRequestUserMessage(prompt.Replace("{content}", content))
                },
                Temperature = 0.1f,
                ResponseFormat = new ChatCompletionsResponseFormatJSON()
            }
        );

        var json = response.Value.Choices[0].Message.Content;
        var result = JsonSerializer.Deserialize<EntityExtractionResult>(json);
        
        return result.Entities;
    }

    private async Task<string> GenerateSummaryAsync(string content)
    {
        var prompt = @"
以下のテキストの要約を作成してください。
重要なポイントを箇条書きで3-5個挙げてください。

テキスト：
{content}

要約：";

        var response = await _openAIClient.GetChatCompletionsAsync(
            new ChatCompletionsOptions
            {
                DeploymentName = "gpt-4",
                Messages = {
                    new ChatRequestSystemMessage("You are a summarization expert."),
                    new ChatRequestUserMessage(prompt.Replace("{content}", content))
                },
                Temperature = 0.3f,
                MaxTokens = 300
            }
        );

        return response.Value.Choices[0].Message.Content;
    }

    private async Task<List<QAPair>> GenerateQAPairsAsync(string content)
    {
        var prompt = @"
以下のテキストから、ユーザーが尋ねそうな質問と回答のペアを3-5個生成してください。
JSON形式で出力してください。

テキスト：
{content}

出力形式：
{
  ""qa_pairs"": [
    {
      ""question"": ""質問文"",
      ""answer"": ""回答文"",
      ""relevance_score"": 0.9
    }
  ]
}";

        var response = await _openAIClient.GetChatCompletionsAsync(
            new ChatCompletionsOptions
            {
                DeploymentName = "gpt-4",
                Messages = {
                    new ChatRequestSystemMessage("You are a Q&A generation expert."),
                    new ChatRequestUserMessage(prompt.Replace("{content}", content))
                },
                Temperature = 0.5f,
                ResponseFormat = new ChatCompletionsResponseFormatJSON()
            }
        );

        var json = response.Value.Choices[0].Message.Content;
        var result = JsonSerializer.Deserialize<QAGenerationResult>(json);
        
        return result.QAPairs;
    }
}

// Models
public class EnrichedDocument
{
    public string Id { get; set; }
    public string OriginalContent { get; set; }
    public string Summary { get; set; }
    public List<Entity> Entities { get; set; }
    public List<string> Keywords { get; set; }
    public Classification Classification { get; set; }
    public List<QAPair> QAPairs { get; set; }
    public Dictionary<string, object> Metadata { get; set; }
}

public class Entity
{
    public string Text { get; set; }
    public string Type { get; set; }
    public string Context { get; set; }
}

public class QAPair
{
    public string Question { get; set; }
    public string Answer { get; set; }
    public double RelevanceScore { get; set; }
}

高度な検索戦略

1. ハイブリッド検索の実装

// Hybrid Search Implementation
public class HybridSearchService
{
    private readonly SearchClient _searchClient;
    private readonly OpenAIClient _openAIClient;
    private readonly IMemoryCache _cache;

    public async Task<HybridSearchResults> SearchAsync(
        string query,
        HybridSearchOptions options)
    {
        // Parallel execution of different search strategies
        var searchTasks = new List<Task<SearchStrategy>>();

        // 1. Vector search
        searchTasks.Add(ExecuteVectorSearchAsync(query, options));

        // 2. Keyword search
        searchTasks.Add(ExecuteKeywordSearchAsync(query, options));

        // 3. Semantic search
        searchTasks.Add(ExecuteSemanticSearchAsync(query, options));

        // 4. Q&A matching (if enabled)
        if (options.EnableQAMatching)
        {
            searchTasks.Add(ExecuteQAMatchingAsync(query, options));
        }

        var searchResults = await Task.WhenAll(searchTasks);

        // Combine and re-rank results
        var combinedResults = await CombineAndRerankAsync(
            searchResults,
            query,
            options
        );

        return new HybridSearchResults
        {
            Results = combinedResults,
            SearchStrategiesUsed = searchResults.Select(r => r.Name).ToList(),
            TotalMatches = combinedResults.Count,
            Query = query
        };
    }

    private async Task<SearchStrategy> ExecuteVectorSearchAsync(
        string query,
        HybridSearchOptions options)
    {
        var queryEmbedding = await GenerateEmbeddingAsync(query);
        
        var searchOptions = new SearchOptions
        {
            Size = options.TopK * 2, // Get more for re-ranking
            VectorSearch = new()
            {
                Queries = { new VectorizedQuery(queryEmbedding)
                {
                    KNearestNeighborsCount = options.TopK * 2,
                    Fields = { "contentVector" }
                }}
            }
        };

        var results = await _searchClient.SearchAsync<SearchDocument>(
            searchText: null,
            searchOptions
        );

        var documents = new List<RankedDocument>();
        await foreach (var result in results.Value.GetResultsAsync())
        {
            documents.Add(new RankedDocument
            {
                Document = result.Document,
                Score = result.Score ?? 0,
                SearchType = "vector"
            });
        }

        return new SearchStrategy
        {
            Name = "VectorSearch",
            Weight = options.VectorSearchWeight,
            Results = documents
        };
    }

    private async Task<SearchStrategy> ExecuteKeywordSearchAsync(
        string query,
        HybridSearchOptions options)
    {
        // Expand query with synonyms
        var expandedQuery = await ExpandQueryAsync(query);
        
        var searchOptions = new SearchOptions
        {
            Size = options.TopK * 2,
            QueryType = SearchQueryType.Full,
            SearchMode = SearchMode.All,
            ScoringProfile = "keywordBoost",
            HighlightFields = { "content" },
            HighlightPreTag = "<mark>",
            HighlightPostTag = "</mark>"
        };

        var results = await _searchClient.SearchAsync<SearchDocument>(
            expandedQuery,
            searchOptions
        );

        var documents = new List<RankedDocument>();
        await foreach (var result in results.Value.GetResultsAsync())
        {
            documents.Add(new RankedDocument
            {
                Document = result.Document,
                Score = result.Score ?? 0,
                SearchType = "keyword",
                Highlights = result.Highlights
            });
        }

        return new SearchStrategy
        {
            Name = "KeywordSearch",
            Weight = options.KeywordSearchWeight,
            Results = documents
        };
    }

    private async Task<List<RankedDocument>> CombineAndRerankAsync(
        SearchStrategy[] strategies,
        string query,
        HybridSearchOptions options)
    {
        // Collect all unique documents
        var documentScores = new Dictionary<string, DocumentScore>();
        
        foreach (var strategy in strategies)
        {
            foreach (var result in strategy.Results)
            {
                var docId = result.Document.GetString("id");
                
                if (!documentScores.ContainsKey(docId))
                {
                    documentScores[docId] = new DocumentScore
                    {
                        Document = result.Document,
                        Highlights = result.Highlights
                    };
                }
                
                // Apply strategy weight
                var weightedScore = result.Score * strategy.Weight;
                documentScores[docId].Scores[strategy.Name] = weightedScore;
                documentScores[docId].TotalScore += weightedScore;
            }
        }

        // Apply cross-encoder re-ranking for top candidates
        var topCandidates = documentScores.Values
            .OrderByDescending(d => d.TotalScore)
            .Take(options.RerankTopK)
            .ToList();

        if (options.EnableCrossEncoderReranking)
        {
            await RerankWithCrossEncoderAsync(topCandidates, query);
        }

        // Final ranking
        return topCandidates
            .OrderByDescending(d => d.FinalScore)
            .Take(options.TopK)
            .Select(d => new RankedDocument
            {
                Document = d.Document,
                Score = d.FinalScore,
                SearchType = "hybrid",
                Highlights = d.Highlights,
                ScoreBreakdown = d.Scores
            })
            .ToList();
    }

    private async Task RerankWithCrossEncoderAsync(
        List<DocumentScore> candidates,
        string query)
    {
        var rerankingTasks = candidates.Select(async candidate =>
        {
            var content = candidate.Document.GetString("content");
            var relevanceScore = await CalculateRelevanceScoreAsync(query, content);
            candidate.CrossEncoderScore = relevanceScore;
            candidate.FinalScore = candidate.TotalScore * 0.3 + relevanceScore * 0.7;
        });

        await Task.WhenAll(rerankingTasks);
    }

    private async Task<double> CalculateRelevanceScoreAsync(
        string query,
        string document)
    {
        var prompt = $@"
質問とドキュメントの関連性を0から1のスコアで評価してください。

質問: {query}
ドキュメント: {document.Substring(0, Math.Min(1000, document.Length))}

スコア（0-1）:";

        var response = await _openAIClient.GetChatCompletionsAsync(
            new ChatCompletionsOptions
            {
                DeploymentName = "gpt-4",
                Messages = {
                    new ChatRequestSystemMessage("You are a relevance scoring expert. Output only a number between 0 and 1."),
                    new ChatRequestUserMessage(prompt)
                },
                Temperature = 0,
                MaxTokens = 10
            }
        );

        if (double.TryParse(response.Value.Choices[0].Message.Content, out var score))
        {
            return Math.Max(0, Math.Min(1, score));
        }

        return 0.5; // Default score
    }
}

2. クエリ理解と拡張

# query_understanding.py
from typing import List, Dict, Tuple
import openai
from dataclasses import dataclass
import json

@dataclass
class QueryIntent:
    intent_type: str  # factual, analytical, navigational, transactional
    entities: List[Dict]
    temporal_context: str  # past, present, future, timeless
    required_depth: str  # shallow, medium, deep
    language: str

class QueryUnderstandingService:
    def __init__(self, openai_client):
        self.client = openai_client
        
    async def analyze_query(self, query: str) -> QueryIntent:
        """
        Analyze user query to understand intent and context
        """
        prompt = f"""
Analyze the following query and extract:
1. Intent type (factual/analytical/navigational/transactional)
2. Key entities and their types
3. Temporal context
4. Required depth of answer
5. Language

Query: {query}

Output in JSON format.
"""
        
        response = await self.client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a query analysis expert."},
                {"role": "user", "content": prompt}
            ],
            temperature=0,
            response_format={"type": "json_object"}
        )
        
        analysis = json.loads(response.choices[0].message.content)
        
        return QueryIntent(
            intent_type=analysis.get("intent_type", "factual"),
            entities=analysis.get("entities", []),
            temporal_context=analysis.get("temporal_context", "timeless"),
            required_depth=analysis.get("required_depth", "medium"),
            language=analysis.get("language", "ja")
        )
    
    async def expand_query(
        self,
        query: str,
        intent: QueryIntent
    ) -> List[str]:
        """
        Expand query with variations and related terms
        """
        expansion_prompt = f"""
Given the query: "{query}"
Intent: {intent.intent_type}
Entities: {intent.entities}

Generate 3-5 query variations that capture the same intent but with different phrasings.
Include relevant synonyms and related terms.

Output as JSON array of strings.
"""
        
        response = await self.client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a query expansion expert."},
                {"role": "user", "content": expansion_prompt}
            ],
            temperature=0.3,
            response_format={"type": "json_object"}
        )
        
        expansions = json.loads(response.choices[0].message.content)
        return expansions.get("queries", [query])
    
    async def generate_filters(
        self,
        query: str,
        intent: QueryIntent
    ) -> Dict[str, any]:
        """
        Generate search filters based on query understanding
        """
        filters = {}
        
        # Temporal filters
        if intent.temporal_context != "timeless":
            filters["temporal_filter"] = self._get_temporal_filter(
                intent.temporal_context
            )
        
        # Entity-based filters
        for entity in intent.entities:
            if entity["type"] == "ORGANIZATION":
                filters["organization"] = entity["text"]
            elif entity["type"] == "TECHNOLOGY":
                filters["technology_stack"] = entity["text"]
            elif entity["type"] == "DATE":
                filters["date_range"] = self._parse_date_entity(entity["text"])
        
        # Depth-based filters
        if intent.required_depth == "deep":
            filters["min_content_length"] = 1000
            filters["include_technical_details"] = True
        elif intent.required_depth == "shallow":
            filters["max_content_length"] = 500
            filters["summary_only"] = True
        
        return filters
    
    def _get_temporal_filter(self, context: str) -> Dict:
        """Generate temporal filter based on context"""
        from datetime import datetime, timedelta
        
        now = datetime.now()
        
        if context == "past":
            return {
                "end_date": now - timedelta(days=30),
                "start_date": now - timedelta(days=365)
            }
        elif context == "present":
            return {
                "start_date": now - timedelta(days=30),
                "end_date": now
            }
        elif context == "future":
            return {
                "start_date": now,
                "include_predictions": True
            }
        
        return {}

回答生成の最適化

1. コンテキスト管理と回答品質向上

// Advanced Response Generation
public class AdvancedResponseGenerator
{
    private readonly OpenAIClient _openAIClient;
    private readonly IMemoryCache _cache;
    private readonly ILogger<AdvancedResponseGenerator> _logger;

    public async Task<EnhancedResponse> GenerateResponseAsync(
        string query,
        List<RankedDocument> documents,
        ResponseGenerationOptions options)
    {
        // Build structured context
        var context = BuildStructuredContext(documents, options);
        
        // Generate initial response
        var response = await GenerateInitialResponseAsync(
            query,
            context,
            options
        );
        
        // Validate and enhance response
        var enhancedResponse = await EnhanceResponseAsync(
            response,
            query,
            documents,
            options
        );
        
        // Add citations
        var citedResponse = AddCitations(enhancedResponse, documents);
        
        return new EnhancedResponse
        {
            Answer = citedResponse,
            Confidence = CalculateConfidence(documents),
            Sources = ExtractSources(documents),
            FollowUpQuestions = await GenerateFollowUpQuestionsAsync(
                query,
                citedResponse
            ),
            Metadata = new ResponseMetadata
            {
                TokensUsed = CountTokens(citedResponse),
                ResponseTime = DateTime.UtcNow,
                Model = options.Model
            }
        };
    }

    private StructuredContext BuildStructuredContext(
        List<RankedDocument> documents,
        ResponseGenerationOptions options)
    {
        var context = new StructuredContext();
        var tokenBudget = options.MaxContextTokens;
        var usedTokens = 0;

        // Group documents by relevance and type
        var groupedDocs = documents
            .GroupBy(d => d.Document.GetString("type") ?? "general")
            .OrderByDescending(g => g.Max(d => d.Score));

        foreach (var group in groupedDocs)
        {
            var section = new ContextSection
            {
                Type = group.Key,
                Documents = new List<ContextDocument>()
            };

            foreach (var doc in group.OrderByDescending(d => d.Score))
            {
                var content = doc.Document.GetString("content");
                var tokens = EstimateTokens(content);

                if (usedTokens + tokens > tokenBudget)
                {
                    // Truncate content to fit
                    content = TruncateToTokenLimit(
                        content,
                        tokenBudget - usedTokens
                    );
                    tokens = tokenBudget - usedTokens;
                }

                section.Documents.Add(new ContextDocument
                {
                    Id = doc.Document.GetString("id"),
                    Title = doc.Document.GetString("title"),
                    Content = content,
                    Score = doc.Score,
                    Metadata = doc.Document.GetObject("metadata")
                });

                usedTokens += tokens;

                if (usedTokens >= tokenBudget)
                    break;
            }

            context.Sections.Add(section);

            if (usedTokens >= tokenBudget)
                break;
        }

        context.TotalTokens = usedTokens;
        return context;
    }

    private async Task<string> GenerateInitialResponseAsync(
        string query,
        StructuredContext context,
        ResponseGenerationOptions options)
    {
        var systemPrompt = BuildSystemPrompt(options);
        var userPrompt = BuildUserPrompt(query, context);

        var completionOptions = new ChatCompletionsOptions
        {
            DeploymentName = options.Model,
            Messages =
            {
                new ChatRequestSystemMessage(systemPrompt),
                new ChatRequestUserMessage(userPrompt)
            },
            Temperature = options.Temperature,
            MaxTokens = options.MaxResponseTokens,
            Functions = GetAvailableFunctions(options),
            FunctionCall = options.EnableFunctionCalling 
                ? FunctionCallOption.Auto 
                : FunctionCallOption.None
        };

        var response = await _openAIClient.GetChatCompletionsAsync(completionOptions);
        
        // Handle function calls if any
        if (response.Value.Choices[0].Message.FunctionCall != null)
        {
            var functionResult = await ExecuteFunctionCallAsync(
                response.Value.Choices[0].Message.FunctionCall
            );
            
            completionOptions.Messages.Add(
                new ChatRequestFunctionMessage(
                    response.Value.Choices[0].Message.FunctionCall.Name,
                    functionResult
                )
            );
            
            response = await _openAIClient.GetChatCompletionsAsync(completionOptions);
        }

        return response.Value.Choices[0].Message.Content;
    }

    private string BuildSystemPrompt(ResponseGenerationOptions options)
    {
        var prompt = new StringBuilder();
        
        prompt.AppendLine("あなたは正確で信頼性の高い情報を提供するAIアシスタントです。");
        prompt.AppendLine();
        prompt.AppendLine("回答する際の重要なガイドライン：");
        prompt.AppendLine("1. 提供されたコンテキスト情報のみに基づいて回答する");
        prompt.AppendLine("2. 情報源を明確に示す（[1], [2]などの参照番号を使用）");
        prompt.AppendLine("3. 確信が持てない場合は、その旨を明記する");
        prompt.AppendLine("4. 段階的で論理的な説明を心がける");
        
        if (options.ResponseStyle == ResponseStyle.Technical)
        {
            prompt.AppendLine("5. 技術的な詳細を含め、専門用語を適切に使用する");
        }
        else if (options.ResponseStyle == ResponseStyle.Simple)
        {
            prompt.AppendLine("5. 専門用語を避け、分かりやすい説明を心がける");
        }
        
        if (options.IncludeConfidenceLevel)
        {
            prompt.AppendLine("6. 回答の確信度を明示する（高/中/低）");
        }

        return prompt.ToString();
    }

    private async Task<string> EnhanceResponseAsync(
        string initialResponse,
        string query,
        List<RankedDocument> documents,
        ResponseGenerationOptions options)
    {
        // Fact-check the response
        var factCheckResult = await FactCheckResponseAsync(
            initialResponse,
            documents
        );
        
        if (!factCheckResult.IsAccurate)
        {
            _logger.LogWarning(
                "Fact check failed for response. Regenerating..."
            );
            
            // Regenerate with stricter prompt
            var stricterOptions = options with
            {
                Temperature = 0.1f,
                StrictFactChecking = true
            };
            
            return await GenerateInitialResponseAsync(
                query,
                BuildStructuredContext(documents, stricterOptions),
                stricterOptions
            );
        }
        
        // Enhance clarity and structure
        if (options.EnhanceReadability)
        {
            return await EnhanceReadabilityAsync(initialResponse);
        }
        
        return initialResponse;
    }

    private string AddCitations(string response, List<RankedDocument> documents)
    {
        var citationMap = new Dictionary<string, int>();
        var citationIndex = 1;
        
        foreach (var doc in documents)
        {
            var title = doc.Document.GetString("title");
            var content = doc.Document.GetString("content");
            
            // Find sentences in response that match document content
            var sentences = response.Split(new[] { '.', '。' }, 
                StringSplitOptions.RemoveEmptyEntries);
            
            for (int i = 0; i < sentences.Length; i++)
            {
                var sentence = sentences[i].Trim();
                
                if (IsContentMatch(sentence, content))
                {
                    if (!citationMap.ContainsKey(title))
                    {
                        citationMap[title] = citationIndex++;
                    }
                    
                    // Add citation to sentence
                    sentences[i] = $"{sentence}[{citationMap[title]}]";
                }
            }
            
            response = string.Join("。", sentences) + "。";
        }
        
        // Add citation list at the end
        if (citationMap.Any())
        {
            response += "\n\n参考文献：\n";
            foreach (var citation in citationMap.OrderBy(c => c.Value))
            {
                response += $"[{citation.Value}] {citation.Key}\n";
            }
        }
        
        return response;
    }

    private async Task<List<string>> GenerateFollowUpQuestionsAsync(
        string query,
        string response)
    {
        var prompt = $@"
元の質問: {query}
回答: {response}

この回答を踏まえて、ユーザーが次に尋ねそうな関連質問を3つ生成してください。
JSON配列形式で出力してください。";

        var completionResponse = await _openAIClient.GetChatCompletionsAsync(
            new ChatCompletionsOptions
            {
                DeploymentName = "gpt-4",
                Messages =
                {
                    new ChatRequestSystemMessage("You are a helpful assistant that generates follow-up questions."),
                    new ChatRequestUserMessage(prompt)
                },
                Temperature = 0.7f,
                ResponseFormat = new ChatCompletionsResponseFormatJSON()
            }
        );

        var json = completionResponse.Value.Choices[0].Message.Content;
        var questions = JsonSerializer.Deserialize<List<string>>(json);
        
        return questions ?? new List<string>();
    }
}

// Supporting classes
public class StructuredContext
{
    public List<ContextSection> Sections { get; set; } = new();
    public int TotalTokens { get; set; }
}

public class ContextSection
{
    public string Type { get; set; }
    public List<ContextDocument> Documents { get; set; }
}

public class ContextDocument
{
    public string Id { get; set; }
    public string Title { get; set; }
    public string Content { get; set; }
    public double Score { get; set; }
    public Dictionary<string, object> Metadata { get; set; }
}

public class ResponseGenerationOptions
{
    public string Model { get; set; } = "gpt-4";
    public int MaxContextTokens { get; set; } = 3000;
    public int MaxResponseTokens { get; set; } = 1000;
    public float Temperature { get; set; } = 0.3f;
    public ResponseStyle ResponseStyle { get; set; } = ResponseStyle.Balanced;
    public bool IncludeConfidenceLevel { get; set; } = true;
    public bool EnableFunctionCalling { get; set; } = false;
    public bool EnhanceReadability { get; set; } = true;
    public bool StrictFactChecking { get; set; } = false;
}

public enum ResponseStyle
{
    Technical,
    Simple,
    Balanced
}

評価とモニタリング

1. RAGシステムの評価メトリクス

# rag_evaluation.py
import numpy as np
from typing import List, Dict, Tuple
from dataclasses import dataclass
import asyncio
from sklearn.metrics.pairwise import cosine_similarity

@dataclass
class EvaluationResult:
    retrieval_metrics: Dict[str, float]
    generation_metrics: Dict[str, float]
    end_to_end_metrics: Dict[str, float]
    detailed_results: List[Dict]

class RAGEvaluator:
    def __init__(self, openai_client, search_service, rag_service):
        self.openai_client = openai_client
        self.search_service = search_service
        self.rag_service = rag_service
    
    async def evaluate_system(
        self,
        test_queries: List[Dict]
    ) -> EvaluationResult:
        """
        Comprehensive evaluation of RAG system
        """
        retrieval_scores = []
        generation_scores = []
        end_to_end_scores = []
        detailed_results = []
        
        for test_case in test_queries:
            query = test_case["query"]
            expected_docs = test_case.get("relevant_docs", [])
            expected_answer = test_case.get("expected_answer", "")
            
            # Evaluate retrieval
            retrieval_result = await self._evaluate_retrieval(
                query,
                expected_docs
            )
            retrieval_scores.append(retrieval_result)
            
            # Evaluate generation
            generation_result = await self._evaluate_generation(
                query,
                retrieval_result["retrieved_docs"],
                expected_answer
            )
            generation_scores.append(generation_result)
            
            # Evaluate end-to-end
            e2e_result = await self._evaluate_end_to_end(
                query,
                expected_answer
            )
            end_to_end_scores.append(e2e_result)
            
            detailed_results.append({
                "query": query,
                "retrieval": retrieval_result,
                "generation": generation_result,
                "end_to_end": e2e_result
            })
        
        return EvaluationResult(
            retrieval_metrics=self._aggregate_retrieval_metrics(retrieval_scores),
            generation_metrics=self._aggregate_generation_metrics(generation_scores),
            end_to_end_metrics=self._aggregate_e2e_metrics(end_to_end_scores),
            detailed_results=detailed_results
        )
    
    async def _evaluate_retrieval(
        self,
        query: str,
        expected_docs: List[str]
    ) -> Dict:
        """Evaluate retrieval performance"""
        # Get retrieved documents
        search_results = await self.search_service.search(query, top_k=10)
        retrieved_docs = [r.document_id for r in search_results]
        
        # Calculate metrics
        if expected_docs:
            precision_at_k = self._calculate_precision_at_k(
                retrieved_docs,
                expected_docs,
                k=5
            )
            recall_at_k = self._calculate_recall_at_k(
                retrieved_docs,
                expected_docs,
                k=5
            )
            mrr = self._calculate_mrr(retrieved_docs, expected_docs)
            ndcg = self._calculate_ndcg(
                retrieved_docs,
                expected_docs,
                [r.score for r in search_results]
            )
        else:
            # If no ground truth, use relevance scoring
            relevance_scores = await self._score_relevance(
                query,
                search_results
            )
            precision_at_k = np.mean(relevance_scores[:5] > 0.7)
            recall_at_k = None
            mrr = None
            ndcg = None
        
        return {
            "retrieved_docs": retrieved_docs[:5],
            "precision_at_5": precision_at_k,
            "recall_at_5": recall_at_k,
            "mrr": mrr,
            "ndcg": ndcg,
            "avg_score": np.mean([r.score for r in search_results[:5]])
        }
    
    async def _evaluate_generation(
        self,
        query: str,
        retrieved_docs: List[str],
        expected_answer: str
    ) -> Dict:
        """Evaluate generation quality"""
        # Get generated answer
        context = await self._build_context(retrieved_docs)
        generated_answer = await self._generate_answer(query, context)
        
        metrics = {}
        
        # Semantic similarity
        if expected_answer:
            metrics["semantic_similarity"] = await self._calculate_semantic_similarity(
                generated_answer,
                expected_answer
            )
        
        # Faithfulness to context
        metrics["faithfulness"] = await self._evaluate_faithfulness(
            generated_answer,
            context
        )
        
        # Answer relevance
        metrics["relevance"] = await self._evaluate_answer_relevance(
            query,
            generated_answer
        )
        
        # Fluency and coherence
        metrics["fluency"] = await self._evaluate_fluency(generated_answer)
        
        return metrics
    
    async def _evaluate_faithfulness(
        self,
        answer: str,
        context: str
    ) -> float:
        """Evaluate if answer is faithful to context"""
        prompt = f"""
Given the context and answer, evaluate if the answer is faithful to the context.
Score from 0 to 1, where 1 means completely faithful.

Context: {context[:1000]}
Answer: {answer}

Output only a number between 0 and 1.
"""
        
        response = await self.openai_client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are an evaluation expert."},
                {"role": "user", "content": prompt}
            ],
            temperature=0
        )
        
        try:
            return float(response.choices[0].message.content)
        except:
            return 0.5
    
    def _calculate_ndcg(
        self,
        retrieved: List[str],
        relevant: List[str],
        scores: List[float]
    ) -> float:
        """Calculate Normalized Discounted Cumulative Gain"""
        def dcg(relevances, k=None):
            if k is None:
                k = len(relevances)
            return sum(
                rel / np.log2(i + 2)
                for i, rel in enumerate(relevances[:k])
            )
        
        # Create relevance array
        relevances = [1 if doc in relevant else 0 for doc in retrieved]
        
        # Calculate DCG
        dcg_score = dcg(relevances)
        
        # Calculate ideal DCG
        ideal_relevances = sorted(relevances, reverse=True)
        idcg_score = dcg(ideal_relevances)
        
        # Calculate NDCG
        if idcg_score == 0:
            return 0
        
        return dcg_score / idcg_score

2. 継続的改善のためのフィードバックループ

// Feedback and Improvement System
public class RAGFeedbackSystem
{
    private readonly IFeedbackRepository _feedbackRepo;
    private readonly IRAGService _ragService;
    private readonly IMetricsService _metricsService;
    private readonly ILogger<RAGFeedbackSystem> _logger;

    public async Task RecordFeedbackAsync(UserFeedback feedback)
    {
        // Store feedback
        await _feedbackRepo.SaveFeedbackAsync(feedback);
        
        // Update metrics
        await _metricsService.UpdateUserSatisfactionMetricsAsync(
            feedback.SessionId,
            feedback.Rating,
            feedback.WasHelpful
        );
        
        // Trigger improvement if needed
        if (feedback.Rating < 3 || !feedback.WasHelpful)
        {
            await TriggerImprovementWorkflowAsync(feedback);
        }
    }

    private async Task TriggerImprovementWorkflowAsync(UserFeedback feedback)
    {
        // Analyze the problematic query
        var analysis = await AnalyzeFailureAsync(
            feedback.Query,
            feedback.Response,
            feedback.Sources
        );
        
        switch (analysis.FailureType)
        {
            case FailureType.InsufficientContext:
                await ImproveDocumentCoverageAsync(feedback.Query);
                break;
                
            case FailureType.PoorRetrieval:
                await OptimizeSearchParametersAsync(feedback.Query);
                break;
                
            case FailureType.IncorrectGeneration:
                await RefineGenerationPromptsAsync(feedback);
                break;
        }
        
        // Log improvement action
        _logger.LogInformation(
            "Triggered improvement workflow for query: {Query}, Type: {Type}",
            feedback.Query,
            analysis.FailureType
        );
    }

    private async Task<FailureAnalysis> AnalyzeFailureAsync(
        string query,
        string response,
        List<Source> sources)
    {
        // Analyze retrieval quality
        var retrievalScore = await EvaluateRetrievalQualityAsync(query, sources);
        
        // Analyze response quality
        var responseScore = await EvaluateResponseQualityAsync(
            query,
            response,
            sources
        );
        
        // Determine failure type
        var failureType = DetermineFailureType(retrievalScore, responseScore);
        
        return new FailureAnalysis
        {
            FailureType = failureType,
            RetrievalScore = retrievalScore,
            ResponseScore = responseScore,
            Recommendations = GenerateRecommendations(failureType)
        };
    }

    private async Task ImproveDocumentCoverageAsync(string query)
    {
        // Identify missing content areas
        var missingTopics = await IdentifyMissingTopicsAsync(query);
        
        // Generate synthetic documents for missing topics
        foreach (var topic in missingTopics)
        {
            var syntheticDoc = await GenerateSyntheticDocumentAsync(topic);
            await IndexDocumentAsync(syntheticDoc);
        }
        
        // Update document metadata to improve retrieval
        await UpdateDocumentMetadataAsync(query);
    }

    private async Task OptimizeSearchParametersAsync(string query)
    {
        // A/B test different search configurations
        var configurations = GenerateSearchConfigurations();
        var bestConfig = await RunSearchOptimizationAsync(
            query,
            configurations
        );
        
        // Update search service with optimized parameters
        await _ragService.UpdateSearchConfigurationAsync(
            query,
            bestConfig
        );
    }
}

// Monitoring Dashboard Data
public class RAGMonitoringService
{
    public async Task<RAGDashboardData> GetDashboardDataAsync(
        DateTime startDate,
        DateTime endDate)
    {
        return new RAGDashboardData
        {
            // Performance Metrics
            AverageResponseTime = await CalculateAverageResponseTimeAsync(startDate, endDate),
            MedianResponseTime = await CalculateMedianResponseTimeAsync(startDate, endDate),
            P95ResponseTime = await CalculateP95ResponseTimeAsync(startDate, endDate),
            
            // Quality Metrics
            AverageUserRating = await CalculateAverageRatingAsync(startDate, endDate),
            AnswerAccuracy = await CalculateAnswerAccuracyAsync(startDate, endDate),
            RetrievalPrecision = await CalculateRetrievalPrecisionAsync(startDate, endDate),
            
            // Usage Metrics
            TotalQueries = await GetTotalQueriesAsync(startDate, endDate),
            UniqueUsers = await GetUniqueUsersAsync(startDate, endDate),
            TopQueries = await GetTopQueriesAsync(startDate, endDate, 10),
            
            // System Health
            ErrorRate = await CalculateErrorRateAsync(startDate, endDate),
            CacheHitRate = await CalculateCacheHitRateAsync(startDate, endDate),
            IndexFreshness = await CalculateIndexFreshnessAsync(),
            
            // Cost Analysis
            TotalTokensUsed = await GetTotalTokensUsedAsync(startDate, endDate),
            EstimatedCost = await CalculateEstimatedCostAsync(startDate, endDate)
        };
    }
}

パフォーマンス最適化

1. キャッシングとレスポンス最適化

// Intelligent Caching System
public class RAGCacheManager
{
    private readonly IDistributedCache _cache;
    private readonly IVectorDatabase _vectorDb;
    private readonly ILogger<RAGCacheManager> _logger;

    public async Task<CachedResponse?> GetCachedResponseAsync(
        string query,
        CacheOptions options)
    {
        // Check exact match cache
        var exactMatch = await GetExactMatchAsync(query);
        if (exactMatch != null)
        {
            return exactMatch;
        }
        
        // Check semantic similarity cache
        if (options.EnableSemanticCache)
        {
            return await GetSemanticMatchAsync(query, options.SimilarityThreshold);
        }
        
        return null;
    }

    private async Task<CachedResponse?> GetSemanticMatchAsync(
        string query,
        float threshold)
    {
        // Generate query embedding
        var queryEmbedding = await GenerateEmbeddingAsync(query);
        
        // Search for similar cached queries
        var similarQueries = await _vectorDb.SearchSimilarAsync(
            queryEmbedding,
            "query_cache",
            topK: 5,
            minScore: threshold
        );
        
        foreach (var match in similarQueries)
        {
            var cacheKey = $"rag_response:{match.Id}";
            var cachedJson = await _cache.GetStringAsync(cacheKey);
            
            if (!string.IsNullOrEmpty(cachedJson))
            {
                var cached = JsonSerializer.Deserialize<CachedResponse>(cachedJson);
                
                // Validate cache freshness
                if (IsCacheFresh(cached, TimeSpan.FromHours(24)))
                {
                    _logger.LogInformation(
                        "Semantic cache hit for query. Similarity: {Score}",
                        match.Score
                    );
                    
                    return cached;
                }
            }
        }
        
        return null;
    }

    public async Task CacheResponseAsync(
        string query,
        RAGResponse response,
        CacheOptions options)
    {
        var cacheEntry = new CachedResponse
        {
            Query = query,
            Response = response,
            CachedAt = DateTime.UtcNow,
            ExpiresAt = DateTime.UtcNow.Add(options.CacheDuration),
            Metadata = new CacheMetadata
            {
                SourceDocuments = response.Sources.Select(s => s.Id).ToList(),
                ModelVersion = response.ModelVersion,
                QueryEmbedding = await GenerateEmbeddingAsync(query)
            }
        };
        
        // Store in distributed cache
        var cacheKey = $"rag_response:{GenerateCacheKey(query)}";
        await _cache.SetStringAsync(
            cacheKey,
            JsonSerializer.Serialize(cacheEntry),
            new DistributedCacheEntryOptions
            {
                AbsoluteExpiration = cacheEntry.ExpiresAt,
                SlidingExpiration = TimeSpan.FromHours(1)
            }
        );
        
        // Store in vector database for semantic search
        if (options.EnableSemanticCache)
        {
            await _vectorDb.UpsertAsync(
                "query_cache",
                new VectorDocument
                {
                    Id = GenerateCacheKey(query),
                    Vector = cacheEntry.Metadata.QueryEmbedding,
                    Metadata = new Dictionary<string, object>
                    {
                        ["query"] = query,
                        ["cached_at"] = cacheEntry.CachedAt,
                        ["expires_at"] = cacheEntry.ExpiresAt
                    }
                }
            );
        }
    }

    public async Task InvalidateCacheAsync(InvalidationOptions options)
    {
        if (options.InvalidateAll)
        {
            await _cache.RemoveAsync("rag_response:*");
            await _vectorDb.DeleteCollectionAsync("query_cache");
        }
        else if (options.DocumentIds?.Any() == true)
        {
            // Invalidate cache entries that reference specific documents
            var keysToInvalidate = await FindCacheKeysReferencingDocumentsAsync(
                options.DocumentIds
            );
            
            foreach (var key in keysToInvalidate)
            {
                await _cache.RemoveAsync(key);
            }
        }
        else if (options.OlderThan.HasValue)
        {
            // Invalidate old cache entries
            await InvalidateOldEntriesAsync(options.OlderThan.Value);
        }
    }
}

まとめ

Azure OpenAI ServiceとAzure AI Searchを活用したRAGシステムは、エンタープライズレベルの知識ベースシステムを構築するための強力なソリューションです。本記事で紹介した実装パターンにより、以下の利点が得られます：

高精度な情報検索: ハイブリッド検索とセマンティック理解
信頼性の高い回答生成: コンテキストに忠実な回答と引用
スケーラビリティ: 効率的なキャッシングと最適化
継続的改善: フィードバックループと自動最適化

エンハンスド株式会社では、Azure OpenAI Serviceを活用したRAGシステムの設計・構築を支援しています。お客様のビジネスニーズに合わせたカスタマイズも承っております。

#AzureOpenAI #RAG #AzureAISearch #GPT4 #エンタープライズAI #知識管理 #自然言語処理 #ベクトル検索 #AI開発

執筆者: エンハンスド株式会社技術チーム
公開日: 2024年12月24日

執

著者

執筆者: エンハンスド株式会社技術チーム

AI・機械学習とクラウド技術の専門チームです。

Tech BlogAzure OpenAI Service を活用した高精度RAGシステム構築ガイド

Azure OpenAI Service を活用した高精度RAGシステム構築ガイド

Azure OpenAI Service を活用した高精度RAGシステム構築ガイド

はじめに

RAGアーキテクチャの設計

1. システム全体構成

2. 基本実装

ドキュメント処理パイプライン

1. インテリジェントなチャンキング

2. メタデータ抽出とエンリッチメント

高度な検索戦略

1. ハイブリッド検索の実装

2. クエリ理解と拡張

回答生成の最適化

1. コンテキスト管理と回答品質向上

評価とモニタリング

1. RAGシステムの評価メトリクス

2. 継続的改善のためのフィードバックループ

パフォーマンス最適化

1. キャッシングとレスポンス最適化

まとめ

著者

技術的な課題をお持ちですか専門チームがサポートします