raggo
1.0.0
一个灵活的抹布(检索增强生成)库,旨在使文档处理和上下文感知的AI交互简单有效。
?智能文档搜索•上下文感知响应•?智能抹布
package main
import (
"context"
"fmt"
"github.com/teilomillet/raggo"
)
func main () {
// Initialize RAG with default settings
rag , err := raggo . NewSimpleRAG ( raggo . DefaultConfig ())
if err != nil {
fmt . Printf ( "Error: %v n " , err )
return
}
defer rag . Close ()
// Add documents from a directory
err = rag . AddDocuments ( context . Background (), "./docs" )
if err != nil {
fmt . Printf ( "Error: %v n " , err )
return
}
// Search with natural language
response , _ := rag . Search ( context . Background (), "What are the key features?" )
fmt . Printf ( "Answer: %s n " , response )
}Raggo提供了一个灵活的配置系统,可以从多个源(环境变量,JSON文件或程序化默认值)加载,该系统可以加载。
// Load configuration (automatically checks standard paths)
cfg , err := config . LoadConfig ()
if err != nil {
log . Fatal ( err )
}
// Or create a custom configuration
cfg := & config. Config {
Provider : "milvus" , // Vector store provider
Model : "text-embedding-3-small" ,
Collection : "my_documents" ,
// Search settings
DefaultTopK : 5 , // Number of similar chunks to retrieve
DefaultMinScore : 0.7 , // Similarity threshold
// Document processing
DefaultChunkSize : 300 , // Size of text chunks
DefaultChunkOverlap : 50 , // Overlap between chunks
}
// Create RAG instance with config
rag , err := raggo . NewSimpleRAG ( cfg )可以保存配置以进行重复使用:
err := cfg . Save ( "~/.raggo/config.json" )环境变量(优先于配置文件):
RAGGO_PROVIDER :服务提供商RAGGO_MODEL :模型标识符RAGGO_COLLECTION :收集名称RAGGO_API_KEY :默认API密钥 # Set API key
export OPENAI_API_KEY=your-api-key
# Install Raggo
go get github.com/teilomillet/raggo loader := raggo . NewLoader ( raggo . SetTimeout ( 1 * time . Minute ))
doc , err := loader . LoadURL ( context . Background (), "https://example.com/doc.pdf" ) parser := raggo . NewParser ()
doc , err := parser . Parse ( "document.pdf" ) chunker := raggo . NewChunker ( raggo . ChunkSize ( 100 ))
chunks := chunker . Chunk ( doc . Content ) embedder := raggo . NewEmbedder (
raggo . SetProvider ( "openai" ),
raggo . SetModel ( "text-embedding-3-small" ),
) db := raggo . NewVectorDB ( raggo . WithMilvus ( "collection" ))最适合直接文档问答:
package main
import (
"context"
"log"
"github.com/teilomillet/raggo"
)
func main () {
// Initialize SimpleRAG
rag , err := raggo . NewSimpleRAG (raggo. SimpleRAGConfig {
Collection : "docs" ,
Model : "text-embedding-3-small" ,
ChunkSize : 300 ,
TopK : 3 ,
})
if err != nil {
log . Fatal ( err )
}
defer rag . Close ()
// Add documents
err = rag . AddDocuments ( context . Background (), "./documents" )
if err != nil {
log . Fatal ( err )
}
// Search with different strategies
basicResponse , _ := rag . Search ( context . Background (), "What is the main feature?" )
hybridResponse , _ := rag . SearchHybrid ( context . Background (), "How does it work?" , 0.7 )
log . Printf ( "Basic Search: %s n " , basicResponse )
log . Printf ( "Hybrid Search: %s n " , hybridResponse )
}为了复杂的文档理解和上下文感知的响应:
package main
import (
"context"
"fmt"
"os"
"path/filepath"
"github.com/teilomillet/raggo"
)
func main () {
// Initialize RAG with default settings
rag , err := raggo . NewDefaultContextualRAG ( "basic_contextual_docs" )
if err != nil {
fmt . Printf ( "Failed to initialize RAG: %v n " , err )
os . Exit ( 1 )
}
defer rag . Close ()
// Add documents - the system will automatically:
// - Split documents into semantic chunks
// - Generate rich context for each chunk
// - Store embeddings with contextual information
docsPath := filepath . Join ( "examples" , "docs" )
if err := rag . AddDocuments ( context . Background (), docsPath ); err != nil {
fmt . Printf ( "Failed to add documents: %v n " , err )
os . Exit ( 1 )
}
// Simple search with automatic context enhancement
query := "What are the key features of the product?"
response , err := rag . Search ( context . Background (), query )
if err != nil {
fmt . Printf ( "Failed to search: %v n " , err )
os . Exit ( 1 )
}
fmt . Printf ( " n Query: %s n Response: %s n " , query , response )
} // Create a custom configuration
config := & raggo. ContextualRAGConfig {
Collection : "advanced_contextual_docs" ,
Model : "text-embedding-3-small" , // Embedding model
LLMModel : "gpt-4o-mini" , // Model for context generation
ChunkSize : 300 , // Larger chunks for more context
ChunkOverlap : 75 , // 25% overlap for better continuity
TopK : 5 , // Number of similar chunks to retrieve
MinScore : 0.7 , // Higher threshold for better relevance
}
// Initialize RAG with custom configuration
rag , err := raggo . NewContextualRAG ( config )
if err != nil {
log . Fatalf ( "Failed to initialize RAG: %v" , err )
}
defer rag . Close ()用于聊天申请和长期环境保留:
package main
import (
"context"
"log"
"github.com/teilomillet/raggo"
"github.com/teilomillet/gollm"
)
func main () {
// Initialize Memory Context
memoryCtx , err := raggo . NewMemoryContext (
os . Getenv ( "OPENAI_API_KEY" ),
raggo . MemoryTopK ( 5 ),
raggo . MemoryCollection ( "chat" ),
raggo . MemoryStoreLastN ( 100 ),
raggo . MemoryMinScore ( 0.7 ),
)
if err != nil {
log . Fatal ( err )
}
defer memoryCtx . Close ()
// Initialize Contextual RAG
rag , err := raggo . NewContextualRAG ( & raggo. ContextualRAGConfig {
Collection : "docs" ,
Model : "text-embedding-3-small" ,
})
if err != nil {
log . Fatal ( err )
}
defer rag . Close ()
// Example chat interaction
messages := []gollm. MemoryMessage {
{ Role : "user" , Content : "How does the authentication system work?" },
}
// Store conversation
err = memoryCtx . StoreMemory ( context . Background (), messages )
if err != nil {
log . Fatal ( err )
}
// Get enhanced response with context
prompt := & gollm. Prompt { Messages : messages }
enhanced , _ := memoryCtx . EnhancePrompt ( context . Background (), prompt , messages )
response , _ := rag . Search ( context . Background (), enhanced . Messages [ 0 ]. Content )
log . Printf ( "Response: %s n " , response )
}处理大型文档集,并具有限制速率和并发处理:
package main
import (
"context"
"log"
"sync"
"time"
"github.com/teilomillet/raggo"
"golang.org/x/time/rate"
)
const (
GPT_RPM_LIMIT = 5000 // Requests per minute
GPT_TPM_LIMIT = 4000000 // Tokens per minute
MAX_CONCURRENT = 10 // Max concurrent goroutines
)
func main () {
// Initialize components
parser := raggo . NewParser ()
chunker := raggo . NewChunker ( raggo . ChunkSize ( 500 ))
embedder := raggo . NewEmbedder (
raggo . SetProvider ( "openai" ),
raggo . SetModel ( "text-embedding-3-small" ),
)
// Create rate limiters
limiter := rate . NewLimiter ( rate . Limit ( GPT_RPM_LIMIT / 60 ), GPT_RPM_LIMIT )
// Process documents concurrently
var wg sync. WaitGroup
semaphore := make ( chan struct {}, MAX_CONCURRENT )
files , _ := filepath . Glob ( "./documents/*.pdf" )
for _ , file := range files {
wg . Add ( 1 )
semaphore <- struct {}{} // Acquire semaphore
go func ( file string ) {
defer wg . Done ()
defer func () { <- semaphore }() // Release semaphore
// Wait for rate limit
limiter . Wait ( context . Background ())
// Process document
doc , _ := parser . Parse ( file )
chunks := chunker . Chunk ( doc . Content )
embeddings , _ := embedder . CreateEmbeddings ( chunks )
log . Printf ( "Processed %s: %d chunks n " , file , len ( chunks ))
}( file )
}
wg . Wait ()
}defer Close()检查/examples以获取更多信息:
/examples/simple//examples/contextual//examples/chat//examples/memory_enhancer_example.go/examples/full_process.go/examples/process_embedding_benchmark.go 麻省理工学院许可证 - 请参阅许可证文件