- Support CLI
- Support python API
- Zero cost: No external dependencies, fully local
- Embedded (in-process) database including in memory and on-disk
- Core functions
- Add: Folders, files, text
- Search: Keyword, tags, path, hybrid
- Chunking: By headings, paragraphs, character's length, separators
- CRUD: Full create, read, update, delete
- Export: folder, file, text
- Tags: Add, remove, search by tags
- Filters: Path patterns, date ranges, tag exclusion
- Stats: Document counts, tag counts
git clone https://github.com/locchh/dkb
cd dkb
uv venv .venv
source .venv/bin/activate
uv pip install -e .import dkb
import openai
client = openai.OpenAI(api_key="your-api-key")
db = dkb.create_dkb("knowledge.dkb")
# Add documents
db.add_folder("./requirements",tags=["requirements","specification","documentation"])
db.add_folder("./design",tags=["design","architecture","documentation"])
db.add_folder("./notes",tags=["notes","meetings","project"])
# User query
user_query = "Please provide me with the design document for the authentication feature?"
# Peform search
results = db.search_by_tags(["design","authentication"])
# Add to context of LLM
context = ""
for result in results:
context += result["content"] + "\n"
# Generate response
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant that answers questions based on the provided context."},
{"role": "user", "content": f"Context:\n{context}\n\nQuestion: {user_query}"}
]
)
print(response.choices[0].message.content)- Complete
BaseDocumentclass ✅ - Complete
documentmodule ✅ - Complete
DKBclass ✅ - Support
search_by*methods ✅ - Support
chunkmethod 🔥 - Support
exportmethod - Testing Python API 🔁
- Support CLI ✅
# Add documents
dkb add --folder ./docs --tags "documentation,project"
dkb add --file notes.md --tags "notes,important"
dkb add --text "Quick note" --tags "memo"
# Search knowledge base
dkb search --query "requirements testing" --match any --limit 5
dkb search --tags "documentation,design" --match all
dkb search --path "*/requirements/*"
# List and explore
dkb list # List all documents
dkb list --tags --count # List tags with counts
dkb stats # Show statistics
# Database options
dkb --database my_kb.dkb search --query "search terms"
dkb add --folder ./docs --override # Override existing databaseCommands:
add- Add documents (folder, file, or text)search- Search by query, tags, or path patternslist- List documents or tagsstats- Show knowledge base statistics
Global Options:
--database, -d- Database file path (default: knowledge.dkb)
from dkb import DKB,create_dkb, connect_dkb
# ============================================
# 1. CREATE / OPEN DATABASE
# ============================================
# Create new knowledge base
db: DKB = create_dkb('my_knowledge.dkb')
# Or connect to existing knowledge base
db: DKB = connect_dkb('my_knowledge.dkb')
# ============================================
# 2. ADD CONTENT (Multiple Ways)
# ============================================
# Add entire folder (recursively)
db.add_folder('./docs')
db.add_folder('./docs', pattern='*.md')
# Add single document from file
db.add_document('notes/idea.md', tags=['ideas'])
# Add direct text
db.add_text('Just a thought...', tags=['ideas'])
# ============================================
# 3. SEARCH (Keyword + Tags + Filters)
# ============================================
# Search by text content (FTS5)
results = db.search_by_query(['authentication']) # AND logic by default
results = db.search_by_query(['machine', 'learning'], match='all') # "machine AND learning"
results = db.search_by_query(['python', 'javascript'], match='any') # "python OR javascript"
# Search with limit
results = db.search_by_query(['machine', 'learning'], limit=5)
# Search by tags only
results = db.search_by_tags(['work', 'important']) # AND logic (must have both tags)
results = db.search_by_tags(['python', 'javascript'], match='any') # OR logic (either tag)
results = db.search_by_tags(['backend', 'security'], match='all') # AND logic (both tags)
# Search by path pattern
results = db.search_by_path('docs/api/*')
results = db.search_by_path('*.md', limit=10)
# ============================================
# 4. READ / RETRIEVE
# ============================================
# List all documents
all_docs = db.list_documents()
# Get single document
doc = db.get_document('notes/idea.md')
# Get document metadata
metadata = db.get_metadata('notes/idea.md')
# Returns: {path, created_at, modified_at, tags, length}
# ============================================
# 5. UPDATE
# ============================================
# Update document content
db.update_document('notes/idea.md', content='# Updated content...')
# Add tags
db.add_tags('notes/idea.md', ['important', 'review'])
# Remove tags
db.remove_tags('notes/idea.md', ['draft'])
# Update metadata
db.update_metadata('notes/idea.md', metadata={'author': 'John', 'priority': 'high'})
# ============================================
# 6. DELETE
# ============================================
# Remove single document
db.remove_document('notes/idea.md')
# Remove multiple by pattern
db.remove_by_path('drafts/*')
# Remove by tag
db.remove_by_tags(['archived'])
# Clear entire database
db.clear_all()
# ============================================
# 7. EXPORT
# ============================================
# Export back to markdown folder
db.export('./output_folder')
# Export specific documents
db.export('./output', tags=['important'])
# ============================================
# 8. CHUNKING OPERATIONS
# ============================================
# Chunk document by headings
chunks = db.chunk(
path='docs/api.md',
strategy='headings',
)
# Chunk document by paragraphs
chunks = db.chunk(
path='docs/api.md',
strategy='paragraphs',
)
# Chunk document by separator
chunks = db.chunk(
path='docs/api.md',
strategy='separator',
separator='----',
)
# ============================================
# 9. UTILITY / STATS
# ============================================
# Get statistics
stats = db.stats()
# Returns: {
# total_documents: 150,
# total_length: 125000,
# total_tags: 23,
# size_bytes: 2048000
# }
# List all tags
all_tags = db.list_tags()
# Count documents by tag
tag_counts = db.count_by_tags(['work', 'ideas', 'draft'])
# Returns: {'work': 45, 'ideas': 23, 'draft': 12}
# ============================================
# 10. CONTEXT MANAGER (Auto-close)
# ============================================
with DKB('knowledge.dkb') as db:
db.add_folder('./docs')
results = db.search_by_query(['query'], limit=5)
# Automatically closes connection
# ============================================
# 11. RESULT OBJECT
# ============================================
results = db.search_by_query(['authentication'], limit=5)
for result in results:
print(result.metadata['path']) # "docs/api.md#authentication"
print(result.metadata['content']) # Chunk or full content
print(result.metadata['score']) # 0.87 (relevance)
print(result.metadata['length']) # 245
print(result.metadata['tags']) # ['backend', 'security']
print(result.metadata['metadata']) # {created_at, modified_at, ...}