diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..a2db687
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,146 @@
+name: Corebrain SDK CI/CD
+
+on:
+ push:
+ branches: [ main, develop ]
+ tags:
+ - 'v*'
+ pull_request:
+ branches: [ main, develop ]
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ['3.8', '3.9', '3.10', '3.11']
+
+ services:
+ # PostgreSQL service for integration tests
+ postgres:
+ image: postgres:13
+ env:
+ POSTGRES_PASSWORD: postgres
+ POSTGRES_USER: postgres
+ POSTGRES_DB: test_db
+ ports:
+ - 5432:5432
+ options: >-
+ --health-cmd pg_isready
+ --health-interval 10s
+ --health-timeout 5s
+ --health-retries 5
+
+ # MongoDB service for NoSQL integration tests
+ mongodb:
+ image: mongo:4.4
+ ports:
+ - 27017:27017
+ options: >-
+ --health-cmd "mongo --eval 'db.runCommand({ ping: 1 })'"
+ --health-interval 10s
+ --health-timeout 5s
+ --health-retries 5
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ cache: 'pip'
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install .[dev,all_db]
+
+ - name: Lint with flake8
+ run: |
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+
+ - name: Type check with mypy
+ run: |
+ mypy core db cli utils
+
+ - name: Format check with black
+ run: |
+ black --check .
+
+ - name: Test with pytest
+ run: |
+ pytest --cov=. --cov-report=xml
+
+ - name: Upload coverage to Codecov
+ uses: codecov/codecov-action@v3
+ with:
+ file: ./coverage.xml
+ fail_ci_if_error: false
+
+ build-and-publish:
+ needs: test
+ runs-on: ubuntu-latest
+ if: startsWith(github.ref, 'refs/tags/v')
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.10'
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install build twine
+
+ - name: Build package
+ run: |
+ python -m build
+
+ - name: Publish to PyPI
+ uses: pypa/gh-action-pypi-publish@release/v1
+ with:
+ user: __token__
+ password: ${{ secrets.PYPI_API_TOKEN }}
+ skip_existing: true
+
+ docker:
+ needs: test
+ runs-on: ubuntu-latest
+ if: |
+ (github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop')) ||
+ startsWith(github.ref, 'refs/tags/v')
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v2
+
+ - name: Login to DockerHub
+ uses: docker/login-action@v2
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+ - name: Extract metadata for Docker
+ id: meta
+ uses: docker/metadata-action@v4
+ with:
+ images: corebrain/sdk
+ tags: |
+ type=ref,event=branch
+ type=ref,event=tag
+ type=semver,pattern={{version}}
+ type=sha,format=short
+
+ - name: Build and push
+ uses: docker/build-push-action@v3
+ with:
+ context: .
+ push: true
+ tags: ${{ steps.meta.outputs.tags }}
+ labels: ${{ steps.meta.outputs.labels }}
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 0a19790..cf98110 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,10 @@
__pycache__/
*.py[cod]
*$py.class
+venv/
+.tofix/
+README-no-valid.md
+requirements.txt
# C extensions
*.so
@@ -14,7 +18,7 @@ dist/
downloads/
eggs/
.eggs/
-lib/
+#lib/
lib64/
parts/
sdist/
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..a034212
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "corebrain/CLI-UI"]
+ path = corebrain/CLI-UI
+ url = https://github.com/Luki20091/CLI-UI.git
diff --git a/1 b/1
new file mode 100644
index 0000000..e69de29
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..47e6927
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,147 @@
+# How to Contribute to Corebrain SDK
+
+Thank you for your interest in contributing to CoreBrain SDK! This document provides guidelines for contributing to the project.
+
+## Code of Conduct
+
+By participating in this project, you commit to maintaining a respectful and collaborative environment.
+
+## How to Contribute
+
+### Reporting Bugs
+
+1. Verify that the bug hasn't already been reported in the [issues](https://github.com/ceoweggo/Corebrain/issues)
+2. Use the bug template to create a new issue
+3. Include as much detail as possible: steps to reproduce, environment, versions, etc.
+4. If possible, include a minimal example that reproduces the problem
+
+### Suggesting Improvements
+
+1. Check the [issues](https://github.com/ceoweggo/Corebrain/issues) to see if it has already been suggested
+2. Use the feature template to create a new issue
+3. Clearly describe the improvement and justify its value
+
+### Submitting Changes
+
+1. Fork the repository
+2. Create a branch for your change (`git checkout -b feature/amazing-feature`)
+3. Make your changes following the code conventions
+4. Write tests for your changes
+5. Ensure all tests pass
+6. Commit your changes (`git commit -m 'Add amazing feature'`)
+7. Push your branch (`git push origin feature/amazing-feature`)
+8. Open a Pull Request
+
+## Development Environment
+
+### Installation for Development
+
+```bash
+# Clone the repository
+git clone https://github.com/ceoweggo/Corebrain.git
+cd sdk
+
+# Create virtual environment
+python -m venv venv
+source venv/bin/activate # On Windows: venv\Scripts\activate
+
+# Install for development
+pip install -e ".[dev]"
+```
+
+### Project Structure
+
+```
+v1/
+├── corebrain/ # Main package
+│ ├── __init__.py
+│ ├── _pycache_/
+│ ├── cli/ # Command-line interface
+│ ├── config/ # Configuration management
+│ ├── core/ # Core functionality
+│ ├── db/ # Database interactions
+│ ├── lib/ # Library components
+│ └── SSO/ # Globodain SSO Authentication
+│ ├── network/ # Network functionality
+│ ├── services/ # Service implementations
+│ ├── utils/ # Utility functions
+│ ├── cli.py # CLI entry point
+│ └── sdk.py # SDK entry point
+├── corebrain.egg-info/ # Package metadata
+├── docs/ # Documentation
+├── examples/ # Usage examples
+├── screenshots/ # Project screenshots
+├── venv/ # Virtual environment (not to be committed)
+├── .github/ # GitHub files directory
+├── _pycache_/ # Python cache files
+├── .tofix/ # Files to be fixed
+├── .gitignore # Git ignore rules
+├── CONTRIBUTING.md # Contribution guidelines
+├── health.py # Health check script
+├── LICENSE # License information
+├── pyproject.toml # Project configuration
+├── README-no-valid.md # Outdated README
+├── README.md # Project overview
+├── requirements.txt # Production dependencies
+└── setup.py # Package setup
+```
+
+### Running Tests
+
+```bash
+# Run all tests
+pytest
+
+# Run specific test file
+pytest tests/test_specific.py
+
+# Run tests with coverage
+pytest --cov=corebrain
+```
+
+## Coding Standards
+
+### Style Guide
+
+- We follow [PEP 8](https://www.python.org/dev/peps/pep-0008/) for Python code
+- Use 4 spaces for indentation
+- Maximum line length is 88 characters
+- Use descriptive variable and function names
+
+### Documentation
+
+- All modules, classes, and functions should have docstrings
+- Follow the [Google docstring format](https://github.com/google/styleguide/blob/gh-pages/pyguide.md#38-comments-and-docstrings)
+- Keep documentation up-to-date with code changes
+
+### Commit Messages
+
+- Use clear, concise commit messages
+- Start with a verb in the present tense (e.g., "Add feature" not "Added feature")
+- Reference issue numbers when applicable (e.g., "Fix #123: Resolve memory leak")
+
+## Pull Request Process
+
+1. Update documentation if necessary
+2. Add or update tests as needed
+3. Ensure CI checks pass
+4. Request a review from maintainers
+5. Address review feedback
+6. Maintainers will merge your PR once approved
+
+## Release Process
+
+Our maintainers follow semantic versioning (MAJOR.MINOR.PATCH):
+- MAJOR version for incompatible API changes
+- MINOR version for backward-compatible functionality
+- PATCH version for backward-compatible bug fixes
+
+## Getting Help
+
+If you need help with anything:
+- Join our [Discord community](https://discord.gg/m2AXjPn2yV)
+- Join our [Whatsapp Channel](https://whatsapp.com/channel/0029Vap43Vy5EjxvR4rncQ1I)
+- Ask questions in the GitHub Discussions
+- Contact the maintainers at ruben@globodain.com
+
+Thank you for contributing to Corebrain SDK!
\ No newline at end of file
diff --git a/FETCH_HEAD b/FETCH_HEAD
new file mode 100644
index 0000000..e69de29
diff --git a/README.md b/README.md
index 22ebdd7..01a3ad5 100644
--- a/README.md
+++ b/README.md
@@ -1,106 +1,211 @@
-# Corebrain
+# Corebrain SDK
-
-
-
+
+[](https://badge.fury.io/py/corebrain)
+[](https://opensource.org/licenses/MIT)
-## What is Corebrain?
+SDK for natural language queries to relational and non-relational databases. Enables interaction with databases using natural language questions.
-Corebrain is an open-source enterprise solution designed to centralize and optimize corporate data management. The project offers a scalable architecture for processing, analyzing, and visualizing critical information for decision-making.
+## ✨ Features
-**IMPORTANT NOTE**: In the current version (0.1.0-alpha), only the SQL code is functional. Other modules are under development.
+- **Natural Language Queries**: Transforms human language questions into database queries (SQL/NoSQL)
+- **Multi-Database Support**: Compatible with SQLite, MySQL, PostgreSQL, and MongoDB
+- **Unified Interface**: Consistent API across different database types
+- **Built-in CLI**: Interact with your databases directly from the terminal
+- **Strong Security**: Robust authentication and secure credential management
+- **Highly Extensible**: Designed for easy integration with new engines and features
+- **Comprehensive Documentation**: Usage examples, API reference, and step-by-step guides
-## Current Status
+## 📋 Requirements
-- ✅ SQL queries for data extraction
-- ✅ Database schemas
-- ✅ Authentication service
-- ❌ NoSQL (in development)
-- ❌ Frontend (in development)
-- ❌ REST API (in development)
+- Python 3.8+
+- Specific dependencies based on the database engine:
+ - **SQLite**: Included in Python
+ - **PostgreSQL**: `psycopg2-binary`
+ - **MySQL**: `mysql-connector-python`
+ - **MongoDB**: `pymongo`
-## SDK Integration
-Corebrain provides SDKs for multiple programming languages, making it easy to integrate with your existing systems. While only SQL in Python functionality is currently available, this SDK will support all features and most common languages as they are developed.
+## 🔧 Installation
-
+### From PyPI (recommended)
-## Available Versions
+```bash
+# Minimal installation
+pip install corebrain
-- **`main` Branch**: Stable version with verified functionality (currently only SQL is functional)
-- **`pre-release` Branch**: Initial version with all features in development (may contain errors)
+### From source code
-## Getting Started
+```bash
-### Installation
+git clone https://github.com/ceoweggo/Corebrain.git
+git submodule update --init --recursive
+pip install -e .
-```bash
-# Clone the repository
-git clone https://github.com/your-organization/corebrain.git
+```
-# Enter the directory
-cd corebrain
+## 🚀 Quick Start Guide
+
+### Initialization
+
+> **⚠️ IMPORTANT:**
+> * If you don't have an existing configuration, first run `corebrain --configure`
+> * If you need to generate a new API key, use `corebrain --create`
+> * Never share your API key in public repositories. Use environment variables instead.
+
+
+```python
+from corebrain import init
+
+# Initialize with a previously saved configuration
+corebrain = init(
+ api_key="your_api_key",
+ config_id="your_config_id"
+)
+```
+
+### Making Natural Language Queries
+
+```python
+# Simple query
+result = client.ask("How many active users are there?")
+print(result["explanation"]) # Natural language explanation
+print(result["query"]) # Generated SQL/NoSQL query
+print(result["results"]) # Query results
+
+# Query with additional parameters
+result = client.ask(
+ "Show the last 5 orders",
+ collection_name="orders",
+ limit=5,
+ filters={"status": "completed"}
+)
+
+# Iterate over the results
+for item in result["results"]:
+ print(item)
+```
-# Install dependencies
-npm install
+### Getting the Database Schema
+
+```python
+# Get the complete schema
+schema = client.db_schema
+
+# List all tables/collections
+tables = client.list_collections_name()
+print(tables)
```
-### Configuration
+### Closing the Connection
-1. Use `corebrain --configure` to start the configuration.
-2. Once configuration has been completed, copy the config_id and replace in your example code (see 'examples' folder).
-3. Run the example code in Python and enjoy!
+```python
+# It's recommended to close the connection when finished
+client.close()
-### Basic Usage
+# Or use the with context
+with init(api_key="your_api_key", config_id="your_config_id") as client:
+ result = client.ask("How many users are there?")
+ print(result["explanation"])
+```
+
+## 🖥️ Command Line Interface Usage
+
+### Configure Connection
```bash
-# Run SQL migrations
-npm run migrate
+# Init configuration
+corebrain --configure
+```
-# Start the SQL service
-npm run sql:start
+### Display Database Schema
+
+```bash
+# Show complete schema
+corebrain --show-schema
```
-## Accessing the Pre-release Version
+### List Configurations
-If you want to test all features under development (including unstable components), you can switch to the pre-release branch:
+```bash
+# List all configurations
+corebrain --list-configs
+```
+
+## 📝 Advanced Documentation
+
+### Configuration Management
+
+```python
+from corebrain import list_configurations, remove_configuration, get_config
+
+# List all configurations
+configs = list_configurations(api_token="your_api_token")
+print(configs)
+
+# Get details of a configuration
+config = get_config(api_token="your_api_token", config_id="your_config_id")
+print(config)
+
+# Remove a configuration
+removed = remove_configuration(api_token="your_api_token", config_id="your_config_id")
+print(f"Configuration removed: {removed}")
+```
+
+## 🧪 Testing and Development
+
+### Development Installation
```bash
-git checkout pre-release
-npm install
+# Clone the repository
+git clone https://github.com/ceoweggo/Corebrain.git
+cd corebrain
+
+# Install in development mode with extra tools
+
+# On Windows (use powershell)
+.\setup.ps1
+
+# On Linux/macOS (use bash)
+./setup.sh
```
-**Warning**: The pre-release version contains experimental features with bugs or unexpected behaviors. Not recommended for production environments.
+### Verifying Style and Typing
-## Contributing
+```bash
+# Check style with flake8
+flake8 .
-Corebrain is an open-source project, and we welcome all contributions. To contribute:
+# Check typing with mypy
+mypy core db cli utils
-1. Fork the repository
-2. Create a new branch (`git checkout -b feature/new-feature`)
-3. Make your changes
-4. Run tests (`npm test`)
-5. Commit your changes (`git commit -m 'Add new feature'`)
-6. Push to your fork (`git push origin feature/new-feature`)
-7. Open a Pull Request
+# Format code with black
+black .
+```
-Please read our [contribution guidelines](CONTRIBUTING.md) before you start.
+### Continuous Integration and Deployment (CI/CD)
-## Roadmap
+The project uses GitHub Actions to automate:
-- **0.1.0**: Basic SQL operation. OpenAI connected. Authentication service Globodain SSO integrated. API Keys configuration integrated.
-- **0.2.0**: NoSQL (MongoDB) fixed. API Key creation by command "Corebrain --configure". Functional version.
-- **0.3.0**: API deployment and integration at source. Functional version for third parties.
-...
-- **1.0.0**: First stable version with all features.
+1. **Testing**: Runs tests on multiple Python versions (3.8-3.11)
+2. **Quality Verification**: Checks style, typing, and formatting
+3. **Coverage**: Generates code coverage reports
+4. **Automatic Publication**: Publishes new versions to PyPI when tags are created
+5. **Docker Images**: Builds and publishes Docker images with each version
-You can see the full report at [Project Roadmap](https://github.com/users/ceoweggo/projects/4/views/2)
+You can see the complete configuration in `.github/workflows/ci.yml`.
-## License
+## 🛠️ Contributions
-This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+Contributions are welcome! To contribute:
+
+1. Fork the repository
+2. Create a branch for your feature (`git checkout -b feature/amazing-feature`)
+3. Commit your changes (`git commit -m 'Add some amazing feature'`)
+4. Push to the branch (`git push origin feature/amazing-feature`)
+5. Open a Pull Request
-## Contact
+Please make sure your changes pass all tests and comply with the style guidelines.
-- **Email**: [ruben@globodain.com](mailto:ruben@globodain.com)
-- **Issues**: [Report a problem](https://github.com/ceoweggo/corebrain/issues)
+## 📄 License
+Distributed under the MIT License. See `LICENSE` for more information.
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..5092964
--- /dev/null
+++ b/config.json
@@ -0,0 +1,10 @@
+{
+ "type": "nosql",
+ "engine": "mongodb",
+ "host": "localhost",
+ "port": 27017,
+ "database": "baza",
+ "config_id": "a1e0694f-112d-4ade-aa31-68e6d83abab6",
+ "excluded_tables": [],
+ "active": true
+}
\ No newline at end of file
diff --git a/corebrain/.github/workflows/ci.yml b/corebrain/.github/workflows/ci.yml
new file mode 100644
index 0000000..a2db687
--- /dev/null
+++ b/corebrain/.github/workflows/ci.yml
@@ -0,0 +1,146 @@
+name: Corebrain SDK CI/CD
+
+on:
+ push:
+ branches: [ main, develop ]
+ tags:
+ - 'v*'
+ pull_request:
+ branches: [ main, develop ]
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ['3.8', '3.9', '3.10', '3.11']
+
+ services:
+ # PostgreSQL service for integration tests
+ postgres:
+ image: postgres:13
+ env:
+ POSTGRES_PASSWORD: postgres
+ POSTGRES_USER: postgres
+ POSTGRES_DB: test_db
+ ports:
+ - 5432:5432
+ options: >-
+ --health-cmd pg_isready
+ --health-interval 10s
+ --health-timeout 5s
+ --health-retries 5
+
+ # MongoDB service for NoSQL integration tests
+ mongodb:
+ image: mongo:4.4
+ ports:
+ - 27017:27017
+ options: >-
+ --health-cmd "mongo --eval 'db.runCommand({ ping: 1 })'"
+ --health-interval 10s
+ --health-timeout 5s
+ --health-retries 5
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ cache: 'pip'
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install .[dev,all_db]
+
+ - name: Lint with flake8
+ run: |
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+
+ - name: Type check with mypy
+ run: |
+ mypy core db cli utils
+
+ - name: Format check with black
+ run: |
+ black --check .
+
+ - name: Test with pytest
+ run: |
+ pytest --cov=. --cov-report=xml
+
+ - name: Upload coverage to Codecov
+ uses: codecov/codecov-action@v3
+ with:
+ file: ./coverage.xml
+ fail_ci_if_error: false
+
+ build-and-publish:
+ needs: test
+ runs-on: ubuntu-latest
+ if: startsWith(github.ref, 'refs/tags/v')
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.10'
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install build twine
+
+ - name: Build package
+ run: |
+ python -m build
+
+ - name: Publish to PyPI
+ uses: pypa/gh-action-pypi-publish@release/v1
+ with:
+ user: __token__
+ password: ${{ secrets.PYPI_API_TOKEN }}
+ skip_existing: true
+
+ docker:
+ needs: test
+ runs-on: ubuntu-latest
+ if: |
+ (github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop')) ||
+ startsWith(github.ref, 'refs/tags/v')
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v2
+
+ - name: Login to DockerHub
+ uses: docker/login-action@v2
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+ - name: Extract metadata for Docker
+ id: meta
+ uses: docker/metadata-action@v4
+ with:
+ images: corebrain/sdk
+ tags: |
+ type=ref,event=branch
+ type=ref,event=tag
+ type=semver,pattern={{version}}
+ type=sha,format=short
+
+ - name: Build and push
+ uses: docker/build-push-action@v3
+ with:
+ context: .
+ push: true
+ tags: ${{ steps.meta.outputs.tags }}
+ labels: ${{ steps.meta.outputs.labels }}
\ No newline at end of file
diff --git a/corebrain/CLI-UI b/corebrain/CLI-UI
new file mode 160000
index 0000000..98143ed
--- /dev/null
+++ b/corebrain/CLI-UI
@@ -0,0 +1 @@
+Subproject commit 98143ed62dd0ec68a5bd181292af4bea88f0218b
diff --git a/corebrain/__init__.py b/corebrain/__init__.py
new file mode 100644
index 0000000..6d2bc2e
--- /dev/null
+++ b/corebrain/__init__.py
@@ -0,0 +1,83 @@
+"""
+Corebrain SDK.
+
+This package provides a Python SDK for interacting with the Corebrain API
+and enables natural language queries to relational and non-relational databases.
+"""
+import logging
+from typing import Dict, Any, List, Optional
+
+# Basic logging configuration
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+
+# Safe imports (no circular dependencies)
+from corebrain.db.engines import get_available_engines
+from corebrain.core.client import Corebrain
+from corebrain.config.manager import ConfigManager
+
+# Explicit export of public components
+__all__ = [
+ 'init',
+ 'extract_db_schema',
+ 'list_configurations',
+ 'remove_configuration',
+ 'get_available_engines',
+ 'get_config',
+ '__version__'
+]
+
+def init(api_key: str, config_id: str, skip_verification: bool = False) -> Corebrain:
+ """
+ Initialize the Corebrain SDK with the provided API key and configuration.
+
+ Args:
+ api_key: API Key de Corebrain
+ config_id: ID de la configuración a usar
+
+ Returns:
+ Instancia de Corebrain configurada
+ """
+ return Corebrain(api_key=api_key, config_id=config_id, skip_verification=skip_verification)
+
+# Package-level convenience features
+def list_configurations(api_key: str) -> List[str]:
+ """
+ Lists the available configurations for an API key.
+
+ Args:
+ api_key: Corebrain API Key
+
+ Returns:
+ List of available configuration IDs
+ """
+ config_manager = ConfigManager()
+ return config_manager.list_configs(api_key)
+
+def remove_configuration(api_key: str, config_id: str) -> bool:
+ """
+ Deletes a specific configuration.
+
+ Args:
+ api_key: Corebrain API Key
+ config_id: ID of the configuration to delete
+
+ Returns:
+ True if deleted successfully, False otherwise
+ """
+ config_manager = ConfigManager()
+ return config_manager.remove_config(api_key, config_id)
+
+def get_config(api_key: str, config_id: str) -> Optional[Dict[str, Any]]:
+ """
+ Retrieves a specific configuration.
+
+ Args:
+ api_key: Corebrain API Key
+ config_id: ID of the configuration to retrieve
+
+ Returns:
+ Dictionary with the configuration or None if it does not exist
+ """
+ config_manager = ConfigManager()
+ return config_manager.get_config(api_key, config_id)
\ No newline at end of file
diff --git a/corebrain/cli.py b/corebrain/cli.py
new file mode 100644
index 0000000..7e17025
--- /dev/null
+++ b/corebrain/cli.py
@@ -0,0 +1,8 @@
+"""
+Entry point for the Corebrain CLI for compatibility.
+"""
+from corebrain.cli.__main__ import main
+
+if __name__ == "__main__":
+ import sys
+ sys.exit(main())
\ No newline at end of file
diff --git a/corebrain/cli/__init__.py b/corebrain/cli/__init__.py
new file mode 100644
index 0000000..108326c
--- /dev/null
+++ b/corebrain/cli/__init__.py
@@ -0,0 +1,57 @@
+"""
+Command-line interface for the Corebrain SDK.
+
+This module provides a command-line interface to configure
+and use the Corebrain SDK for natural language queries to databases.
+"""
+import sys
+from typing import Optional, List
+
+# Import core components for CLI
+from corebrain.cli.commands import main_cli
+from corebrain.cli.utils import print_colored, ProgressTracker, get_free_port
+from corebrain.cli.config import (
+ configure_sdk,
+ get_db_type,
+ get_db_engine,
+ get_connection_params,
+ test_database_connection,
+ select_excluded_tables
+)
+from corebrain.cli.auth import (
+ authenticate_with_sso,
+ fetch_api_keys,
+ exchange_sso_token_for_api_token,
+ verify_api_token
+)
+
+
+# Explicit export of public components
+__all__ = [
+ 'main_cli',
+ 'run_cli',
+ 'print_colored',
+ 'ProgressTracker',
+ 'get_free_port',
+ 'configure_sdk',
+ 'authenticate_with_sso',
+ 'fetch_api_keys',
+ 'exchange_sso_token_for_api_token',
+ 'verify_api_token'
+]
+
+# Convenience function for running CLI
+def run_cli(argv: Optional[List[str]] = None) -> int:
+ """
+ Run the CLI with the provided arguments.
+
+ Args:
+ argv: List of arguments (use sys.argv if None)
+
+ Returns:
+ Exit code
+ """
+ if argv is None:
+ argv = sys.argv[1:]
+
+ return main_cli(argv)
\ No newline at end of file
diff --git a/corebrain/cli/__main__.py b/corebrain/cli/__main__.py
new file mode 100644
index 0000000..db91155
--- /dev/null
+++ b/corebrain/cli/__main__.py
@@ -0,0 +1,12 @@
+"""
+Entry point to run the CLI as a module.
+"""
+import sys
+from corebrain.cli.commands import main_cli
+
+def main():
+ """Main function for the entry point in pyproject.toml."""
+ return main_cli()
+
+if __name__ == "__main__":
+ sys.exit(main())
\ No newline at end of file
diff --git a/corebrain/cli/auth/__init__.py b/corebrain/cli/auth/__init__.py
new file mode 100644
index 0000000..a601c7b
--- /dev/null
+++ b/corebrain/cli/auth/__init__.py
@@ -0,0 +1,22 @@
+"""
+Authentication modules for the Corebrain CLI.
+
+This package provides functionality for authentication,
+token management, and API keys in the Corebrain CLI.
+"""
+from corebrain.cli.auth.sso import authenticate_with_sso, TokenHandler
+from corebrain.cli.auth.api_keys import (
+ fetch_api_keys,
+ exchange_sso_token_for_api_token,
+ verify_api_token,
+ get_api_key_id_from_token
+)
+# Explicit export of public components
+__all__ = [
+ 'authenticate_with_sso',
+ 'TokenHandler',
+ 'fetch_api_keys',
+ 'exchange_sso_token_for_api_token',
+ 'verify_api_token',
+ 'get_api_key_id_from_token'
+]
\ No newline at end of file
diff --git a/corebrain/cli/auth/api_keys.py b/corebrain/cli/auth/api_keys.py
new file mode 100644
index 0000000..5be72f0
--- /dev/null
+++ b/corebrain/cli/auth/api_keys.py
@@ -0,0 +1,299 @@
+"""
+API Keys Management for the CLI.
+"""
+import uuid
+import httpx
+
+from typing import Optional, Dict, Any, Tuple
+
+from corebrain.cli.utils import print_colored
+from corebrain.network.client import http_session
+from corebrain.core.client import Corebrain
+
+def verify_api_token(token: str, api_url: Optional[str] = None, user_data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Optional[Dict[str, Any]]]:
+ """
+ Verifies if an API token is valid.
+
+ Args:
+ token (str): API token to verify.
+ api_url (str, optional): API URL. Defaults to None.
+ user_data (dict, optional): User data. Defaults to None.
+
+ Returns:
+ tuple: (validity (bool), user information (dict)) if valid, else (False, None).
+ """
+ try:
+ # Create a temporary SDK instance to verify the token
+ config = {"type": "test", "config_id": str(uuid.uuid4())}
+ kwargs = {"api_token": token, "db_config": config}
+
+ if user_data:
+ kwargs["user_data"] = user_data
+
+ if api_url:
+ kwargs["api_url"] = api_url
+
+ sdk = Corebrain(**kwargs)
+ return True, sdk.user_info
+ except Exception as e:
+ print_colored(f"Error verifying API token: {str(e)}", "red")
+ return False, None
+
+def fetch_api_keys(api_url: str, api_token: str, user_data: Dict[str, Any]) -> Optional[str]:
+ """
+ Retrieves the available API keys for the user and allows selecting one.
+
+ Args:
+ api_url: Base URL of the Corebrain API
+ api_token: API token (exchanged from SSO token)
+ user_data: User data
+
+ Returns:
+ Selected API key or None if none is selected
+ """
+ if not user_data or 'id' not in user_data:
+ print_colored("Could not identify the user to retrieve their API keys.", "yellow")
+ return None
+
+ try:
+ # Ensure protocol in URL
+ if not api_url.startswith(("http://", "https://")):
+ api_url = "https://" + api_url
+
+ # Remove trailing slash if it exists
+ if api_url.endswith('/'):
+ api_url = api_url[:-1]
+
+ # Build endpoint to get API keys
+ endpoint = f"{api_url}/api/auth/api-keys"
+
+ print_colored(f"Requesting user's API keys...", "blue")
+
+ # Configure client with timeout and error handling
+ headers = {
+ "Authorization": f"Bearer {api_token}",
+ "Content-Type": "application/json"
+ }
+
+ response = http_session.get(endpoint, headers=headers)
+
+ # Verify response
+ if response.status_code == 200:
+ try:
+ api_keys_data = response.json()
+ # Verify response format
+ if not isinstance(api_keys_data, (list, dict)):
+ print_colored(f"Unexpected response format: {type(api_keys_data)}", "yellow")
+ return None
+
+ # Handle both direct list and dictionary with list
+ api_keys = api_keys_data if isinstance(api_keys_data, list) else api_keys_data.get("data", [])
+
+ if not api_keys:
+ print_colored("No API keys available for this user.", "yellow")
+ return None
+
+ print_colored(f"\nFound {len(api_keys)} API keys", "green")
+ print_colored("\n=== Available API Keys ===", "blue")
+
+ # Show available API keys
+ for i, key_info in enumerate(api_keys, 1):
+ key_id = key_info.get('id', 'No ID')
+ key_value = key_info.get('key', 'No value')
+ key_name = key_info.get('name', 'No name')
+ key_active = key_info.get('active')
+
+ # Show status with color
+ status_color = "green" if key_active == True else "red"
+ status_text = "Active" if key_active == True else "Inactive"
+
+ print(f"{i}. {key_name} - {print_colored(status_text, status_color, return_str=True)} (Value: {key_value})")
+
+ # Ask user to select an API key
+ while True:
+ try:
+ choice = input(f"\nSelect an API key (1-{len(api_keys)}) or press Enter to cancel: ").strip()
+
+ # Allow canceling and using API token
+ if not choice:
+ print_colored("No API key selected.", "yellow")
+ return None
+
+ choice_num = int(choice)
+ if 1 <= choice_num <= len(api_keys):
+ selected_key = api_keys[choice_num - 1]
+
+ # Verify if the key is active
+ if selected_key.get('active') != True:
+ print_colored("⚠️ The selected API key is not active. Select another one.", "yellow")
+ continue
+
+ # Get information of the selected key
+ key_name = selected_key.get('name', 'Unknown')
+ key_value = selected_key.get('key', None)
+
+ if not key_value:
+ print_colored("⚠️ The selected API key does not have a valid value.", "yellow")
+ continue
+
+ print_colored(f"✅ You selected: {key_name}", "green")
+ print_colored("Wait while we assign the API key to your SDK...", "yellow")
+
+ return key_value
+ else:
+ print_colored("Invalid option. Try again.", "red")
+ except ValueError:
+ print_colored("Please enter a valid number.", "red")
+ except Exception as e:
+ print_colored(f"Error processing JSON response: {str(e)}", "red")
+ return None
+ else:
+ # Handle error by status code
+ error_message = f"Error retrieving API keys: {response.status_code}"
+
+ try:
+ error_data = response.json()
+ if "message" in error_data:
+ error_message += f" - {error_data['message']}"
+ elif "detail" in error_data:
+ error_message += f" - {error_data['detail']}"
+ except:
+ # If we can't parse JSON, use the full text
+ error_message += f" - {response.text[:100]}..."
+
+ print_colored(error_message, "red")
+
+ # Try to identify common problems
+ if response.status_code == 401:
+ print_colored("The authentication token has expired or is invalid.", "yellow")
+ elif response.status_code == 403:
+ print_colored("You don't have permissions to access the API keys.", "yellow")
+ elif response.status_code == 404:
+ print_colored("The API keys endpoint doesn't exist. Verify the API URL.", "yellow")
+ elif response.status_code >= 500:
+ print_colored("Server error. Try again later.", "yellow")
+
+ return None
+
+ except httpx.RequestError as e:
+ print_colored(f"Connection error: {str(e)}", "red")
+ print_colored("Verify the API URL and your internet connection.", "yellow")
+ return None
+ except Exception as e:
+ print_colored(f"Unexpected error retrieving API keys: {str(e)}", "red")
+ return None
+
+def get_api_key_id_from_token(sso_token: str, api_token: str, api_url: str) -> Optional[str]:
+ """
+ Gets the ID of an API key from its token.
+
+ Args:
+ sso_token: SSO token
+ api_token: API token
+ api_url: API URL
+
+ Returns:
+ API key ID or None if it cannot be obtained
+ """
+ try:
+ # Endpoint to get information of the current user
+ endpoint = f"{api_url}/api/auth/api-keys/{api_token}"
+
+ headers = {
+ "Authorization": f"Bearer {api_token}",
+ "Content-Type": "application/json"
+ }
+
+ response = httpx.get(
+ endpoint,
+ headers=headers
+ )
+
+ print("API keys response: ", response.json())
+
+ if response.status_code == 200:
+ key_data = response.json()
+ key_id = key_data.get("id")
+ return key_id
+ else:
+ print_colored("⚠️ Could not find the API key ID", "yellow")
+ return None
+
+ except Exception as e:
+ print_colored(f"Error getting API key ID: {str(e)}", "red")
+ return None
+
+def exchange_sso_token_for_api_token(api_url: str, sso_token: str, user_data: Dict[str, Any]) -> Optional[str]:
+ """
+ Exchanges a Globodain SSO token for a Corebrain API token.
+
+ Args:
+ api_url: Base URL of the Corebrain API
+ sso_token: Globodain SSO token
+ user_data: User data
+
+ Returns:
+ API token or None if it fails
+ """
+ try:
+ # Ensure protocol in URL
+ if not api_url.startswith(("http://", "https://")):
+ api_url = "https://" + api_url
+
+ # Remove trailing slash if it exists
+ if api_url.endswith('/'):
+ api_url = api_url[:-1]
+
+ # Endpoint to exchange token
+ endpoint = f"{api_url}/api/auth/sso/token"
+
+ print_colored(f"Exchanging SSO token for API token...", "blue")
+
+ # Configure client with timeout and error handling
+ headers = {
+ 'Authorization': f'Bearer {sso_token}',
+ 'Content-Type': 'application/json'
+ }
+ body = {
+ "user_data": user_data
+ }
+
+ response = http_session.post(endpoint, json=body, headers=headers)
+
+ if response.status_code == 200:
+ try:
+ token_data = response.json()
+ api_token = token_data.get("access_token")
+
+ if not api_token:
+ print_colored("The response does not contain a valid API token", "red")
+ return None
+
+ print_colored("✅ API token successfully obtained", "green")
+ return api_token
+ except Exception as e:
+ print_colored(f"Error processing JSON response: {str(e)}", "red")
+ return None
+ else:
+ # Handle error by status code
+ error_message = f"Error exchanging token: {response.status_code}"
+
+ try:
+ error_data = response.json()
+ if "message" in error_data:
+ error_message += f" - {error_data['message']}"
+ elif "detail" in error_data:
+ error_message += f" - {error_data['detail']}"
+ except:
+ # If we can't parse JSON, use the full text
+ error_message += f" - {response.text[:100]}..."
+
+ print_colored(error_message, "red")
+ return None
+
+ except httpx.RequestError as e:
+ print_colored(f"Connection error: {str(e)}", "red")
+ return None
+ except Exception as e:
+ print_colored(f"Unexpected error exchanging token: {str(e)}", "red")
+ return None
\ No newline at end of file
diff --git a/corebrain/cli/auth/sso.py b/corebrain/cli/auth/sso.py
new file mode 100644
index 0000000..4f97ce4
--- /dev/null
+++ b/corebrain/cli/auth/sso.py
@@ -0,0 +1,469 @@
+"""
+SSO Authentication for the CLI.
+"""
+import os
+import webbrowser
+import http.server
+import socketserver
+import threading
+import urllib.parse
+import time
+import json
+
+from typing import Tuple, Dict, Any, Optional
+
+from corebrain.cli.common import DEFAULT_API_URL, DEFAULT_SSO_URL, DEFAULT_PORT, SSO_CLIENT_ID, SSO_CLIENT_SECRET
+from corebrain.cli.utils import print_colored
+from corebrain.lib.sso.auth import GlobodainSSOAuth
+
+class TokenHandler(http.server.SimpleHTTPRequestHandler):
+ """
+ Handler for the local HTTP server that processes the SSO authentication callback.
+ """
+ def __init__(self, *args, **kwargs):
+ self.sso_auth = kwargs.pop('sso_auth', None)
+ self.result = kwargs.pop('result', {})
+ self.session_data = kwargs.pop('session_data', {})
+ self.auth_completed = kwargs.pop('auth_completed', None)
+ super().__init__(*args, **kwargs)
+
+ def do_GET(self):
+ # Parse the URL to get the parameters
+ parsed_path = urllib.parse.urlparse(self.path)
+
+ # Check if it's the callback path
+ if parsed_path.path == "/auth/sso/callback":
+ query = urllib.parse.parse_qs(parsed_path.query)
+
+ if "code" in query:
+ code = query["code"][0]
+
+ try:
+ # Exchange code for token using the sso_auth object
+ token_data = self.sso_auth.exchange_code_for_token(code)
+
+ if not token_data:
+ raise ValueError("Could not obtain the token")
+
+ # Save token in the result and session
+ access_token = token_data.get('access_token')
+ if not access_token:
+ raise ValueError("The received token does not contain an access_token")
+
+ # Updated: save as sso_token for clarity
+ self.result["sso_token"] = access_token
+ self.session_data['sso_token'] = token_data
+
+ # Get user information
+ user_info = self.sso_auth.get_user_info(access_token)
+ if user_info:
+ self.session_data['user'] = user_info
+ # Extract email to identify the user
+ if 'email' in user_info:
+ self.session_data['email'] = user_info['email']
+
+ # Signal that authentication has completed
+ self.auth_completed.set()
+
+ # Send a success response to the browser
+ self.send_response(200)
+ self.send_header("Content-type", "text/html")
+ self.end_headers()
+ success_html = """
+
+
+ Corebrain - Authentication Completed
+
+
+
+
+
Authentication Completed
+
You have successfully logged in to Corebrain CLI.
+
You can close this window and return to the terminal.
+
+
+
+ """
+ self.wfile.write(success_html.encode())
+ except Exception as e:
+ # If there's an error, show error message
+ self.send_response(400)
+ self.send_header("Content-type", "text/html")
+ self.end_headers()
+ error_html = f"""
+
+
+ Corebrain - Authentication Error
+
+
+
+
+
Authentication Error
+
Error: {str(e)}
+
Please close this window and try again.
+
+
+
+ """
+ self.wfile.write(error_html.encode())
+ else:
+ # If there's no code, it's an error
+ self.send_response(400)
+ self.send_header("Content-type", "text/html")
+ self.end_headers()
+ error_html = """
+
+
+ Corebrain - Authentication Error
+
+
+
+
+
Authentication Error
+
Could not complete the authentication process.
+
Please close this window and try again.
+
+
+
+ """
+ self.wfile.write(error_html.encode())
+ else:
+ # For any other path, show a 404 error
+ self.send_response(404)
+ self.end_headers()
+ self.wfile.write(b"Not Found")
+
+ def log_message(self, format, *args):
+ # Silence server logs
+ return
+
+def authenticate_with_sso(sso_url: str) -> Tuple[Optional[str], Optional[Dict[str, Any]], Optional[str]]:
+ """
+ Initiates an SSO authentication flow through the browser and uses the callback system.
+
+ Args:
+ sso_url: Base URL of the SSO service
+
+ Returns:
+ Tuple with (api_key, user_data, api_token) or (None, None, None) if it fails
+ - api_key: Selected API key to use with the SDK
+ - user_data: Authenticated user data
+ - api_token: API token obtained from SSO for general authentication
+ """
+
+ # Token to store the result
+ result = {"sso_token": None} # Renamed for clarity
+ auth_completed = threading.Event()
+ session_data = {}
+
+ # Find an available port
+ #port = get_free_port(DEFAULT_PORT)
+
+ # SSO client configuration
+ auth_config = {
+ 'GLOBODAIN_SSO_URL': sso_url or DEFAULT_SSO_URL,
+ 'GLOBODAIN_CLIENT_ID': SSO_CLIENT_ID,
+ 'GLOBODAIN_CLIENT_SECRET': SSO_CLIENT_SECRET,
+ 'GLOBODAIN_REDIRECT_URI': f"http://localhost:{DEFAULT_PORT}/auth/sso/callback",
+ 'GLOBODAIN_SUCCESS_REDIRECT': 'https://sso.globodain.com/cli/success'
+ }
+
+ sso_auth = GlobodainSSOAuth(config=auth_config)
+
+ # Factory to create TokenHandler instances with the desired parameters
+ def handler_factory(*args, **kwargs):
+ return TokenHandler(
+ *args,
+ sso_auth=sso_auth,
+ result=result,
+ session_data=session_data,
+ auth_completed=auth_completed,
+ **kwargs
+ )
+
+ # Start server in the background
+ server = socketserver.TCPServer(("", DEFAULT_PORT), handler_factory)
+ server_thread = threading.Thread(target=server.serve_forever)
+ server_thread.daemon = True
+ server_thread.start()
+
+ try:
+ # Build complete URL with protocol if missing
+ if sso_url and not sso_url.startswith(("http://", "https://")):
+ sso_url = "https://" + sso_url
+
+ # URL to start the SSO flow
+ login_url = sso_auth.get_login_url()
+ auth_url = login_url
+
+ print_colored(f"Opening browser for SSO authentication...", "blue")
+ print_colored(f"If the browser doesn't open automatically, visit:", "blue")
+ print_colored(f"{auth_url}", "bold")
+
+ # Try to open the browser
+ if not webbrowser.open(auth_url):
+ print_colored("Could not open the browser automatically.", "yellow")
+ print_colored(f"Please copy and paste the following URL into your browser:", "yellow")
+ print_colored(f"{auth_url}", "bold")
+
+ # Tell the user to wait
+ print_colored("\nWaiting for you to complete authentication in the browser...", "blue")
+
+ # Wait for authentication to complete (with timeout)
+ timeout_seconds = 60
+ start_time = time.time()
+
+ # We use a loop with better feedback
+ while not auth_completed.is_set() and (time.time() - start_time < timeout_seconds):
+ elapsed = int(time.time() - start_time)
+ if elapsed % 5 == 0: # Every 5 seconds we show a message
+ remaining = timeout_seconds - elapsed
+ #print_colored(f"Waiting for authentication... ({remaining}s remaining)", "yellow")
+
+ # Check every 0.5 seconds for better reactivity
+ auth_completed.wait(0.5)
+
+ # Verify if authentication was completed
+ if auth_completed.is_set():
+ print_colored("✅ SSO authentication completed successfully!", "green")
+ return result["sso_token"], session_data['user']
+ else:
+ print_colored(f"❌ Could not complete SSO authentication in {timeout_seconds} seconds.", "red")
+ print_colored("You can try again or use a token manually.", "yellow")
+ return None, None, None
+ except Exception as e:
+ print_colored(f"❌ Error during SSO authentication: {str(e)}", "red")
+ return None, None, None
+ finally:
+ # Stop the server
+ try:
+ server.shutdown()
+ server.server_close()
+ except:
+ # If there's any error closing the server, we ignore it
+ pass
+
+def authenticate_with_sso_and_api_key_request(sso_url: str) -> Tuple[Optional[str], Optional[Dict[str, Any]], Optional[str]]:
+ """
+ Initiates an SSO authentication flow through the browser and uses the callback system.
+
+ Args:
+ sso_url: Base URL of the SSO service
+
+ Returns:
+ Tuple with (api_key, user_data, api_token) or (None, None, None) if it fails
+ - api_key: Selected API key to use with the SDK
+ - user_data: Authenticated user data
+ - api_token: API token obtained from SSO for general authentication
+ """
+ # Import inside the function to avoid circular dependencies
+ from corebrain.cli.auth.api_keys import fetch_api_keys, exchange_sso_token_for_api_token
+
+ # Token to store the result
+ result = {"sso_token": None} # Renamed for clarity
+ auth_completed = threading.Event()
+ session_data = {}
+
+ # Find an available port
+ #port = get_free_port(DEFAULT_PORT)
+
+ # SSO client configuration
+ auth_config = {
+ 'GLOBODAIN_SSO_URL': sso_url or DEFAULT_SSO_URL,
+ 'GLOBODAIN_CLIENT_ID': SSO_CLIENT_ID,
+ 'GLOBODAIN_CLIENT_SECRET': SSO_CLIENT_SECRET,
+ 'GLOBODAIN_REDIRECT_URI': f"http://localhost:{DEFAULT_PORT}/auth/sso/callback",
+ 'GLOBODAIN_SUCCESS_REDIRECT': 'https://sso.globodain.com/cli/success'
+ }
+
+ sso_auth = GlobodainSSOAuth(config=auth_config)
+
+ # Factory to create TokenHandler instances with the desired parameters
+ def handler_factory(*args, **kwargs):
+ return TokenHandler(
+ *args,
+ sso_auth=sso_auth,
+ result=result,
+ session_data=session_data,
+ auth_completed=auth_completed,
+ **kwargs
+ )
+
+ # Start server in the background
+ server = socketserver.TCPServer(("", DEFAULT_PORT), handler_factory)
+ server_thread = threading.Thread(target=server.serve_forever)
+ server_thread.daemon = True
+ server_thread.start()
+
+ try:
+ # Build complete URL with protocol if missing
+ if sso_url and not sso_url.startswith(("http://", "https://")):
+ sso_url = "https://" + sso_url
+
+ # URL to start the SSO flow
+ login_url = sso_auth.get_login_url()
+ auth_url = login_url
+
+ print_colored(f"Opening browser for SSO authentication...", "blue")
+ print_colored(f"If the browser doesn't open automatically, visit:", "blue")
+ print_colored(f"{auth_url}", "bold")
+
+ # Try to open the browser
+ if not webbrowser.open(auth_url):
+ print_colored("Could not open the browser automatically.", "yellow")
+ print_colored(f"Please copy and paste the following URL into your browser:", "yellow")
+ print_colored(f"{auth_url}", "bold")
+
+ # Tell the user to wait
+ print_colored("\nWaiting for you to complete authentication in the browser...", "blue")
+
+ # Wait for authentication to complete (with timeout)
+ timeout_seconds = 60
+ start_time = time.time()
+
+ # We use a loop with better feedback
+ while not auth_completed.is_set() and (time.time() - start_time < timeout_seconds):
+ elapsed = int(time.time() - start_time)
+ if elapsed % 5 == 0: # Every 5 seconds we show a message
+ remaining = timeout_seconds - elapsed
+ #print_colored(f"Waiting for authentication... ({remaining}s remaining)", "yellow")
+
+ # Check every 0.5 seconds for better reactivity
+ auth_completed.wait(0.5)
+
+ # Verify if authentication was completed
+ if auth_completed.is_set():
+ user_data = None
+ if 'user' in session_data:
+ user_data = session_data['user']
+
+ print_colored("✅ SSO authentication completed successfully!", "green")
+
+ # Get and select an API key
+ api_url = os.environ.get("COREBRAIN_API_URL", DEFAULT_API_URL)
+
+ # Now we use the SSO token to get an API token and then the API keys
+ # First we verify that we have a token
+ if result["sso_token"]:
+ api_token = exchange_sso_token_for_api_token(api_url, result["sso_token"], user_data)
+
+ if not api_token:
+ print_colored("⚠️ Could not obtain an API Token with the SSO Token", "yellow")
+ return None, None, None
+
+ # Now that we have the API Token, we get the available API Keys
+ api_key_selected = fetch_api_keys(api_url, api_token, user_data)
+
+ if api_key_selected:
+ # We return the selected api_key
+ return api_key_selected, user_data, api_token
+ else:
+ print_colored("⚠️ Could not obtain an API Key. Create a new one using the command", "yellow")
+ return None, user_data, api_token
+ else:
+ print_colored("❌ No valid token was obtained during authentication.", "red")
+ return None, None, None
+
+ # We don't have a token or user data
+ print_colored("❌ Authentication did not produce a valid token.", "red")
+ return None, None, None
+ else:
+ print_colored(f"❌ Could not complete SSO authentication in {timeout_seconds} seconds.", "red")
+ print_colored("You can try again or use a token manually.", "yellow")
+ return None, None, None
+ except Exception as e:
+ print_colored(f"❌ Error during SSO authentication: {str(e)}", "red")
+ return None, None, None
+ finally:
+ # Stop the server
+ try:
+ server.shutdown()
+ server.server_close()
+ except:
+ # If there's any error closing the server, we ignore it
+ pass
+
+def save_api_token(api_token: str):
+ config_dir = os.path.join(os.path.expanduser("~"), ".corebrain")
+ os.makedirs(config_dir, exist_ok=True)
+
+ token_path = os.path.join(config_dir, "token.json")
+ with open(token_path, "w") as f:
+ json.dump({"api_token": api_token}, f)
+
+def load_api_token() -> str:
+ token_path = os.path.join(os.path.expanduser("~"), ".corebrain", "token.json")
+ if os.path.exists(token_path):
+ with open(token_path, "r") as f:
+ return json.load(f).get("api_token")
+
+ return None
\ No newline at end of file
diff --git a/corebrain/cli/commands.py b/corebrain/cli/commands.py
new file mode 100644
index 0000000..ba12d91
--- /dev/null
+++ b/corebrain/cli/commands.py
@@ -0,0 +1,1186 @@
+"""
+Main commands for the Corebrain CLI.
+"""
+import argparse
+import os
+import sys
+import webbrowser
+import requests
+import random
+import string
+
+from typing import Optional, List
+
+from corebrain.cli.common import DEFAULT_API_URL, DEFAULT_SSO_URL, DEFAULT_PORT, SSO_CLIENT_ID, SSO_CLIENT_SECRET
+from corebrain.cli.auth.sso import authenticate_with_sso, authenticate_with_sso_and_api_key_request, load_api_token, save_api_token
+from corebrain.cli.config import configure_sdk, get_api_credential
+from corebrain.cli.utils import print_colored
+from corebrain.config.manager import ConfigManager
+from corebrain.lib.sso.auth import GlobodainSSOAuth
+
+def main_cli(argv: Optional[List[str]] = None) -> int:
+ """
+ Main entry point for the Corebrain CLI.
+
+ Args:
+ argv: List of command line arguments (defaults to sys.argv[1:])
+
+ Returns:
+ Exit code (0 for success, other value for error)
+ """
+
+ # Package version
+ __version__ = "0.2.0"
+
+ try:
+ print_colored("Corebrain CLI started. Version ", __version__, "blue")
+
+ if argv is None:
+ argv = sys.argv[1:]
+
+ # Functions
+ def authentication_api_token():
+ sso_url = os.environ.get("COREBRAIN_SSO_URL") or DEFAULT_SSO_URL
+ api_key_selected, user_data, api_token = authenticate_with_sso_and_api_key_request(sso_url)
+
+ if api_token:
+ save_api_token(api_token)
+ print_colored("✅ API token saved.", "green")
+ print_colored("✅ Returning User data.", "green")
+ print_colored(f"{user_data}", "blue")
+ return api_token, user_data
+ else:
+ print_colored("❌ Could not authenticate with SSO.", "red")
+ return None, None
+
+ def authentication():
+ sso_url = os.environ.get("COREBRAIN_SSO_URL") or DEFAULT_SSO_URL
+ sso_token, sso_user = authenticate_with_sso(sso_url)
+ if sso_token:
+ try:
+ print_colored("✅ Returning SSO Token.", "green")
+ print_colored(f"{sso_token}", "blue")
+ print_colored("✅ Returning User data.", "green")
+ print_colored(f"{sso_user}", "blue")
+
+ # Saving api token
+ save_api_token(sso_token)
+ print_colored("✅ API token saved.", "green")
+
+ return sso_token, sso_user
+
+ except Exception as e:
+ print_colored("❌ Could not return SSO Token or SSO User data.", "red")
+ return sso_token, sso_user
+
+ else:
+ print_colored("❌ Could not authenticate with SSO.", "red")
+ return None, None
+
+ def authentication_with_api_key_return():
+ sso_url = os.environ.get("COREBRAIN_SSO_URL") or DEFAULT_SSO_URL
+ api_key_selected, user_data, api_token = authenticate_with_sso_and_api_key_request(sso_url)
+
+ if api_token:
+ try:
+ print_colored("✅ User authenticated and SDK is now connected to API.", "green")
+ print_colored("✅ Returning User data.", "green")
+ print_colored(f"{user_data}", "blue")
+ return api_key_selected, user_data, api_token
+
+ except Exception as e:
+ print_colored("❌ Could not return SSO Token or SSO User data.", "red")
+ return api_key_selected, user_data, api_token
+
+ else:
+ print_colored("❌ Could not authenticate with SSO.", "red")
+ return None, None, None
+
+ # Argument parser configuration
+ parser = argparse.ArgumentParser(description="Corebrain SDK CLI")
+
+ # Arguments for development
+ parser.add_argument("--version", action="store_true", help="Show SDK version")
+ parser.add_argument("--check-status",action="store_true",help="Checks status of task")
+ parser.add_argument("--authentication", action="store_true", help="Authenticate with SSO")
+ parser.add_argument("--test-auth", action="store_true", help="Test SSO authentication system") # Is this command really useful?
+ parser.add_argument("--test-connection",action="store_true",help="Tests the connection to the Corebrain API using the provide credentials")
+ parser.add_argument("--create-api-key", action="store_true", help="Create a new API Key")
+ parser.add_argument("--key-name", help="Sets name of the new API Key")
+ parser.add_argument("--key-level", choices=["read", "write", "admin"], default="read", help="Specifies access level for the new API Key")
+
+ # Arguments to use the SDK
+ parser.add_argument("--create-user", action="store_true", help="Create an user and API Key by default")
+ parser.add_argument("--configure", action="store_true", help="Configure the Corebrain SDK")
+ parser.add_argument("--list-configs", action="store_true", help="List available configurations")
+ parser.add_argument("--show-schema", action="store_true", help="Display database schema for a configuration")
+ parser.add_argument("--whoami",action="store_true",help="Display information about the current user")
+ parser.add_argument("--gui", action="store_true", help="Check setup and launch the web interface")
+
+ args = parser.parse_args(argv)
+
+ # Common variables
+ api_url = os.environ.get("COREBRAIN_API_URL", DEFAULT_API_URL)
+ sso_url = os.environ.get("COREBRAIN_SSO_URL", DEFAULT_SSO_URL)
+
+ ## ** For development ** ##
+ if args.version:
+ """
+ Display the current version of the Corebrain SDK.
+
+ This command shows the version of the installed Corebrain SDK package.
+ It attempts to get the version from the package metadata first, and if that fails,
+ it falls back to the hardcoded version in the CLI module.
+
+ Usage: corebrain --version
+
+ Example output:
+ Corebrain SDK version 0.2.0
+ """
+ try:
+ from importlib.metadata import version
+ sdk_version = version("corebrain")
+ print(f"Corebrain SDK version {sdk_version}")
+ except Exception:
+ print(f"Corebrain SDK version {__version__}")
+ return 0
+
+ if args.check_status:
+ """
+ If you're in development mode:
+
+ Check that all requirements for developing code and performing tests or other functions are accessible:
+ - Check that the API Server is runned
+ - Check that the Redis is runned on port 6379
+ - Check that the SSO Server is active (sso.globodain.com)
+ - Check that MongoDB is runned on port 27017
+ - Check that the all libraries are installed:
+
+ httpx>=0.23.0
+ pymongo>=4.3.0
+ psycopg2-binary>=2.9.5
+ mysql-connector-python>=8.0.31
+ sqlalchemy>=2.0.0
+ cryptography>=39.0.0
+ pydantic>=1.10.0
+
+
+ If you're in production mode:
+
+ Check that the API Server is active (api.etedata.com)
+ Check that the SSO Server is active (sso.globodain.com)
+ Check that the all libraries are installed:
+
+ httpx>=0.23.0
+ pymongo>=4.3.0
+ psycopg2-binary>=2.9.5
+ mysql-connector-python>=8.0.31
+ sqlalchemy>=2.0.0
+ cryptography>=39.0.0
+ pydantic>=1.10.0
+
+ """
+
+ import socket
+ import subprocess
+ import importlib.util
+
+ def check_port(host, port, service_name):
+ """Check if a service is running on a specific port"""
+ try:
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.settimeout(3)
+ result = sock.connect_ex((host, port))
+ sock.close()
+ if result == 0:
+ print_colored(f"✅ {service_name} is running on {host}:{port}", "green")
+ return True
+ else:
+ print_colored(f"❌ {service_name} is not accessible on {host}:{port}", "red")
+ return False
+ except Exception as e:
+ print_colored(f"❌ Error checking {service_name}: {str(e)}", "red")
+ return False
+
+ def check_url(url, service_name):
+ """Check if a service is accessible via HTTP"""
+ try:
+ response = requests.get(url, timeout=10)
+ if response.status_code < 500:
+ print_colored(f"✅ {service_name} is accessible at {url}", "green")
+ return True
+ else:
+ print_colored(f"❌ {service_name} returned status {response.status_code} at {url}", "red")
+ return False
+ except Exception as e:
+ print_colored(f"❌ {service_name} is not accessible at {url}: {str(e)}", "red")
+ return False
+
+ def check_library(library_name, min_version):
+ """Check if a library is installed with minimum version"""
+ # Mapping of PyPI package names to import names
+ package_import_mapping = {
+ 'psycopg2-binary': 'psycopg2',
+ 'mysql-connector-python': 'mysql.connector',
+ 'httpx': 'httpx',
+ 'pymongo': 'pymongo',
+ 'sqlalchemy': 'sqlalchemy',
+ 'cryptography': 'cryptography',
+ 'pydantic': 'pydantic'
+ }
+
+ package_name = library_name.split('>=')[0]
+ import_name = package_import_mapping.get(package_name, package_name)
+
+ try:
+ # Check if the module can be imported
+ if '.' in import_name:
+ # For modules like mysql.connector
+ parts = import_name.split('.')
+ spec = importlib.util.find_spec(parts[0])
+ if spec is None:
+ print_colored(f"❌ {package_name} is not installed", "red")
+ return False
+ # Try to import the full module path
+ try:
+ __import__(import_name)
+ except ImportError:
+ print_colored(f"❌ {package_name} is not installed", "red")
+ return False
+ else:
+ spec = importlib.util.find_spec(import_name)
+ if spec is None:
+ print_colored(f"❌ {package_name} is not installed", "red")
+ return False
+
+ # Try to get version using different methods
+ try:
+ from importlib.metadata import version
+ # Try with the package name first
+ try:
+ installed_version = version(package_name)
+ except:
+ # If that fails, try with common alternative names
+ alternative_names = {
+ 'psycopg2-binary': ['psycopg2', 'psycopg2-binary'],
+ 'mysql-connector-python': ['mysql-connector-python', 'mysql-connector']
+ }
+
+ installed_version = None
+ for alt_name in alternative_names.get(package_name, [package_name]):
+ try:
+ installed_version = version(alt_name)
+ break
+ except:
+ continue
+
+ if installed_version is None:
+ raise Exception("Version not found")
+
+ print_colored(f"✅ {package_name} {installed_version} is installed", "green")
+ return True
+
+ except Exception:
+ # If version check fails, at least we know the module can be imported
+ print_colored(f"✅ {package_name} is installed (version check failed)", "yellow")
+ return True
+
+ except Exception as e:
+ print_colored(f"❌ Error checking {package_name}: {str(e)}", "red")
+ return False
+
+ # Determine if in development or production mode
+ api_url = os.environ.get("COREBRAIN_API_URL") or DEFAULT_API_URL
+ is_development = "localhost" in api_url or "127.0.0.1" in api_url or api_url == DEFAULT_API_URL
+
+ print_colored("🔍 Checking system status...", "blue")
+ print_colored(f"Mode: {'Development' if is_development else 'Production'}", "blue")
+ print_colored(f"API URL: {api_url}", "blue")
+ print()
+
+ all_checks_passed = True
+
+ # Required libraries for both modes
+ required_libraries = [
+ "httpx>=0.23.0",
+ "pymongo>=4.3.0",
+ "psycopg2-binary>=2.9.5",
+ "mysql-connector-python>=8.0.31",
+ "sqlalchemy>=2.0.0",
+ "cryptography>=39.0.0",
+ "pydantic>=1.10.0"
+ ]
+
+ # Check libraries
+ print_colored("📚 Checking required libraries:", "blue")
+ for library in required_libraries:
+ if not check_library(library, library.split('>=')[1] if '>=' in library else None):
+ all_checks_passed = False
+ print()
+
+ # Check services based on mode
+ if is_development:
+ print_colored("🔧 Development mode - Checking local services:", "blue")
+
+ # Check local API server
+ if not check_url(api_url, "API Server"):
+ all_checks_passed = False
+
+ # Check Redis
+ if not check_port("localhost", 6379, "Redis"):
+ all_checks_passed = False
+
+ # Check MongoDB
+ if not check_port("localhost", 27017, "MongoDB"):
+ all_checks_passed = False
+
+ else:
+ print_colored("🌐 Production mode - Checking remote services:", "blue")
+
+ # Check production API server
+ if not check_url("https://api.etedata.com", "API Server (Production)"):
+ all_checks_passed = False
+
+ # Check SSO service for both modes
+ sso_url = os.environ.get("COREBRAIN_SSO_URL") or DEFAULT_SSO_URL
+ if not check_url(sso_url, "SSO Server"):
+ all_checks_passed = False
+
+ print()
+ if all_checks_passed:
+ print_colored("✅ All system checks passed!", "green")
+ return 0
+ else:
+ print_colored("❌ Some system checks failed. Please review the issues above.", "red")
+ return 1
+
+ if args.authentication:
+ """
+ Perform SSO authentication and display the obtained tokens and user data.
+
+ This command initiates the SSO (Single Sign-On) authentication flow through the browser.
+ It opens a browser window for the user to authenticate with their Globodain SSO credentials
+ and returns the authentication token and user information.
+
+ This is primarily used for testing authentication or when you need to see the raw
+ authentication data. For normal usage, prefer --login which also obtains API keys.
+
+ Usage: corebrain --authentication [--sso-url ]
+
+ Returns:
+ - SSO authentication token
+ - User profile data (name, email, etc.)
+
+ Note: This command only authenticates but doesn't save credentials for future use.
+ """
+ authentication_api_token()
+
+ if args.test_auth:
+ """
+ Test the SSO (Single Sign-On) authentication system.
+
+ This command performs a comprehensive test of the SSO authentication flow
+ without saving any credentials or performing any actual operations. It's useful
+ for diagnosing authentication issues and verifying that the SSO system is working.
+
+ The test process:
+ 1. Configures the SSO authentication client
+ 2. Generates a login URL
+ 3. Opens the browser for user authentication
+ 4. Waits for user to complete the authentication process
+ 5. Reports success or failure
+
+ Usage: corebrain --test-auth [--sso-url ]
+
+ What it tests:
+ - SSO server connectivity
+ - Client configuration validity
+ - Authentication flow completion
+ - Browser integration
+
+ Note: This is a diagnostic tool and doesn't save any authentication data.
+ For actual login, use --login instead.
+ """
+ sso_url = os.environ.get("COREBRAIN_SSO_URL") or DEFAULT_SSO_URL
+
+ print_colored("Testing SSO authentication...", "blue")
+
+ # Authentication configuration
+ auth_config = {
+ 'GLOBODAIN_SSO_URL': sso_url,
+ 'GLOBODAIN_CLIENT_ID': SSO_CLIENT_ID,
+ 'GLOBODAIN_CLIENT_SECRET': SSO_CLIENT_SECRET,
+ 'GLOBODAIN_REDIRECT_URI': f"http://localhost:{DEFAULT_PORT}/auth/sso/callback",
+ 'GLOBODAIN_SUCCESS_REDIRECT': f"http://localhost:{DEFAULT_PORT}/auth/sso/callback"
+ }
+
+ try:
+ # Instantiate authentication client
+ sso_auth = GlobodainSSOAuth(config=auth_config)
+
+ # Get login URL
+ login_url = sso_auth.get_login_url()
+
+ print_colored(f"Login URL: {login_url}", "blue")
+ print_colored("Opening browser for login...", "blue")
+
+ # Open browser
+ webbrowser.open(login_url)
+
+ print_colored("Please complete the login process in the browser.", "blue")
+ input("\nPress Enter when you've completed the process or to cancel...")
+
+ print_colored("✅ SSO authentication test completed!", "green")
+ return 0
+ except Exception as e:
+ print_colored(f"❌ Error during test: {str(e)}", "red")
+ return 1
+ if args.test_connection:
+ """
+ Test the connection to the Corebrain API using the provided credentials.
+
+ This command verifies that the Corebrain SDK can successfully connect to the
+ Corebrain API server using the provided API key or token. It checks if the
+ API is reachable and responds correctly.
+
+ Usage: corebrain --test-connection [--api-key ] [--api-url ]
+ """
+ # Test connection to the Corebrain API
+ api_url = os.environ.get("COREBRAIN_API_URL", DEFAULT_API_URL)
+ sso_url = os.environ.get("COREBRAIN_SSO_URL", DEFAULT_SSO_URL)
+
+ try:
+ # Retrieve API credentials
+ api_key, user_data, api_token = get_api_credential(sso_url)
+ except Exception as e:
+ # Handle errors while retrieving credentials
+ print_colored(f"Error while retrieving API credentials: {e}", "red")
+ return 1
+
+ if not api_key:
+ # If no API key is provided, print an error message
+ # and return an error code
+ print_colored(
+ "Error: An API key is required. You can generate one at dashboard.corebrain.com.",
+ "red"
+ )
+ return 1
+
+ try:
+ # Test the connection
+ # Import the test_connection function from the schema_file module
+ # and call it with the provided API key and URL
+ from corebrain.db.schema_file import test_connection
+ test_connection(api_key, api_url)
+ print_colored("Successfully connected to Corebrain API.", "green")
+ except Exception as e:
+ print_colored(f"Failed to connect to Corebrain API: {e}", "red")
+ return 1
+
+ ## ** SDK ** ##
+
+ if args.create_user:
+ """
+ Create a new user account and generate an associated API Key.
+
+ This command performs a complete user registration process:
+ 1. Authenticates the user through SSO (Single Sign-On)
+ 2. Creates a new user account in the Corebrain system using SSO data
+ 3. Automatically generates an API Key for the new user
+
+ The user can choose to use their SSO password or create a new password
+ specifically for their Corebrain account. If using SSO password fails,
+ a random secure password will be generated.
+
+ Usage: corebrain --create-user [--api-url ] [--sso-url ]
+
+ Interactive prompts:
+ - SSO authentication (browser-based)
+ - Password choice (use SSO password or create new)
+ - Password confirmation (if creating new)
+
+ Requirements:
+ - Valid Globodain SSO account
+ - Internet connection for API communication
+
+ On success: Creates user account and displays confirmation
+ On failure: Shows specific error message
+ """
+ sso_token, sso_user = authentication() # Authentica use with SSO
+
+ if sso_token and sso_user:
+ print_colored("✅ Enter to create an user and API Key.", "green")
+ save_api_token(sso_token)
+ print_colored("✅ SSO token saved.", "green")
+
+ # Get API URL from environment or use default
+ api_url = os.environ.get("COREBRAIN_API_URL", DEFAULT_API_URL)
+
+ """
+ Create user data with SSO information.
+ If the user wants to use a different password than their SSO account,
+ they can specify it here.
+ """
+ # Ask if user wants to use SSO password or create a new one
+ use_sso_password = input("Do you want to use your SSO password? (y/n): ").lower().strip() == 'y'
+
+ if use_sso_password:
+ random_password = ''.join(random.choices(string.ascii_letters + string.digits, k=12))
+ password = sso_user.get("password", random_password)
+ else:
+ while True:
+ password = input("Enter new password: ").strip()
+ if len(password) >= 8:
+ break
+ print_colored("Password must be at least 8 characters long", "yellow")
+
+ user_data = {
+ "email": sso_user["email"],
+ "name": f"{sso_user['first_name']} {sso_user['last_name']}",
+ "password": password
+ }
+
+ try:
+ # Make the API request
+ response = requests.post(
+ f"{api_url}/api/auth/users",
+ json=user_data,
+ headers={
+ "Authorization": f"Bearer {sso_token}",
+ "Content-Type": "application/json"
+ }
+ )
+
+ # Check if the request was successful
+ print("response API: ", response)
+ if response.status_code == 200:
+ print_colored("✅ User and API Key created successfully!", "green")
+ return 0
+ else:
+ print_colored(f"❌ Error creating user: {response.text}", "red")
+ return 1
+
+ except requests.exceptions.RequestException as e:
+ print_colored(f"❌ Error connecting to API: {str(e)}", "red")
+ return 1
+
+ else:
+ print_colored("❌ Could not create the user or the API KEY.", "red")
+ return 1
+
+ if args.configure or args.list_configs or args.show_schema:
+ """
+ Configure, list or show schema of the configured database.
+
+ Reuse the same authentication code for configure, list and show schema.
+ """
+
+ api_key_selected, user_data, api_token = authentication_with_api_key_return() # Authentica use with SSO
+
+ if not api_key_selected:
+ print_colored("Error: An API Key is required. You can generate one at dashboard.etedata.com", "red")
+ print_colored("Or use the 'corebrain --create-api-key' command to create a new one using CLI.", "blue")
+ return 1
+
+ from corebrain.db.schema_file import show_db_schema
+
+ # Execute the selected operation
+ if args.configure:
+ """
+ Launch the comprehensive SDK configuration wizard.
+
+ This is the main configuration command that guides you through setting up
+ a complete database connection for use with the Corebrain SDK. The wizard
+ walks you through each step of the configuration process interactively.
+
+ Configuration phases:
+ 1. Authentication verification (already completed)
+ 2. Database type selection (SQL or MongoDB)
+ 3. Database engine selection (PostgreSQL, MySQL, SQLite, etc.)
+ 4. Connection parameters input (host, port, credentials, etc.)
+ 5. Database connection testing and validation
+ 6. Schema accessibility configuration (excluded tables/collections)
+ 7. Configuration saving and server synchronization
+ 8. Optional natural language query testing
+
+ Usage: corebrain --configure [--api-key ] [--api-url ] [--sso-url ]
+
+ Interactive prompts guide you through:
+ - Database type (sql/mongodb)
+ - Engine selection (postgresql, mysql, sqlite, etc.)
+ - Connection details (host, port, database name)
+ - Authentication credentials (username, password)
+ - Connection string (alternative to individual parameters)
+ - Table/collection exclusions for security
+ - Configuration naming and saving
+
+ Supported databases:
+ SQL:
+ - PostgreSQL (local and remote)
+ - MySQL/MariaDB (local and remote)
+ - SQLite (file-based and in-memory)
+
+ NoSQL:
+ - MongoDB (local and remote, with or without authentication)
+
+ Security features:
+ - Encrypted local storage of configurations
+ - Secure credential handling
+ - Table/collection access control
+ - Server synchronization with encrypted transport
+
+ After successful configuration:
+ - Configuration is saved locally with encryption
+ - Synchronization with Corebrain API server
+ - Ready to use with SDK (init function)
+ - Available for natural language queries
+
+ Example usage after configuration:
+ ```python
+ from corebrain import init
+
+ client = init(
+ api_key="your_api_key",
+ config_id="generated_config_id"
+ )
+
+ result = client.ask("How many users are in the database?")
+ ```
+
+ Prerequisites:
+ - Valid API key (obtain via --login or --api-key)
+ - Network access to target database
+ - Appropriate database permissions for schema reading
+ - Internet connectivity for API synchronization
+ """
+ configure_sdk(api_token, api_key_selected, api_url, sso_url, user_data)
+
+ elif args.list_configs:
+ """
+ List and manage all saved database configurations for your API key.
+
+ This command provides an interactive interface to view and manage all
+ database configurations associated with your API key. It serves as a
+ central hub for configuration management operations.
+
+ Main features:
+ - View all saved configurations with details
+ - Interactive selection and management
+ - Multiple management operations per configuration
+ - Safe deletion with confirmation prompts
+ - Configuration validation and testing
+ - Import/export capabilities
+
+ Usage: corebrain --list-configs [--api-key ]
+
+ Available operations for each configuration:
+ 1. Show Schema: Display detailed database structure
+ - Tables/collections list
+ - Column details and types
+ - Indexes and relationships
+ - Safe read-only operation
+
+ 2. Validate Config: Comprehensive configuration validation
+ - Structure and format verification
+ - Database connectivity testing
+ - Permission and access verification
+ - Error reporting and diagnostics
+
+ 3. Remove Config: Safe configuration deletion
+ - Confirmation prompts
+ - Local storage cleanup
+ - Server synchronization
+ - Irreversible operation warning
+
+ 4. Modify Config: Update existing configuration
+ - Interactive parameter editing
+ - Connection parameter updates
+ - Excluded tables management
+ - Automatic validation after changes
+
+ 5. Export Config: Backup configuration to file
+ - JSON format export
+ - Credential handling options
+ - Shareable format creation
+ - Backup and migration support
+
+ 6. Import Config: Load configuration from file
+ - JSON file import
+ - Validation before saving
+ - Conflict resolution
+ - Batch import support
+
+ 7. Configure New: Launch configuration wizard
+ - Full setup process
+ - Database connection setup
+ - Testing and validation
+ - Save new configuration
+
+ Information displayed for each configuration:
+ - Configuration ID (unique identifier)
+ - Database type and engine
+ - Connection details (host, database name)
+ - Creation and last modified dates
+ - Validation status
+ - Usage statistics
+
+ Interactive navigation:
+ - Arrow keys or numbers for selection
+ - Enter to confirm operations
+ - ESC or 'q' to exit
+ - Help available with '?' key
+
+ Security considerations:
+ - Configurations stored with encryption
+ - Sensitive data masked in display
+ - Secure credential handling
+ - Server synchronization with HTTPS
+
+ Use cases:
+ - Review existing database connections
+ - Maintain multiple database configurations
+ - Troubleshoot connection issues
+ - Backup and restore configurations
+ - Share configurations between environments
+ - Clean up unused configurations
+
+ Prerequisites:
+ - Valid API key for authentication
+ - Internet connectivity for server operations
+ - Appropriate permissions for configuration management
+
+ Note: This command provides a safe environment for configuration
+ management with confirmation prompts for destructive operations.
+ """
+ manager = ConfigManager()
+ manager.list_configs(api_key_selected,user_data,api_token)
+
+ elif args.show_schema:
+ """
+ Display the schema of a configured database without connecting through the SDK.
+
+ This command allows you to explore the structure of a database by showing
+ detailed information about tables, columns, indexes, and relationships.
+ It's useful for understanding the database structure before writing queries.
+
+ The command can work in two ways:
+ 1. With a saved configuration (using --config-id)
+ 2. By prompting you to select from available configurations
+
+ Usage: corebrain --show-schema [--config-id ]
+
+ Information displayed:
+ - Database type and engine
+ - List of all tables/collections
+ - Column details (name, type, constraints)
+ - Primary keys and foreign keys
+ - Indexes and their properties
+ - Table relationships and dependencies
+
+ Supported databases:
+ - SQL: PostgreSQL, MySQL, SQLite
+ - NoSQL: MongoDB
+
+ Note: This command only reads schema information and doesn't modify
+ the database in any way. It's safe to run on production databases.
+ """
+ show_db_schema(api_key_selected, args.config_id, api_url)
+
+
+
+ # ** move to the config manager --> inside of the command --list-configs **
+
+ # Handle validate-config and export-config commands
+ #if args.validate_config:
+ """
+ Validate a saved configuration without executing any operations.
+
+ This command performs comprehensive validation of a database configuration
+ to ensure it's correctly formatted and all required parameters are present.
+ It checks the configuration syntax, required fields, and optionally tests
+ the database connection.
+
+ Validation checks performed:
+ 1. Configuration format and structure
+ 2. Required fields presence (type, engine, credentials)
+ 3. Field value validity (ports, hostnames, database names)
+ 4. Database connection test (optional)
+ 5. Authentication and permissions verification
+
+ Usage: corebrain --validate-config --config-id [--api-key ]
+
+ Validation levels:
+ - Structure: Validates configuration format and required fields
+ - Connection: Tests actual database connectivity
+ - Permissions: Verifies database access permissions
+ - Schema: Checks if the database schema can be read
+
+ Exit codes:
+ - 0: Configuration is valid
+ - 1: Configuration has errors
+
+ Use cases:
+ - Verify configuration before deployment
+ - Troubleshoot connection issues
+ - Validate imported configurations
+ - Check configuration after database changes
+
+ Note: This command requires a valid API key to access saved configurations.
+ """
+ # if not args.config_id:
+ # print_colored("Error: --config-id is required for validation", "red")
+ # return 1
+
+ # Get credentials
+ # api_url = os.environ.get("COREBRAIN_API_URL") or DEFAULT_API_URL
+ # sso_url = os.environ.get("COREBRAIN_SSO_URL") or DEFAULT_SSO_URL
+ # token_arg = args.api_key if args.api_key else args.token
+ # api_key, user_data, api_token = get_api_credential(token_arg, sso_url)
+
+ # if not api_key:
+ # print_colored("Error: An API Key is required. Use --api-key or login via --login", "red")
+ # return 1
+
+ # Validate the configuration
+ # try:
+ # config_manager = ConfigManager()
+ # config = config_manager.get_config(api_key, args.config_id)
+
+ # if not config:
+ # print_colored(f"Configuration with ID '{args.config_id}' not found", "red")
+ # return 1
+
+ # print_colored(f"✅ Validating configuration: {args.config_id}", "blue")
+
+ # Create a temporary Corebrain instance to validate
+ # from corebrain.core.client import Corebrain
+ # try:
+ # temp_client = Corebrain(
+ # api_key=api_key,
+ # db_config=config,
+ # skip_verification=True
+ # )
+ # print_colored("✅ Configuration validation passed!", "green")
+ # print_colored(f"Database type: {config.get('type', 'Unknown')}", "blue")
+ # print_colored(f"Engine: {config.get('engine', 'Unknown')}", "blue")
+ # return 0
+ # except Exception as validation_error:
+ # print_colored(f"❌ Configuration validation failed: {str(validation_error)}", "red")
+ # return 1
+
+ # except Exception as e:
+ # print_colored(f"❌ Error during validation: {str(e)}", "red")
+ # return 1
+
+ #if args.export_config:
+ """
+ Export a saved configuration to a JSON file.
+
+ This command exports a database configuration from the local storage
+ to a JSON file that can be shared, backed up, or imported on another system.
+ The exported file contains all connection parameters and settings needed
+ to recreate the configuration.
+
+ The export process:
+ 1. Retrieves the specified configuration from local storage
+ 2. Decrypts sensitive information (if encrypted)
+ 3. Formats the configuration as readable JSON
+ 4. Saves to the specified output file
+ 5. Optionally removes sensitive data for sharing
+
+ Usage: corebrain --export-config --config-id [--output-file ] [--api-key ]
+
+ Options:
+ --config-id: ID of the configuration to export (required)
+ --output-file: Path for the exported file (default: config_.json)
+ --remove-credentials: Remove sensitive data for sharing (optional)
+ --pretty-print: Format JSON with indentation for readability
+
+ Exported data includes:
+ - Database connection parameters
+ - Engine and type information
+ - Configuration metadata
+ - Excluded tables/collections list
+ - Custom settings and preferences
+
+ Security considerations:
+ - Exported files may contain sensitive credentials
+ - Use --remove-credentials flag when sharing configurations
+ - Store exported files in secure locations
+ - Consider encrypting exported files for transmission
+
+ Use cases:
+ - Backup configurations before changes
+ - Share configurations between team members
+ - Migrate configurations to different environments
+ - Create configuration templates
+ - Document database connection settings
+ """
+ # if not args.config_id:
+ # print_colored("Error: --config-id is required for export", "red")
+ # return 1
+
+ # Get credentials
+ # api_url = os.environ.get("COREBRAIN_API_URL") or DEFAULT_API_URL
+ # sso_url = os.environ.get("COREBRAIN_SSO_URL") or DEFAULT_SSO_URL
+ # token_arg = args.api_key if args.api_key else args.token
+ # api_key, user_data, api_token = get_api_credential(token_arg, sso_url)
+
+ # if not api_key:
+ # print_colored("Error: An API Key is required. Use --api-key or login via --login", "red")
+ # return 1
+
+ # Export the configuration
+ # try:
+ # config_manager = ConfigManager()
+ # config = config_manager.get_config(api_key, args.config_id)
+
+ # if not config:
+ # print_colored(f"Configuration with ID '{args.config_id}' not found", "red")
+ # return 1
+
+ # Generate output filename if not provided
+ # output_file = getattr(args, 'output_file', None) or f"config_{args.config_id}.json"
+
+ # Export to file
+ # import json
+ # with open(output_file, 'w', encoding='utf-8') as f:
+ # json.dump(config, f, indent=2, default=str)
+
+ # print_colored(f"✅ Configuration exported to: {output_file}", "green")
+ # return 0
+
+ # except Exception as e:
+ # print_colored(f"❌ Error exporting configuration: {str(e)}", "red")
+ # return 1
+
+
+ if args.whoami:
+ """
+ Display information about the currently authenticated user.
+
+ This command shows detailed information about the user associated with the
+ current authentication credentials. It's similar to the Unix 'whoami' command
+ but for the Corebrain system.
+
+ The command attempts to retrieve user data using the following credential sources
+ (in order of priority):
+ 1. API key provided via --api-key argument
+ 2. Token provided via --token argument
+ 3. COREBRAIN_API_KEY environment variable
+ 4. COREBRAIN_API_TOKEN environment variable
+ 5. SSO authentication (if no other credentials found)
+
+ Usage: corebrain --whoami [--api-key ] [--token ] [--sso-url ]
+
+ Information displayed:
+ - User ID and email
+ - Name and profile details
+ - Account creation and last login dates
+ - Associated roles and permissions
+ - Any other profile metadata from SSO
+
+ Use cases:
+ - Verify which user account is currently active
+ - Debug authentication issues
+ - Check user permissions and profile data
+ - Confirm successful login
+
+ Note: Requires valid authentication credentials to work.
+ """
+ try:
+ #downloading user data
+ sso_url = os.environ.get("COREBRAIN_SSO_URL") or DEFAULT_SSO_URL
+ token_arg = args.api_key if args.api_key else args.token
+
+ #using saved data about user
+ api_key, user_data, api_token = get_api_credential(token_arg, sso_url)
+ #printing user data
+ if user_data:
+ print_colored("User Data:", "blue")
+ for k, v in user_data.items():
+ print(f"{k}: {v}")
+ else:
+ print_colored("❌ Can't find data about user, be sure that you are logged into --login.", "red")
+ return 1
+
+ return 0
+ except Exception as e:
+ print_colored(f"❌ Error when downloading data about user {str(e)}", "red")
+ return 1
+
+ if args.gui:
+ """
+ Check setup and launch the web-based graphical user interface.
+
+ This command sets up and launches a complete web-based GUI for the Corebrain SDK,
+ providing a user-friendly alternative to the command-line interface. The GUI includes
+ both frontend and backend components and integrates with the Corebrain API.
+
+ Components launched:
+ 1. React Frontend (client) - User interface running on port 5173
+ 2. Express Backend (server) - API server for the frontend
+ 3. Corebrain API wrapper (C#) - Additional API integration
+
+ Setup process:
+ 1. Validates required directory structure
+ 2. Installs Node.js dependencies if not present
+ 3. Configures development tools (Vite, TypeScript)
+ 4. Starts all services concurrently
+ 5. Opens browser to the GUI automatically
+
+ Usage: corebrain --gui
+
+ Directory structure required:
+ - CLI-UI/client/ (React frontend)
+ - CLI-UI/server/ (Express backend)
+ - wrappers/csharp_cli_api/ (C# API wrapper)
+
+ Dependencies installed automatically:
+ Frontend (React):
+ - Standard React dependencies
+ - History library for routing
+ - Vite for development and building
+ - Concurrently for running multiple processes
+
+ Backend (Express):
+ - Standard Express dependencies
+ - TypeScript development tools
+ - ts-node-dev for hot reloading
+
+ Access points:
+ - Frontend GUI: http://localhost:5173/
+ - Backend API: Usually http://localhost:3000/
+ - C# API wrapper: Usually http://localhost:5000/
+
+ Use cases:
+ - Visual configuration of database connections
+ - Interactive query building and testing
+ - Graphical schema exploration
+ - User-friendly alternative to CLI commands
+ - Debugging and development interface
+
+ Note: Requires Node.js, npm, and .NET runtime to be installed on the system.
+ """
+ import subprocess
+ from pathlib import Path
+
+ def run_cmd(cmd, cwd=None):
+ print_colored(f"▶ {cmd}", "yellow")
+ subprocess.run(cmd, shell=True, cwd=cwd, check=True)
+
+ print("Checking GUI setup...")
+
+ commands_path = Path(__file__).resolve()
+ corebrain_root = commands_path.parents[1]
+
+ cli_ui_path = corebrain_root / "CLI-UI"
+ client_path = cli_ui_path / "client"
+ server_path = cli_ui_path / "server"
+ api_path = corebrain_root / "wrappers" / "csharp_cli_api"
+
+ # Path validation
+ if not client_path.exists():
+ print_colored(f"Folder {client_path} does not exist!", "red")
+ sys.exit(1)
+ if not server_path.exists():
+ print_colored(f"Folder {server_path} does not exist!", "red")
+ sys.exit(1)
+ if not api_path.exists():
+ print_colored(f"Folder {api_path} does not exist!", "red")
+ sys.exit(1)
+
+ # Setup client
+ if not (client_path / "node_modules").exists():
+ print_colored("Installing frontend (React) dependencies...", "cyan")
+ run_cmd("npm install", cwd=client_path)
+ run_cmd("npm install history", cwd=client_path)
+ run_cmd("npm install --save-dev vite", cwd=client_path)
+ run_cmd("npm install concurrently --save-dev", cwd=client_path)
+
+ # Setup server
+ if not (server_path / "node_modules").exists():
+ print_colored("Installing backend (Express) dependencies...", "cyan")
+ run_cmd("npm install", cwd=server_path)
+ run_cmd("npm install --save-dev ts-node-dev", cwd=server_path)
+
+ # Start GUI: CLI UI + Corebrain API
+ print("Starting GUI (CLI-UI + Corebrain API)...")
+
+ def run_in_background_silent(cmd, cwd):
+ return subprocess.Popen(
+ cmd,
+ cwd=cwd,
+ shell=True,
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.DEVNULL
+ )
+
+ run_in_background_silent("dotnet run", cwd=api_path)
+ run_in_background_silent(
+ 'npx concurrently "npm --prefix server run dev" "npm --prefix client run dev"',
+ cwd=cli_ui_path
+ )
+
+ url = "http://localhost:5173/"
+ print_colored(f"GUI: {url}", "cyan")
+ webbrowser.open(url)
+
+
+ # Handles the CLI command to create a new API key using stored credentials (token from SSO)
+ #
+ # Usage example:
+ # corebrain --create-api-key --key-name "Name of key" --key-level read | write | admin
+ #
+
+ if args.create_api_key:
+ sso_token = load_api_token()
+
+ key_name = args.key_name or "default-key"
+ key_level = args.key_level or "read"
+
+ api_url = os.environ.get("COREBRAIN_API_URL") or DEFAULT_API_URL
+
+ # Sending request to Corebrain-API
+ payload = {
+ "name": key_name,
+ "access_level": key_level
+ }
+
+ headers = {
+ "Authorization": f"Bearer {sso_token}",
+ "Content-Type": "application/json"
+ }
+
+ try:
+ response = requests.post(
+ f"{api_url}/api/auth/api-keys",
+ json=payload,
+ headers=headers
+ )
+
+ if response.status_code == 200:
+ key_data = response.json()
+ print_colored("✅ API Key was created successfully:", "green")
+ print_colored(f"Name: {key_data['name']}", "blue")
+ print_colored(f"Key: {key_data['key']}", "blue")
+ else:
+ print_colored(f"❌ Error while creating API Key: {response.text}", "red")
+ return 1
+
+ except Exception as e:
+ print_colored(f"❌ Exception occurred while creating API Key: {str(e)}", "red")
+ return 1
+
+ return 0
+
+ else:
+ # If no option was specified, show help
+ parser.print_help()
+ print_colored("\nTip: Use 'corebrain --login' to login via SSO.", "blue")
+ return 0
+ except Exception as e:
+ print_colored(f"Error: {str(e)}", "red")
+ import traceback
+ traceback.print_exc()
+ return 1
diff --git a/corebrain/cli/common.py b/corebrain/cli/common.py
new file mode 100644
index 0000000..022e221
--- /dev/null
+++ b/corebrain/cli/common.py
@@ -0,0 +1,15 @@
+"""
+Default values for SSO and API connection
+"""
+
+DEFAULT_API_URL = "http://localhost:5000" # Use 5000 in Windows / 1000 in MacOS by default
+#DEFAULT_SSO_URL = "http://localhost:3000" # localhost
+DEFAULT_SSO_URL = "https://sso.globodain.com" # remote
+DEFAULT_PORT = 8765
+DEFAULT_TIMEOUT = 10
+#SSO_CLIENT_ID = '401dca6e-3f3b-4458-b3ef-f87eaae0398d' # localhost
+#SSO_CLIENT_SECRET = 'f9d315ea-5a65-4e3f-be35-b27a933dfb5b' # localhost
+SSO_CLIENT_ID = '63d767e9-5a06-4890-a194-8608ae29d426' # remote
+SSO_CLIENT_SECRET = '06cf39f6-ca93-466e-955e-cb6ea0a02d4d' # remote
+SSO_REDIRECT_URI = 'http://localhost:8765/oauth/callback'
+SSO_SERVICE_ID = 2
\ No newline at end of file
diff --git a/corebrain/cli/config.py b/corebrain/cli/config.py
new file mode 100644
index 0000000..9777a78
--- /dev/null
+++ b/corebrain/cli/config.py
@@ -0,0 +1,490 @@
+"""
+Configuration functions for the CLI.
+"""
+import json
+import uuid
+import getpass
+import os
+from typing import Dict, Any, List, Optional, Tuple
+from datetime import datetime
+
+from corebrain.cli.common import DEFAULT_API_URL, DEFAULT_SSO_URL
+from corebrain.cli.auth.sso import authenticate_with_sso, authenticate_with_sso_and_api_key_request
+from corebrain.cli.utils import print_colored, ProgressTracker
+from corebrain.db.engines import get_available_engines
+from corebrain.config.manager import ConfigManager
+from corebrain.network.client import http_session
+from corebrain.core.test_utils import test_natural_language_query
+from corebrain.db.schema_file import extract_db_schema
+
+def get_api_credential(args_token: Optional[str] = None, sso_url: Optional[str] = None) -> Tuple[Optional[str], Optional[Dict[str, Any]], Optional[str]]:
+ """
+ Obtains the API credential (API key), trying several methods in order:
+ 1. Token provided as argument
+ 2. Environment variable
+ 3. SSO authentication
+ 4. Manual user input
+
+ Args:
+ args_token: Token provided as argument
+ sso_url: SSO service URL
+
+ Returns:
+ Tuple with (api_key, user_data, api_token) or (None, None, None) if couldn't be obtained
+ - api_key: API key to use with SDK
+ - user_data: User data
+ - api_token: API token for general authentication
+ """
+ # 1. Check if provided as argument
+ if args_token:
+ print_colored("Using token provided as argument.", "blue")
+ # Assume the provided token is directly an API key
+ return args_token, None, args_token
+
+ # 2. Check environment variable for API key
+ env_api_key = os.environ.get("COREBRAIN_API_KEY")
+ if env_api_key:
+ print_colored("Using API key from COREBRAIN_API_KEY environment variable.", "blue")
+ return env_api_key, None, env_api_key
+
+ # 3. Check environment variable for API token
+ env_api_token = os.environ.get("COREBRAIN_API_TOKEN")
+ if env_api_token:
+ print_colored("Using API token from COREBRAIN_API_TOKEN environment variable.", "blue")
+ # Note: Here we return the same value as api_key and api_token
+ # because we have no way to obtain a specific api_key
+ return env_api_token, None, env_api_token
+
+ # 4. Try SSO authentication
+ print_colored("Attempting authentication via SSO...", "blue")
+ api_key, user_data, api_token = authenticate_with_sso_and_api_key_request(sso_url or DEFAULT_SSO_URL)
+ print("Exit from authenticate_with_sso: ", datetime.now())
+ if api_key:
+ # Save for future use
+ os.environ["COREBRAIN_API_KEY"] = api_key
+ os.environ["COREBRAIN_API_TOKEN"] = api_token
+ return api_key, user_data, api_token
+
+ # 5. Request manually
+ print_colored("\nCouldn't complete SSO authentication.", "yellow")
+ print_colored("You can directly enter an API key:", "blue")
+ manual_input = input("Enter your Corebrain API key: ").strip()
+ if manual_input:
+ # Assume manual input is an API key
+ return manual_input, None, manual_input
+
+ # If we got here, we couldn't get a credential
+ return None, None, None
+
+def get_db_type() -> str:
+ """
+ Prompts the user to select a database type.
+
+ Returns:
+ Selected database type
+ """
+ print_colored("\n=== Select the database type ===", "blue")
+ print("1. SQL (SQLite, MySQL, PostgreSQL)")
+ print("2. NoSQL (MongoDB)")
+
+ while True:
+ try:
+ choice = int(input("\nSelect an option (1-2): ").strip())
+ if choice == 1:
+ return "sql"
+ elif choice == 2:
+ return "nosql"
+ else:
+ print_colored("Invalid option. Try again.", "red")
+ except ValueError:
+ print_colored("Please enter a number.", "red")
+
+def get_db_engine(db_type: str) -> str:
+ """
+ Prompts the user to select a database engine.
+
+ Args:
+ db_type: Selected database type
+
+ Returns:
+ Selected database engine
+ """
+ engines = get_available_engines()
+
+ if db_type == "sql":
+ available_engines = engines["sql"]
+ print_colored("\n=== Select the SQL engine ===", "blue")
+ for i, engine in enumerate(available_engines, 1):
+ print(f"{i}. {engine.capitalize()}")
+
+ while True:
+ try:
+ choice = int(input(f"\nSelect an option (1-{len(available_engines)}): ").strip())
+ if 1 <= choice <= len(available_engines):
+ return available_engines[choice - 1]
+ else:
+ print_colored("Invalid option. Try again.", "red")
+ except ValueError:
+ print_colored("Please enter a number.", "red")
+ else:
+ # For NoSQL, we only have MongoDB for now
+ return "mongodb"
+
+def get_connection_params(db_type: str, engine: str) -> Dict[str, Any]:
+ """
+ Prompts for connection parameters according to the database type and engine.
+
+ Args:
+ db_type: Database type
+ engine: Database engine
+
+ Returns:
+ Dictionary with connection parameters
+ """
+ params = {"type": db_type, "engine": engine}
+
+ # Specific parameters by type and engine
+ if db_type == "sql":
+ if engine == "sqlite":
+ path = input("\nPath to SQLite database file: ").strip()
+ params["database"] = path
+ else:
+ # MySQL or PostgreSQL
+ print_colored("\n=== Connection Parameters ===", "blue")
+ params["host"] = input("Host (default: localhost): ").strip() or "localhost"
+
+ if engine == "mysql":
+ params["port"] = int(input("Port (default: 3306): ").strip() or "3306")
+ else: # PostgreSQL
+ params["port"] = int(input("Port (default: 5432): ").strip() or "5432")
+
+ params["user"] = input("User: ").strip()
+ params["password"] = getpass.getpass("Password: ")
+ params["database"] = input("Database name: ").strip()
+ else:
+ # MongoDB
+ print_colored("\n=== MongoDB Connection Parameters ===", "blue")
+ use_connection_string = input("Use connection string? (y/n): ").strip().lower() == "y"
+
+ if use_connection_string:
+ params["connection_string"] = input("MongoDB connection string: ").strip()
+ else:
+ params["host"] = input("Host (default: localhost): ").strip() or "localhost"
+ params["port"] = int(input("Port (default: 27017): ").strip() or "27017")
+
+ use_auth = input("Use authentication? (y/n): ").strip().lower() == "y"
+ if use_auth:
+ params["user"] = input("User: ").strip()
+ params["password"] = getpass.getpass("Password: ")
+
+ params["database"] = input("Database name: ").strip()
+
+ # Add configuration ID
+ params["config_id"] = str(uuid.uuid4())
+ params["excluded_tables"] = []
+
+ return params
+
+def test_database_connection(api_token: str, db_config: Dict[str, Any], api_url: Optional[str] = None, user_data: Optional[Dict[str, Any]] = None) -> bool:
+ """
+ Tests the database connection without verifying the API token.
+
+ Args:
+ api_token: API token
+ db_config: Database configuration
+ api_url: Optional API URL
+ user_data: User data
+
+ Returns:
+ True if connection is successful, False otherwise
+ """
+ try:
+ print_colored("\nTesting database connection...", "blue")
+
+ db_type = db_config["type"].lower()
+ engine = db_config.get("engine", "").lower()
+
+ if db_type == "sql":
+ if engine == "sqlite":
+ import sqlite3
+ conn = sqlite3.connect(db_config.get("database", ""))
+ cursor = conn.cursor()
+ cursor.execute("SELECT 1")
+ cursor.close()
+ conn.close()
+
+ elif engine == "mysql":
+ import mysql.connector
+ if "connection_string" in db_config:
+ conn = mysql.connector.connect(connection_string=db_config["connection_string"])
+ else:
+ conn = mysql.connector.connect(
+ host=db_config.get("host", "localhost"),
+ user=db_config.get("user", ""),
+ password=db_config.get("password", ""),
+ database=db_config.get("database", ""),
+ port=db_config.get("port", 3306)
+ )
+ cursor = conn.cursor()
+ cursor.execute("SELECT 1")
+ cursor.close()
+ conn.close()
+
+ elif engine == "postgresql":
+ import psycopg2
+ if "connection_string" in db_config:
+ conn = psycopg2.connect(db_config["connection_string"])
+ else:
+ conn = psycopg2.connect(
+ host=db_config.get("host", "localhost"),
+ user=db_config.get("user", ""),
+ password=db_config.get("password", ""),
+ dbname=db_config.get("database", ""),
+ port=db_config.get("port", 5432)
+ )
+ cursor = conn.cursor()
+ cursor.execute("SELECT 1")
+ cursor.close()
+ conn.close()
+
+ elif db_type == "nosql" and engine == "mongodb":
+ import pymongo
+ if "connection_string" in db_config:
+ client = pymongo.MongoClient(db_config["connection_string"])
+ else:
+ client = pymongo.MongoClient(
+ host=db_config.get("host", "localhost"),
+ port=db_config.get("port", 27017),
+ username=db_config.get("user"),
+ password=db_config.get("password")
+ )
+
+ # Verify connection by trying to access the database
+ db = client[db_config["database"]]
+ # List collections to verify we can access
+ _ = db.list_collection_names()
+ client.close()
+
+ # If we got here, the connection was successful
+ print_colored("✅ Database connection successful!", "green")
+ return True
+ except Exception as e:
+ print_colored(f"❌ Error connecting to the database: {str(e)}", "red")
+ return False
+
+def select_excluded_tables(api_token: str, db_config: Dict[str, Any], api_url: Optional[str] = None, user_data: Optional[Dict[str, Any]] = None) -> List[str]:
+
+ """
+ Allows the user to select tables/collections to exclude.
+
+ Args:
+ api_token: API token
+ db_config: Database configuration
+ api_url: Optional API URL
+ user_data: User data
+
+ Returns:
+ List of excluded tables/collections
+ """
+ print_colored("\nRetrieving database schema...", "blue")
+
+ # Get the database schema directly
+ schema = extract_db_schema(db_config)
+
+ if not schema or not schema.get("tables"):
+ print_colored("No tables/collections found.", "yellow")
+ return []
+
+ print_colored("\n=== Tables/Collections found ===", "blue")
+ print("Mark with 'n' the tables that should NOT be accessible (y for accessible)")
+
+ # Use the tables list instead of the dictionary
+ tables_list = schema.get("tables_list", [])
+ excluded_tables = []
+
+ if not tables_list:
+ # If there's no table list, convert the tables dictionary to a list
+ tables = schema.get("tables", {})
+ for table_name in tables:
+ choice = input(f"{table_name} (accessible? y/n): ").strip().lower()
+ if choice == "n":
+ excluded_tables.append(table_name)
+ else:
+ # If there's a table list, use it directly
+ for i, table in enumerate(tables_list, 1):
+ table_name = table["name"]
+ choice = input(f"{i}. {table_name} (accessible? y/n): ").strip().lower()
+ if choice == "n":
+ excluded_tables.append(table_name)
+
+ print_colored(f"\n{len(excluded_tables)} tables/collections have been excluded", "green")
+ return excluded_tables
+
+def save_configuration(sso_token: str, api_key: str, db_config: Dict[str, Any], api_url: Optional[str] = None) -> bool:
+ """
+ Saves the configuration locally and syncs it with the API server.
+
+ Args:
+ sso_token: SSO authentication token
+ api_key: API Key to identify the configuration
+ db_config: Database configuration
+ api_url: Optional API URL
+
+ Returns:
+ True if saved correctly, False otherwise
+ """
+ config_id = db_config.get("config_id")
+ if not config_id:
+ config_id = str(uuid.uuid4())
+ db_config["config_id"] = config_id
+
+ print_colored(f"\nSaving configuration with ID: {config_id}...", "blue")
+
+ try:
+ config_manager = ConfigManager()
+ config_manager.add_config(api_key, db_config, config_id)
+
+ # 2. Verify that the configuration was saved locally
+ saved_config = config_manager.get_config(api_key, config_id)
+ if not saved_config:
+ print_colored("⚠️ Could not verify local saving of configuration", "yellow")
+ else:
+ print_colored("✅ Configuration saved locally successfully", "green")
+
+ # 3. Try to sync with the server
+ try:
+ if api_url:
+ print_colored("Syncing configuration with server...", "blue")
+
+ # Prepare URL
+ if not api_url.startswith(("http://", "https://")):
+ api_url = "https://" + api_url
+
+ if api_url.endswith('/'):
+ api_url = api_url[:-1]
+
+ # Endpoint to update API key
+ endpoint = f"{api_url}/api/auth/api-keys/{api_key}"
+
+ # Create ApiKeyUpdate object according to your model
+ update_data = {
+ "metadata": {
+ "config_id": config_id,
+ "db_config": db_config,
+ "corebrain_sdk": {
+ "version": "1.0.0",
+ "updated_at": datetime.now().isoformat()
+ }
+ }
+ }
+
+ print_colored(f"Updating API key with ID: {api_key}", "blue")
+
+ # Send to server
+ headers = {
+ "Authorization": f"Bearer {sso_token}",
+ "Content-Type": "application/json"
+ }
+
+ response = http_session.put(
+ endpoint,
+ headers=headers,
+ json=update_data,
+ timeout=5.0
+ )
+
+ if response.status_code in [200, 201, 204]:
+ print_colored("✅ Configuration successfully synced with server", "green")
+ else:
+ print_colored(f"⚠️ Error syncing with server (Code: {response.status_code})", "yellow")
+ print_colored(f"Response: {response.text[:200]}...", "yellow")
+
+ except Exception as e:
+ print_colored(f"⚠️ Error syncing with server: {str(e)}", "yellow")
+ print_colored("The configuration is still saved locally", "green")
+
+ return True
+
+ except Exception as e:
+ print_colored(f"❌ Error saving configuration: {str(e)}", "red")
+ return False
+
+def configure_sdk(api_token: str, api_key: str, api_url: Optional[str] = None, sso_url: Optional[str] = None, user_data: Optional[Dict[str, Any]] = None) -> None:
+ """
+ Configures the Corebrain SDK with a step-by-step wizard.
+
+ Args:
+ api_token: API token for general authentication (obtained from SSO)
+ api_key: Specific API key selected to use with the SDK
+ api_url: Corebrain API URL
+ sso_url: Globodain SSO service URL
+ user_data: User data obtained from SSO
+ """
+ # Ensure default values for URLs
+ api_url = api_url or DEFAULT_API_URL
+ sso_url = sso_url or DEFAULT_SSO_URL
+
+ print_colored("\n=== COREBRAIN SDK CONFIGURATION WIZARD ===", "bold")
+
+ # PHASE 1-3: Already completed - User authentication
+
+ # PHASE 4: Select database type
+ print_colored("\n2. Selecting database type...", "blue")
+ db_type = get_db_type()
+
+ # PHASE 4: Select database engine
+ print_colored("\n3. Selecting database engine...", "blue")
+ engine = get_db_engine(db_type)
+
+ # PHASE 5: Configure connection parameters
+ print_colored("\n4. Configuring connection parameters...", "blue")
+ db_config = get_connection_params(db_type, engine)
+
+ # PHASE 5: Verify database connection
+ print_colored("\n5. Verifying database connection...", "blue")
+ if not test_database_connection(api_key, db_config, api_url, user_data):
+ print_colored("❌ Configuration not completed due to connection errors.", "red")
+ return
+
+ # PHASE 6: Define non-accessible tables/collections
+ print_colored("\n6. Defining non-accessible tables/collections...", "blue")
+ excluded_tables = select_excluded_tables(api_key, db_config, api_url, user_data)
+ db_config["excluded_tables"] = excluded_tables
+
+ # PHASE 7: Save configuration
+ print_colored("\n7. Saving configuration...", "blue")
+ config_id = db_config["config_id"]
+
+ # Save the configuration
+ if not save_configuration(api_token, api_key, db_config, api_url):
+ print_colored("❌ Error saving configuration.", "red")
+ return
+
+ """ # * --> Deactivated
+ # PHASE 8: Test natural language query (optional depending on API status)
+ try:
+ print_colored("\n8. Testing natural language query...", "blue")
+ test_natural_language_query(api_key, db_config, api_url, user_data)
+ except Exception as e:
+ print_colored(f"⚠️ Could not perform the query test: {str(e)}", "yellow")
+ print_colored("This does not affect the saved configuration.", "yellow")
+ """
+
+ # Final message
+ print_colored("\n✅ Configuration completed successfully!", "green")
+ print_colored(f"\nYou can use this SDK in your code with:", "blue")
+ print(f"""
+ from corebrain import init
+
+ # Initialize the SDK with API key and configuration ID
+ corebrain = init(
+ api_key="{api_key}",
+ config_id="{config_id}"
+ )
+
+ # Perform natural language queries
+ result = corebrain.ask("Your question in natural language")
+ print(result["explanation"])
+ """
+ )
+
diff --git a/corebrain/cli/utils.py b/corebrain/cli/utils.py
new file mode 100644
index 0000000..6c0ccac
--- /dev/null
+++ b/corebrain/cli/utils.py
@@ -0,0 +1,595 @@
+"""
+Utilities for the Corebrain CLI.
+
+This module provides utility functions and classes for the
+Corebrain command-line interface.
+"""
+import sys
+import time
+import socket
+import random
+import logging
+import threading
+import socketserver
+
+from typing import Optional, Dict, Any, List, Union
+from pathlib import Path
+
+from corebrain.cli.common import DEFAULT_PORT, DEFAULT_TIMEOUT
+
+logger = logging.getLogger(__name__)
+
+# Terminal color definitions
+COLORS = {
+ "default": "\033[0m",
+ "bold": "\033[1m",
+ "green": "\033[92m",
+ "red": "\033[91m",
+ "yellow": "\033[93m",
+ "blue": "\033[94m",
+ "magenta": "\033[95m",
+ "cyan": "\033[96m",
+ "white": "\033[97m",
+ "black": "\033[30m",
+ "bg_green": "\033[42m",
+ "bg_red": "\033[41m",
+ "bg_yellow": "\033[43m",
+ "bg_blue": "\033[44m",
+}
+
+def print_colored(text: str, color: str = "default", return_str: bool = False) -> Optional[str]:
+ """
+ Prints colored text in the terminal or returns the colored text.
+
+ Args:
+ text: Text to color
+ color: Color to use (default, green, red, yellow, blue, bold, etc.)
+ return_str: If True, returns the colored text instead of printing it
+
+ Returns:
+ If return_str is True, returns the colored text, otherwise None
+ """
+ try:
+ # Get color code
+ start_color = COLORS.get(color, COLORS["default"])
+ end_color = COLORS["default"]
+
+ # Compose colored text
+ colored_text = f"{start_color}{text}{end_color}"
+
+ # Return or print
+ if return_str:
+ return colored_text
+ else:
+ print(colored_text)
+ return None
+ except Exception as e:
+ # If there's an error with colors (e.g., terminal that doesn't support them)
+ logger.debug(f"Error using colors: {e}")
+ if return_str:
+ return text
+ else:
+ print(text)
+ return None
+
+def format_table(data: List[Dict[str, Any]], columns: Optional[List[str]] = None,
+ max_width: int = 80) -> str:
+ """
+ Formats data as a text table for display in the terminal.
+
+ Args:
+ data: List of dictionaries with the data
+ columns: List of columns to display (if None, uses all columns)
+ max_width: Maximum width of the table
+
+ Returns:
+ Table formatted as text
+ """
+ if not data:
+ return "No data to display"
+
+ # Determine columns to display
+ if not columns:
+ # Use all columns from the first element
+ columns = list(data[0].keys())
+
+ # Get the maximum width for each column
+ widths = {col: len(col) for col in columns}
+ for row in data:
+ for col in columns:
+ if col in row:
+ val = str(row[col])
+ widths[col] = max(widths[col], min(len(val), 30)) # Limit to 30 characters
+
+ # Adjust widths if they exceed the maximum
+ total_width = sum(widths.values()) + (3 * len(columns)) - 1
+ if total_width > max_width:
+ # Reduce proportionally
+ ratio = max_width / total_width
+ for col in widths:
+ widths[col] = max(8, int(widths[col] * ratio))
+
+ # Header
+ header = " | ".join(col.ljust(widths[col]) for col in columns)
+ separator = "-+-".join("-" * widths[col] for col in columns)
+
+ # Rows
+ rows = []
+ for row in data:
+ row_str = " | ".join(
+ str(row.get(col, "")).ljust(widths[col])[:widths[col]]
+ for col in columns
+ )
+ rows.append(row_str)
+
+ # Compose table
+ return "\n".join([header, separator] + rows)
+
+def get_free_port(start_port: int = DEFAULT_PORT) -> int:
+ """
+ Finds an available port, starting with the suggested port.
+
+ Args:
+ start_port: Initial port to try
+
+ Returns:
+ Available port
+ """
+ try:
+ # Try with the suggested port first
+ with socketserver.TCPServer(("", start_port), None) as _:
+ return start_port # The port is available
+ except OSError:
+ # If the suggested port is busy, look for a free one
+ for _ in range(10): # Try 10 times
+ # Choose a random port between 8000 and 9000
+ port = random.randint(8000, 9000)
+ try:
+ with socketserver.TCPServer(("", port), None) as _:
+ return port # Port available
+ except OSError:
+ continue # Try with another port
+
+ # If we can't find a free port, use a default high one
+ return 10000 + random.randint(0, 1000)
+
+def is_port_in_use(port: int) -> bool:
+ """
+ Checks if a port is in use.
+
+ Args:
+ port: Port number to check
+
+ Returns:
+ True if the port is in use
+ """
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+ return s.connect_ex(('localhost', port)) == 0
+
+def is_interactive() -> bool:
+ """
+ Determines if the current session is interactive.
+
+ Returns:
+ True if the session is interactive
+ """
+ return sys.stdin.isatty() and sys.stdout.isatty()
+
+def confirm_action(message: str, default: bool = False) -> bool:
+ """
+ Asks the user for confirmation of an action.
+
+ Args:
+ message: Confirmation message
+ default: Default value if the user just presses Enter
+
+ Returns:
+ True if the user confirms, False otherwise
+ """
+ if not is_interactive():
+ return default
+
+ default_text = "Y/n" if default else "y/N"
+ response = input(f"{message} ({default_text}): ").strip().lower()
+
+ if not response:
+ return default
+
+ return response.startswith('y')
+
+def get_input_with_default(prompt: str, default: Optional[str] = None) -> str:
+ """
+ Requests input from the user with a default value.
+
+ Args:
+ prompt: Request message
+ default: Default value
+
+ Returns:
+ Value entered by the user or default value
+ """
+ if default:
+ full_prompt = f"{prompt} (default: {default}): "
+ else:
+ full_prompt = f"{prompt}: "
+
+ response = input(full_prompt).strip()
+
+ return response if response else (default or "")
+
+def get_password_input(prompt: str = "Password") -> str:
+ """
+ Requests a password from the user without displaying it on screen.
+
+ Args:
+ prompt: Request message
+
+ Returns:
+ Password entered
+ """
+ import getpass
+ return getpass.getpass(f"{prompt}: ")
+
+def truncate_text(text: str, max_length: int = 100, suffix: str = "...") -> str:
+ """
+ Truncates text if it exceeds the maximum length.
+
+ Args:
+ text: Text to truncate
+ max_length: Maximum length
+ suffix: Suffix to add if the text is truncated
+
+ Returns:
+ Truncated text if necessary
+ """
+ if not text or len(text) <= max_length:
+ return text
+
+ return text[:max_length - len(suffix)] + suffix
+
+def ensure_dir(path: Union[str, Path]) -> Path:
+ """
+ Ensures that a directory exists, creating it if necessary.
+
+ Args:
+ path: Directory path
+
+ Returns:
+ Path object of the directory
+ """
+ path_obj = Path(path)
+ path_obj.mkdir(parents=True, exist_ok=True)
+ return path_obj
+
+class ProgressTracker:
+ """
+ Displays progress of CLI operations with colors and timing.
+ """
+
+ def __init__(self, verbose: bool = False, spinner: bool = True):
+ """
+ Initializes the progress tracker.
+
+ Args:
+ verbose: Whether to show detailed information
+ spinner: Whether to show an animated spinner
+ """
+ self.verbose = verbose
+ self.use_spinner = spinner and is_interactive()
+ self.start_time = None
+ self.current_task = None
+ self.total = None
+ self.steps = 0
+ self.spinner_thread = None
+ self.stop_spinner = threading.Event()
+ self.last_update_time = 0
+ self.update_interval = 0.2 # Seconds between updates
+
+ def _run_spinner(self):
+ """Displays an animated spinner in the console."""
+ spinner_chars = "|/-\\"
+ idx = 0
+
+ while not self.stop_spinner.is_set():
+ if self.current_task:
+ elapsed = time.time() - self.start_time
+ status = f"{self.steps}/{self.total}" if self.total else f"step {self.steps}"
+ sys.stdout.write(f"\r{COLORS['blue']}[{spinner_chars[idx]}] {self.current_task} ({status}, {elapsed:.1f}s){COLORS['default']} ")
+ sys.stdout.flush()
+ idx = (idx + 1) % len(spinner_chars)
+ time.sleep(0.1)
+
+ def start(self, task: str, total: Optional[int] = None) -> None:
+ """
+ Starts tracking a task.
+
+ Args:
+ task: Task description
+ total: Total number of steps (optional)
+ """
+ self.reset() # Ensure there's no previous task
+
+ self.current_task = task
+ self.total = total
+ self.start_time = time.time()
+ self.steps = 0
+ self.last_update_time = self.start_time
+
+ # Show initial message
+ print_colored(f"▶ {task}...", "blue")
+
+ # Start spinner if enabled
+ if self.use_spinner:
+ self.stop_spinner.clear()
+ self.spinner_thread = threading.Thread(target=self._run_spinner)
+ self.spinner_thread.daemon = True
+ self.spinner_thread.start()
+
+ def update(self, message: Optional[str] = None, increment: int = 1) -> None:
+ """
+ Updates progress with optional message.
+
+ Args:
+ message: Progress message
+ increment: Step increment
+ """
+ if not self.start_time:
+ return # No active task
+
+ self.steps += increment
+ current_time = time.time()
+
+ # Limit update frequency to avoid saturating the output
+ if (current_time - self.last_update_time < self.update_interval) and not message:
+ return
+
+ self.last_update_time = current_time
+
+ # If there's an active spinner, temporarily stop it to show the message
+ if self.use_spinner and self.spinner_thread and self.spinner_thread.is_alive():
+ sys.stdout.write("\r" + " " * 80 + "\r") # Clear current line
+ sys.stdout.flush()
+
+ if message or self.verbose:
+ elapsed = current_time - self.start_time
+ status = f"{self.steps}/{self.total}" if self.total else f"step {self.steps}"
+
+ if message:
+ print_colored(f" • {message} ({status}, {elapsed:.1f}s)", "blue")
+ elif self.verbose:
+ print_colored(f" • Progress: {status}, {elapsed:.1f}s", "blue")
+
+ def finish(self, message: Optional[str] = None) -> None:
+ """
+ Finishes a task with success message.
+
+ Args:
+ message: Final message
+ """
+ if not self.start_time:
+ return # No active task
+
+ # Stop spinner if active
+ self._stop_spinner()
+
+ elapsed = time.time() - self.start_time
+ msg = message or f"{self.current_task} completed"
+ print_colored(f"✅ {msg} in {elapsed:.2f}s", "green")
+
+ self.reset()
+
+ def fail(self, message: Optional[str] = None) -> None:
+ """
+ Marks a task as failed.
+
+ Args:
+ message: Error message
+ """
+ if not self.start_time:
+ return # No active task
+
+ # Stop spinner if active
+ self._stop_spinner()
+
+ elapsed = time.time() - self.start_time
+ msg = message or f"{self.current_task} failed"
+ print_colored(f"❌ {msg} after {elapsed:.2f}s", "red")
+
+ self.reset()
+
+ def _stop_spinner(self) -> None:
+ """Stops the spinner if active."""
+ if self.use_spinner and self.spinner_thread and self.spinner_thread.is_alive():
+ self.stop_spinner.set()
+ self.spinner_thread.join(timeout=0.5)
+
+ # Clear spinner line
+ sys.stdout.write("\r" + " " * 80 + "\r")
+ sys.stdout.flush()
+
+ def reset(self) -> None:
+ """Resets the tracker."""
+ self._stop_spinner()
+ self.start_time = None
+ self.current_task = None
+ self.total = None
+ self.steps = 0
+ self.spinner_thread = None
+
+class CliConfig:
+ """
+ Manages the CLI configuration.
+ """
+
+ def __init__(self, config_dir: Optional[Union[str, Path]] = None):
+ """
+ Initializes the CLI configuration.
+
+ Args:
+ config_dir: Directory for configuration files
+ """
+ if config_dir:
+ self.config_dir = Path(config_dir)
+ else:
+ self.config_dir = Path.home() / ".corebrain" / "cli"
+
+ self.config_file = self.config_dir / "config.json"
+ self.config = self._load_config()
+
+ def _load_config(self) -> Dict[str, Any]:
+ """
+ Loads configuration from file.
+
+ Returns:
+ Loaded configuration
+ """
+ if not self.config_file.exists():
+ return self._create_default_config()
+
+ try:
+ import json
+ with open(self.config_file, 'r') as f:
+ return json.load(f)
+ except Exception as e:
+ logger.warning(f"Error loading configuration: {e}")
+ return self._create_default_config()
+
+ def _create_default_config(self) -> Dict[str, Any]:
+ """
+ Creates a default configuration.
+
+ Returns:
+ Default configuration
+ """
+ from corebrain.cli.common import DEFAULT_API_URL, DEFAULT_SSO_URL
+
+ config = {
+ "api_url": DEFAULT_API_URL,
+ "sso_url": DEFAULT_SSO_URL,
+ "verbose": False,
+ "timeout": DEFAULT_TIMEOUT,
+ "last_used": {
+ "api_key": None,
+ "config_id": None
+ },
+ "ui": {
+ "use_colors": True,
+ "use_spinner": True,
+ "verbose": False
+ }
+ }
+
+ # Ensure the directory exists
+ ensure_dir(self.config_dir)
+
+ # Save default configuration
+ try:
+ import json
+ with open(self.config_file, 'w') as f:
+ json.dump(config, f, indent=2)
+ except Exception as e:
+ logger.warning(f"Error saving configuration: {e}")
+
+ return config
+
+ def save(self) -> bool:
+ """
+ Saves current configuration.
+
+ Returns:
+ True if saved correctly
+ """
+ try:
+ # Ensure the directory exists
+ ensure_dir(self.config_dir)
+
+ import json
+ with open(self.config_file, 'w') as f:
+ json.dump(self.config, f, indent=2)
+ return True
+ except Exception as e:
+ logger.error(f"Error saving configuration: {e}")
+ return False
+
+ def get(self, key: str, default: Any = None) -> Any:
+ """
+ Gets a configuration value.
+
+ Args:
+ key: Configuration key
+ default: Default value
+
+ Returns:
+ Configuration value
+ """
+ # Support for nested keys with dots
+ if "." in key:
+ parts = key.split(".")
+ current = self.config
+ for part in parts:
+ if part not in current:
+ return default
+ current = current[part]
+ return current
+
+ return self.config.get(key, default)
+
+ def set(self, key: str, value: Any) -> bool:
+ """
+ Sets a configuration value.
+
+ Args:
+ key: Configuration key
+ value: Value to set
+
+ Returns:
+ True if set correctly
+ """
+ # Support for nested keys with dots
+ if "." in key:
+ parts = key.split(".")
+ current = self.config
+ for part in parts[:-1]:
+ if part not in current:
+ current[part] = {}
+ current = current[part]
+ current[parts[-1]] = value
+ else:
+ self.config[key] = value
+
+ return self.save()
+
+ def update(self, config_dict: Dict[str, Any]) -> bool:
+ """
+ Updates configuration with a dictionary.
+
+ Args:
+ config_dict: Configuration dictionary
+
+ Returns:
+ True if updated correctly
+ """
+ self.config.update(config_dict)
+ return self.save()
+
+ def update_last_used(self, api_key: Optional[str] = None, config_id: Optional[str] = None) -> bool:
+ """
+ Updates the last used configuration.
+
+ Args:
+ api_key: API key used
+ config_id: Configuration ID used
+
+ Returns:
+ True if updated correctly
+ """
+ if not self.config.get("last_used"):
+ self.config["last_used"] = {}
+
+ if api_key:
+ self.config["last_used"]["api_key"] = api_key
+
+ if config_id:
+ self.config["last_used"]["config_id"] = config_id
+
+ return self.save()
\ No newline at end of file
diff --git a/corebrain/config/__init__.py b/corebrain/config/__init__.py
new file mode 100644
index 0000000..3363886
--- /dev/null
+++ b/corebrain/config/__init__.py
@@ -0,0 +1,10 @@
+"""
+Configuration management for the Corebrain SDK.
+
+This package provides functionality to manage database connection configurations
+and SDK preferences.
+"""
+from .manager import ConfigManager
+
+# Explicit export of public components
+__all__ = ['ConfigManager']
\ No newline at end of file
diff --git a/corebrain/config/manager.py b/corebrain/config/manager.py
new file mode 100644
index 0000000..d2899e4
--- /dev/null
+++ b/corebrain/config/manager.py
@@ -0,0 +1,550 @@
+"""
+Configuration manager for the Corebrain SDK.
+"""
+
+import json
+import uuid
+import os
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+from cryptography.fernet import Fernet
+from corebrain.utils.serializer import serialize_to_json
+from corebrain.core.common import logger
+from corebrain.cli.config import DEFAULT_API_URL, DEFAULT_SSO_URL
+from corebrain.db.schema_file import show_db_schema
+from corebrain.cli.utils import print_colored
+from tkinter import filedialog
+import tkinter as tk
+
+
+# Made by Lukasz
+# get data from pyproject.toml
+def load_project_metadata():
+ pyproject_path = Path(__file__).resolve().parent.parent / "pyproject.toml"
+ try:
+ with open(pyproject_path, "rb") as f:
+ data = tomli.load(f)
+ return data.get("project", {})
+ except (FileNotFoundError, tomli.TOMLDecodeError) as e:
+ print(f"Warning: Could not load project metadata: {e}")
+ return {}
+
+
+# Made by Lukasz
+# get the name, version, etc.
+def get_config():
+ metadata = load_project_metadata() # ^
+ return {
+ "model": metadata.get("name", "unknown"),
+ "version": metadata.get("version", "0.0.0"),
+ "debug": False,
+ "logging": {"level": "info"},
+ }
+
+
+# Made by Lukasz
+# export config to file
+def export_config(config, filepath="config.json", include_credentials=False, shareable=False):
+ """
+ Export configuration to a file with options for credentials and shareable formats.
+
+ Args:
+ config (dict): The configuration dictionary to export.
+ filepath (str): Path to the file to export.
+ include_credentials (bool): Whether to include sensitive fields like passwords or tokens.
+ shareable (bool): Whether to create a sanitized, shareable version (removes credentials).
+ """
+ config_to_export = config.copy()
+
+ sensitive_keys = {"password", "api_key", "token", "secret", "access_token", "credentials"}
+
+ if shareable or not include_credentials:
+ config_to_export = {
+ k: ("***REDACTED***" if k in sensitive_keys else v)
+ for k, v in config_to_export.items()
+ }
+
+ try:
+ with open(filepath, "w") as f:
+ json.dump(config_to_export, f, indent=4)
+ _print_colored(f"Configuration exported to {filepath}", "green")
+ except Exception as e:
+ _print_colored(f"Failed to export configuration: {e}", "red")
+
+
+# Function to print colored messages
+def _print_colored(message: str, color: str) -> None:
+ """Simplified version of _print_colored that does not depend on cli.utils."""
+ colors = {
+ "red": "\033[91m",
+ "green": "\033[92m",
+ "yellow": "\033[93m",
+ "blue": "\033[94m",
+ "default": "\033[0m",
+ }
+ color_code = colors.get(color, colors["default"])
+ print(f"{color_code}{message}{colors['default']}")
+
+
+class ConfigManager:
+ """SDK configuration manager with improved security and performance."""
+
+ CONFIG_DIR = Path.home() / ".corebrain"
+ CONFIG_FILE = CONFIG_DIR / "config.json"
+ SECRET_KEY_FILE = CONFIG_DIR / "secret.key"
+ ACTIVE_CONFIG_FILE = CONFIG_DIR / "active_config.json"
+
+ def __init__(self):
+ self.configs = {}
+ self.cipher = None
+ self._ensure_config_dir()
+ self._load_secret_key()
+ self._load_configs()
+
+ def _ensure_config_dir(self) -> None:
+ """Ensures that the configuration directory exists."""
+ try:
+ self.CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+ logger.debug(f"Configuration directory ensured: {self.CONFIG_DIR}")
+ _print_colored(f"Configuration dire:ctory ensured: {self.CONFIG_DIR}", "blue")
+ except Exception as e:
+ logger.error(f"Error creating configuration directory: {str(e)}")
+ _print_colored(f"Error creating configuration directory: {str(e)}", "red")
+
+ def _load_secret_key(self) -> None:
+ """Loads or generates the secret key to encrypt sensitive data."""
+ try:
+ if not self.SECRET_KEY_FILE.exists():
+ key = Fernet.generate_key()
+ with open(self.SECRET_KEY_FILE, "wb") as key_file:
+ key_file.write(key)
+ _print_colored(f"New secret key generated in: {self.SECRET_KEY_FILE}", "green")
+
+ with open(self.SECRET_KEY_FILE, "rb") as key_file:
+ self.secret_key = key_file.read()
+
+ self.cipher = Fernet(self.secret_key)
+ except Exception as e:
+ _print_colored(f"Error loading/generating secret key: {str(e)}", "red")
+ # Fallback a una clave temporal (menos segura pero funcional)
+ self.secret_key = Fernet.generate_key()
+ self.cipher = Fernet(self.secret_key)
+
+ def _load_configs(self) -> Dict[str, Dict[str, Any]]:
+ """Loads the saved configurations."""
+ if not self.CONFIG_FILE.exists():
+ _print_colored(f"Configuration file not found: {self.CONFIG_FILE}", "yellow")
+ return {}
+
+ try:
+ with open(self.CONFIG_FILE, "r") as f:
+ encrypted_data = f.read()
+
+ if not encrypted_data:
+ _print_colored("Configuration file is empty", "yellow")
+ return {}
+
+ try:
+ # Trying to decipher the data
+ decrypted_data = self.cipher.decrypt(encrypted_data.encode()).decode()
+ configs = json.loads(decrypted_data)
+ except Exception as e:
+ # If decryption fails, attempt to load as plain JSON
+ logger.warning(f"Error decrypting configuration: {e}")
+ configs = json.loads(encrypted_data)
+
+ if isinstance(configs, str):
+ configs = json.loads(configs)
+
+ _print_colored(f"Configuration loaded", "green")
+ self.configs = configs
+ return configs
+ except Exception as e:
+ _print_colored(f"Error loading configurations: {str(e)}", "red")
+ return {}
+
+ def _save_configs(self) -> None:
+ """Saves the current configurations."""
+ try:
+ configs_json = serialize_to_json(self.configs)
+ encrypted_data = self.cipher.encrypt(json.dumps(configs_json).encode()).decode()
+
+ with open(self.CONFIG_FILE, "w") as f:
+ f.write(encrypted_data)
+
+ _print_colored(f"Configurations saved in: {self.CONFIG_FILE}", "green")
+ except Exception as e:
+ _print_colored(f"Error saving configurations: {str(e)}", "red")
+
+ def add_config(
+ self, api_key: str, db_config: Dict[str, Any], config_id: Optional[str] = None
+ ) -> str:
+ """
+ Adds a new configuration.
+
+ Args:
+ api_key: Selected API Key
+ db_config: Database configuration
+ config_id: Optional ID for the configuration (one is generated if not provided)
+
+ Returns:
+ Configuration ID
+ """
+ if not config_id:
+ config_id = str(uuid.uuid4())
+ db_config["config_id"] = config_id
+
+ # Create or update the entry for this token
+ if api_key not in self.configs:
+ self.configs[api_key] = {}
+
+ # Add the configuration
+ self.configs[api_key][config_id] = db_config
+ self._save_configs()
+
+ _print_colored(f"Configuration added: {config_id} for API Key: {api_key[:8]}...", "green")
+ return config_id
+
+ def get_config(self, api_key_selected: str, config_id: str) -> Optional[Dict[str, Any]]:
+ """
+ Retrieves a specific configuration.
+
+ Args:
+ api_key_selected: Selected API Key
+ config_id: Configuration ID
+
+ Returns:
+ Configuration or None if it does not exist
+ """
+ return self.configs.get(api_key_selected, {}).get(config_id)
+
+ """ --> Default version
+ def list_configs(self, api_key_selected: str) -> List[str]:
+
+ Lists the available configuration IDs for an API Key.
+
+ Args:
+ api_key_selected: Selected API Key
+
+ Returns:
+ List of configuration IDs
+
+ return list(self.configs.get(api_key_selected, {}).keys())
+ """
+
+ def set_active_config(self, config_id_to_activate: str) -> bool:
+ """
+ Sets a given config as active, regardless of which API key it's under.
+
+ Args:
+ config_id_to_activate: The config ID to set as active globally.
+
+ Returns:
+ True if the config was found and activated, False otherwise.
+ """
+ found = False
+
+ for api_key, configs in self.configs.items():
+ for config_id, config_data in configs.items():
+ if config_id == config_id_to_activate:
+ config_data["active"] = True
+ found = True
+ else:
+ config_data.pop("active", None)
+
+ if found:
+ self._save_configs()
+ _print_colored(f"Activated configuration {config_id_to_activate}", "green")
+ return True
+ else:
+ _print_colored(f"Invalid Config ID: {config_id_to_activate}", "red")
+ return False
+
+ def get_active_config_id(self, api_key: str) -> Optional[str]:
+ """
+ Retrieve the currently active configuration ID for a given API key.
+
+ Returns None if not set.
+ """
+ try:
+ if self.ACTIVE_CONFIG_FILE.exists():
+ with open(self.ACTIVE_CONFIG_FILE, "r") as f:
+ data = json.load(f)
+ if data.get("api_key") == api_key:
+ return data.get("config_id")
+ except Exception as e:
+ _print_colored(f"Could not load active configuration: {e}", "yellow")
+ return None
+
+ def list_configs(self, api_key_selected: str, user_data=None, api_token=None) -> List[str]:
+ """
+ Interactively select an API key, then display and manage its configurations.
+
+ Returns:
+ ID of the selected or activated configuration (or None if nothing selected).
+ """
+ if not self.configs:
+ print("No saved configurations found.")
+ return None
+
+ api_keys = list(self.configs.keys())
+ print("\nAvailable API Keys:")
+ for idx, key in enumerate(api_keys, 1):
+ print(f" {idx}. {key}")
+
+ try:
+ selected_index = int(input("Select an API Key by number: ").strip())
+ selected_api_key = api_keys[selected_index - 1]
+ except (ValueError, IndexError):
+ _print_colored("Invalid selection.", "red")
+ return None
+
+ configs = self.configs[selected_api_key]
+ if not configs:
+ _print_colored("No configurations found for the selected API Key.", "yellow")
+ return None
+
+ print(f"\nConfigurations for API Key {selected_api_key}.")
+ config_ids = list(configs.keys())
+ for idx, config_id in enumerate(config_ids, 1):
+ status = " [ACTIVE]" if configs[config_id].get("active") else ""
+ if status == " [ACTIVE]":
+ _print_colored(f" {idx}. {config_id}{status}", "blue")
+ else:
+ print(f" {idx}. {config_id}{status}")
+
+ for k, v in configs[config_id].items():
+ print(f" {k}: {v}")
+
+ action_prompt = input("\nWould you like to perform an action? (y/n): ").strip().lower()
+ if action_prompt == "y":
+ print("\nAvailable actions:")
+ print(" 1. Activate configuration")
+ print(" 2. Delete configuration")
+ print(" 3. Show schema")
+ print(" 4. Validate configuration")
+ print(" 5. Edit configuration")
+ print(" 6. Export configuration")
+ print(" 7. Improt configuration")
+ print(" 8. Create configuration")
+ print(" q. Exit")
+
+ choice = input("Enter your choice (1/2/3/4/5/6/7/8/q): ").strip()
+ if choice == "1":
+ selected_idx = input("Enter the number of the configuration to activate: ").strip()
+ try:
+ config_id = config_ids[int(selected_idx) - 1]
+ self.set_active_config(config_id)
+ return config_id
+ except (ValueError, IndexError):
+ _print_colored("Invalid configuration number.", "red")
+ elif choice == "2":
+ selected_idx = input("Enter the number of the configuration to delete: ").strip()
+ try:
+ config_id = config_ids[int(selected_idx) - 1]
+ self.remove_config(selected_api_key, config_id)
+ except (ValueError, IndexError):
+ _print_colored("Invalid configuration number.", "red")
+ elif choice == "3":
+ api_url = os.environ.get("COREBRAIN_API_URL") or DEFAULT_API_URL
+ selected_idx = input("Enter the number of the configuration to show: ").strip()
+ try:
+ config_id = config_ids[int(selected_idx) - 1]
+ config = self.get_config(api_key_selected, config_id)
+ show_db_schema(config, selected_api_key, config_id, api_url)
+ except (ValueError, IndexError):
+ _print_colored("Invalid configuration number.", "red")
+ elif choice == "4":
+ api_url = os.environ.get("COREBRAIN_API_URL") or DEFAULT_API_URL
+ selected_idx = input("Enter the number of the configuration to validate: ").strip()
+ try:
+ config_id = config_ids[int(selected_idx) - 1]
+ self.validate_config(selected_api_key, config_id)
+ except (ValueError, IndexError):
+ _print_colored("Invalid configuration number.", "red")
+ elif choice == "5":
+ selected_idx = input("Enter the number of the configuration to modify: ").strip()
+ try:
+ config_id = config_ids[int(selected_idx) - 1]
+ self.modify_config(selected_api_key, config_id)
+ except (ValueError, IndexError):
+ _print_colored("Invalid configuration number.", "red")
+ elif choice == "6":
+ selected_idx = input("Enter the number of the configuration to export: ").strip()
+ try:
+ config_id = config_ids[int(selected_idx) - 1]
+ config = self.get_config(api_key_selected, config_id)
+ include_credentials = input("Include credentials in export? (y/n): ").strip().lower() == "y"
+ shareable = input("Export as shareable version? (y/n): ").strip().lower() == "y"
+ export_config(config, include_credentials=include_credentials, shareable=shareable)
+
+ except (ValueError, IndexError):
+ _print_colored("Invalid configuration number.", "red")
+ elif choice == "7":
+ try:
+ self.validate_config(selected_api_key, config_id)
+ self.import_config(selected_api_key)
+ except (ValueError, IndexError):
+ _print_colored("Invalid configuration number.", "red")
+ elif choice == "8":
+ api_url = os.environ.get("COREBRAIN_API_URL") or DEFAULT_API_URL
+ sso_url = os.environ.get("COREBRAIN_SSO_URL") or DEFAULT_SSO_URL
+ from corebrain.cli.config import configure_sdk
+
+ configure_sdk(api_token, api_key_selected, api_url, sso_url, user_data)
+
+ elif choice == "q":
+ print("Exit selected.")
+ else:
+ print("Invalid action.")
+ elif action_prompt != "n":
+ print("Invalid input. Please enter 'y' or 'n'.")
+
+ return None
+
+ def modify_config(self, api_key_selected: str, config_id: str) -> None:
+ """
+ Allows the user to interactively modify multiple parameters of an existing configuration.
+ """
+ config = self.get_config(api_key_selected, config_id)
+ if not config:
+ _print_colored(f"Configuration with ID '{config_id}' not found", "red")
+ return
+
+ print_colored(f"\nEditing configuration: {config_id}", "blue")
+
+ while True:
+ keys = [key for key in config.keys() if key != "config_id"]
+ print("\nCurrent parameters:")
+ for idx, key in enumerate(keys, 1):
+ print(f" {idx}. {key}: {config[key]}")
+
+ print(" 0. Exit edit mode")
+
+ try:
+ key_idx = int(input("\nSelect parameter number to edit (or 0 to exit): ").strip())
+ if key_idx == 0:
+ _print_colored("Exiting edit mode.", "yellow")
+ break
+
+ key_to_edit = keys[key_idx - 1]
+ new_value = input(f"Enter new value for '{key_to_edit}': ").strip()
+
+ # Try to interpret value types
+ if new_value.lower() in ["true", "false"]:
+ new_value = new_value.lower() == "true"
+ elif new_value.isdigit():
+ new_value = int(new_value)
+ elif new_value.lower() == "null":
+ new_value = None
+
+ config[key_to_edit] = new_value
+ self.configs[api_key_selected][config_id] = config
+ self._save_configs()
+ _print_colored(f"Updated '{key_to_edit}'", "green")
+
+ except (ValueError, IndexError):
+ _print_colored("Invalid selection.", "red")
+
+ validate = (
+ input("Would you like to validate the modified config now? (y/n): ").strip().lower()
+ )
+ if validate == "y":
+ self.validate_config(api_key_selected, config_id)
+
+ def import_config(self, api_key: str) -> str | None:
+ """
+ Opens a file dialog to select a JSON config file and imports it.
+
+ Args:
+ api_key: The API key under which to store the configuration.
+
+ Returns:
+ The ID of the imported configuration, or None if import failed or cancelled.
+ """
+ # Open file dialog for selecting config JSON file
+ root = tk.Tk()
+ filepath = filedialog.askopenfilename(
+ title="Select configuration JSON file",
+ filetypes=[("JSON files", "*.json"), ("All files", "*.*")],
+ )
+ root.destroy()
+
+ if not filepath:
+ _print_colored("Import cancelled or no file selected.", "yellow")
+ return None
+
+ try:
+ with open(filepath, "r") as f:
+ config = json.load(f)
+
+ config_id = config.get("id") or str(uuid.uuid4())
+ config["id"] = config_id
+
+ self.add_config(api_key, config, config_id)
+ _print_colored(f"Configuration imported as {config_id}", "green")
+ return config_id
+
+ except Exception as e:
+ _print_colored(f"Failed to import configuration: {e}", "red")
+ return None
+
+ def validate_config(self, api_key, config_id):
+ api_url = os.environ.get("COREBRAIN_API_URL") or DEFAULT_API_URL
+ sso_url = os.environ.get("COREBRAIN_SSO_URL") or DEFAULT_SSO_URL
+ if not api_key:
+ print_colored(
+ "Error: An API Key is required. Use --api-key or login via --login", "red"
+ )
+ return 1
+ try:
+ config = self.get_config(api_key, config_id)
+ if not config:
+ print_colored(f"Configuration with ID '{config_id}' not found", "red")
+ return 1
+ print_colored(
+ f"✅ Validating configuration: {config_id}", "blue"
+ ) # Create a temporary Corebrain instance to validate
+ from corebrain.core.client import Corebrain
+
+ try:
+ temp_client = Corebrain(api_key=api_key, db_config=config, skip_verification=True)
+ print_colored("✅ Configuration validation passed!", "green")
+ print_colored(f"Database type: {config.get('type', 'Unknown')}", "blue")
+ print_colored(f"Engine: {config.get('engine', 'Unknown')}", "blue")
+ return 0
+ except Exception as validation_error:
+ print_colored(f"❌ Configuration validation failed: {str(validation_error)}", "red")
+ return 1
+ except Exception as e:
+ print_colored(f"❌ Error during validation: {str(e)}", "red")
+ return 1
+
+ def remove_config(self, api_key_selected: str, config_id: str) -> bool:
+ """
+ Deletes a configuration.
+
+ Args:
+ api_key_selected: Selected API Key
+ config_id: Configuration ID
+
+ Returns:
+ True if deleted successfully, False otherwise
+ """
+ if api_key_selected in self.configs and config_id in self.configs[api_key_selected]:
+ del self.configs[api_key_selected][config_id]
+
+ # If there are no configurations for this token, delete the entry
+ if not self.configs[api_key_selected]:
+ del self.configs[api_key_selected]
+
+ self._save_configs()
+ _print_colored(
+ f"Configuration {config_id} removed for API Key: {api_key_selected[:8]}...", "green"
+ )
+ return True
+
+ _print_colored(
+ f"Configuration {config_id} not found for API Key: {api_key_selected[:8]}...", "yellow"
+ )
+ return False
diff --git a/corebrain/core/__init__.py b/corebrain/core/__init__.py
new file mode 100644
index 0000000..d10b67f
--- /dev/null
+++ b/corebrain/core/__init__.py
@@ -0,0 +1,20 @@
+"""
+Corebrain SDK main components.
+
+This package contains the core components of the SDK,
+including the main client and schema handling.
+"""
+from corebrain.core.client import Corebrain, init
+from corebrain.core.query import QueryCache, QueryAnalyzer, QueryTemplate
+from corebrain.core.test_utils import test_natural_language_query, generate_test_question_from_schema
+
+# Export public componentes
+__all__ = [
+ 'Corebrain',
+ 'init',
+ 'QueryCache',
+ 'QueryAnalyzer',
+ 'QueryTemplate',
+ 'test_natural_language_query',
+ 'generate_test_question_from_schema'
+]
\ No newline at end of file
diff --git a/corebrain/core/client.py b/corebrain/core/client.py
new file mode 100644
index 0000000..c96e3a0
--- /dev/null
+++ b/corebrain/core/client.py
@@ -0,0 +1,1362 @@
+"""
+Corebrain SDK Main Client.
+
+This module provides the main interface to interact with the Corebrain API
+and enables natural language queries to relational and non-relational databases.
+"""
+import uuid
+import re
+import logging
+import requests
+import httpx
+import sqlite3
+import mysql.connector
+import psycopg2
+import pymongo
+import json
+from typing import Dict, Any, List, Optional
+from sqlalchemy import create_engine, inspect
+from pathlib import Path
+from datetime import datetime
+
+from corebrain.core.common import logger, CorebrainError
+
+class Corebrain:
+ """
+ Main client for the Corebrain SDK for natural language database queries.
+
+ This class provides a unified interface to interact with different types of databases
+ (SQL and NoSQL) using natural language. It manages the connection, schema extraction,
+ and query processing through the Corebrain API.
+
+ Attributes:
+ api_key (str): Authentication key for the Corebrain API.
+ db_config (Dict[str, Any]): Database connection configuration.
+ config_id (str): Unique identifier for the current configuration.
+ api_url (str): Base URL for the Corebrain API.
+ user_info (Dict[str, Any]): Information about the authenticated user.
+ db_connection: Active database connection.
+ db_schema (Dict[str, Any]): Extracted database schema.
+
+ Examples:
+ SQLite initialization:
+ ```python
+ from corebrain import init
+
+ # Connect to a SQLite database
+ client = init(
+ api_key="your_api_key",
+ db_config={
+ "type": "sql",
+ "engine": "sqlite",
+ "database": "my_database.db"
+ }
+ )
+
+ # Make a query
+ result = client.ask("How many registered users are there?")
+ print(result["explanation"])
+ ```
+
+ PostgreSQL initialization:
+ ```python
+ # Connect to PostgreSQL
+ client = init(
+ api_key="your_api_key",
+ db_config={
+ "type": "sql",
+ "engine": "postgresql",
+ "host": "localhost",
+ "port": 5432,
+ "user": "postgres",
+ "password": "your_password",
+ "database": "my_database"
+ }
+ )
+ ```
+
+ MongoDB initialization:
+ ```python
+ # Connect to MongoDB
+ client = init(
+ api_key="your_api_key",
+ db_config={
+ "type": "mongodb",
+ "host": "localhost",
+ "port": 27017,
+ "database": "my_database"
+ }
+ )
+ ```
+ """
+
+ def __init__(
+ self,
+ api_key: str,
+ db_config: Optional[Dict[str, Any]] = None,
+ config_id: Optional[str] = None,
+ user_data: Optional[Dict[str, Any]] = None,
+ api_url: str = "http://localhost:5000",
+ skip_verification: bool = False
+ ):
+ """
+ Initialize the Corebrain SDK client.
+
+ Args:
+ api_key (str): Required API key for authentication with the Corebrain service.
+ Can be generated from the dashboard at https://dashboard.corebrain.com.
+
+ db_config (Dict[str, Any], optional): Database configuration to query.
+ This parameter is required if config_id is not provided. Must contain at least:
+ - "type": Database type ("sql" or "mongodb")
+ - For SQL: "engine" ("sqlite", "postgresql", "mysql")
+ - Specific connection parameters depending on type and engine
+
+ Example for SQLite:
+ ```
+ {
+ "type": "sql",
+ "engine": "sqlite",
+ "database": "path/to/database.db"
+ }
+ ```
+
+ Example for PostgreSQL:
+ ```
+ {
+ "type": "sql",
+ "engine": "postgresql",
+ "host": "localhost",
+ "port": 5432,
+ "user": "postgres",
+ "password": "password",
+ "database": "db_name"
+ }
+ ```
+
+ config_id (str, optional): Identifier for a previously saved configuration.
+ If provided, this configuration will be used instead of db_config.
+ Useful for maintaining persistent configurations between sessions.
+
+ user_data (Dict[str, Any], optional): Additional user information for verification.
+ Can contain data like "email" for more precise token validation.
+
+ api_url (str, optional): Base URL for the Corebrain API.
+ Defaults to "http://localhost:5000" for local development.
+ In production, it is typically "https://api.corebrain.com".
+
+ skip_verification (bool, optional): If True, skips token verification with the server.
+ Useful in offline environments or for local testing.
+ Defaults to False.
+
+ Raises:
+ ValueError: If required parameters are missing or if the configuration is invalid.
+ CorebrainError: If there are issues with the API connection or database.
+
+ Example:
+ ```python
+ from corebrain import Corebrain
+
+ # Basic initialization with SQLite
+ client = Corebrain(
+ api_key="your_api_key",
+ db_config={
+ "type": "sql",
+ "engine": "sqlite",
+ "database": "my_db.db"
+ }
+ )
+ ```
+ """
+ self.api_key = api_key
+ self.user_data = user_data
+ self.api_url = api_url.rstrip('/')
+ self.db_connection = None
+ self.db_schema = None
+
+ # Import ConfigManager dynamically to avoid circular dependency
+ try:
+ from corebrain.config.manager import ConfigManager
+ self.config_manager = ConfigManager()
+ except ImportError as e:
+ logger.error(f"Error importing ConfigManager: {e}")
+ raise CorebrainError(f"Could not load configuration manager: {e}")
+
+ # Determine which configuration to use
+ if config_id:
+ saved_config = self.config_manager.get_config(api_key, config_id)
+ if not saved_config:
+ # Try to load from old format
+ old_config = self._load_old_config(api_key, config_id)
+ if old_config:
+ self.db_config = old_config
+ self.config_id = config_id
+ # Save in new format
+ self.config_manager.add_config(api_key, old_config, config_id)
+ else:
+ raise ValueError(f"Configuration with ID {config_id} not found for the provided key")
+ else:
+ self.db_config = saved_config
+ self.config_id = config_id
+ elif db_config:
+ self.db_config = db_config
+
+ # Generate config ID if it doesn't exist
+ if "config_id" in db_config:
+ self.config_id = db_config["config_id"]
+ else:
+ self.config_id = str(uuid.uuid4())
+ db_config["config_id"] = self.config_id
+
+ # Save the configuration
+ self.config_manager.add_config(api_key, db_config, self.config_id)
+ else:
+ raise ValueError("db_config or config_id must be provided")
+
+ # Validate configuration
+ self._validate_config()
+
+ # Verify the API token (only if necessary)
+ if not skip_verification:
+ self._verify_api_token()
+ else:
+ # Initialize user_info with basic information if not verifying
+ self.user_info = {"token": api_key}
+
+ # Connect to the database
+ self._connect_to_database()
+
+ # Extract database schema
+ self.db_schema = self._extract_db_schema()
+
+ self.metadata = {
+ "config_id": self.config_id,
+ "api_key": api_key,
+ "db_config": self.db_config
+ }
+
+ def _load_old_config(self, api_key: str, config_id: str) -> Optional[Dict[str, Any]]:
+ """
+ Try to load configuration from old format.
+
+ Args:
+ api_key: API key
+ config_id: Configuration ID
+
+ Returns:
+ Configuration dictionary if found, None otherwise
+ """
+ try:
+ # Try to load from old config file
+ old_config_path = Path.home() / ".corebrain" / "config.json"
+ if old_config_path.exists():
+ with open(old_config_path, 'r') as f:
+ old_configs = json.load(f)
+ if api_key in old_configs and config_id in old_configs[api_key]:
+ return old_configs[api_key][config_id]
+ except Exception as e:
+ logger.warning(f"Error loading old config: {e}")
+ return None
+
+ def _validate_config(self) -> None:
+ """
+ Validate the provided configuration.
+
+ This internal function verifies that the database configuration
+ contains all necessary fields according to the specified database type.
+
+ Raises:
+ ValueError: If the database configuration is invalid or incomplete.
+ """
+ if not self.api_key:
+ raise ValueError("API key is required. Generate one at dashboard.corebrain.com")
+
+ if not self.db_config:
+ raise ValueError("Database configuration is required")
+
+ if "type" not in self.db_config:
+ raise ValueError("Database type is required in db_config")
+
+ if "connection_string" not in self.db_config and self.db_config["type"] != "sqlite_memory":
+ if self.db_config["type"] == "sql":
+ if "engine" not in self.db_config:
+ raise ValueError("Database engine is required for 'sql' type")
+
+ # Verify alternative configuration for SQL engines
+ if self.db_config["engine"] == "mysql" or self.db_config["engine"] == "postgresql":
+ if not ("host" in self.db_config and "user" in self.db_config and
+ "password" in self.db_config and "database" in self.db_config):
+ raise ValueError("host, user, password, and database are required for MySQL/PostgreSQL")
+ elif self.db_config["engine"] == "sqlite":
+ if "database" not in self.db_config:
+ raise ValueError("database field is required for SQLite")
+ elif self.db_config["type"] == "mongodb":
+ if "database" not in self.db_config:
+ raise ValueError("database field is required for MongoDB")
+
+ if "connection_string" not in self.db_config:
+ if not ("host" in self.db_config and "port" in self.db_config):
+ raise ValueError("host and port are required for MongoDB without connection_string")
+
+ def _verify_api_token(self) -> None:
+ """
+ Verify the API token with the server.
+
+ This internal function sends a request to the Corebrain server
+ to validate that the provided API token is valid.
+ If the user provided additional information (like email),
+ it will be used for more precise verification.
+
+ The verification results are stored in self.user_info.
+
+ Raises:
+ ValueError: If the API token is invalid.
+ """
+ try:
+ # Use the user's email for verification if available
+ if self.user_data and 'email' in self.user_data:
+ endpoint = f"{self.api_url}/api/auth/users/{self.user_data['email']}"
+
+ response = httpx.get(
+ endpoint,
+ headers={"X-API-Key": self.api_key},
+ timeout=10.0
+ )
+
+ if response.status_code != 200:
+ raise ValueError(f"Invalid API token. Error code: {response.status_code}")
+
+ # Store user information
+ self.user_info = response.json()
+ else:
+ # If no email, do a simple verification with a generic endpoint
+ endpoint = f"{self.api_url}/api/auth/verify"
+
+ try:
+ response = httpx.get(
+ endpoint,
+ headers={"X-API-Key": self.api_key},
+ timeout=5.0
+ )
+
+ if response.status_code == 200:
+ self.user_info = response.json()
+ else:
+ # If it fails, just store basic information
+ self.user_info = {"token": self.api_key}
+ except Exception as e:
+ # If there's a connection error, don't fail, just store basic info
+ logger.warning(f"Could not verify token: {str(e)}")
+ self.user_info = {"token": self.api_key}
+
+ except httpx.RequestError as e:
+ # Connection error shouldn't be fatal if we already have a configuration
+ logger.warning(f"Error connecting to API: {str(e)}")
+ self.user_info = {"token": self.api_key}
+ except Exception as e:
+ # Other errors are logged but not fatal
+ logger.warning(f"Error in token verification: {str(e)}")
+ self.user_info = {"token": self.api_key}
+
+ def _connect_to_database(self) -> None:
+ """
+ Establish a connection to the database according to the configuration.
+
+ This internal function creates a database connection using the parameters
+ defined in self.db_config. It supports various database types:
+ - SQLite (file or in-memory)
+ - PostgreSQL
+ - MySQL
+ - MongoDB
+
+ The connection is stored in self.db_connection for later use.
+
+ Raises:
+ CorebrainError: If the connection to the database cannot be established.
+ NotImplementedError: If the database type is not supported.
+ """
+ db_type = self.db_config["type"].lower()
+
+ try:
+ if db_type == "sql":
+ engine = self.db_config.get("engine", "").lower()
+
+ if engine == "sqlite":
+ database = self.db_config.get("database", "")
+ if database:
+ self.db_connection = sqlite3.connect(database)
+ else:
+ self.db_connection = sqlite3.connect(self.db_config.get("connection_string", ""))
+
+ elif engine == "mysql":
+ if "connection_string" in self.db_config:
+ self.db_connection = mysql.connector.connect(
+ connection_string=self.db_config["connection_string"]
+ )
+ else:
+ self.db_connection = mysql.connector.connect(
+ host=self.db_config.get("host", "localhost"),
+ user=self.db_config.get("user", ""),
+ password=self.db_config.get("password", ""),
+ database=self.db_config.get("database", ""),
+ port=self.db_config.get("port", 3306)
+ )
+
+ elif engine == "postgresql":
+ if "connection_string" in self.db_config:
+ self.db_connection = psycopg2.connect(self.db_config["connection_string"])
+ else:
+ self.db_connection = psycopg2.connect(
+ host=self.db_config.get("host", "localhost"),
+ user=self.db_config.get("user", ""),
+ password=self.db_config.get("password", ""),
+ dbname=self.db_config.get("database", ""),
+ port=self.db_config.get("port", 5432)
+ )
+
+ else:
+ # Use SQLAlchemy for other engines
+ self.db_connection = create_engine(self.db_config["connection_string"])
+
+ # Improved code for MongoDB
+ elif db_type == "nosql" or db_type == "mongodb":
+ # If engine is mongodb or the type is directly mongodb
+ engine = self.db_config.get("engine", "").lower()
+ if not engine or engine == "mongodb":
+ # Create connection parameters
+ mongo_params = {}
+
+ if "connection_string" in self.db_config:
+ # Save the MongoDB client to be able to close it correctly later
+ self.mongo_client = pymongo.MongoClient(self.db_config["connection_string"])
+ else:
+ # Configure host and port
+ mongo_params["host"] = self.db_config.get("host", "localhost")
+ if "port" in self.db_config:
+ mongo_params["port"] = self.db_config.get("port")
+
+ # Add credentials if available
+ if "user" in self.db_config and self.db_config["user"]:
+ mongo_params["username"] = self.db_config["user"]
+ if "password" in self.db_config and self.db_config["password"]:
+ mongo_params["password"] = self.db_config["password"]
+
+ # Create MongoDB client
+ self.mongo_client = pymongo.MongoClient(**mongo_params)
+
+ # Get the database
+ db_name = self.db_config.get("database", "")
+ if db_name:
+ # Save reference to the database
+ self.db_connection = self.mongo_client[db_name]
+ else:
+ # If there's no database name, use 'admin' as fallback
+ logger.warning("Database name not specified for MongoDB, using 'admin'")
+ self.db_connection = self.mongo_client["admin"]
+ else:
+ raise ValueError(f"Unsupported NoSQL database engine: {engine}")
+
+ elif db_type == "sqlite_memory":
+ self.db_connection = sqlite3.connect(":memory:")
+
+ else:
+ raise ValueError(f"Unsupported database type: {db_type}. Valid types: 'sql', 'nosql', 'mongodb'")
+
+ except Exception as e:
+ logger.error(f"Error connecting to database: {str(e)}")
+ raise ConnectionError(f"Error connecting to database: {str(e)}")
+
+ def _extract_db_schema(self, detail_level: str = "full", specific_collections: List[str] = None) -> Dict[str, Any]:
+ """
+ Extracts the database schema to provide context to the AI.
+
+ Returns:
+ Dictionary with the database structure organized by tables/collections
+ """
+ logger.info(f"Extracting database schema. Type: {self.db_config['type']}, Engine: {self.db_config.get('engine')}")
+
+ db_type = self.db_config["type"].lower()
+ schema = {
+ "type": db_type,
+ "database": self.db_config.get("database", ""),
+ "tables": {},
+ "total_collections": 0, # Add total counter
+ "included_collections": 0 # Counter for included ones
+ }
+ excluded_tables = set(self.db_config.get("excluded_tables", []))
+ logger.info(f"Excluded tables: {excluded_tables}")
+
+ try:
+ if db_type == "sql":
+ engine = self.db_config.get("engine", "").lower()
+ logger.info(f"Processing SQL database with engine: {engine}")
+
+ if engine in ["sqlite", "mysql", "postgresql"]:
+ cursor = self.db_connection.cursor()
+
+ if engine == "sqlite":
+ logger.info("Getting SQLite tables")
+ # Get table listing
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+ tables = cursor.fetchall()
+ logger.info(f"Tables found in SQLite: {tables}")
+
+ elif engine == "mysql":
+ logger.info("Getting MySQL tables")
+ cursor.execute("SHOW TABLES;")
+ tables = cursor.fetchall()
+ logger.info(f"Tables found in MySQL: {tables}")
+
+ elif engine == "postgresql":
+ logger.info("Getting PostgreSQL tables")
+ cursor.execute("""
+ SELECT table_name FROM information_schema.tables
+ WHERE table_schema = 'public';
+ """)
+ tables = cursor.fetchall()
+ logger.info(f"Tables found in PostgreSQL: {tables}")
+
+ # Process the found tables
+ for table in tables:
+ table_name = table[0]
+ logger.info(f"Processing table: {table_name}")
+
+ # Skip excluded tables
+ if table_name in excluded_tables:
+ logger.info(f"Skipping excluded table: {table_name}")
+ continue
+
+ try:
+ # Get column information according to engine
+ if engine == "sqlite":
+ cursor.execute(f"PRAGMA table_info({table_name});")
+ elif engine == "mysql":
+ cursor.execute(f"DESCRIBE {table_name};")
+ elif engine == "postgresql":
+ cursor.execute(f"""
+ SELECT column_name, data_type
+ FROM information_schema.columns
+ WHERE table_name = '{table_name}';
+ """)
+
+ columns = cursor.fetchall()
+ logger.info(f"Columns found for {table_name}: {columns}")
+
+ # Column structure according to engine
+ if engine == "sqlite":
+ column_info = [{"name": col[1], "type": col[2]} for col in columns]
+ elif engine == "mysql":
+ column_info = [{"name": col[0], "type": col[1]} for col in columns]
+ elif engine == "postgresql":
+ column_info = [{"name": col[0], "type": col[1]} for col in columns]
+
+ # Save table information
+ schema["tables"][table_name] = {
+ "columns": column_info,
+ "sample_data": [] # We don't get sample data by default
+ }
+
+ except Exception as e:
+ logger.error(f"Error processing table {table_name}: {str(e)}")
+
+ else:
+ # Using SQLAlchemy
+ logger.info("Using SQLAlchemy to get schema")
+ inspector = inspect(self.db_connection)
+ table_names = inspector.get_table_names()
+ logger.info(f"Tables found with SQLAlchemy: {table_names}")
+
+ for table_name in table_names:
+ if table_name in excluded_tables:
+ logger.info(f"Skipping excluded table: {table_name}")
+ continue
+
+ try:
+ columns = inspector.get_columns(table_name)
+ column_info = [{"name": col["name"], "type": str(col["type"])} for col in columns]
+
+ schema["tables"][table_name] = {
+ "columns": column_info,
+ "sample_data": []
+ }
+ except Exception as e:
+ logger.error(f"Error processing table {table_name} with SQLAlchemy: {str(e)}")
+
+ elif db_type in ["nosql", "mongodb"]:
+ logger.info("Processing MongoDB database")
+ if not hasattr(self, 'db_connection') or self.db_connection is None:
+ logger.error("MongoDB connection is not available")
+ return schema
+
+ try:
+ collection_names = []
+ try:
+ collection_names = self.db_connection.list_collection_names()
+ schema["total_collections"] = len(collection_names)
+ logger.info(f"Collections found in MongoDB: {collection_names}")
+ except Exception as e:
+ logger.error(f"Error getting MongoDB collections: {str(e)}")
+ return schema
+
+ # If we only want the names
+ if detail_level == "names_only":
+ schema["collection_names"] = collection_names
+ return schema
+
+ # Process each collection
+ for collection_name in collection_names:
+ if collection_name in excluded_tables:
+ logger.info(f"Skipping excluded collection: {collection_name}")
+ continue
+
+ try:
+ collection = self.db_connection[collection_name]
+ # Get a document to infer structure
+ first_doc = collection.find_one()
+
+ if first_doc:
+ fields = []
+ for field, value in first_doc.items():
+ if field != "_id":
+ field_type = type(value).__name__
+ fields.append({"name": field, "type": field_type})
+
+ schema["tables"][collection_name] = {
+ "fields": fields,
+ "doc_count": collection.estimated_document_count()
+ }
+ logger.info(f"Processed collection {collection_name} with {len(fields)} fields")
+ else:
+ logger.info(f"Collection {collection_name} is empty")
+ schema["tables"][collection_name] = {
+ "fields": [],
+ "doc_count": 0
+ }
+ except Exception as e:
+ logger.error(f"Error processing collection {collection_name}: {str(e)}")
+
+ except Exception as e:
+ logger.error(f"General error processing MongoDB: {str(e)}")
+
+ # Convert the table dictionary to a list
+ table_list = []
+ for table_name, table_info in schema["tables"].items():
+ table_data = {"name": table_name}
+ table_data.update(table_info)
+ table_list.append(table_data)
+
+ schema["tables_list"] = table_list
+ logger.info(f"Final schema - Tables found: {len(schema['tables'])}")
+ logger.info(f"Table names: {list(schema['tables'].keys())}")
+
+ return schema
+
+ except Exception as e:
+ logger.error(f"Error extracting database schema: {str(e)}")
+ return {"type": db_type, "tables": {}, "tables_list": []}
+
+ def list_collections_name(self) -> List[str]:
+ """
+ Returns a list of the available collections or tables in the database.
+
+ Returns:
+ List of collections or tables
+ """
+ print("Excluded tables: ", self.db_schema.get("excluded_tables", []))
+ return self.db_schema.get("tables", [])
+
+ def ask(self, question: str, **kwargs) -> Dict:
+ """
+ Perform a natural language query to the database.
+
+ Args:
+ question: The natural language question
+ **kwargs: Additional parameters:
+ - collection_name: For MongoDB, the collection to query
+ - limit: Maximum number of results
+ - detail_level: Schema detail level ("names_only", "structure", "full")
+ - auto_select: Whether to automatically select collections
+ - max_collections: Maximum number of collections to include
+ - execute_query: Whether to execute the query (True by default)
+ - explain_results: Whether to generate an explanation of results (True by default)
+
+ Returns:
+ Dictionary with the query results and explanation
+ """
+ try:
+ # Check behavior options
+ execute_query = kwargs.get("execute_query", True)
+ explain_results = kwargs.get("explain_results", True)
+
+ # Obtain an outline with the appropriate level of detail
+ detail_level = kwargs.get("detail_level", "full")
+ schema = self._extract_db_schema(detail_level=detail_level)
+
+ # Validate that the schema has tables/collections
+ if not schema.get("tables"):
+ print("Error: No tables/collections found in the database")
+ return {"error": True, "explanation": "No tables/collections found in the database"}
+
+ # Get table names available for validation
+ available_tables = set()
+ if isinstance(schema.get("tables"), dict):
+ available_tables.update(schema["tables"].keys())
+ elif isinstance(schema.get("tables_list"), list):
+ available_tables.update(table["name"] for table in schema["tables_list"])
+
+ # Prepare application data with enhanced schema information
+ request_data = {
+ "question": question,
+ "db_schema": schema,
+ "config_id": self.config_id,
+ "metadata": {
+ "type": self.db_config["type"].lower(),
+ "engine": self.db_config.get("engine", "").lower(),
+ "database": self.db_config.get("database", ""),
+ "available_tables": list(available_tables),
+ "collections": list(available_tables)
+ }
+ }
+
+ # Add database configuration to the request
+ # This allows the API to directly execute queries if needed.
+ if execute_query:
+ request_data["db_config"] = self.db_config
+
+ # Add user data if available
+ if self.user_data:
+ request_data["user_data"] = self.user_data
+
+ # Prepare headers for the request
+ headers = {
+ "X-API-Key": self.api_key,
+ "Content-Type": "application/json"
+ }
+
+ # Determine the appropriate endpoint based on the execution mode
+ if execute_query:
+ # Use the full execution endpoint
+ endpoint = f"{self.api_url}/api/database/sdk/query"
+ else:
+ # Use the query-only generation endpoint
+ endpoint = f"{self.api_url}/api/database/generate"
+
+ # Make a request to the API
+ response = httpx.post(
+ endpoint,
+ headers=headers,
+ content=json.dumps(request_data, default=str),
+ timeout=60.0
+ )
+
+ # Check answer
+ if response.status_code != 200:
+ error_msg = f"Error {response.status_code} while performing query"
+ try:
+ error_data = response.json()
+ if isinstance(error_data, dict):
+ error_msg += f": {error_data.get('detail', error_data.get('message', response.text))}"
+ except:
+ error_msg += f": {response.text}"
+ return {"error": True, "explanation": error_msg}
+
+ # Process API response
+ api_response = response.json()
+
+ # Check if the API reported an error
+ if api_response.get("error", False):
+ return api_response
+
+ # Check if a valid query was generated
+ if "query" not in api_response:
+ return {
+ "error": True,
+ "explanation": "The API did not generate a valid query."
+ }
+
+ # If the query should be executed but the API did not
+ # (this would only occur in the case of configuration changes or fallbacks)
+ if execute_query and "result" not in api_response:
+ try:
+ # Prepare the query for local execution
+ query_type = self.db_config.get("engine", "").lower() if self.db_config["type"].lower() == "sql" else self.db_config["type"].lower()
+ query_value = api_response["query"]
+
+ # For SQL, make sure the query is a string
+ if query_type in ["sqlite", "mysql", "postgresql"]:
+ if isinstance(query_value, dict):
+ sql_candidate = query_value.get("sql") or query_value.get("query")
+ if isinstance(sql_candidate, str):
+ query_value = sql_candidate
+ else:
+ raise CorebrainError(f"The generated SQL query is not a string: {query_value}")
+
+ # Prepare the consultation with the appropriate format
+ query_to_execute = {
+ "type": query_type,
+ "query": query_value
+ }
+
+ # For MongoDB, add specific information
+ if query_type in ["nosql", "mongodb"]:
+ # Get collection name
+ collection_name = None
+ if isinstance(api_response["query"], dict):
+ collection_name = api_response["query"].get("collection")
+ if not collection_name and "collection_name" in kwargs:
+ collection_name = kwargs["collection_name"]
+ if not collection_name and "collection" in self.db_config:
+ collection_name = self.db_config["collection"]
+ if not collection_name and available_tables:
+ collection_name = list(available_tables)[0]
+
+ # Validate collection name
+ if not collection_name:
+ raise CorebrainError("No collection specified and no collections found in schema")
+ if not isinstance(collection_name, str) or not collection_name.strip():
+ raise CorebrainError("Invalid collection name: must be a non-empty string")
+
+ # Add collection to query
+ query_to_execute["collection"] = collection_name
+
+ # Add operation type
+ if isinstance(api_response["query"], dict):
+ query_to_execute["operation"] = api_response["query"].get("operation", "find")
+
+ # Add limit if specified
+ if "limit" in kwargs:
+ query_to_execute["limit"] = kwargs["limit"]
+
+ # Run the query
+ start_time = datetime.now()
+ query_result = self._execute_query(query_to_execute)
+ query_time_ms = int((datetime.now() - start_time).total_seconds() * 1000)
+
+ # Update the response with the results
+ api_response["result"] = {
+ "data": query_result,
+ "count": len(query_result) if isinstance(query_result, list) else 1,
+ "query_time_ms": query_time_ms,
+ "has_more": False
+ }
+
+ # If explanation should be generated but API didn't do it
+ if explain_results and (
+ "explanation" not in api_response or
+ not isinstance(api_response.get("explanation"), str) or
+ len(str(api_response.get("explanation", ""))) < 15 # Detect numerical or very short explanations
+ ):
+ # Prepare data for explanation
+ explanation_data = {
+ "question": question,
+ "query": api_response["query"],
+ "result": query_result,
+ "query_time_ms": query_time_ms,
+ "config_id": self.config_id,
+ "metadata": {
+ "collections_used": [query_to_execute.get("collection")] if query_to_execute.get("collection") else [],
+ "execution_time_ms": query_time_ms,
+ "available_tables": list(available_tables)
+ }
+ }
+
+ try:
+ # Get API explanation
+ explanation_response = httpx.post(
+ f"{self.api_url}/api/database/sdk/query/explain",
+ headers=headers,
+ content=json.dumps(explanation_data, default=str),
+ timeout=30.0
+ )
+
+ if explanation_response.status_code == 200:
+ explanation_result = explanation_response.json()
+ api_response["explanation"] = explanation_result.get("explanation", "Could not generate an explanation.")
+ else:
+ api_response["explanation"] = self._generate_fallback_explanation(query_to_execute, query_result)
+ except Exception as explain_error:
+ logger.error(f"Error getting explanation: {str(explain_error)}")
+ api_response["explanation"] = self._generate_fallback_explanation(query_to_execute, query_result)
+
+ except Exception as e:
+ error_msg = f"Error executing query: {str(e)}"
+ logger.error(error_msg)
+ return {
+ "error": True,
+ "explanation": error_msg,
+ "query": api_response.get("query", {}),
+ "metadata": {
+ "available_tables": list(available_tables)
+ }
+ }
+
+ # Check if the explanation is a number (probably the runtime) and correct it
+ if "explanation" in api_response and not isinstance(api_response["explanation"], str):
+ # If the explanation is a number, replace it with a generated explanation
+ try:
+ is_sql = False
+ if "query" in api_response:
+ if isinstance(api_response["query"], dict) and "sql" in api_response["query"]:
+ is_sql = True
+
+ if "result" in api_response:
+ result_data = api_response["result"]
+ if isinstance(result_data, dict) and "data" in result_data:
+ result_data = result_data["data"]
+
+ if is_sql:
+ sql_query = api_response["query"].get("sql", "")
+ api_response["explanation"] = self._generate_sql_explanation(sql_query, result_data)
+ else:
+ # For MongoDB or generic
+ api_response["explanation"] = self._generate_generic_explanation(api_response["query"], result_data)
+ else:
+ api_response["explanation"] = "The query executed successfully."
+ except Exception as exp_fix_error:
+ logger.error(f"Error correcting explanation: {str(exp_fix_error)}")
+ api_response["explanation"] = "The query executed successfully."
+
+ # Prepare the final response
+ result = {
+ "question": question,
+ "query": api_response["query"],
+ "config_id": self.config_id,
+ "metadata": {
+ "available_tables": list(available_tables)
+ }
+ }
+
+ # Add results if available
+ if "result" in api_response:
+ if isinstance(api_response["result"], dict) and "data" in api_response["result"]:
+ result["result"] = api_response["result"]
+ else:
+ result["result"] = {
+ "data": api_response["result"],
+ "count": len(api_response["result"]) if isinstance(api_response["result"], list) else 1,
+ "query_time_ms": api_response.get("query_time_ms", 0),
+ "has_more": False
+ }
+
+ # Add explanation if available
+ if "explanation" in api_response:
+ result["explanation"] = api_response["explanation"]
+
+ return result
+
+ except httpx.TimeoutException:
+ return {"error": True, "explanation": "Timeout waiting to connect to server."}
+
+ except httpx.RequestError as e:
+ return {"error": True, "explanation": f"Connection error with server: {str(e)}"}
+
+ except Exception as e:
+ import traceback
+ error_details = traceback.format_exc()
+ logger.error(f"Unexpected error in ask(): {error_details}")
+ return {"error": True, "explanation": f"Unexpected error: {str(e)}"}
+
+ def _generate_fallback_explanation(self, query, results):
+ """
+ Generates a fallback explanation when the explanation generation fails.
+
+ Args:
+ query: The executed query
+ results: The obtained results
+
+ Returns:
+ Generated explanation
+ """
+ # Determine if it is SQL or MongoDB
+ if isinstance(query, dict):
+ query_type = query.get("type", "").lower()
+
+ if query_type in ["sqlite", "mysql", "postgresql"]:
+ return self._generate_sql_explanation(query.get("query", ""), results)
+ elif query_type in ["nosql", "mongodb"]:
+ return self._generate_mongodb_explanation(query, results)
+
+ # Generic Fallback
+ result_count = len(results) if isinstance(results, list) else (1 if results else 0)
+ return f"The query returned {result_count} results."
+
+ def _generate_sql_explanation(self, sql_query, results):
+ """
+ Generates a simple explanation for SQL queries.
+
+ Args:
+ sql_query: The executed SQL query
+ results: The obtained results
+
+ Returns:
+ Generated explanation
+ """
+ sql_lower = sql_query.lower() if isinstance(sql_query, str) else ""
+ result_count = len(results) if isinstance(results, list) else (1 if results else 0)
+
+ # Extract table names if possible
+ tables = []
+ from_match = re.search(r'from\s+([a-zA-Z0-9_]+)', sql_lower)
+ if from_match:
+ tables.append(from_match.group(1))
+
+ join_matches = re.findall(r'join\s+([a-zA-Z0-9_]+)', sql_lower)
+ if join_matches:
+ tables.extend(join_matches)
+
+ # Detect query type
+ if "select" in sql_lower:
+ if "join" in sql_lower:
+ if len(tables) > 1:
+ if "where" in sql_lower:
+ return f"Found {result_count} records that meet the specified criteria, relating information from tables {', '.join(tables)}."
+ else:
+ return f"Found {result_count} records relating information from tables {', '.join(tables)}."
+ else:
+ return f"Found {result_count} records relating data between tables."
+
+ elif "where" in sql_lower:
+ return f"Found {result_count} records that meet the search criteria."
+
+ else:
+ return f"The query returned {result_count} records from the database."
+
+ # For other types of queries (INSERT, UPDATE, DELETE)
+ if "insert" in sql_lower:
+ return "Data inserted successfully into the database."
+ elif "update" in sql_lower:
+ return "Data updated successfully in the database."
+ elif "delete" in sql_lower:
+ return "Data deleted successfully from the database."
+
+ # Fallback genérico
+ return f"The SQL query executed successfully and returned {result_count} results."
+
+
+ def _generate_mongodb_explanation(self, query, results):
+ """
+ Generates a simple explanation for MongoDB queries.
+
+ Args:
+ query: The executed MongoDB query
+ results: The obtained results
+
+ Returns:
+ Generated explanation
+ """
+ collection = query.get("collection", "the collection")
+ operation = query.get("operation", "find")
+ result_count = len(results) if isinstance(results, list) else (1 if results else 0)
+
+ # Generate explanation according to the operation
+ if operation == "find":
+ return f"Found {result_count} documents in the {collection} that meet the search criteria."
+ elif operation == "findOne":
+ if result_count > 0:
+ return f"Found the requested document in the {collection}."
+ else:
+ return f"No documents found in the {collection} that meet the criteria."
+ elif operation == "aggregate":
+ return f"The aggregation in the {collection} returned {result_count} results."
+ elif operation == "insertOne":
+ return f"A new document inserted successfully into the {collection}."
+ elif operation == "updateOne":
+ return f"A document updated successfully in the {collection}."
+ elif operation == "deleteOne":
+ return f"A document deleted successfully from the {collection}."
+
+ # Fallback genérico
+ return f"The {operation} operation executed successfully in the {collection} and returned {result_count} results."
+
+
+ def _generate_generic_explanation(self, query, results):
+ """
+ Generates a generic explanation when the query type cannot be determined.
+
+ Args:
+ query: The executed query
+ results: The obtained results
+
+ Returns:
+ Generated explanation
+ """
+ result_count = len(results) if isinstance(results, list) else (1 if results else 0)
+
+ if result_count == 0:
+ return "The query returned no results."
+ elif result_count == 1:
+ return "The query returned 1 result."
+ else:
+ return f"The query returned {result_count} results."
+
+
+ def close(self) -> None:
+ """
+ Close the database connection and release resources.
+
+ This method should be called when the client is no longer needed to
+ ensure proper cleanup of resources.
+ """
+ if self.db_connection:
+ db_type = self.db_config["type"].lower()
+
+ try:
+ if db_type == "sql":
+ engine = self.db_config.get("engine", "").lower()
+ if engine in ["sqlite", "mysql", "postgresql"]:
+ self.db_connection.close()
+ else:
+ # SQLAlchemy engine
+ self.db_connection.dispose()
+
+ elif db_type == "nosql" or db_type == "mongodb":
+ # For MongoDB, we close the client
+ if hasattr(self, 'mongo_client') and self.mongo_client:
+ self.mongo_client.close()
+
+ elif db_type == "sqlite_memory":
+ self.db_connection.close()
+
+ except Exception as e:
+ logger.warning(f"Error closing database connection: {str(e)}")
+
+ self.db_connection = None
+ logger.info("Database connection closed")
+
+ def _execute_query(self, query: Dict[str, Any]) -> List[Dict[str, Any]]:
+ """
+ Execute a query based on its type.
+
+ Args:
+ query: Dictionary containing query information
+
+ Returns:
+ List of dictionaries containing query results
+ """
+ query_type = query.get("type", "").lower()
+
+ if query_type in ["sqlite", "mysql", "postgresql"]:
+ return self._execute_sql_query(query)
+ elif query_type in ["nosql", "mongodb"]:
+ return self._execute_mongodb_query(query)
+ else:
+ raise CorebrainError(f"Unsupported query type: {query_type}")
+
+ def _execute_sql_query(self, query: Dict[str, Any]) -> List[Dict[str, Any]]:
+ """
+ Execute a SQL query.
+
+ Args:
+ query: Dictionary containing SQL query information
+
+ Returns:
+ List of dictionaries containing query results
+ """
+ query_type = query.get("type", "").lower()
+
+ if query_type in ["sqlite", "mysql", "postgresql"]:
+ sql_query = query.get("query", "")
+ if not sql_query:
+ raise CorebrainError("No SQL query provided")
+
+ engine = self.db_config.get("engine", "").lower()
+
+ if engine == "sqlite":
+ return self._execute_sqlite_query(sql_query)
+ elif engine == "mysql":
+ return self._execute_mysql_query(sql_query)
+ elif engine == "postgresql":
+ return self._execute_postgresql_query(sql_query)
+ else:
+ raise CorebrainError(f"Unsupported SQL engine: {engine}")
+
+ else:
+ raise CorebrainError(f"Unsupported SQL query type: {query_type}")
+
+ def _execute_sqlite_query(self, sql_query: str) -> List[Dict[str, Any]]:
+ """
+ Execute a SQLite query.
+
+ Args:
+ sql_query (str): SQL query to execute
+
+ Returns:
+ List[Dict[str, Any]]: List of results as dictionaries
+ """
+ cursor = self.db_connection.cursor()
+ cursor.execute(sql_query)
+
+ # Get column names
+ columns = [description[0] for description in cursor.description]
+
+ # Convert results to list of dictionaries
+ results = []
+ for row in cursor.fetchall():
+ result = {}
+ for i, value in enumerate(row):
+ # Convert datetime objects to strings
+ if hasattr(value, 'isoformat'):
+ result[columns[i]] = value.isoformat()
+ else:
+ result[columns[i]] = value
+ results.append(result)
+
+ return results
+
+ def _execute_mysql_query(self, sql_query: str) -> List[Dict[str, Any]]:
+ """
+ Execute a MySQL query.
+
+ Args:
+ sql_query (str): SQL query to execute
+
+ Returns:
+ List[Dict[str, Any]]: List of results as dictionaries
+ """
+ cursor = self.db_connection.cursor(dictionary=True)
+ cursor.execute(sql_query)
+
+ # Convert results to list of dictionaries
+ results = []
+ for row in cursor.fetchall():
+ result = {}
+ for key, value in row.items():
+ # Convert datetime objects to strings
+ if hasattr(value, 'isoformat'):
+ result[key] = value.isoformat()
+ else:
+ result[key] = value
+ results.append(result)
+
+ return results
+
+ def _execute_postgresql_query(self, sql_query: str) -> List[Dict[str, Any]]:
+ """
+ Execute a PostgreSQL query.
+
+ Args:
+ sql_query (str): SQL query to execute
+
+ Returns:
+ List[Dict[str, Any]]: List of results as dictionaries
+ """
+ cursor = self.db_connection.cursor()
+ cursor.execute(sql_query)
+
+ # Get column names
+ columns = [description[0] for description in cursor.description]
+
+ # Convert results to list of dictionaries
+ results = []
+ for row in cursor.fetchall():
+ result = {}
+ for i, value in enumerate(row):
+ # Convert datetime objects to strings
+ if hasattr(value, 'isoformat'):
+ result[columns[i]] = value.isoformat()
+ else:
+ result[columns[i]] = value
+ results.append(result)
+
+ return results
+
+ def _execute_mongodb_query(self, query: Dict[str, Any]) -> List[Dict[str, Any]]:
+ """
+ Execute a MongoDB query.
+
+ Args:
+ query: Dictionary containing MongoDB query information
+
+ Returns:
+ List of dictionaries containing query results
+ """
+ try:
+ # Get collection name from query or use default
+ collection_name = query.get("collection")
+ if not collection_name:
+ raise CorebrainError("No collection specified for MongoDB query")
+
+ # Get MongoDB collection
+ collection = self.mongo_client[self.db_config.get("database", "")][collection_name]
+
+ # Execute query based on operation type
+ operation = query.get("operation", "find")
+
+ if operation == "find":
+ # Handle find operation
+ cursor = collection.find(
+ query.get("query", {}),
+ projection=query.get("projection"),
+ sort=query.get("sort"),
+ limit=query.get("limit", 10),
+ skip=query.get("skip", 0)
+ )
+ results = list(cursor)
+
+ elif operation == "aggregate":
+ # Handle aggregate operation
+ pipeline = query.get("pipeline", [])
+ cursor = collection.aggregate(pipeline)
+ results = list(cursor)
+
+ else:
+ raise CorebrainError(f"Unsupported MongoDB operation: {operation}")
+
+ # Convert results to dictionaries and handle datetime serialization
+ serialized_results = []
+ for doc in results:
+ # Convert ObjectId to string
+ if "_id" in doc:
+ doc["_id"] = str(doc["_id"])
+
+ # Handle datetime objects
+ for key, value in doc.items():
+ if hasattr(value, 'isoformat'):
+ doc[key] = value.isoformat()
+
+ serialized_results.append(doc)
+
+ return serialized_results
+
+ except Exception as e:
+ raise CorebrainError(f"Error executing MongoDB query: {str(e)}")
+
+def init(
+ api_key: str = None,
+ db_config: Dict = None,
+ config_id: str = None,
+ user_data: Dict = None,
+ api_url: str = None,
+ skip_verification: bool = False
+) -> Corebrain:
+ """
+ Initialize and return a Corebrain client instance.
+
+ This function creates a new Corebrain SDK client with the provided configuration.
+ It's a convenient factory function that wraps the Corebrain class initialization.
+
+ Args:
+ api_key (str, optional): Corebrain API key. If not provided, it will attempt
+ to read from the COREBRAIN_API_KEY environment variable.
+ db_config (Dict, optional): Database configuration dictionary. If not provided,
+ it will attempt to read from the COREBRAIN_DB_CONFIG environment variable
+ (expected in JSON format).
+ config_id (str, optional): Configuration ID for saving/loading configurations.
+ user_data (Dict, optional): Optional user data for personalization.
+ api_url (str, optional): Corebrain API URL. Defaults to the production API.
+ skip_verification (bool, optional): Skip API token verification. Default False.
+
+ Returns:
+ Corebrain: An initialized Corebrain client instance.
+
+ Example:
+ >>> client = init(api_key="your_api_key", db_config={"type": "sql", "engine": "sqlite", "database": "example.db"})
+ """
+ return Corebrain(
+ api_key=api_key,
+ db_config=db_config,
+ config_id=config_id,
+ user_data=user_data,
+ api_url=api_url,
+ skip_verification=skip_verification
+ )
\ No newline at end of file
diff --git a/corebrain/core/common.py b/corebrain/core/common.py
new file mode 100644
index 0000000..3d75c8e
--- /dev/null
+++ b/corebrain/core/common.py
@@ -0,0 +1,225 @@
+"""
+Core functionalities shared across the Corebrain SDK.
+
+This module contains common elements used throughout the SDK, including:
+- Logging system configuration
+- Common type definitions and aliases
+- Custom exceptions for better error handling
+- Component registry system for dependency management
+
+These elements provide a common foundation for implementing
+the rest of the SDK modules, ensuring consistency and facilitating
+maintenance.
+"""
+import logging
+from typing import Dict, Any, Optional, List, Callable, TypeVar, Union
+
+# Global logging configuration
+logger = logging.getLogger("corebrain")
+logger.addHandler(logging.NullHandler())
+
+# Type aliases to improve readability and maintenance
+ConfigDict = Dict[str, Any]
+"""
+Type representing a configuration as a key-value dictionary.
+
+Example:
+```python
+config: ConfigDict = {
+ "type": "sql",
+ "engine": "postgresql",
+ "host": "localhost",
+ "port": 5432,
+ "user": "postgres",
+ "password": "password",
+ "database": "mydatabase"
+}
+```
+"""
+
+SchemaDict = Dict[str, Any]
+"""
+Type representing a database schema as a dictionary.
+
+Example:
+```python
+schema: SchemaDict = {
+ "tables": [
+ {
+ "name": "users",
+ "columns": [
+ {"name": "id", "type": "INTEGER", "primary_key": True},
+ {"name": "name", "type": "TEXT"},
+ {"name": "email", "type": "TEXT"}
+ ]
+ }
+ ]
+}
+```
+"""
+
+# Generic component for typing
+T = TypeVar('T')
+
+# SDK exceptions
+class CorebrainError(Exception):
+ """
+ Base exception for all Corebrain SDK errors.
+
+ All other specific exceptions inherit from this class,
+ allowing you to catch any SDK error with a single
+ except block.
+
+ Example:
+ ```python
+ try:
+ result = client.ask("How many users are there?")
+ except CorebrainError as e:
+ print(f"Corebrain error: {e}")
+ ```
+ """
+ pass
+
+class ConfigError(CorebrainError):
+ """
+ Error related to SDK configuration.
+
+ Raised when there are issues with the provided configuration,
+ such as invalid credentials, missing parameters, or incorrect formats.
+
+ Example:
+ ```python
+ try:
+ client = init(api_key="invalid_key", db_config={})
+ except ConfigError as e:
+ print(f"Configuration error: {e}")
+ ```
+ """
+ pass
+
+class DatabaseError(CorebrainError):
+ """
+ Error related to database connection or query.
+
+ Raised when there are problems connecting to the database,
+ executing queries, or extracting schema information.
+
+ Example:
+ ```python
+ try:
+ result = client.ask("select * from a_table_that_does_not_exist")
+ except DatabaseError as e:
+ print(f"Database error: {e}")
+ ```
+ """
+ pass
+
+class APIError(CorebrainError):
+ """
+ Error related to communication with the Corebrain API.
+
+ Raised when there are issues in communicating with the service,
+ such as network errors, authentication failures, or unexpected responses.
+
+ Example:
+ ```python
+ try:
+ result = client.ask("How many users are there?")
+ except APIError as e:
+ print(f"API error: {e}")
+ if e.status_code == 401:
+ print("Please verify your API key")
+ ```
+ """
+ def __init__(self, message: str, status_code: Optional[int] = None, response: Optional[Dict[str, Any]] = None):
+ """
+ Initialize an APIError exception.
+
+ Args:
+ message: Descriptive error message
+ status_code: Optional HTTP status code (e.g., 401, 404, 500)
+ response: Server response content if available
+ """
+ self.status_code = status_code
+ self.response = response
+ super().__init__(message)
+
+# Component registry (to avoid circular imports)
+_registry: Dict[str, Any] = {}
+
+def register_component(name: str, component: Any) -> None:
+ """
+ Register a component in the global registry.
+
+ This mechanism resolves circular dependencies between modules
+ by providing a way to access components without importing them directly.
+
+ Args:
+ name: Unique name to identify the component
+ component: The component to register (can be any object)
+
+ Example:
+ ```python
+ # In the module that defines the component
+ from core.common import register_component
+
+ class DatabaseConnector:
+ def connect(self):
+ pass
+
+ # Register the component
+ connector = DatabaseConnector()
+ register_component("db_connector", connector)
+ ```
+ """
+ _registry[name] = component
+
+def get_component(name: str) -> Any:
+ """
+ Get a component from the global registry.
+
+ Args:
+ name: Name of the component to retrieve
+
+ Returns:
+ The registered component or None if it doesn't exist
+
+ Example:
+ ```python
+ # In another module that needs to use the component
+ from core.common import get_component
+
+ # Get the component
+ connector = get_component("db_connector")
+ if connector:
+ connector.connect()
+ ```
+ """
+ return _registry.get(name)
+
+def safely_get_component(name: str, default: Optional[T] = None) -> Union[Any, T]:
+ """
+ Safely get a component from the global registry.
+
+ If the component doesn't exist, it returns the provided default
+ value instead of None.
+
+ Args:
+ name: Name of the component to retrieve
+ default: Default value to return if the component doesn't exist
+
+ Returns:
+ The registered component or the default value
+
+ Example:
+ ```python
+ # In another module
+ from core.common import safely_get_component
+
+ # Get the component with a default value
+ connector = safely_get_component("db_connector", MyDefaultConnector())
+ connector.connect() # Guaranteed not to be None
+ ```
+ """
+ component = _registry.get(name)
+ return component if component is not None else default
\ No newline at end of file
diff --git a/corebrain/core/query.py b/corebrain/core/query.py
new file mode 100644
index 0000000..4d9f87b
--- /dev/null
+++ b/corebrain/core/query.py
@@ -0,0 +1,1037 @@
+"""
+Components for query handling and analysis.
+"""
+import os
+import json
+import time
+import re
+import sqlite3
+import pickle
+import hashlib
+
+from typing import Dict, Any, List, Optional, Tuple, Callable
+from datetime import datetime
+from pathlib import Path
+
+from corebrain.cli.utils import print_colored
+
+class QueryCache:
+ """Multilevel cache system for queries."""
+
+ def __init__(self, cache_dir: str = None, ttl: int = 86400, memory_limit: int = 100):
+ """
+ Initializes the cache system.
+
+ Args:
+ cache_dir: Directory for persistent cache
+ ttl: Time-to-live of the cache in seconds (default: 24 hours)
+ memory_limit: Memory cache entry limit
+ """
+ # In-memory cache (faster, but volatile)
+ self.memory_cache = {}
+ self.memory_timestamps = {}
+ self.memory_limit = memory_limit
+ self.memory_lru = [] # Least recently used tracking list
+
+ # Persistent cache (slower, but permanent)
+ self.ttl = ttl
+ if cache_dir:
+ self.cache_dir = Path(cache_dir)
+ else:
+ self.cache_dir = Path.home() / ".corebrain_cache"
+
+ # Create cache directory if it does not exist
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
+
+ # Initialize SQLite database for metadata
+ self.db_path = self.cache_dir / "cache_metadata.db"
+ self._init_db()
+
+ print_colored(f"Caché inicializado en {self.cache_dir}", "blue")
+
+ def _init_db(self):
+ """Initializes the SQLite database for cache metadata."""
+ conn = sqlite3.connect(str(self.db_path))
+ cursor = conn.cursor()
+
+ # Create metadata table if it does not exist
+ cursor.execute('''
+ CREATE TABLE IF NOT EXISTS cache_metadata (
+ query_hash TEXT PRIMARY KEY,
+ query TEXT,
+ config_id TEXT,
+ created_at TIMESTAMP,
+ last_accessed TIMESTAMP,
+ hit_count INTEGER DEFAULT 1
+ )
+ ''')
+
+ conn.commit()
+ conn.close()
+
+ def _get_hash(self, query: str, config_id: str, collection_name: Optional[str] = None) -> str:
+ """Generates a unique hash for the query."""
+ # Normalize the query (remove extra spaces, convert to lowercase)
+ normalized_query = re.sub(r'\s+', ' ', query.lower().strip())
+
+ # Create composite string for the hash
+ hash_input = f"{normalized_query}|{config_id}"
+ if collection_name:
+ hash_input += f"|{collection_name}"
+
+ # Generate the hash
+ return hashlib.md5(hash_input.encode()).hexdigest()
+
+ def _get_cache_path(self, query_hash: str) -> Path:
+ """Gets the cache file path for a given hash."""
+ # Use the first characters of the hash to create subdirectories
+ # This prevents having too many files in a single directory
+ subdir = query_hash[:2]
+ cache_subdir = self.cache_dir / subdir
+ cache_subdir.mkdir(exist_ok=True)
+
+ return cache_subdir / f"{query_hash}.cache"
+
+ def _update_metadata(self, query_hash: str, query: str, config_id: str):
+ """Updates the metadata in the database."""
+ conn = sqlite3.connect(str(self.db_path))
+ cursor = conn.cursor()
+
+ now = datetime.now().isoformat()
+
+ # Check if the hash already exists
+ cursor.execute("SELECT hit_count FROM cache_metadata WHERE query_hash = ?", (query_hash,))
+ result = cursor.fetchone()
+
+ if result:
+ # Update existing entry
+ hit_count = result[0] + 1
+ cursor.execute('''
+ UPDATE cache_metadata
+ SET last_accessed = ?, hit_count = ?
+ WHERE query_hash = ?
+ ''', (now, hit_count, query_hash))
+ else:
+ # Insert new entry
+ cursor.execute('''
+ INSERT INTO cache_metadata (query_hash, query, config_id, created_at, last_accessed, hit_count)
+ VALUES (?, ?, ?, ?, ?, 1)
+ ''', (query_hash, query, config_id, now, now))
+
+ conn.commit()
+ conn.close()
+
+ def _update_memory_lru(self, query_hash: str):
+ """Updates the LRU (Least Recently Used) list for the in-memory cache."""
+ if query_hash in self.memory_lru:
+ # Move to end (most recently used)
+ self.memory_lru.remove(query_hash)
+
+ self.memory_lru.append(query_hash)
+
+ # If we exceed the limit, delete the least recently used item
+ if len(self.memory_lru) > self.memory_limit:
+ oldest_hash = self.memory_lru.pop(0)
+ if oldest_hash in self.memory_cache:
+ del self.memory_cache[oldest_hash]
+ del self.memory_timestamps[oldest_hash]
+
+ def get(self, query: str, config_id: str, collection_name: Optional[str] = None) -> Optional[Dict[str, Any]]:
+ """
+ Retrieves a cached result if it exists and has not expired.
+
+ Args:
+ query: Natural language query
+ config_id: Database configuration ID
+ collection_name: Name of the collection/table (optional)
+
+ Returns:
+ Cached result or None if it does not exist or has expired
+ """
+ query_hash = self._get_hash(query, config_id, collection_name)
+
+ # 1. Check in-memory cache (faster)
+ if query_hash in self.memory_cache:
+ timestamp = self.memory_timestamps[query_hash]
+ if (time.time() - timestamp) < self.ttl:
+ self._update_memory_lru(query_hash)
+ self._update_metadata(query_hash, query, config_id)
+ print_colored(f"Cache hit (memory): {query[:30]}...", "green")
+ return self.memory_cache[query_hash]
+ else:
+ # Expired in memory
+ del self.memory_cache[query_hash]
+ del self.memory_timestamps[query_hash]
+ if query_hash in self.memory_lru:
+ self.memory_lru.remove(query_hash)
+
+ # 2. Check disk cache
+ cache_path = self._get_cache_path(query_hash)
+ if cache_path.exists():
+ # Check file age
+ file_age = time.time() - cache_path.stat().st_mtime
+ if file_age < self.ttl:
+ try:
+ with open(cache_path, 'rb') as f:
+ result = pickle.load(f)
+
+ # Also save in memory cache
+ self.memory_cache[query_hash] = result
+ self.memory_timestamps[query_hash] = time.time()
+ self._update_memory_lru(query_hash)
+ self._update_metadata(query_hash, query, config_id)
+
+ print_colored(f"Cache hit (disk): {query[:30]}...", "green")
+ return result
+ except Exception as e:
+ print_colored(f"Error al cargar caché: {str(e)}", "red")
+ # If there is an error when uploading, delete the corrupted file
+ cache_path.unlink(missing_ok=True)
+ else:
+ # Expired file, delete it
+ cache_path.unlink(missing_ok=True)
+
+ return None
+
+ def set(self, query: str, config_id: str, result: Dict[str, Any], collection_name: Optional[str] = None):
+ """
+ Saves a result in the cache.
+
+ Args:
+ query: Natural language query
+ config_id: Configuration ID
+ result: Result to cache
+ collection_name: Name of the collection/table (optional)
+ """
+ query_hash = self._get_hash(query, config_id, collection_name)
+
+ # 1. Save to memory cache
+ self.memory_cache[query_hash] = result
+ self.memory_timestamps[query_hash] = time.time()
+ self._update_memory_lru(query_hash)
+
+ # 2. Save to persistent cache
+ try:
+ cache_path = self._get_cache_path(query_hash)
+ with open(cache_path, 'wb') as f:
+ pickle.dump(result, f)
+
+ # 3. Update metadata
+ self._update_metadata(query_hash, query, config_id)
+
+ print_colored(f"Cached: {query[:30]}...", "green")
+ except Exception as e:
+ print_colored(f"Error al guardar en caché: {str(e)}", "red")
+
+ def clear(self, older_than: int = None):
+ """
+ Clears the cache.
+
+ Args:
+ older_than: Only clear entries older than this number of seconds
+ """
+ # Clear cache in memory
+ if older_than:
+ current_time = time.time()
+ keys_to_remove = [
+ k for k, timestamp in self.memory_timestamps.items()
+ if (current_time - timestamp) > older_than
+ ]
+
+ for k in keys_to_remove:
+ if k in self.memory_cache:
+ del self.memory_cache[k]
+ if k in self.memory_timestamps:
+ del self.memory_timestamps[k]
+ if k in self.memory_lru:
+ self.memory_lru.remove(k)
+ else:
+ self.memory_cache.clear()
+ self.memory_timestamps.clear()
+ self.memory_lru.clear()
+
+ # Clear disk cache
+ if older_than:
+ cutoff_time = time.time() - older_than
+
+ # Using the database to find old files
+ conn = sqlite3.connect(str(self.db_path))
+ cursor = conn.cursor()
+
+ # Convert cutoff_time to ISO format
+ cutoff_datetime = datetime.fromtimestamp(cutoff_time).isoformat()
+
+ cursor.execute(
+ "SELECT query_hash FROM cache_metadata WHERE last_accessed < ?",
+ (cutoff_datetime,)
+ )
+
+ old_hashes = [row[0] for row in cursor.fetchall()]
+
+ # Delete old files
+ for query_hash in old_hashes:
+ cache_path = self._get_cache_path(query_hash)
+ if cache_path.exists():
+ cache_path.unlink()
+
+ # Delete from the database
+ cursor.execute(
+ "DELETE FROM cache_metadata WHERE query_hash = ?",
+ (query_hash,)
+ )
+
+ conn.commit()
+ conn.close()
+ else:
+ # Delete all cache files
+ for subdir in self.cache_dir.iterdir():
+ if subdir.is_dir():
+ for cache_file in subdir.glob("*.cache"):
+ cache_file.unlink()
+
+ # Restart the database
+ conn = sqlite3.connect(str(self.db_path))
+ cursor = conn.cursor()
+ cursor.execute("DELETE FROM cache_metadata")
+ conn.commit()
+ conn.close()
+
+ def get_stats(self) -> Dict[str, Any]:
+ """Gets cache statistics."""
+ # Count files on disk
+ disk_count = 0
+ for subdir in self.cache_dir.iterdir():
+ if subdir.is_dir():
+ disk_count += len(list(subdir.glob("*.cache")))
+
+ # Obtaining database statistics
+ conn = sqlite3.connect(str(self.db_path))
+ cursor = conn.cursor()
+
+ # Total entries
+ cursor.execute("SELECT COUNT(*) FROM cache_metadata")
+ total_entries = cursor.fetchone()[0]
+
+ # Most frequent queries
+ cursor.execute(
+ "SELECT query, hit_count FROM cache_metadata ORDER BY hit_count DESC LIMIT 5"
+ )
+ top_queries = cursor.fetchall()
+
+ # Average age
+ cursor.execute(
+ "SELECT AVG(strftime('%s', 'now') - strftime('%s', created_at)) FROM cache_metadata"
+ )
+ avg_age = cursor.fetchone()[0]
+
+ conn.close()
+
+ return {
+ "memory_cache_size": len(self.memory_cache),
+ "disk_cache_size": disk_count,
+ "total_entries": total_entries,
+ "top_queries": top_queries,
+ "average_age_seconds": avg_age,
+ "cache_directory": str(self.cache_dir)
+ }
+
+class QueryTemplate:
+ """Predefined query template for common patterns."""
+
+ def __init__(self, pattern: str, description: str,
+ sql_template: Optional[str] = None,
+ generator_func: Optional[Callable] = None,
+ db_type: str = "sql",
+ applicable_tables: Optional[List[str]] = None):
+ """
+ Initializes a query template.
+
+ Args:
+ pattern: Natural language pattern that matches this template
+ description: Description of the template
+ sql_template: SQL template with placeholders for parameters
+ generator_func: Alternative function to generate the query
+ db_type: Database type (sql, mongodb)
+ applicable_tables: List of tables to which this template applies
+ """
+ self.pattern = pattern
+ self.description = description
+ self.sql_template = sql_template
+ self.generator_func = generator_func
+ self.db_type = db_type
+ self.applicable_tables = applicable_tables or []
+
+ # Compile regular expression for the pattern
+ self.regex = self._compile_pattern(pattern)
+
+ def _compile_pattern(self, pattern: str) -> re.Pattern:
+ """Compiles the pattern into a regular expression."""
+ # Replace special markers with capture groups
+ regex_pattern = pattern
+
+ # {table} becomes a capturing group for the table name
+ regex_pattern = regex_pattern.replace("{table}", r"(\w+)")
+
+ # {field} becomes a capturing group for the field name
+ regex_pattern = regex_pattern.replace("{field}", r"(\w+)")
+
+ # {value} becomes a capturing group for a value
+ regex_pattern = regex_pattern.replace("{value}", r"([^,.\s]+)")
+
+ # {number} becomes a capture group for a number
+ regex_pattern = regex_pattern.replace("{number}", r"(\d+)")
+
+ # Match the entire pattern
+ regex_pattern = f"^{regex_pattern}$"
+
+ return re.compile(regex_pattern, re.IGNORECASE)
+
+ def matches(self, query: str) -> Tuple[bool, List[str]]:
+ """
+ Checks if a query matches this template.
+
+ Args:
+ query: Query to check
+
+ Returns:
+ Tuple of (match, [captured parameters])
+ """
+ match = self.regex.match(query)
+ if match:
+ return True, list(match.groups())
+ return False, []
+
+ def generate_query(self, params: List[str], db_schema: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+ """
+ Generates a query from the captured parameters.
+
+ Args:
+ params: Captured parameters from the pattern
+ db_schema: Database schema
+
+ Returns:
+ Generated query or None if it cannot be generated
+ """
+ if self.generator_func:
+ # Use custom function
+ return self.generator_func(params, db_schema)
+
+ if not self.sql_template:
+ return None
+
+ # Try to apply the SQL template with the parameters
+ try:
+ sql_query = self.sql_template
+
+ # Replace parameters in the template
+ for i, param in enumerate(params):
+ placeholder = f"${i+1}"
+ sql_query = sql_query.replace(placeholder, param)
+
+ # Check if there are any unreplaced parameters
+ if "$" in sql_query:
+ return None
+
+ return {"sql": sql_query}
+ except Exception:
+ return None
+
+class QueryAnalyzer:
+ """Analyzes query patterns to suggest optimizations."""
+
+ def __init__(self, query_log_path: str = None, template_path: str = None):
+ """
+ Initializes the query analyzer.
+
+ Args:
+ query_log_path: Path to the query log file
+ template_path: Path to the template file
+ """
+ self.query_log_path = query_log_path or os.path.join(
+ Path.home(), ".corebrain_cache", "query_log.db"
+ )
+
+ self.template_path = template_path or os.path.join(
+ Path.home(), ".corebrain_cache", "templates.json"
+ )
+
+ # Initialize database
+ self._init_db()
+
+ # Predefined templates for common queries
+ self.templates = self._load_default_templates()
+
+ # Upload custom templates
+ self._load_custom_templates()
+
+ # Common templates for identifying patterns
+ self.common_patterns = [
+ r"muestra\s+(?:todos\s+)?los\s+(\w+)",
+ r"lista\s+(?:de\s+)?(?:todos\s+)?los\s+(\w+)",
+ r"busca\s+(\w+)\s+donde",
+ r"cu[aá]ntos\s+(\w+)\s+hay",
+ r"total\s+de\s+(\w+)"
+ ]
+
+ def _init_db(self):
+ """Initializes the database for query logging."""
+ # Ensure that the directory exists
+ os.makedirs(os.path.dirname(self.query_log_path), exist_ok=True)
+
+ conn = sqlite3.connect(self.query_log_path)
+ cursor = conn.cursor()
+
+ # Create log table if it does not exist
+ cursor.execute('''
+ CREATE TABLE IF NOT EXISTS query_log (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ query TEXT,
+ config_id TEXT,
+ collection_name TEXT,
+ timestamp TIMESTAMP,
+ execution_time REAL,
+ cost REAL,
+ result_count INTEGER,
+ pattern TEXT
+ )
+ ''')
+
+ # Create table of detected patterns
+ cursor.execute('''
+ CREATE TABLE IF NOT EXISTS query_patterns (
+ pattern TEXT PRIMARY KEY,
+ count INTEGER,
+ avg_execution_time REAL,
+ avg_cost REAL,
+ last_updated TIMESTAMP
+ )
+ ''')
+
+ conn.commit()
+ conn.close()
+
+ def _load_default_templates(self) -> List[QueryTemplate]:
+ """Carga las plantillas predefinidas para consultas comunes."""
+ templates = []
+
+ # List all records in a table
+ templates.append(
+ QueryTemplate(
+ pattern="muestra todos los {table}",
+ description="Listar todos los registros de una tabla",
+ sql_template="SELECT * FROM $1 LIMIT 100",
+ db_type="sql"
+ )
+ )
+
+ # Count records
+ templates.append(
+ QueryTemplate(
+ pattern="cuántos {table} hay",
+ description="Contar registros en una tabla",
+ sql_template="SELECT COUNT(*) FROM $1",
+ db_type="sql"
+ )
+ )
+
+ # Search by ID
+ templates.append(
+ QueryTemplate(
+ pattern="busca el {table} con id {value}",
+ description="Buscar registro por ID",
+ sql_template="SELECT * FROM $1 WHERE id = $2",
+ db_type="sql"
+ )
+ )
+
+ # List sorted
+ templates.append(
+ QueryTemplate(
+ pattern="lista los {table} ordenados por {field}",
+ description="Listar registros ordenados por campo",
+ sql_template="SELECT * FROM $1 ORDER BY $2 LIMIT 100",
+ db_type="sql"
+ )
+ )
+
+ # Search by email
+ templates.append(
+ QueryTemplate(
+ pattern="busca el usuario con email {value}",
+ description="Buscar usuario por email",
+ sql_template="SELECT * FROM users WHERE email = '$2'",
+ db_type="sql"
+ )
+ )
+
+ # Count by field
+ templates.append(
+ QueryTemplate(
+ pattern="cuántos {table} hay por {field}",
+ description="Contar registros agrupados por campo",
+ sql_template="SELECT $2, COUNT(*) FROM $1 GROUP BY $2",
+ db_type="sql"
+ )
+ )
+
+ # Count active users
+ templates.append(
+ QueryTemplate(
+ pattern="cuántos usuarios activos hay",
+ description="Contar usuarios activos",
+ sql_template="SELECT COUNT(*) FROM users WHERE is_active = TRUE",
+ db_type="sql",
+ applicable_tables=["users"]
+ )
+ )
+
+ # List users by registration date
+ templates.append(
+ QueryTemplate(
+ pattern="usuarios registrados en los últimos {number} días",
+ description="Listar usuarios recientes",
+ sql_template="""
+ SELECT * FROM users
+ WHERE created_at >= datetime('now', '-$2 days')
+ ORDER BY created_at DESC
+ LIMIT 100
+ """,
+ db_type="sql",
+ applicable_tables=["users"]
+ )
+ )
+
+ # Search companies
+ templates.append(
+ QueryTemplate(
+ pattern="usuarios que tienen empresa",
+ description="Buscar usuarios con empresa asignada",
+ sql_template="""
+ SELECT u.* FROM users u
+ INNER JOIN businesses b ON u.id = b.owner_id
+ WHERE u.is_business = TRUE
+ LIMIT 100
+ """,
+ db_type="sql",
+ applicable_tables=["users", "businesses"]
+ )
+ )
+
+ # Find businesses
+ templates.append(
+ QueryTemplate(
+ pattern="busca negocios en {value}",
+ description="Buscar negocios por ubicación",
+ sql_template="""
+ SELECT * FROM businesses
+ WHERE address_city LIKE '%$2%' OR address_province LIKE '%$2%'
+ LIMIT 100
+ """,
+ db_type="sql",
+ applicable_tables=["businesses"]
+ )
+ )
+
+ # MongoDB: List documents
+ templates.append(
+ QueryTemplate(
+ pattern="muestra todos los documentos de {table}",
+ description="Listar documentos en una colección",
+ db_type="mongodb",
+ generator_func=lambda params, schema: {
+ "collection": params[0],
+ "operation": "find",
+ "query": {},
+ "limit": 100
+ }
+ )
+ )
+
+ return templates
+
+ def _load_custom_templates(self):
+ """Loads custom templates from the file."""
+ if not os.path.exists(self.template_path):
+ return
+
+ try:
+ with open(self.template_path, 'r') as f:
+ custom_templates = json.load(f)
+
+ for template_data in custom_templates:
+ # Create template from JSON data
+ template = QueryTemplate(
+ pattern=template_data.get("pattern", ""),
+ description=template_data.get("description", ""),
+ sql_template=template_data.get("sql_template"),
+ db_type=template_data.get("db_type", "sql"),
+ applicable_tables=template_data.get("applicable_tables", [])
+ )
+
+ self.templates.append(template)
+
+ except Exception as e:
+ print_colored(f"Error al cargar plantillas personalizadas: {str(e)}", "red")
+
+ def save_custom_template(self, template: QueryTemplate) -> bool:
+ """
+ Saves a custom template.
+
+ Args:
+ template: Template to save
+
+ Returns:
+ True if saved successfully
+ """
+ # Load existing templates
+ custom_templates = []
+ if os.path.exists(self.template_path):
+ try:
+ with open(self.template_path, 'r') as f:
+ custom_templates = json.load(f)
+ except:
+ custom_templates = []
+
+ # Convert template to dictionary
+ template_data = {
+ "pattern": template.pattern,
+ "description": template.description,
+ "sql_template": template.sql_template,
+ "db_type": template.db_type,
+ "applicable_tables": template.applicable_tables
+ }
+
+ # Check if a template with the same pattern already exists
+ for i, existing in enumerate(custom_templates):
+ if existing.get("pattern") == template.pattern:
+ # Update existing
+ custom_templates[i] = template_data
+ break
+ else:
+ # Add new
+ custom_templates.append(template_data)
+
+ # Save templates
+ try:
+ with open(self.template_path, 'w') as f:
+ json.dump(custom_templates, f, indent=2)
+
+ # Update template list
+ self.templates.append(template)
+
+ return True
+ except Exception as e:
+ print_colored(f"Error al guardar plantilla personalizada: {str(e)}", "red")
+ return False
+
+ def find_matching_template(self, query: str, db_schema: Dict[str, Any]) -> Optional[Tuple[QueryTemplate, List[str]]]:
+ """
+ Searches for a template that matches the query.
+
+ Args:
+ query: Natural language query
+ db_schema: Database schema
+
+ Returns:
+ Tuple of (template, parameters) or None if no match is found
+ """
+ for template in self.templates:
+ matches, params = template.matches(query)
+ if matches:
+ # Check if the template is applicable to existing tables
+ if template.applicable_tables:
+ available_tables = set(db_schema.get("tables", {}).keys())
+ if not any(table in available_tables for table in template.applicable_tables):
+ continue
+
+ return template, params
+
+ return None
+
+ def log_query(self, query: str, config_id: str, collection_name: str = None,
+ execution_time: float = 0, cost: float = 0.09, result_count: int = 0):
+ """
+ Registers a query for analysis.
+
+ Args:
+ query: Natural language query
+ config_id: Configuration ID
+ collection_name: Name of the collection/table
+ execution_time: Execution time in seconds
+ cost: Estimated cost of the query
+ result_count: Number of results obtained
+ """
+ # Detect pattern
+ pattern = self._detect_pattern(query)
+
+ # Register in the database
+ conn = sqlite3.connect(self.query_log_path)
+ cursor = conn.cursor()
+
+ cursor.execute('''
+ INSERT INTO query_log (query, config_id, collection_name, timestamp, execution_time, cost, result_count, pattern)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+ ''', (
+ query, config_id, collection_name, datetime.now().isoformat(),
+ execution_time, cost, result_count, pattern
+ ))
+
+ # Update pattern statistics
+ if pattern:
+ cursor.execute(
+ "SELECT count, avg_execution_time, avg_cost FROM query_patterns WHERE pattern = ?",
+ (pattern,)
+ )
+ result = cursor.fetchone()
+
+ if result:
+ # Update existing pattern
+ count, avg_exec_time, avg_cost = result
+ new_count = count + 1
+ new_avg_exec_time = (avg_exec_time * count + execution_time) / new_count
+ new_avg_cost = (avg_cost * count + cost) / new_count
+
+ cursor.execute('''
+ UPDATE query_patterns
+ SET count = ?, avg_execution_time = ?, avg_cost = ?, last_updated = ?
+ WHERE pattern = ?
+ ''', (new_count, new_avg_exec_time, new_avg_cost, datetime.now().isoformat(), pattern))
+ else:
+ # Insert new pattern
+ cursor.execute('''
+ INSERT INTO query_patterns (pattern, count, avg_execution_time, avg_cost, last_updated)
+ VALUES (?, 1, ?, ?, ?)
+ ''', (pattern, execution_time, cost, datetime.now().isoformat()))
+
+ conn.commit()
+ conn.close()
+
+ def _detect_pattern(self, query: str) -> Optional[str]:
+ """
+ Detects a pattern in the query.
+
+ Args:
+ query: Query to analyze
+
+ Returns:
+ Detected pattern or None
+ """
+ normalized_query = query.lower()
+
+ # Check predefined patterns
+ for pattern in self.common_patterns:
+ match = re.search(pattern, normalized_query)
+ if match:
+ # Return the pattern with wildcards
+ entity = match.group(1)
+ return pattern.replace(r'(\w+)', f"{entity}")
+
+ # If no predefined pattern is detected, try to generalize
+ words = normalized_query.split()
+ if len(words) < 3:
+ return None
+
+ # Try to generalize simple queries
+ if "mostrar" in words or "muestra" in words or "listar" in words or "lista" in words:
+ for i, word in enumerate(words):
+ if word in ["de", "los", "las", "todos", "todas"]:
+ if i+1 < len(words):
+ return f"lista_de_{words[i+1]}"
+
+ return None
+
+ def get_common_patterns(self, limit: int = 5) -> List[Dict[str, Any]]:
+ """
+ Retrieves the most common query patterns.
+
+ Args:
+ limit: Maximum number of patterns to return
+
+ Returns:
+ List of the most common patterns
+ """
+ conn = sqlite3.connect(self.query_log_path)
+ cursor = conn.cursor()
+
+ cursor.execute('''
+ SELECT pattern, count, avg_execution_time, avg_cost
+ FROM query_patterns
+ ORDER BY count DESC
+ LIMIT ?
+ ''', (limit,))
+
+ patterns = []
+ for row in cursor.fetchall():
+ pattern, count, avg_time, avg_cost = row
+ patterns.append({
+ "pattern": pattern,
+ "count": count,
+ "avg_execution_time": avg_time,
+ "avg_cost": avg_cost,
+ "estimated_monthly_cost": round(avg_cost * count * 30 / 7, 2) # Estimación mensual
+ })
+
+ conn.close()
+ return patterns
+
+ def suggest_new_template(self, query: str, sql_query: str) -> Optional[QueryTemplate]:
+ """
+ Suggests a new template based on a successful query.
+
+ Args:
+ query: Natural language query
+ sql_query: Generated SQL query
+
+ Returns:
+ Suggested template or None
+ """
+ # Detect pattern
+ pattern = self._detect_pattern(query)
+ if not pattern:
+ return None
+
+ # Generalize the SQL query
+ generalized_sql = sql_query
+
+ # Replace specific values with markers
+ # This is a simplification; ideally, you would use an SQL parser
+ tokens = query.lower().split()
+
+ # Identify possible values to parameterize
+ for i, token in enumerate(tokens):
+ if token.isdigit():
+ # Replace numbers
+ generalized_sql = re.sub(r'\b' + re.escape(token) + r'\b', '$1', generalized_sql)
+ pattern = pattern.replace(token, "{number}")
+ elif '@' in token and '.' in token:
+ # Replace emails
+ generalized_sql = re.sub(r'\b' + re.escape(token) + r'\b', '$1', generalized_sql)
+ pattern = pattern.replace(token, "{value}")
+ elif token.startswith('"') or token.startswith("'"):
+ # Reemplazar strings
+ value = token.strip('"\'')
+ if len(value) > 2: # Avoid replacing very short strings
+ generalized_sql = re.sub(r'[\'"]' + re.escape(value) + r'[\'"]', "'$1'", generalized_sql)
+ pattern = pattern.replace(token, "{value}")
+
+ # Create template
+ return QueryTemplate(
+ pattern=pattern,
+ description=f"Plantilla generada automáticamente para: {pattern}",
+ sql_template=generalized_sql,
+ db_type="sql"
+ )
+
+ def get_optimization_suggestions(self) -> List[Dict[str, Any]]:
+ """
+ Generates suggestions to optimize queries.
+
+ Returns:
+ List of optimization suggestions
+ """
+ suggestions = []
+
+ # Calculate general statistics
+ conn = sqlite3.connect(self.query_log_path)
+ cursor = conn.cursor()
+
+ # Total consultations and cost in the last 30 days
+ cursor.execute('''
+ SELECT COUNT(*) as query_count, SUM(cost) as total_cost
+ FROM query_log
+ WHERE timestamp > datetime('now', '-30 day')
+ ''')
+
+ row = cursor.fetchone()
+ if row:
+ query_count, total_cost = row
+
+ if query_count and query_count > 100:
+ # If there are many queries in total, suggest volume plan
+ suggestions.append({
+ "type": "volume_plan",
+ "query_count": query_count,
+ "total_cost": round(total_cost, 2) if total_cost else 0,
+ "suggestion": f"Considerar negociar un plan por volumen. Actualmente ~{query_count} consultas/mes."
+ })
+
+ # Suggest adjusting cache TTL based on frequency
+ avg_queries_per_day = query_count / 30
+ suggested_ttl = max(3600, min(86400 * 3, 86400 * (100 / avg_queries_per_day)))
+
+ suggestions.append({
+ "type": "cache_adjustment",
+ "current_rate": f"{avg_queries_per_day:.1f} consultas/día",
+ "suggestion": f"Ajustar TTL del caché a {suggested_ttl/3600:.1f} horas basado en su patrón de uso"
+ })
+
+ # Get common patterns
+ common_patterns = self.get_common_patterns(10)
+
+ for pattern in common_patterns:
+ if pattern["count"] >= 5:
+ # If a pattern repeats a lot, suggest precompilation
+ suggestions.append({
+ "type": "precompile",
+ "pattern": pattern["pattern"],
+ "count": pattern["count"],
+ "estimated_savings": round(pattern["avg_cost"] * pattern["count"] * 0.9, 2), # 90% savings
+ "suggestion": f"Crear una plantilla SQL para consultas del tipo '{pattern['pattern']}'"
+ })
+
+ # If a pattern is expensive but rare
+ if pattern["avg_cost"] > 0.1 and pattern["count"] < 5:
+ suggestions.append({
+ "type": "analyze",
+ "pattern": pattern["pattern"],
+ "avg_cost": pattern["avg_cost"],
+ "suggestion": f"Revisar manualmente consultas del tipo '{pattern['pattern']}' para optimizar"
+ })
+
+ # Find periods with high load to adjust parameters
+ cursor.execute('''
+ SELECT strftime('%Y-%m-%d %H', timestamp) as hour, COUNT(*) as count, SUM(cost) as total_cost
+ FROM query_log
+ WHERE timestamp > datetime('now', '-7 day')
+ GROUP BY hour
+ ORDER BY count DESC
+ LIMIT 5
+ ''')
+
+ for row in cursor.fetchall():
+ hour, count, total_cost = row
+ if count > 20: # If there are more than 20 queries in an hour
+ suggestions.append({
+ "type": "load_balancing",
+ "hour": hour,
+ "query_count": count,
+ "total_cost": round(total_cost, 2),
+ "suggestion": f"Alta carga de consultas detectada el {hour} ({count} consultas). Considerar técnicas de agrupación."
+ })
+
+ # Find redundant queries (same query in a short time)
+ cursor.execute('''
+ SELECT query, COUNT(*) as count
+ FROM query_log
+ WHERE timestamp > datetime('now', '-1 day')
+ GROUP BY query
+ HAVING COUNT(*) > 3
+ ORDER BY COUNT(*) DESC
+ LIMIT 5
+ ''')
+
+ for row in cursor.fetchall():
+ query, count = row
+ suggestions.append({
+ "type": "redundant",
+ "query": query,
+ "count": count,
+ "estimated_savings": round(0.09 * (count - 1), 2), # Ahorro por no repetir
+ "suggestion": f"Implementar caché para la consulta '{query[:50]}...' que se repitió {count} veces"
+ })
+
+ conn.close()
+ return suggestions
+
+
+
\ No newline at end of file
diff --git a/corebrain/core/test_utils.py b/corebrain/core/test_utils.py
new file mode 100644
index 0000000..00c1ec3
--- /dev/null
+++ b/corebrain/core/test_utils.py
@@ -0,0 +1,157 @@
+"""
+Utilities for testing and validating components.
+"""
+import json
+import random
+from typing import Dict, Any, Optional
+
+from corebrain.cli.utils import print_colored
+from corebrain.cli.common import DEFAULT_API_URL
+from corebrain.network.client import http_session
+
+def generate_test_question_from_schema(schema: Dict[str, Any]) -> str:
+ """
+ Generates a test question based on the database schema.
+
+ Args:
+ schema: Database schema
+
+ Returns:
+ Generated test question
+ """
+ if not schema or not schema.get("tables"):
+ return "What are the available tables?"
+
+ tables = schema["tables"]
+
+ if not tables:
+ return "What are the available tables?"
+
+ # Select a random table
+ table = random.choice(tables)
+ table_name = table["name"]
+
+ # Determine the type of question
+ question_types = [
+ f"How many records are in the {table_name} table?",
+ f"Show the first 5 records from {table_name}",
+ f"What are the fields in the {table_name} table?",
+ ]
+
+ # Get columns according to structure (SQL vs NoSQL)
+ columns = []
+ if "columns" in table and table["columns"]:
+ columns = table["columns"]
+ elif "fields" in table and table["fields"]:
+ columns = table["fields"]
+
+ if columns:
+ # If we have information from columns/fields
+ column_name = columns[0]["name"] if columns else "id"
+
+ # Add specific questions with columns
+ question_types.extend([
+ f"What is the maximum value of {column_name} in {table_name}?",
+ f"What are the unique values of {column_name} in {table_name}?",
+ ])
+
+ return random.choice(question_types)
+
+def test_natural_language_query(api_token: str, db_config: Dict[str, Any], api_url: Optional[str] = None, user_data: Optional[Dict[str, Any]] = None) -> bool:
+ """
+ Tests a natural language query.
+
+ Args:
+ api_token: API token
+ db_config: Database configuration
+ api_url: Optional API URL
+ user_data: User data
+
+ Returns:
+ True if the test is successful, False otherwise
+ """
+ try:
+ print_colored("\nPerforming natural language query test...", "blue")
+
+ # Dynamic import to avoid circular imports
+ from db.schema_file import extract_db_schema
+
+ # Generate a test question based on the directly extracted schema
+ schema = extract_db_schema(db_config)
+ print("Retrieved schema: ", schema)
+ question = generate_test_question_from_schema(schema)
+ print(f"Test question: {question}")
+
+ # Prepare the data for the request
+ api_url = api_url or DEFAULT_API_URL
+ if not api_url.startswith(("http://", "https://")):
+ api_url = "https://" + api_url
+
+ if api_url.endswith('/'):
+ api_url = api_url[:-1]
+
+ # Build endpoint for the query
+ endpoint = f"{api_url}/api/database/sdk/query"
+
+ # Data for the query
+ request_data = {
+ "question": question,
+ "db_schema": schema,
+ "config_id": db_config["config_id"]
+ }
+
+ # Make the request to the API
+ headers = {
+ "Authorization": f"Bearer {api_token}",
+ "Content-Type": "application/json"
+ }
+
+ timeout = 15.0 # Reduced maximum waiting time
+
+ try:
+ print_colored("Sending query to API...", "blue")
+ response = http_session.post(
+ endpoint,
+ headers=headers,
+ json=request_data,
+ timeout=timeout
+ )
+
+ # Check the answer
+ if response.status_code == 200:
+ result = response.json()
+
+ # Check if there is an explanation in the result
+ if "explanation" in result:
+ print_colored("\nResponse:", "green")
+ print(result["explanation"])
+
+ print_colored("\n✅ Query test successful!", "green")
+ return True
+ else:
+ # If there is no explanation but the API responds, it may be a different format
+ print_colored("\nRespuesta recibida del API (formato diferente al esperado):", "yellow")
+ print(json.dumps(result, indent=2))
+ print_colored("\n⚠️ The API responded, but with a different format than expected.", "yellow")
+ return True
+ else:
+ print_colored(f"❌ Error in response: Code {response.status_code}", "red")
+ try:
+ error_data = response.json()
+ print(json.dumps(error_data, indent=2))
+ except:
+ print(response.text[:500])
+ return False
+
+ except http_session.TimeoutException:
+ print_colored("⚠️ Timeout while performing query. The API may be busy or unavailable.", "yellow")
+ print_colored("This does not affect the saved configuration.", "yellow")
+ return False
+ except http_session.RequestError as e:
+ print_colored(f"⚠️ Connection error: {str(e)}", "yellow")
+ print_colored("Check the API URL and your internet connection.", "yellow")
+ return False
+
+ except Exception as e:
+ print_colored(f"❌ Error performing query: {str(e)}", "red")
+ return False
\ No newline at end of file
diff --git a/corebrain/db/__init__.py b/corebrain/db/__init__.py
new file mode 100644
index 0000000..23252a8
--- /dev/null
+++ b/corebrain/db/__init__.py
@@ -0,0 +1,26 @@
+"""
+Database connectors for Corebrain SDK.
+
+This package provides connectors for different types and
+database engines supported by Corebrain.
+"""
+from corebrain.db.connector import DatabaseConnector
+from corebrain.db.factory import get_connector
+from corebrain.db.engines import get_available_engines
+from corebrain.db.connectors.sql import SQLConnector
+from corebrain.db.connectors.nosql import NoSQLConnector
+from corebrain.db.schema_file import get_schema_with_dynamic_import
+from corebrain.db.schema.optimizer import SchemaOptimizer
+from corebrain.db.schema.extractor import extract_db_schema
+
+# Export public components
+__all__ = [
+ 'DatabaseConnector',
+ 'get_connector',
+ 'get_available_engines',
+ 'SQLConnector',
+ 'NoSQLConnector',
+ 'SchemaOptimizer',
+ 'extract_db_schema',
+ 'get_schema_with_dynamic_import'
+]
\ No newline at end of file
diff --git a/corebrain/db/connector.py b/corebrain/db/connector.py
new file mode 100644
index 0000000..886a2a9
--- /dev/null
+++ b/corebrain/db/connector.py
@@ -0,0 +1,33 @@
+"""
+Base connectors for different types of databases.
+"""
+from typing import Dict, Any, List, Optional, Callable
+
+class DatabaseConnector:
+ """Base class for all database connectors."""
+
+ def __init__(self, config: Dict[str, Any], timeout: int = 10):
+ self.config = config
+ self.timeout = timeout
+ self.connection = None
+
+ def connect(self):
+ """Establishes a connection to the database."""
+ raise NotImplementedError
+
+ def extract_schema(self, sample_limit: int = 5, table_limit: Optional[int] = None,
+ progress_callback: Optional[Callable] = None) -> Dict[str, Any]:
+ """Extracts the database schema."""
+ raise NotImplementedError
+
+ def execute_query(self, query: str) -> List[Dict[str, Any]]:
+ """Executes a query on the database."""
+ raise NotImplementedError
+
+ def close(self):
+ """Closes the connection."""
+ if self.connection:
+ try:
+ self.connection.close()
+ except:
+ pass
\ No newline at end of file
diff --git a/corebrain/db/connectors/NoSQL/mongodb.py b/corebrain/db/connectors/NoSQL/mongodb.py
new file mode 100644
index 0000000..57dd992
--- /dev/null
+++ b/corebrain/db/connectors/NoSQL/mongodb.py
@@ -0,0 +1,208 @@
+import time
+import json
+import re
+
+from typing import Dict, Any, List, Optional, Callable, Tuple
+from corebrain.db.connectors.nosql import PYMONGO_IMPORTED
+
+def extract_schema(self, sample_limit: int = 5, collection_limit: Optional[int] = None,
+ progress_callback: Optional[Callable] = None) -> Dict[str, Any]:
+ '''
+ extract schema for MongoDB collections
+ Args:
+ sample_limit (int): Number of samples to extract from each collection.
+ collection_limit (Optional[int]): Maximum number of collections to process.
+ progress_callback (Optional[Callable]): Function to call for progress updates.
+ '''
+ schema = {
+ "type": self.engine,
+ "database": self.db.name,
+ "tables": {}, # Depends on DB
+ }
+
+ try:
+ collections = self.db.list_collection_names()
+ if collection_limit is not None and collection_limit > 0:
+ collections = collections[:collection_limit]
+ total_collections = len(collections)
+ for i, collection_name in enumerate(collections):
+ if progress_callback:
+ progress_callback(i, total_collections, f"Processing collection: {collection_name}")
+ collection = self.db[collection_name]
+
+ try:
+ doc_count = collection.count_documents({})
+ if doc_count <= 0:
+ schema["tables"][collection_name] = {
+ "fields": [],
+ "sample_data": [],
+ "count": 0,
+ "empty": True
+ }
+ else:
+ sample_docs = list(collection.find().limit(sample_limit))
+ fields = {}
+ sample_data = []
+
+ for doc in sample_docs:
+ self._extract_document_fields(doc, fields)
+ processed_doc = self._process_document_for_serialization(doc)
+ sample_data.append(processed_doc)
+
+ formatted_fields = [{"name": field, "type": type_name} for field, type_name in fields.items()]
+
+ schema["tables"][collection_name] = {
+ "fields": formatted_fields,
+ "sample_data": sample_data,
+ "count": doc_count,
+ }
+ except Exception as e:
+ print(f"Error processing collection {collection_name}: {e}")
+ schema["tables"][collection_name] = {
+ "fields": [],
+ "error": str(e)
+ }
+ # Convert the schema to a list of tables
+ table_list = []
+ for collection_name, collection_info in schema["tables"].items():
+ table_data = {"name": collection_name}
+ table_data.update(collection_info)
+ table_list.append(table_data)
+ schema["tables_list"] = table_list
+ return schema
+ except Exception as e:
+ print(f"Error extracting schema: {e}")
+ return {
+ "type": "mongodb",
+ "tables": {},
+ "tabbles_list": []
+ }
+
+def _extract_document_fields(self, doc: Dict[str, Any], fields: Dict[str, str],
+ prefix: str = "", max_depth: int = 3, current_depth: int = 0) -> None:
+ '''
+ Recursively extract fields from a document and determine their types.
+ Args:
+ doc (Dict[str, Any]): The document to extract fields from.
+ fields (Dict[str, str]): Dictionary to store field names and types.
+ prefix (str): Prefix for nested fields.
+ max_depth (int): Maximum depth for nested fields.
+ current_depth (int): Current depth in the recursion.
+ '''
+ if not PYMONGO_IMPORTED:
+ raise ImportError("pymongo is not installed. Please install it to use MongoDB connector.")
+ if current_depth >= max_depth:
+ return
+ for field, value in doc.items():
+ if field == "_id":
+ field_type = "ObjectId"
+ elif isinstance(value, dict):
+ if value and current_depth < max_depth - 1:
+ self._extract_document_fields(value, fields, f"{prefix}{field}.", max_depth, current_depth + 1)
+ continue
+ else:
+ field_type = f"object"
+ elif isinstance(value, list):
+ if value and isinstance(value[0], dict) and current_depth < max_depth - 1:
+ self._extract_document_fields(value[0], fields, f"{prefix}{field}[].", max_depth, current_depth + 1)
+ field_type = f"array