From 4a61593118ea0aa358883440c195e4d5af44f281 Mon Sep 17 00:00:00 2001 From: Bibo Hao Date: Wed, 15 May 2024 20:09:38 +0800 Subject: [PATCH 1/7] feat - duckdb --- src/aloha/db/duckdb.py | 44 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 src/aloha/db/duckdb.py diff --git a/src/aloha/db/duckdb.py b/src/aloha/db/duckdb.py new file mode 100644 index 0000000..98b001c --- /dev/null +++ b/src/aloha/db/duckdb.py @@ -0,0 +1,44 @@ +__all__ = ('DuckOperator',) + +import duckdb +from sqlalchemy import create_engine +from sqlalchemy.sql import text + +from ..logger import LOG + +LOG.debug('duckdb: duckdb version = %s' % duckdb.__version__) + + +class DuckOperator: + def __init__(self, db_config, **kwargs): + self._config = { + 'path': db_config['path'], + 'dbname': db_config['dbname'], + } + connect_args = {} + if 'schema' in db_config: + connect_args['options'] = '-csearch_path={}'.format(db_config['schema']) + + try: + self.engine = create_engine( + 'postgresql+psycopg2://{user}:{password}@{host}:{port}/{dbname}'.format(**self._config), + connect_args=connect_args, client_encoding='utf8', encoding='utf-8', + pool_size=20, max_overflow=10, pool_pre_ping=True, **kwargs + ) + LOG.debug("PostgresSQL connected: {host}:{port}/{dbname}".format(**self._config)) + except Exception as e: + LOG.error(e) + raise RuntimeError('Failed to connect to PostgresSQL') + + @property + def connection(self): + return self.engine + + def execute_query(self, sql, *args, **kwargs): + with self.engine.connect() as conn: + cur = conn.execute(text(sql), *args, **kwargs) + return cur + + @property + def connection_str(self) -> str: + return "duckdb:///{user}:{password}@{host}:{port}/{dbname}".format(**self._config) From e1a3b4f137bd74b738d2f53cc5f7fb89b9df6a42 Mon Sep 17 00:00:00 2001 From: Bibo Hao Date: Sat, 2 Nov 2024 19:28:46 +0000 Subject: [PATCH 2/7] fix subfolders of exclude folder --- src/aloha/script/compile.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/aloha/script/compile.py b/src/aloha/script/compile.py index 2438c6e..552a064 100644 --- a/src/aloha/script/compile.py +++ b/src/aloha/script/compile.py @@ -49,11 +49,19 @@ def build(base: str = None, dist: str = 'build', exclude: list = None, keep: lis for dir_path, dir_names, file_names in os.walk(path_base): dir_name = dir_path.split(os.sep)[-1] # name of the current directory - if dir_path.startswith(path_build) or dir_path in files_exclude: - continue # skip: folder for build output, and excluded folders - + flag_skip: bool = False if dir_name.startswith('.') or (os.sep + '.' in dir_path): - continue # hidden folders and sub-folders + flag_skip = True # hidden folders and sub-folders + elif dir_path.startswith(path_build): + flag_skip = True # skip: folder for build output, and excluded folders + else: + for f in files_exclude: + if dir_path.startswith(f): + flag_skip = True + break + + if flag_skip: + continue for file in file_names: (name, extension), path = os.path.splitext(file), os.path.join(dir_path, file) From 50726ca50216dd5094628bed0176c36c15e01cee Mon Sep 17 00:00:00 2001 From: Bibo Hao Date: Sun, 3 Nov 2024 04:01:24 +0800 Subject: [PATCH 3/7] Update postgres.py Signed-off-by: Bibo Hao --- src/aloha/db/postgres.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aloha/db/postgres.py b/src/aloha/db/postgres.py index a0e8a81..e7394ca 100644 --- a/src/aloha/db/postgres.py +++ b/src/aloha/db/postgres.py @@ -7,7 +7,7 @@ from .base import PasswordVault from ..logger import LOG -LOG.debug('postgres: psycopg2 version = %s' % psycopg2.__version__) +LOG.debug('postgres: psycopg version = %s' % psycopg2.__version__) class PostgresOperator: From c76875b37a694e9881e560b0c27a6a54188822dc Mon Sep 17 00:00:00 2001 From: Bibo Hao Date: Mon, 18 Aug 2025 21:40:11 +0800 Subject: [PATCH 4/7] Update duckdb.py Signed-off-by: Bibo Hao --- src/aloha/db/duckdb.py | 124 +++++++++++++++++++++++++++++++++-------- 1 file changed, 101 insertions(+), 23 deletions(-) diff --git a/src/aloha/db/duckdb.py b/src/aloha/db/duckdb.py index 98b001c..7935291 100644 --- a/src/aloha/db/duckdb.py +++ b/src/aloha/db/duckdb.py @@ -1,44 +1,122 @@ __all__ = ('DuckOperator',) -import duckdb -from sqlalchemy import create_engine -from sqlalchemy.sql import text +from pathlib import Path -from ..logger import LOG +import duckdb +import duckdb_engine +from aloha.logger import LOG +from sqlalchemy import create_engine, text -LOG.debug('duckdb: duckdb version = %s' % duckdb.__version__) +LOG.debug('duckdb version = %s, duckdb_engine = %s ' % (duckdb.__version__, duckdb_engine.__version__)) class DuckOperator: def __init__(self, db_config, **kwargs): + """ + db_config example: + { + "path": "/path/to/db.duckdb", # 数据库文件路径,使用 ":memory:" 表示内存模式 + "schema": "sales", # 可选,默认 'main' + "read_only": True, # 可选,默认 False (内存模式下强制为 False) + "config": {"memory_limit": "500mb"}# 可选,DuckDB 连接配置 + } + """ self._config = { - 'path': db_config['path'], - 'dbname': db_config['dbname'], + 'path': db_config.get('path', ':memory:'), + 'schema': db_config.get('schema', 'main'), + 'read_only': bool(db_config.get('read_only', False)), + 'config': db_config.get('config', {}), } - connect_args = {} - if 'schema' in db_config: - connect_args['options'] = '-csearch_path={}'.format(db_config['schema']) + + if not self._config['path'] or self._config['path'] == ':memory:': # 标准化内存模式路径 + self._config['path'] = ':memory:' + + if self._config['read_only']: # 内存数据库不支持只读模式 + LOG.warning("In-memory database cannot be read-only. Setting read_only=False.") + self._config['read_only'] = False + + else: + self._prepare_database() try: - self.engine = create_engine( - 'postgresql+psycopg2://{user}:{password}@{host}:{port}/{dbname}'.format(**self._config), - connect_args=connect_args, client_encoding='utf8', encoding='utf-8', - pool_size=20, max_overflow=10, pool_pre_ping=True, **kwargs + str_connection = f"duckdb:///{self._config['path']}" + self.conn = create_engine( + str_connection, + connect_args={ + 'read_only': self._config['read_only'], + 'config': self._config['config'] + }, + **kwargs + ).connect() + + self._initialize_schema() + + LOG.debug( + f"DuckDB connected: {self._config['path']} [schema={self._config['schema']}, read_only={self._config['read_only']}]" ) - LOG.debug("PostgresSQL connected: {host}:{port}/{dbname}".format(**self._config)) except Exception as e: - LOG.error(e) - raise RuntimeError('Failed to connect to PostgresSQL') + LOG.exception(e) + raise RuntimeError('Failed to connect to DuckDB') + + def _prepare_database(self): + """准备数据库文件和目录""" + path = self._config['path'] + path_obj = Path(path) + + parent_dir = path_obj.parent + if not parent_dir.exists(): + if self._config['read_only']: + raise RuntimeError( + f"Directory '{parent_dir}' does not exist and read_only=True" + ) + try: + parent_dir.mkdir(parents=True, exist_ok=True) + LOG.debug(f"Created directory: {parent_dir}") + except Exception as e: + raise RuntimeError(f"Failed to create directory '{parent_dir}': {e}") + + if not path_obj.exists(): + if self._config['read_only']: + raise RuntimeError( + f"DuckDB file '{path}' does not exist and read_only=True" + ) + try: + LOG.debug(f"Database file not found, creating: {path}") + duckdb.connect(path).close() + except Exception as e: + raise RuntimeError(f"Failed to create database file '{path}': {e}") + + def _initialize_schema(self): + if self._config['schema'] == 'main': + return + + try: + if self._config['read_only']: + result = self.conn.execute( + text("SELECT schema_name FROM information_schema.schemata WHERE schema_name = :schema"), + {'schema': self._config['schema']} + ) + if not result.fetchone(): + raise RuntimeError( + f"Schema '{self._config['schema']}' does not exist and read_only=True" + ) + else: + self.conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {self._config['schema']}")) + + self.conn.execute(text(f"SET schema '{self._config['schema']}'")) + except Exception as e: + raise RuntimeError(f'Failed to initialize schema: {e}') @property def connection(self): - return self.engine + return self.conn - def execute_query(self, sql, *args, **kwargs): - with self.engine.connect() as conn: - cur = conn.execute(text(sql), *args, **kwargs) - return cur + def execute_query(self, sql, auto_commit: bool = True, *args, **kwargs): + cur = self.conn.execute(text(sql), *args, **kwargs) + if auto_commit: + self.conn.commit() + return cur @property def connection_str(self) -> str: - return "duckdb:///{user}:{password}@{host}:{port}/{dbname}".format(**self._config) + return f"duckdb:///{self._config['path']} [schema={self._config['schema']}, read_only={self._config['read_only']}]" From ef4e7e825a0704068e651c8c7495148f4371b767 Mon Sep 17 00:00:00 2001 From: Bibo Hao Date: Mon, 18 Aug 2025 21:43:41 +0800 Subject: [PATCH 5/7] Update postgres.py Signed-off-by: Bibo Hao --- src/aloha/db/postgres.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aloha/db/postgres.py b/src/aloha/db/postgres.py index 021a623..76f030c 100644 --- a/src/aloha/db/postgres.py +++ b/src/aloha/db/postgres.py @@ -7,7 +7,7 @@ from .base import PasswordVault from ..logger import LOG -LOG.debug('postgres: psycopg version = %s' % psycopg2.__version__) +LOG.debug('postgres: psycopg version = %s' % psycopg.__version__) class PostgresOperator: From 4fd873e2c19f5c24f1f52c98815c6dcdddaf88f1 Mon Sep 17 00:00:00 2001 From: Bibo Hao Date: Sat, 23 Aug 2025 17:52:00 +0000 Subject: [PATCH 6/7] update logger --- src/aloha/logger/__init__.py | 3 ++- src/aloha/logger/logger.py | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/aloha/logger/__init__.py b/src/aloha/logger/__init__.py index 06ed0ff..958a90c 100644 --- a/src/aloha/logger/__init__.py +++ b/src/aloha/logger/__init__.py @@ -2,5 +2,6 @@ from ..settings import SETTINGS LOG = get_logger( - level=SETTINGS.config.get('deploy', {}).get('log_level', 10) # 10 = logging.DEBUG + level=SETTINGS.config.get('deploy', {}).get('log_level', 10), # 10 = logging.DEBUG + # logger_name='default', ) diff --git a/src/aloha/logger/logger.py b/src/aloha/logger/logger.py index 578b2b9..5e78430 100644 --- a/src/aloha/logger/logger.py +++ b/src/aloha/logger/logger.py @@ -17,8 +17,8 @@ def setup_logger(logger: logging.Logger, level: int = logging.DEBUG, logger_name from ..settings import SETTINGS module = SETTINGS.config.get('APP_MODULE') or os.environ.get('APP_MODULE', 'default') - if logger_name is not None and len(logger_name) > 0: - module = '%s_%s' % (logger_name, module) + # if logger_name is not None and len(logger_name) > 0: + # module = '%s_%s' % (logger_name, module) path_file = [module, socket.gethostname(), 'p%s' % os.getpid()] # module, hostname, pid path_file = '_'.join(str(i) for i in path_file if i is not None) @@ -34,7 +34,10 @@ def setup_logger(logger: logging.Logger, level: int = logging.DEBUG, logger_name logger.setLevel(level) -def get_logger(level=logging.DEBUG, logger_name: str = None, *args, **kwargs) -> logging.Logger: +def get_logger(level=logging.DEBUG, logger_name: str | None = None, *args, **kwargs) -> logging.Logger: + if logger_name is None: + logger_name = 'default' + logger = logging.getLogger(logger_name) if isinstance(level, str): From 5c80f8671627a63aacaeb079c6933413368cbb31 Mon Sep 17 00:00:00 2001 From: Bibo Hao Date: Sat, 23 Aug 2025 18:22:51 +0000 Subject: [PATCH 7/7] update pipeline --- .github/workflows/pip.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index 339da71..2e934fe 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -37,18 +37,17 @@ jobs: - name: pypi-publish env: - TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }} TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} - TWINE_USERNAME_TEST: ${{ secrets.TWINE_USERNAME_TEST }} TWINE_PASSWORD_TEST: ${{ secrets.TWINE_PASSWORD_TEST }} run: | env | sort -f && cd src && ls -alh sudo python3 -c "import fcntl; fcntl.fcntl(1, fcntl.F_SETFL, 0)" sudo python3 setup.py sdist bdist_wheel ls -alh ./dist + if [ "${GITHUB_REPOSITORY}" = "QPod/aloha-python" ] && [ "${GITHUB_REF_NAME}" = "main" ] ; then - twine upload dist/* --verbose -u "${TWINE_USERNAME}" -p "${TWINE_PASSWORD}" ; - elif [ ! -z "${TWINE_USERNAME_TEST}" ]; then - twine upload dist/* --verbose -u "${TWINE_USERNAME_TEST}" -p "${TWINE_PASSWORD_TEST}" \ - --repository-url "https://test.pypi.org/legacy/" ; + URL_REPOSITORY="https://upload.pypi.org/legacy/" + else + URL_REPOSITORY="https://test.pypi.org/legacy/" fi + twine upload dist/* --verbose -u "__token__" -p "${TWINE_PASSWORD_TEST}" --repository-url "${URL_REPOSITORY}" ;