Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 2 additions & 13 deletions pyathena/aio/arrow/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pyathena.arrow.result_set import AthenaArrowResultSet
from pyathena.common import CursorIterator
from pyathena.error import OperationalError, ProgrammingError
from pyathena.model import AthenaCompression, AthenaFileFormat, AthenaQueryExecution
from pyathena.model import AthenaQueryExecution

if TYPE_CHECKING:
import polars as pl
Expand Down Expand Up @@ -110,18 +110,7 @@ async def execute( # type: ignore[override]
Self reference for method chaining.
"""
self._reset_state()
if self._unload:
s3_staging_dir = s3_staging_dir if s3_staging_dir else self._s3_staging_dir
if not s3_staging_dir:
raise ProgrammingError("If the unload option is used, s3_staging_dir is required.")
operation, unload_location = self._formatter.wrap_unload(
operation,
s3_staging_dir=s3_staging_dir,
format_=AthenaFileFormat.FILE_FORMAT_PARQUET,
compression=AthenaCompression.COMPRESSION_SNAPPY,
)
else:
unload_location = None
operation, unload_location = self._prepare_unload(operation, s3_staging_dir)
self.query_id = await self._execute(
operation,
parameters=parameters,
Expand Down
15 changes: 2 additions & 13 deletions pyathena/aio/pandas/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from pyathena.aio.common import WithAsyncFetch
from pyathena.common import CursorIterator
from pyathena.error import OperationalError, ProgrammingError
from pyathena.model import AthenaCompression, AthenaFileFormat, AthenaQueryExecution
from pyathena.model import AthenaQueryExecution
from pyathena.pandas.converter import (
DefaultPandasTypeConverter,
DefaultPandasUnloadTypeConverter,
Expand Down Expand Up @@ -133,18 +133,7 @@ async def execute( # type: ignore[override]
Self reference for method chaining.
"""
self._reset_state()
if self._unload:
s3_staging_dir = s3_staging_dir if s3_staging_dir else self._s3_staging_dir
if not s3_staging_dir:
raise ProgrammingError("If the unload option is used, s3_staging_dir is required.")
operation, unload_location = self._formatter.wrap_unload(
operation,
s3_staging_dir=s3_staging_dir,
format_=AthenaFileFormat.FILE_FORMAT_PARQUET,
compression=AthenaCompression.COMPRESSION_SNAPPY,
)
else:
unload_location = None
operation, unload_location = self._prepare_unload(operation, s3_staging_dir)
self.query_id = await self._execute(
operation,
parameters=parameters,
Expand Down
15 changes: 2 additions & 13 deletions pyathena/aio/polars/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pyathena.aio.common import WithAsyncFetch
from pyathena.common import CursorIterator
from pyathena.error import OperationalError, ProgrammingError
from pyathena.model import AthenaCompression, AthenaFileFormat, AthenaQueryExecution
from pyathena.model import AthenaQueryExecution
from pyathena.polars.converter import (
DefaultPolarsTypeConverter,
DefaultPolarsUnloadTypeConverter,
Expand Down Expand Up @@ -115,18 +115,7 @@ async def execute( # type: ignore[override]
Self reference for method chaining.
"""
self._reset_state()
if self._unload:
s3_staging_dir = s3_staging_dir if s3_staging_dir else self._s3_staging_dir
if not s3_staging_dir:
raise ProgrammingError("If the unload option is used, s3_staging_dir is required.")
operation, unload_location = self._formatter.wrap_unload(
operation,
s3_staging_dir=s3_staging_dir,
format_=AthenaFileFormat.FILE_FORMAT_PARQUET,
compression=AthenaCompression.COMPRESSION_SNAPPY,
)
else:
unload_location = None
operation, unload_location = self._prepare_unload(operation, s3_staging_dir)
self.query_id = await self._execute(
operation,
parameters=parameters,
Expand Down
15 changes: 2 additions & 13 deletions pyathena/arrow/async_cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from pyathena.arrow.result_set import AthenaArrowResultSet
from pyathena.async_cursor import AsyncCursor
from pyathena.common import CursorIterator
from pyathena.model import AthenaCompression, AthenaFileFormat, AthenaQueryExecution
from pyathena.model import AthenaQueryExecution

_logger = logging.getLogger(__name__) # type: ignore

Expand Down Expand Up @@ -182,18 +182,7 @@ def execute(
paramstyle: Optional[str] = None,
**kwargs,
) -> Tuple[str, "Future[Union[AthenaArrowResultSet, Any]]"]:
if self._unload:
s3_staging_dir = s3_staging_dir if s3_staging_dir else self._s3_staging_dir
if not s3_staging_dir:
raise ProgrammingError("If the unload option is used, s3_staging_dir is required.")
operation, unload_location = self._formatter.wrap_unload(
operation,
s3_staging_dir=s3_staging_dir,
format_=AthenaFileFormat.FILE_FORMAT_PARQUET,
compression=AthenaCompression.COMPRESSION_SNAPPY,
)
else:
unload_location = None
operation, unload_location = self._prepare_unload(operation, s3_staging_dir)
query_id = self._execute(
operation,
parameters=parameters,
Expand Down
15 changes: 2 additions & 13 deletions pyathena/arrow/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pyathena.arrow.result_set import AthenaArrowResultSet
from pyathena.common import CursorIterator
from pyathena.error import OperationalError, ProgrammingError
from pyathena.model import AthenaCompression, AthenaFileFormat, AthenaQueryExecution
from pyathena.model import AthenaQueryExecution
from pyathena.result_set import WithFetch

if TYPE_CHECKING:
Expand Down Expand Up @@ -166,18 +166,7 @@ def execute(
>>> table = cursor.as_arrow() # Returns Apache Arrow Table
"""
self._reset_state()
if self._unload:
s3_staging_dir = s3_staging_dir if s3_staging_dir else self._s3_staging_dir
if not s3_staging_dir:
raise ProgrammingError("If the unload option is used, s3_staging_dir is required.")
operation, unload_location = self._formatter.wrap_unload(
operation,
s3_staging_dir=s3_staging_dir,
format_=AthenaFileFormat.FILE_FORMAT_PARQUET,
compression=AthenaCompression.COMPRESSION_SNAPPY,
)
else:
unload_location = None
operation, unload_location = self._prepare_unload(operation, s3_staging_dir)
self.query_id = self._execute(
operation,
parameters=parameters,
Expand Down
28 changes: 28 additions & 0 deletions pyathena/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
from pyathena.model import (
AthenaCalculationExecution,
AthenaCalculationExecutionStatus,
AthenaCompression,
AthenaDatabase,
AthenaFileFormat,
AthenaQueryExecution,
AthenaTableMetadata,
)
Expand Down Expand Up @@ -652,6 +654,32 @@ def _prepare_query(
_logger.debug(query)
return query, execution_parameters

def _prepare_unload(
self,
operation: str,
s3_staging_dir: Optional[str],
) -> Tuple[str, Optional[str]]:
"""Wrap operation with UNLOAD if enabled.

Args:
operation: SQL query string.
s3_staging_dir: S3 location for query results.

Returns:
Tuple of (possibly-wrapped operation, unload_location or None).
"""
if not getattr(self, "_unload", False):
return operation, None
s3_staging_dir = s3_staging_dir if s3_staging_dir else self._s3_staging_dir
if not s3_staging_dir:
raise ProgrammingError("If the unload option is used, s3_staging_dir is required.")
return self._formatter.wrap_unload(
operation,
s3_staging_dir=s3_staging_dir,
format_=AthenaFileFormat.FILE_FORMAT_PARQUET,
compression=AthenaCompression.COMPRESSION_SNAPPY,
)

def _execute(
self,
operation: str,
Expand Down
4 changes: 2 additions & 2 deletions pyathena/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from copy import deepcopy
from datetime import date, datetime, timezone
from decimal import Decimal
from typing import Any, Callable, Dict, Optional, Type
from typing import Any, Callable, Dict, Optional, Tuple, Type

from pyathena.error import ProgrammingError
from pyathena.model import AthenaCompression, AthenaFileFormat
Expand Down Expand Up @@ -86,7 +86,7 @@ def wrap_unload(
s3_staging_dir: str,
format_: str = AthenaFileFormat.FILE_FORMAT_PARQUET,
compression: str = AthenaCompression.COMPRESSION_SNAPPY,
):
) -> Tuple[str, Optional[str]]:
"""Wrap a SELECT query with UNLOAD statement for high-performance result retrieval.

Transforms SELECT or WITH queries into UNLOAD statements that export results
Expand Down
15 changes: 2 additions & 13 deletions pyathena/pandas/async_cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pyathena import ProgrammingError
from pyathena.async_cursor import AsyncCursor
from pyathena.common import CursorIterator
from pyathena.model import AthenaCompression, AthenaFileFormat, AthenaQueryExecution
from pyathena.model import AthenaQueryExecution
from pyathena.pandas.converter import (
DefaultPandasTypeConverter,
DefaultPandasUnloadTypeConverter,
Expand Down Expand Up @@ -159,18 +159,7 @@ def execute(
quoting: int = 1,
**kwargs,
) -> Tuple[str, "Future[Union[AthenaPandasResultSet, Any]]"]:
if self._unload:
s3_staging_dir = s3_staging_dir if s3_staging_dir else self._s3_staging_dir
if not s3_staging_dir:
raise ProgrammingError("If the unload option is used, s3_staging_dir is required.")
operation, unload_location = self._formatter.wrap_unload(
operation,
s3_staging_dir=s3_staging_dir,
format_=AthenaFileFormat.FILE_FORMAT_PARQUET,
compression=AthenaCompression.COMPRESSION_SNAPPY,
)
else:
unload_location = None
operation, unload_location = self._prepare_unload(operation, s3_staging_dir)
query_id = self._execute(
operation,
parameters=parameters,
Expand Down
15 changes: 2 additions & 13 deletions pyathena/pandas/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from pyathena.common import CursorIterator
from pyathena.error import OperationalError, ProgrammingError
from pyathena.model import AthenaCompression, AthenaFileFormat, AthenaQueryExecution
from pyathena.model import AthenaQueryExecution
from pyathena.pandas.converter import (
DefaultPandasTypeConverter,
DefaultPandasUnloadTypeConverter,
Expand Down Expand Up @@ -193,18 +193,7 @@ def execute(
>>> df = cursor.fetchall() # Returns pandas DataFrame
"""
self._reset_state()
if self._unload:
s3_staging_dir = s3_staging_dir if s3_staging_dir else self._s3_staging_dir
if not s3_staging_dir:
raise ProgrammingError("If the unload option is used, s3_staging_dir is required.")
operation, unload_location = self._formatter.wrap_unload(
operation,
s3_staging_dir=s3_staging_dir,
format_=AthenaFileFormat.FILE_FORMAT_PARQUET,
compression=AthenaCompression.COMPRESSION_SNAPPY,
)
else:
unload_location = None
operation, unload_location = self._prepare_unload(operation, s3_staging_dir)
self.query_id = self._execute(
operation,
parameters=parameters,
Expand Down
15 changes: 2 additions & 13 deletions pyathena/polars/async_cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pyathena import ProgrammingError
from pyathena.async_cursor import AsyncCursor
from pyathena.common import CursorIterator
from pyathena.model import AthenaCompression, AthenaFileFormat, AthenaQueryExecution
from pyathena.model import AthenaQueryExecution
from pyathena.polars.converter import (
DefaultPolarsTypeConverter,
DefaultPolarsUnloadTypeConverter,
Expand Down Expand Up @@ -221,18 +221,7 @@ def execute(
>>> result_set = future.result()
>>> df = result_set.as_polars() # Returns Polars DataFrame
"""
if self._unload:
s3_staging_dir = s3_staging_dir if s3_staging_dir else self._s3_staging_dir
if not s3_staging_dir:
raise ProgrammingError("If the unload option is used, s3_staging_dir is required.")
operation, unload_location = self._formatter.wrap_unload(
operation,
s3_staging_dir=s3_staging_dir,
format_=AthenaFileFormat.FILE_FORMAT_PARQUET,
compression=AthenaCompression.COMPRESSION_SNAPPY,
)
else:
unload_location = None
operation, unload_location = self._prepare_unload(operation, s3_staging_dir)
query_id = self._execute(
operation,
parameters=parameters,
Expand Down
15 changes: 2 additions & 13 deletions pyathena/polars/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from pyathena.common import CursorIterator
from pyathena.error import OperationalError, ProgrammingError
from pyathena.model import AthenaCompression, AthenaFileFormat, AthenaQueryExecution
from pyathena.model import AthenaQueryExecution
from pyathena.polars.converter import (
DefaultPolarsTypeConverter,
DefaultPolarsUnloadTypeConverter,
Expand Down Expand Up @@ -191,18 +191,7 @@ def execute(
>>> df = cursor.as_polars() # Returns Polars DataFrame
"""
self._reset_state()
if self._unload:
s3_staging_dir = s3_staging_dir if s3_staging_dir else self._s3_staging_dir
if not s3_staging_dir:
raise ProgrammingError("If the unload option is used, s3_staging_dir is required.")
operation, unload_location = self._formatter.wrap_unload(
operation,
s3_staging_dir=s3_staging_dir,
format_=AthenaFileFormat.FILE_FORMAT_PARQUET,
compression=AthenaCompression.COMPRESSION_SNAPPY,
)
else:
unload_location = None
operation, unload_location = self._prepare_unload(operation, s3_staging_dir)
self.query_id = self._execute(
operation,
parameters=parameters,
Expand Down