22"""SingleStoreDB export service."""
33from __future__ import annotations
44
5- import abc
6- import re
5+ import copy
6+ import json
77from typing import Any
88from typing import Dict
99from typing import List
1010from typing import Optional
11+ from typing import Union
1112
1213from .. import ManagementError
1314from .utils import vars_to_str
1415from .workspace import WorkspaceGroup
1516from .workspace import WorkspaceManager
1617
1718
18- class Link (object ):
19- """Generic storage base class."""
20- scheme : str = 'unknown'
21-
22- def __str__ (self ) -> str :
23- """Return string representation."""
24- return vars_to_str (self )
25-
26- def __repr__ (self ) -> str :
27- """Return string representation."""
28- return str (self )
29-
30- @abc .abstractmethod
31- def to_storage_info (self ) -> Dict [str , Any ]:
32- raise NotImplementedError
33-
34- @classmethod
35- def from_config_and_creds (
36- cls ,
37- scheme : str ,
38- config : Dict [str , Any ],
39- credentials : Dict [str , Any ],
40- manager : 'WorkspaceManager' ,
41- ) -> 'Link' :
42- out_cls = None
43- for c in cls .__subclasses__ ():
44- if c .scheme == scheme .upper ():
45- out_cls = c
46- break
47-
48- if out_cls is None :
49- raise TypeError (f'No link class found for given information: { scheme } ' )
50-
51- return out_cls .from_config_and_creds (scheme , config , credentials , manager )
52-
53-
54- class S3Link (Link ):
55- """S3 link."""
56-
57- scheme : str = 'S3'
58- region : str
59- storage_base_url : str
60-
61- def __init__ (self , region : str , storage_base_url : str ):
62- self .region = region
63- self .storage_base_url = storage_base_url
64- self ._manager : Optional [WorkspaceManager ] = None
65-
66- def to_storage_info (self ) -> Dict [str , Any ]:
67- return dict (
68- storageBaseURL = self .storage_base_url ,
69- storageRegion = self .region ,
70- )
71-
72- @classmethod
73- def from_config_and_creds (
74- cls ,
75- scheme : str ,
76- config : Dict [str , Any ],
77- credentials : Dict [str , Any ],
78- manager : 'WorkspaceManager' ,
79- ) -> 'S3Link' :
80- assert scheme .upper () == cls .scheme
81-
82- params : Dict [str , Any ] = {}
83- params .update (config )
84- params .update (credentials )
85-
86- assert params .get ('region' ), 'region is required'
87- assert params .get ('endpoint_url' ), 'endpoint_url is required'
88-
89- out = cls (params ['region' ], params ['endpoint_url' ])
90- out ._manager = manager
91- return out
92-
93-
94- class Catalog (object ):
95- """Generic catalog base class."""
96-
97- catalog_type : str = 'UNKNOWN'
98- table_format : str = 'UNKNOWN'
99-
100- def __str__ (self ) -> str :
101- """Return string representation."""
102- return vars_to_str (self )
103-
104- def __repr__ (self ) -> str :
105- """Return string representation."""
106- return str (self )
107-
108- @classmethod
109- def from_config_and_creds (
110- cls ,
111- config : Dict [str , Any ],
112- credentials : Dict [str , Any ],
113- manager : 'WorkspaceManager' ,
114- ) -> 'Catalog' :
115- catalog_type = config ['type' ].upper ()
116- table_format = config ['table_format' ].upper ()
117-
118- out_cls = None
119- for c in cls .__subclasses__ ():
120- if c .catalog_type == catalog_type and c .table_format == table_format :
121- out_cls = c
122- break
123-
124- if out_cls is None :
125- raise TypeError (f'No catalog class found for given information: { config } ' )
126-
127- return out_cls .from_config_and_creds (config , credentials , manager )
128-
129- @abc .abstractmethod
130- def to_catalog_info (self ) -> Dict [str , Any ]:
131- """Return a catalog info dictionary."""
132- raise NotImplementedError
133-
134-
135- class IcebergGlueCatalog (Catalog ):
136- """Iceberg glue catalog."""
137-
138- table_format = 'ICEBERG'
139- catalog_type = 'GLUE'
140-
141- region : str
142- catalog_id : str
143-
144- def __init__ (self , region : str , catalog_id : str ):
145- self .region = region
146- self .catalog_id = catalog_id
147- self ._manager : Optional [WorkspaceManager ] = None
148-
149- @classmethod
150- def from_config_and_creds (
151- cls ,
152- config : Dict [str , Any ],
153- credentials : Dict [str , Any ],
154- manager : 'WorkspaceManager' ,
155- ) -> 'IcebergGlueCatalog' :
156- params = {}
157- params .update (config )
158- params .update (credentials )
159-
160- out = cls (
161- region = params ['region' ],
162- catalog_id = params ['id' ],
163- )
164- out ._manager = manager
165- return out
166-
167- def to_catalog_info (self ) -> Dict [str , Any ]:
168- """Return a catalog info dictionary."""
169- return dict (
170- catalogType = self .catalog_type ,
171- tableFormat = self .table_format ,
172- glueRegion = self .region ,
173- glueCatalogID = self .catalog_id ,
174- )
175-
176-
17719class ExportService (object ):
17820 """Export service."""
17921
18022 database : str
18123 table : str
182- catalog : Catalog
183- storage_link : Link
24+ catalog_info : Dict [ str , Any ]
25+ storage_info : Dict [ str , Any ]
18426 columns : Optional [List [str ]]
18527
18628 def __init__ (
18729 self ,
18830 workspace_group : WorkspaceGroup ,
18931 database : str ,
19032 table : str ,
191- catalog : Catalog ,
192- storage_link : Link ,
33+ catalog_info : Union [ str , Dict [ str , Any ]] ,
34+ storage_info : Union [ str , Dict [ str , Any ]] ,
19335 columns : Optional [List [str ]],
19436 ):
19537 #: Workspace group
@@ -205,10 +47,16 @@ def __init__(
20547 self .columns = columns
20648
20749 #: Catalog
208- self .catalog = catalog
50+ if isinstance (catalog_info , str ):
51+ self .catalog_info = json .loads (catalog_info )
52+ else :
53+ self .catalog_info = copy .copy (catalog_info )
20954
21055 #: Storage
211- self .storage_link = storage_link
56+ if isinstance (storage_info , str ):
57+ self .storage_info = json .loads (storage_info )
58+ else :
59+ self .storage_info = copy .copy (storage_info )
21260
21361 self ._manager : Optional [WorkspaceManager ] = workspace_group ._manager
21462
@@ -227,21 +75,12 @@ def create_cluster_identity(self) -> Dict[str, Any]:
22775 msg = 'No workspace manager is associated with this object.' ,
22876 )
22977
230- if not isinstance (self .catalog , IcebergGlueCatalog ):
231- raise TypeError ('Only Iceberg Glue catalog is supported at this time.' )
232-
233- if not isinstance (self .storage_link , S3Link ):
234- raise TypeError ('Only S3 links are supported at this time.' )
235-
23678 out = self ._manager ._post (
23779 f'workspaceGroups/{ self .workspace_group .id } /'
23880 'egress/createEgressClusterIdentity' ,
23981 json = dict (
240- storageBucketName = re .split (
241- r'/+' , self .storage_link .storage_base_url ,
242- )[1 ],
243- glueRegion = self .catalog .region ,
244- glueCatalogID = self .catalog .catalog_id ,
82+ catalogInfo = self .catalog_info ,
83+ storageInfo = self .storage_info ,
24584 ),
24685 )
24786
@@ -254,16 +93,13 @@ def start(self, tags: Optional[List[str]] = None) -> 'ExportStatus':
25493 msg = 'No workspace manager is associated with this object.' ,
25594 )
25695
257- if not isinstance (self .storage_link , S3Link ):
258- raise TypeError ('Only S3 links are supported at this time.' )
259-
26096 out = self ._manager ._post (
26197 f'workspaceGroups/{ self .workspace_group .id } /egress/startTableEgress' ,
26298 json = dict (
26399 databaseName = self .database ,
264100 tableName = self .table ,
265- storageInfo = self .storage_link . to_storage_info () ,
266- catalogInfo = self .catalog . to_catalog_info () ,
101+ storageInfo = self .storage_info ,
102+ catalogInfo = self .catalog_info ,
267103 ),
268104 )
269105
0 commit comments