Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions faculty/datasets/pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Copyright 2018-2021 Faculty Science Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pandas

from faculty.session import get_session
from faculty.context import get_context
from faculty.clients.object import ObjectClient
from faculty.datasets import transfer


def read_csv(project_path, project_id=None, *args, **kwargs):
"""Read a CSV file from a project's datasets as a pandas DataFrame.

Requires pandas to be installed.

Parameters
----------
project_path : str
The path of the CSV file in the project's datasets to read.
project_id : str, optional
The project to get the CSV file from. You need to have access to this
project for it to work. Defaults to the project set by
FACULTY_PROJECT_ID in your environment.
*args
Additional positional arguments to pass to ``pandas.read_csv``.
**kwargs
Additional keyword arguments to pass to ``pandas.read_csv``.
"""

project_id = project_id or get_context().project_id

session = get_session()
url = session.service_url(ObjectClient.SERVICE_NAME)
object_client = ObjectClient(url, session)

presigned_url = object_client.presign_download(project_id, project_path)

return pandas.read_csv(presigned_url, *args, **kwargs)


def to_csv(dataframe, project_path, project_id=None, *args, **kwargs):
"""Write a pandas DataFrame to a CSV file in a project's datasets.

Requires pandas to be installed.

Parameters
----------
dataframe : pandas.DataFrame
The DataFrame to write.
project_path : str
The path in the project's datasets to write to.
project_id : str, optional
The project to write the CSV file to. You need to have access to this
project for it to work. Defaults to the project set by
FACULTY_PROJECT_ID in your environment.
*args
Additional positional arguments to pass to ``pandas.DataFrame.to_csv``.
**kwargs
Additional keyword arguments to pass to ``pandas.DataFrame.to_csv``.
"""

project_id = project_id or get_context().project_id

session = get_session()
url = session.service_url(ObjectClient.SERVICE_NAME)
object_client = ObjectClient(url, session)

content = dataframe.to_csv(path_or_buf=None, *args, **kwargs)

transfer.upload(
object_client, project_id, project_path, content.encode("utf-8")
)