Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 38 additions & 6 deletions examples/dataframe/create_write_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
import pyarrow as pa # type: ignore
except Exception:
pa = None
try:
import pandas as pd # type: ignore
except Exception:
pd = None
from nisystemlink.clients.core import HttpConfiguration
from nisystemlink.clients.dataframe import DataFrameClient
from nisystemlink.clients.dataframe.models import (
Expand Down Expand Up @@ -40,6 +44,11 @@
)
)


print(f"Created table with ID: {table_id}")

print("Appending data to table...")

# Append via explicit AppendTableDataRequest (JSON)
frame_request = DataFrame(
data=[[str(i), str(random.random()), datetime.now().isoformat()] for i in range(3)]
Expand All @@ -55,6 +64,7 @@
client.append_table_data(table_id, frame_direct)

if pa is not None:
print("Appending data to table via Arrow RecordBatches...")
# Append via single RecordBatch (Arrow)
batch_single = pa.record_batch(
[
Expand All @@ -79,9 +89,31 @@
]
client.append_table_data(table_id, batch_list)

# Mark end_of_data for the table
# Supply `None` and `end_of_data=True`
client.append_table_data(table_id, None, end_of_data=True)
else:
# If pyarrow not installed, flush via JSON path
client.append_table_data(table_id, None, end_of_data=True)
if pa is not None and pd is not None:
print("Appending data to table via Pandas DataFrame...")
# Append via DataFrame (Pandas)
df = pd.DataFrame(
{
"ix": [11, 12, 13],
"Float_Column": [0.6, 0.7, 0.8],
"Timestamp_Column": [datetime.now() for _ in range(3)],
}
)

# Optional - coerce df types to the dataframe table schema
df = df.astype(
{
"ix": "Int32",
"Float_Column": "float32",
"Timestamp_Column": "datetime64[ns]",
}
)

# convert Pandas DataFrame to Arrow RecordBatch and append
record_batch = pa.RecordBatch.from_pandas(df)
client.append_table_data(table_id, record_batch)

# Mark end_of_data for the table
# Supply `None` and `end_of_data=True`
print("Finished appending data.")
client.append_table_data(table_id, None, end_of_data=True)