Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dask-metrics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pip install .
* **`timestamp`**: the time, in seconds, since the start of metric collection.

### Optional
* **`tasks`**: the keys of the currently executing tasks
* **`total-mem`**: the memory used, in bytes, by each GPU on a worker
* **`mem-util`**: the memory utilization of each GPU on a worker as a percentage of total memory available.
* **`compute-util`**: the compute utilization of each GPU on a worker as a percentage of maximum compute ability.
Expand Down
4 changes: 2 additions & 2 deletions dask-metrics/conda.recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package:
name: dask-metrics
version: "2021.8.12"
version: "2021.8.20"

build:
number: 0
number: 2
noarch: python

source:
Expand Down
8 changes: 8 additions & 0 deletions dask-metrics/dask_metrics/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,14 @@ def mem_util(worker, handle):
def compute_util(worker, handle):
return pynvml.nvmlDeviceGetUtilizationRates(handle).gpu

@operation("tasks", per_device=False)
def tasks(worker, handles):
result = []
for key, state in worker.tasks.items():
if state.state == "executing":
result.append(key)
return ", ".join(result)

self.tracking_list = [o for o in operations if o["name"] in tracking]
self.tracking_list += custom
self.clear_metrics(clear_disk=True)
Expand Down
7 changes: 6 additions & 1 deletion dask-metrics/dask_metrics/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,18 @@ def _get_data(worker_file, job, metrics):
df = cudf.read_csv(worker_file)
section = df[df["job"] == job]

if len(section) == 0:
raise ValueError(
f"The requested job '{job}' was not found in the provided file"
)

sections = []
for metric in metrics:
if metric not in section:
raise ValueError(
f"The requested metric '{metric}' was not found in the provided file"
)

m = section[metric].str.split(pat=", ")
m = m.to_arrow().to_pylist()
m = cudf.DataFrame(m, dtype="int64")
Expand Down
2 changes: 1 addition & 1 deletion dask-metrics/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

setup(
name="dask-metrics",
version="2021.8.12",
version="2021.8.20",
description="A tool for collecting metrics on distributed Dask clusters",
author="Travis Hester",
packages=find_packages(include=["dask_metrics"]),
Expand Down