Skip to content

Commit a54b678

Browse files
committed
Add real-time auto-refresh feature to dashboard
1 parent 6b3e4db commit a54b678

File tree

8 files changed

+224
-12
lines changed

8 files changed

+224
-12
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ tmp/
99
gpuhosts.db
1010
gpuview.log
1111
pypi.sh
12+
.DS_Store

README.md

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ GPU is an expensive resource, and deep learning practitioners have to monitor th
1313

1414
With `gpuview` one can monitor GPUs on the go, through a web browser. Moreover, **multiple GPU servers** can be registered into one `gpuview` dashboard and all stats are aggregated and accessible from one place.
1515

16+
The dashboard features **live auto-refresh** (every 3 seconds) and includes interactive tooltips, temperature-based color coding, and pause/resume controls for real-time GPU monitoring.
17+
1618
Dashboard view of nine GPUs across multiple servers:
1719

1820
![Screenshot of gpuview](https://github.com/fgaim/gpuview/blob/main/imgs/dash-1.png)
@@ -60,6 +62,17 @@ gpuview run --demo
6062

6163
This displays fake GPU statistics and is useful when developing on systems without NVIDIA GPUs or when showcasing the dashboard.
6264

65+
## API Endpoints
66+
67+
gpuview provides REST API endpoints for programmatic access:
68+
69+
* `GET /api/gpustat/self` - Returns GPU statistics for the main host
70+
* `GET /api/gpustat/all` - Returns aggregated GPU statistics for all registered hosts
71+
72+
**Legacy endpoints:**
73+
74+
* `GET /gpustat` - Returns GPU statistics for the local host (backward compatibility)
75+
6376
### Run as a Service
6477

6578
To permanently run `gpuview` it needs to be deployed as a background service.
@@ -87,7 +100,7 @@ gpuview -h
87100
* `--exclude-self` : Don't report to others but to self-dashboard
88101
* `--demo` : Run with fake data for testing purposes
89102
* `-d`, `--debug` : Run server in debug mode (for developers)
90-
* `add` : Add a GPU host to dashboard
103+
* `add` : Add a GPU host to the dashboard
91104
* `--url` : URL of host [IP:Port], eg. X.X.X.X:9988
92105
* `--name` : Optional readable name for the host, eg. Node101
93106
* `remove` : Remove a registered host from dashboard
@@ -117,7 +130,7 @@ Remove a registered host as follows:
117130
gpuview remove --url <ip:port> --name <name>
118131
```
119132

120-
Display all registered hosts as follows:
133+
Display all registered hosts/nodes as follows:
121134

122135
```sh
123136
gpuview hosts

gpuview/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
The gpuview module.
33
"""
44

5-
version_info = (0, 5, 0)
5+
version_info = (1, 0, 0)
66
__version__ = ".".join(str(c) for c in version_info)
77

88

gpuview/app.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,13 @@ def index() -> str:
3535
return template("index", gpustats=gpustats, update_time=now)
3636

3737

38-
@app.route("/gpustat", methods=["GET"])
38+
@app.route("/gpustat", methods=["GET"]) # deprecated alias
39+
@app.route("/api/gpustat/self", methods=["GET"])
3940
def report_gpustat() -> str:
4041
"""
4142
Returns the gpustat of this host.
4243
See `exclude-self` option of `gpuview run`.
44+
Available at both /gpustat (legacy) and /api/gpustat/self (RESTful).
4345
"""
4446

4547
def _date_handler(obj: Any) -> str:
@@ -58,6 +60,27 @@ def _date_handler(obj: Any) -> str:
5860
return json.dumps(resp, default=_date_handler)
5961

6062

63+
@app.route("/api/gpustat/all", methods=["GET"])
64+
def api_gpustat_all() -> str:
65+
"""
66+
Returns aggregated gpustats for all hosts (same data as index page).
67+
Used by frontend for live updates.
68+
"""
69+
70+
def _date_handler(obj: Any) -> str:
71+
if hasattr(obj, "isoformat"):
72+
return obj.isoformat()
73+
else:
74+
raise TypeError(type(obj))
75+
76+
response.content_type = "application/json"
77+
if DEMO_MODE:
78+
resp = demo.get_demo_gpustats()
79+
else:
80+
resp = core.all_gpustats()
81+
return json.dumps(resp, default=_date_handler)
82+
83+
6184
def main() -> None:
6285
parser = utils.arg_parser()
6386
args = parser.parse_args()

gpuview/views/index.tpl

Lines changed: 181 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
data-bs-placement="top"
3636
data-bs-html="true"
3737
title="<div class='text-start small'>
38-
<strong>Host:</strong> {{ gpustat.get('hostname', '-') }}<br>
38+
<strong>Node:</strong> {{ gpustat.get('hostname', '-') }}<br>
3939
<strong>Device:</strong> {{ gpu.get('name', '-') }}<br>
4040
<strong>Device ID:</strong> {{ gpu.get('index', '-') }}<br>
4141
<strong>Memory:</strong> {{ gpu.get('memory.used', '-') }}/{{ gpu.get('memory.total', '-') }} MB<br>
@@ -83,13 +83,13 @@
8383
<!-- GPU Stat Card-->
8484
<div class="card mb-3">
8585
<div class="card-header">
86-
<i class="fas fa-table"></i> All Hosts and GPUs</div>
86+
<i class="fas fa-table"></i> All Nodes and GPUs</div>
8787
<div class="card-body">
8888
<div class="table-responsive">
8989
<table class="table table-striped table-bordered" id="dataTable" width="100%">
9090
<thead>
9191
<tr>
92-
<th scope="col">Host</th>
92+
<th scope="col">Node</th>
9393
<th scope="col">Device</th>
9494
<th scope="col">Temp.</th>
9595
<th scope="col">Util.</th>
@@ -116,8 +116,37 @@
116116
</table>
117117
</div>
118118
</div>
119-
<div class="card-footer small text-muted">{{ update_time }}</div>
119+
<div class="card-footer small text-muted px-3 py-2">
120+
<div class="d-flex justify-content-between align-items-center">
121+
<div id="update-timestamp">{{ update_time }}</div>
122+
<div class="d-flex align-items-center gap-3">
123+
<span id="refresh-status">Auto-refresh: <span class="text-success">ON</span> (every 3s)</span>
124+
<a href="#" id="toggle-refresh" class="text-decoration-none small">
125+
<i class="fas fa-pause me-1"></i>Pause
126+
</a>
127+
</div>
128+
</div>
129+
</div>
130+
</div>
131+
132+
<div class="row mt-3">
133+
<div class="col-12">
134+
<div class="card bg-light">
135+
<div class="card-body p-3">
136+
<div class="d-flex align-items-center flex-wrap gap-3">
137+
<h6 class="card-title mb-0 me-2"><i class="fas fa-palette me-2"></i>GPU Temperature Legend:</h6>
138+
<div class="d-flex flex-wrap gap-2">
139+
<span class="badge rounded-pill bg-danger px-2 py-1">Hot (>75°C)</span>
140+
<span class="badge rounded-pill bg-warning px-2 py-1">Warm (50-75°C)</span>
141+
<span class="badge rounded-pill bg-success px-2 py-1">Normal (25-50°C)</span>
142+
<span class="badge rounded-pill bg-primary px-2 py-1">Cool (<25°C)</span>
143+
</div>
144+
</div>
145+
</div>
146+
</div>
147+
</div>
120148
</div>
149+
121150
<footer class="py-4 bg-dark">
122151
<div class="container">
123152
<div class="text-center">
@@ -135,14 +164,161 @@
135164
<script src="https://cdn.datatables.net/2.0.3/js/dataTables.min.js"></script>
136165
<script src="https://cdn.datatables.net/2.0.3/js/dataTables.bootstrap5.min.js"></script>
137166
<script>
167+
let dataTable;
168+
let refreshInterval;
169+
let isRefreshEnabled = true;
170+
138171
document.addEventListener('DOMContentLoaded', function() {
139-
new DataTable('#dataTable');
172+
dataTable = new DataTable('#dataTable', {
173+
createdRow: function(row, data, dataIndex) {
174+
row.classList.add('small');
175+
},
176+
columnDefs: [
177+
{ className: 'text-start', targets: '_all' }
178+
]
179+
});
140180

141181
var tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'));
142182
var tooltipList = tooltipTriggerList.map(function (tooltipTriggerEl) {
143183
return new bootstrap.Tooltip(tooltipTriggerEl);
144184
});
185+
186+
const toggleButton = document.getElementById('toggle-refresh');
187+
const refreshStatus = document.getElementById('refresh-status');
188+
189+
toggleButton.addEventListener('click', function(e) {
190+
e.preventDefault();
191+
isRefreshEnabled = !isRefreshEnabled;
192+
193+
if (isRefreshEnabled) {
194+
refreshInterval = setInterval(updateDashboard, 3000);
195+
toggleButton.innerHTML = '<i class="fas fa-pause me-1"></i>Pause';
196+
refreshStatus.innerHTML = 'Auto-refresh: <span class="text-success">ON</span> (every 3s)';
197+
} else {
198+
clearInterval(refreshInterval);
199+
toggleButton.innerHTML = '<i class="fas fa-play me-1"></i>Resume';
200+
refreshStatus.innerHTML = 'Auto-refresh: <span class="text-danger">PAUSED</span>';
201+
}
202+
});
203+
204+
// Auto-refresh every 3 seconds
205+
refreshInterval = setInterval(updateDashboard, 3000);
145206
});
207+
208+
async function updateDashboard() {
209+
try {
210+
const response = await fetch('/api/gpustat/all');
211+
if (!response.ok) return;
212+
213+
const gpustats = await response.json();
214+
updateCards(gpustats);
215+
updateTable(gpustats);
216+
const now = new Date();
217+
const timeString = now.toLocaleString() + ' ' + Intl.DateTimeFormat().resolvedOptions().timeZone;
218+
document.getElementById('update-timestamp').textContent = `Updated at ${timeString}`;
219+
} catch (error) {
220+
console.log('Auto-refresh failed:', error);
221+
}
222+
}
223+
224+
function updateCards(gpustats) {
225+
var existingTooltips = document.querySelectorAll('[data-bs-toggle="tooltip"]');
226+
existingTooltips.forEach(function(element) {
227+
var tooltip = bootstrap.Tooltip.getInstance(element);
228+
if (tooltip) {
229+
tooltip.dispose();
230+
}
231+
});
232+
233+
const cardsContainer = document.querySelector('.row');
234+
cardsContainer.innerHTML = '';
235+
236+
gpustats.forEach(gpustat => {
237+
gpustat.gpus.forEach(gpu => {
238+
const temp = gpu['temperature.gpu'];
239+
let flag = 'bg-primary';
240+
if (temp > 75) flag = 'bg-danger';
241+
else if (temp > 50) flag = 'bg-warning';
242+
else if (temp > 25) flag = 'bg-success';
243+
244+
const cardHtml = `
245+
<div class="col-xl-3 col-md-4 col-sm-6 mb-3">
246+
<div class="card text-white ${flag} h-100"
247+
data-bs-toggle="tooltip"
248+
data-bs-placement="top"
249+
data-bs-html="true"
250+
title="<div class='text-start small'>
251+
<strong>Node:</strong> ${gpustat.hostname || '-'}<br>
252+
<strong>Device:</strong> ${gpu.name || '-'}, ID: ${gpu.index || '-'}<br>
253+
<strong>Memory:</strong> ${gpu['memory.used'] || '-'} / ${gpu['memory.total'] || '-'} MB<br>
254+
<strong>Power:</strong> ${gpu['power.draw'] || '-'} / ${gpu['enforced.power.limit'] || '-'} W<br>
255+
<strong>Temperature:</strong> ${gpu['temperature.gpu'] || '-'}°C<br>
256+
<strong>Utilization:</strong> ${gpu['utilization.gpu'] || '-'}%<br>
257+
<strong>Processes:</strong> ${gpu['user_processes'] || '-'}
258+
</div>">
259+
<div class="card-body">
260+
<div class="d-flex align-items-center">
261+
<div>
262+
<i class="fas fa-server me-2"></i>
263+
<b>${gpustat.hostname || '-'}</b>
264+
</div>
265+
</div>
266+
<div class="mt-2">
267+
[${gpu.index || ''}] ${gpu.name || '-'}
268+
</div>
269+
</div>
270+
<div class="card-footer text-white small">
271+
<div class="row g-1">
272+
<div class="col-3">
273+
<i class="fas fa-temperature-three-quarters" aria-hidden="true"></i>
274+
${gpu['temperature.gpu'] || '-'}°C
275+
</div>
276+
<div class="col-3">
277+
<i class="fas fa-memory" aria-hidden="true"></i>
278+
${gpu.memory || '-'}%
279+
</div>
280+
<div class="col-3">
281+
<i class="fas fa-cogs" aria-hidden="true"></i>
282+
${gpu['utilization.gpu'] || '-'}%
283+
</div>
284+
<div class="col-3">
285+
<i class="fas fa-users" aria-hidden="true"></i>
286+
${gpu.users || '-'}
287+
</div>
288+
</div>
289+
</div>
290+
</div>
291+
</div>
292+
`;
293+
cardsContainer.insertAdjacentHTML('beforeend', cardHtml);
294+
});
295+
});
296+
297+
var tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'));
298+
var tooltipList = tooltipTriggerList.map(function (tooltipTriggerEl) {
299+
return new bootstrap.Tooltip(tooltipTriggerEl);
300+
});
301+
}
302+
303+
function updateTable(gpustats) {
304+
dataTable.clear();
305+
const newData = [];
306+
gpustats.forEach(gpustat => {
307+
gpustat.gpus.forEach(gpu => {
308+
newData.push([
309+
gpustat.hostname || '-',
310+
`[${gpu.index || ''}] ${gpu.name || '-'}`,
311+
`${gpu['temperature.gpu'] || '-'}°C`,
312+
`${gpu['utilization.gpu'] || '-'}%`,
313+
`${gpu.memory || '-'}% (${gpu['memory.used'] || ''}/${gpu['memory.total'] || '-'})`,
314+
`${gpu['power.draw'] || '-'} / ${gpu['enforced.power.limit'] || '-'}`,
315+
gpu['user_processes'] || '-'
316+
]);
317+
});
318+
});
319+
dataTable.rows.add(newData);
320+
dataTable.draw(false);
321+
}
146322
</script>
147323
</div>
148324
</body>

imgs/dash-1.png

107 KB
Loading

imgs/dash-2.png

127 KB
Loading

todo.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
todo for `gpuview`
2-
-------------------
1+
# todo for `gpuview`
32

4-
- [ ] Make the dashboard live with partial asynchronous updates.
3+
- [x] Make the dashboard live with partial asynchronous updates.
54
- [x] Install as a `service daemon` and enable autostart at boot time.
65
- [ ] Add animated graphs to dashboard using D3.js.
76
- [ ] Logging to system journal.

0 commit comments

Comments
 (0)