From 564b83540f1d266c780bb2b205c2a3c915630f48 Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Tue, 13 Jan 2026 18:15:33 +0000 Subject: [PATCH] DNI: drm/vc4: Skip input lines when doing a large downscale The TPZ filter wants ideally 3 lines of image to give good quality downscaling. More than that leads to excessive SDRAM bandwidth for no gain. If the downsample factor allows for it, reduce the programmed image height and increase the pitch to compensate. FIXME: This currently does not handle SAND or T-format images where we need to configure it slightly differently. Signed-off-by: Dave Stevenson --- drivers/gpu/drm/vc4/vc4_drv.h | 5 +++++ drivers/gpu/drm/vc4/vc4_plane.c | 39 +++++++++++++++++++++++++++------ 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index a5f9f3d8ffed55..894bfe9f2ba83c 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -480,6 +480,11 @@ struct vc4_plane_state { */ bool is_yuv444_unity; + /* Skip lines on large downscales to avoid consuming too much SDRAM + * bandwidth + */ + unsigned int vdownsample; + /* Our allocation in LBM for temporary storage during scaling. */ unsigned int lbm_handle; diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 9e9946d9cf6d74..c6fdc21a4614b7 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -935,7 +935,7 @@ static void vc4_write_scaling_parameters(struct drm_plane_state *state, /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) { - vc4_write_tpz(vc4_state, vc4_state->src_h[channel], + vc4_write_tpz(vc4_state, vc4_state->src_h[channel] / vc4_state->vdownsample, vc4_state->crtc_h); vc4_dlist_write(vc4_state, 0xc0c0c0c0); } @@ -1361,6 +1361,17 @@ static int vc4_plane_mode_set(struct drm_plane *plane, return 0; } + if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ && + vc4_state->src_h[0] / vc4_state->crtc_h > 3) { + /* Downscaling by more than x3. Reduce the number of lines read + * to avoid exceeding SDRAM bandwidth. + */ + vc4_state->vdownsample = ((vc4_state->src_h[0] / + (vc4_state->crtc_h * 3)) >> 16) + 1; + } else { + vc4_state->vdownsample = 1; + } + width = vc4_state->src_w[0] >> 16; height = vc4_state->src_h[0] >> 16; @@ -1635,7 +1646,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | vc4_hvs4_get_alpha_blend_mode(state) | VC4_SET_FIELD(width, SCALER_POS2_WIDTH) | - VC4_SET_FIELD(height, SCALER_POS2_HEIGHT)); + VC4_SET_FIELD(height / vc4_state->vdownsample, + SCALER_POS2_HEIGHT)); /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); @@ -1689,7 +1701,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, vc4_state->pos2_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) | - VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT)); + VC4_SET_FIELD(height / vc4_state->vdownsample, + SCALER5_POS2_HEIGHT)); /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); @@ -1713,14 +1726,15 @@ static int vc4_plane_mode_set(struct drm_plane *plane, vc4_dlist_write(vc4_state, 0xc0c0c0c0); /* Pitch word 0 */ - vc4_dlist_write(vc4_state, pitch[0]); + vc4_dlist_write(vc4_state, pitch[0] / vc4_state->vdownsample); /* Pitch word 1/2 */ for (i = 1; i < num_planes; i++) { if (hvs_format != HVS_PIXEL_FORMAT_H264 && hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) { vc4_dlist_write(vc4_state, - VC4_SET_FIELD(fb->pitches[i], + VC4_SET_FIELD(fb->pitches[i] / + vc4_state->vdownsample, SCALER_SRC_PITCH)); } else { vc4_dlist_write(vc4_state, pitch[1]); @@ -1938,6 +1952,17 @@ static int vc6_plane_mode_set(struct drm_plane *plane, width = vc4_state->src_w[0] >> 16; height = vc4_state->src_h[0] >> 16; + if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ && + vc4_state->src_h[0] / vc4_state->crtc_h > 3) { + /* Downscaling by more than x3. Reduce the number of lines read + * to avoid exceeding SDRAM bandwidth. + */ + vc4_state->vdownsample = ((vc4_state->src_h[0] / + (vc4_state->crtc_h * 3)) >> 16) + 1; + } else { + vc4_state->vdownsample = 1; + } + /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB * and 4:4:4, scl1 should be set to scl0 so both channels of * the scaler do the same thing. For YUV, the Y plane needs @@ -2152,7 +2177,7 @@ static int vc6_plane_mode_set(struct drm_plane *plane, /* Position Word 2: Source Image Size */ vc4_state->pos2_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, - VC4_SET_FIELD(height - 1, + VC4_SET_FIELD((height / vc4_state->vdownsample) - 1, SCALER6_POS2_SRC_LINES) | VC4_SET_FIELD(width - 1, SCALER6_POS2_SRC_WIDTH)); @@ -2187,7 +2212,7 @@ static int vc6_plane_mode_set(struct drm_plane *plane, if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 && base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) { vc4_dlist_write(vc4_state, - VC4_SET_FIELD(fb->pitches[i], + VC4_SET_FIELD((fb->pitches[i] * vc4_state->vdownsample), SCALER6_PTR2_PITCH)); } else { vc4_dlist_write(vc4_state, pitch[i]);