From 9f32962c0fe4129a01bd0422cd1cda76a13c5ef4 Mon Sep 17 00:00:00 2001
From: John Cox <jc@kynesim.co.uk>
Date: Thu, 1 Apr 2021 16:20:58 +0100
Subject: [PATCH] media: rpivid: Improve values returned when setting
 output format

Guess a better value for the compressed bitstream buffer size

Signed-off-by: John Cox <jc@kynesim.co.uk>
---
 drivers/staging/media/rpivid/rpivid_h265.c  | 66 ++++-----------------
 drivers/staging/media/rpivid/rpivid_video.c | 61 +++++++++++++++++--
 drivers/staging/media/rpivid/rpivid_video.h |  4 ++
 3 files changed, 70 insertions(+), 61 deletions(-)

--- a/drivers/staging/media/rpivid/rpivid_h265.c
+++ b/drivers/staging/media/rpivid/rpivid_h265.c
@@ -18,6 +18,7 @@
 
 #include "rpivid.h"
 #include "rpivid_hw.h"
+#include "rpivid_video.h"
 
 #define DEBUG_TRACE_P1_CMD 0
 #define DEBUG_TRACE_EXECUTION 0
@@ -115,41 +116,9 @@ static int gptr_realloc_new(struct rpivi
 	return 0;
 }
 
-/* floor(log2(x)) */
-static unsigned int log2_size(size_t x)
-{
-	unsigned int n = 0;
-
-	if (x & ~0xffff) {
-		n += 16;
-		x >>= 16;
-	}
-	if (x & ~0xff) {
-		n += 8;
-		x >>= 8;
-	}
-	if (x & ~0xf) {
-		n += 4;
-		x >>= 4;
-	}
-	if (x & ~3) {
-		n += 2;
-		x >>= 2;
-	}
-	return (x & ~1) ? n + 1 : n;
-}
-
-static size_t round_up_size(const size_t x)
-{
-	/* Admit no size < 256 */
-	const unsigned int n = x < 256 ? 8 : log2_size(x) - 1;
-
-	return x >= (3 << n) ? 4 << n : (3 << n);
-}
-
 static size_t next_size(const size_t x)
 {
-	return round_up_size(x + 1);
+	return rpivid_round_up_size(x + 1);
 }
 
 #define NUM_SCALING_FACTORS 4064 /* Not a typo = 0xbe0 + 0x400 */
@@ -332,7 +301,7 @@ static int cmds_check_space(struct rpivi
 	if (de->cmd_len + n <= de->cmd_max)
 		return 0;
 
-	newmax = 2 << log2_size(de->cmd_len + n);
+	newmax = roundup_pow_of_two(de->cmd_len + n);
 
 	a = krealloc(de->cmd_fifo, newmax * sizeof(struct rpi_cmd),
 		     GFP_KERNEL);
@@ -1855,23 +1824,10 @@ static void rpivid_h265_setup(struct rpi
 		 * slice as we can use the src buf directly
 		 */
 		if (!s->frame_end && !de->bit_copy_gptr->ptr) {
-			const size_t wxh = s->sps.pic_width_in_luma_samples *
-				s->sps.pic_height_in_luma_samples;
 			size_t bits_alloc;
-
-			/* Annex A gives a min compression of 2 @ lvl 3.1
-			 * (wxh <= 983040) and min 4 thereafter but avoid
-			 * the odity of 983041 having a lower limit than
-			 * 983040.
-			 * Multiply by 3/2 for 4:2:0
-			 */
-			bits_alloc = wxh < 983040 ? wxh * 3 / 4 :
-				wxh < 983040 * 2 ? 983040 * 3 / 4 :
-				wxh * 3 / 8;
-			/* Allow for bit depth */
-			bits_alloc += (bits_alloc *
-				       s->sps.bit_depth_luma_minus8) / 8;
-			bits_alloc = round_up_size(bits_alloc);
+			bits_alloc = rpivid_bit_buf_size(s->sps.pic_width_in_luma_samples,
+							 s->sps.pic_height_in_luma_samples,
+							 s->sps.bit_depth_luma_minus8);
 
 			if (gptr_alloc(dev, de->bit_copy_gptr,
 				       bits_alloc,
@@ -2454,17 +2410,15 @@ static int rpivid_h265_start(struct rpiv
 
 	// Finger in the air PU & Coeff alloc
 	// Will be realloced if too small
-	coeff_alloc = round_up_size(wxh);
-	pu_alloc = round_up_size(wxh / 4);
+	coeff_alloc = rpivid_round_up_size(wxh);
+	pu_alloc = rpivid_round_up_size(wxh / 4);
 	for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i) {
 		// Don't actually need a kernel mapping here
 		if (gptr_alloc(dev, ctx->pu_bufs + i, pu_alloc,
-			       DMA_ATTR_FORCE_CONTIGUOUS |
-					DMA_ATTR_NO_KERNEL_MAPPING))
+			       DMA_ATTR_NO_KERNEL_MAPPING))
 			goto fail;
 		if (gptr_alloc(dev, ctx->coeff_bufs + i, coeff_alloc,
-			       DMA_ATTR_FORCE_CONTIGUOUS |
-					DMA_ATTR_NO_KERNEL_MAPPING))
+			       DMA_ATTR_NO_KERNEL_MAPPING))
 			goto fail;
 	}
 	aux_q_init(ctx);
--- a/drivers/staging/media/rpivid/rpivid_video.c
+++ b/drivers/staging/media/rpivid/rpivid_video.c
@@ -42,18 +42,69 @@ static inline unsigned int constrain2x(u
 			(x > y * 2) ? y : x;
 }
 
+size_t rpivid_round_up_size(const size_t x)
+{
+	/* Admit no size < 256 */
+	const unsigned int n = x < 256 ? 8 : ilog2(x);
+
+	return x >= (3 << n) ? 4 << n : (3 << n);
+}
+
+size_t rpivid_bit_buf_size(unsigned int w, unsigned int h, unsigned int bits_minus8)
+{
+	const size_t wxh = w * h;
+	size_t bits_alloc;
+
+	/* Annex A gives a min compression of 2 @ lvl 3.1
+	 * (wxh <= 983040) and min 4 thereafter but avoid
+	 * the odity of 983041 having a lower limit than
+	 * 983040.
+	 * Multiply by 3/2 for 4:2:0
+	 */
+	bits_alloc = wxh < 983040 ? wxh * 3 / 4 :
+		wxh < 983040 * 2 ? 983040 * 3 / 4 :
+		wxh * 3 / 8;
+	/* Allow for bit depth */
+	bits_alloc += (bits_alloc * bits_minus8) / 8;
+	return rpivid_round_up_size(bits_alloc);
+}
+
 int rpivid_prepare_src_format(struct v4l2_pix_format_mplane *pix_fmt)
 {
+	size_t size;
+	u32 w;
+	u32 h;
+
 	if (pix_fmt->pixelformat != V4L2_PIX_FMT_HEVC_SLICE)
 		return -EINVAL;
 
-	/* Zero bytes per line for encoded source. */
-	pix_fmt->plane_fmt[0].bytesperline = 0;
-	/* Choose some minimum size since this can't be 0 */
-	pix_fmt->plane_fmt[0].sizeimage = max_t(u32, SZ_1K,
-						pix_fmt->plane_fmt[0].sizeimage);
+	w = pix_fmt->width;
+	h = pix_fmt->height;
+	if (!w || !h) {
+		w = 1920;
+		h = 1080;
+	}
+	if (w > 4096)
+		w = 4096;
+	if (h > 4096)
+		h = 4096;
+
+	if (!pix_fmt->plane_fmt[0].sizeimage ||
+	    pix_fmt->plane_fmt[0].sizeimage > SZ_32M) {
+		/* Unspecified or way too big - pick max for size */
+		size = rpivid_bit_buf_size(w, h, 2);
+	}
+	/* Set a minimum */
+	size = max_t(u32, SZ_4K, pix_fmt->plane_fmt[0].sizeimage);
+
+	pix_fmt->width = w;
+	pix_fmt->height = h;
 	pix_fmt->num_planes = 1;
 	pix_fmt->field = V4L2_FIELD_NONE;
+	/* Zero bytes per line for encoded source. */
+	pix_fmt->plane_fmt[0].bytesperline = 0;
+	pix_fmt->plane_fmt[0].sizeimage = size;
+
 	return 0;
 }
 
--- a/drivers/staging/media/rpivid/rpivid_video.h
+++ b/drivers/staging/media/rpivid/rpivid_video.h
@@ -24,6 +24,10 @@ extern const struct v4l2_ioctl_ops rpivi
 
 int rpivid_queue_init(void *priv, struct vb2_queue *src_vq,
 		      struct vb2_queue *dst_vq);
+
+size_t rpivid_bit_buf_size(unsigned int w, unsigned int h, unsigned int bits_minus8);
+size_t rpivid_round_up_size(const size_t x);
+
 int rpivid_prepare_src_format(struct v4l2_pix_format_mplane *pix_fmt);
 int rpivid_prepare_dst_format(struct v4l2_pix_format_mplane *pix_fmt);
 
