1 // SPDX-License-Identifier: GPL-2.0
3 * Hantro VPU codec driver
5 * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
9 * The VPU JPEG encoder produces JPEG baseline sequential format.
10 * The quantization coefficients are 8-bit values, complying with
11 * the baseline specification. Therefore, it requires
12 * luma and chroma quantization tables. The hardware does entropy
13 * encoding using internal Huffman tables, as specified in the JPEG
16 * In other words, only the luma and chroma quantization tables are
17 * required for the encoding operation.
19 * Quantization luma table values are written to registers
20 * VEPU_swreg_0-VEPU_swreg_15, and chroma table values to
21 * VEPU_swreg_16-VEPU_swreg_31. A special order is needed, neither
25 #include <linux/unaligned.h>
26 #include <media/v4l2-mem2mem.h>
27 #include "hantro_jpeg.h"
29 #include "hantro_v4l2.h"
30 #include "hantro_hw.h"
31 #include "rockchip_vpu2_regs.h"
33 #define VEPU_JPEG_QUANT_TABLE_COUNT 16
35 static void rockchip_vpu2_set_src_img_ctrl(struct hantro_dev
*vpu
,
36 struct hantro_ctx
*ctx
)
38 u32 overfill_r
, overfill_b
;
42 * The format width and height are already macroblock aligned
43 * by .vidioc_s_fmt_vid_cap_mplane() callback. Destination
44 * format width and height can be further modified by
45 * .vidioc_s_selection(), and the width is 4-aligned.
47 overfill_r
= ctx
->src_fmt
.width
- ctx
->dst_fmt
.width
;
48 overfill_b
= ctx
->src_fmt
.height
- ctx
->dst_fmt
.height
;
50 reg
= VEPU_REG_IN_IMG_CTRL_ROW_LEN(ctx
->src_fmt
.width
);
51 vepu_write_relaxed(vpu
, reg
, VEPU_REG_INPUT_LUMA_INFO
);
53 reg
= VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r
/ 4) |
54 VEPU_REG_IN_IMG_CTRL_OVRFLB(overfill_b
);
56 * This register controls the input crop, as the offset
57 * from the right/bottom within the last macroblock. The offset from the
58 * right must be divided by 4 and so the crop must be aligned to 4 pixels
61 vepu_write_relaxed(vpu
, reg
, VEPU_REG_ENC_OVER_FILL_STRM_OFFSET
);
63 reg
= VEPU_REG_IN_IMG_CTRL_FMT(ctx
->vpu_src_fmt
->enc_fmt
);
64 vepu_write_relaxed(vpu
, reg
, VEPU_REG_ENC_CTRL1
);
67 static void rockchip_vpu2_jpeg_enc_set_buffers(struct hantro_dev
*vpu
,
68 struct hantro_ctx
*ctx
,
69 struct vb2_buffer
*src_buf
,
70 struct vb2_buffer
*dst_buf
)
72 struct v4l2_pix_format_mplane
*pix_fmt
= &ctx
->src_fmt
;
76 size_left
= vb2_plane_size(dst_buf
, 0) - ctx
->vpu_dst_fmt
->header_size
;
77 if (WARN_ON(vb2_plane_size(dst_buf
, 0) < ctx
->vpu_dst_fmt
->header_size
))
80 WARN_ON(pix_fmt
->num_planes
> 3);
82 vepu_write_relaxed(vpu
, vb2_dma_contig_plane_dma_addr(dst_buf
, 0) +
83 ctx
->vpu_dst_fmt
->header_size
,
84 VEPU_REG_ADDR_OUTPUT_STREAM
);
85 vepu_write_relaxed(vpu
, size_left
, VEPU_REG_STR_BUF_LIMIT
);
87 if (pix_fmt
->num_planes
== 1) {
88 src
[0] = vb2_dma_contig_plane_dma_addr(src_buf
, 0);
89 vepu_write_relaxed(vpu
, src
[0], VEPU_REG_ADDR_IN_PLANE_0
);
90 } else if (pix_fmt
->num_planes
== 2) {
91 src
[0] = vb2_dma_contig_plane_dma_addr(src_buf
, 0);
92 src
[1] = vb2_dma_contig_plane_dma_addr(src_buf
, 1);
93 vepu_write_relaxed(vpu
, src
[0], VEPU_REG_ADDR_IN_PLANE_0
);
94 vepu_write_relaxed(vpu
, src
[1], VEPU_REG_ADDR_IN_PLANE_1
);
96 src
[0] = vb2_dma_contig_plane_dma_addr(src_buf
, 0);
97 src
[1] = vb2_dma_contig_plane_dma_addr(src_buf
, 1);
98 src
[2] = vb2_dma_contig_plane_dma_addr(src_buf
, 2);
99 vepu_write_relaxed(vpu
, src
[0], VEPU_REG_ADDR_IN_PLANE_0
);
100 vepu_write_relaxed(vpu
, src
[1], VEPU_REG_ADDR_IN_PLANE_1
);
101 vepu_write_relaxed(vpu
, src
[2], VEPU_REG_ADDR_IN_PLANE_2
);
106 rockchip_vpu2_jpeg_enc_set_qtable(struct hantro_dev
*vpu
,
107 unsigned char *luma_qtable
,
108 unsigned char *chroma_qtable
)
111 __be32
*luma_qtable_p
;
112 __be32
*chroma_qtable_p
;
114 luma_qtable_p
= (__be32
*)luma_qtable
;
115 chroma_qtable_p
= (__be32
*)chroma_qtable
;
118 * Quantization table registers must be written in contiguous blocks.
119 * DO NOT collapse the below two "for" loops into one.
121 for (i
= 0; i
< VEPU_JPEG_QUANT_TABLE_COUNT
; i
++) {
122 reg
= get_unaligned_be32(&luma_qtable_p
[i
]);
123 vepu_write_relaxed(vpu
, reg
, VEPU_REG_JPEG_LUMA_QUAT(i
));
126 for (i
= 0; i
< VEPU_JPEG_QUANT_TABLE_COUNT
; i
++) {
127 reg
= get_unaligned_be32(&chroma_qtable_p
[i
]);
128 vepu_write_relaxed(vpu
, reg
, VEPU_REG_JPEG_CHROMA_QUAT(i
));
132 int rockchip_vpu2_jpeg_enc_run(struct hantro_ctx
*ctx
)
134 struct hantro_dev
*vpu
= ctx
->dev
;
135 struct vb2_v4l2_buffer
*src_buf
, *dst_buf
;
136 struct hantro_jpeg_ctx jpeg_ctx
;
139 src_buf
= hantro_get_src_buf(ctx
);
140 dst_buf
= hantro_get_dst_buf(ctx
);
142 hantro_start_prepare_run(ctx
);
144 memset(&jpeg_ctx
, 0, sizeof(jpeg_ctx
));
145 jpeg_ctx
.buffer
= vb2_plane_vaddr(&dst_buf
->vb2_buf
, 0);
146 if (!jpeg_ctx
.buffer
)
149 jpeg_ctx
.width
= ctx
->dst_fmt
.width
;
150 jpeg_ctx
.height
= ctx
->dst_fmt
.height
;
151 jpeg_ctx
.quality
= ctx
->jpeg_quality
;
152 hantro_jpeg_header_assemble(&jpeg_ctx
);
154 /* Switch to JPEG encoder mode before writing registers */
155 vepu_write_relaxed(vpu
, VEPU_REG_ENCODE_FORMAT_JPEG
,
156 VEPU_REG_ENCODE_START
);
158 rockchip_vpu2_set_src_img_ctrl(vpu
, ctx
);
159 rockchip_vpu2_jpeg_enc_set_buffers(vpu
, ctx
, &src_buf
->vb2_buf
,
161 rockchip_vpu2_jpeg_enc_set_qtable(vpu
, jpeg_ctx
.hw_luma_qtable
,
162 jpeg_ctx
.hw_chroma_qtable
);
164 reg
= VEPU_REG_OUTPUT_SWAP32
165 | VEPU_REG_OUTPUT_SWAP16
166 | VEPU_REG_OUTPUT_SWAP8
167 | VEPU_REG_INPUT_SWAP8
168 | VEPU_REG_INPUT_SWAP16
169 | VEPU_REG_INPUT_SWAP32
;
170 /* Make sure that all registers are written at this point. */
171 vepu_write(vpu
, reg
, VEPU_REG_DATA_ENDIAN
);
173 reg
= VEPU_REG_AXI_CTRL_BURST_LEN(16);
174 vepu_write_relaxed(vpu
, reg
, VEPU_REG_AXI_CTRL
);
176 reg
= VEPU_REG_MB_WIDTH(MB_WIDTH(ctx
->src_fmt
.width
))
177 | VEPU_REG_MB_HEIGHT(MB_HEIGHT(ctx
->src_fmt
.height
))
178 | VEPU_REG_FRAME_TYPE_INTRA
179 | VEPU_REG_ENCODE_FORMAT_JPEG
180 | VEPU_REG_ENCODE_ENABLE
;
182 /* Kick the watchdog and start encoding */
183 hantro_end_prepare_run(ctx
);
184 vepu_write(vpu
, reg
, VEPU_REG_ENCODE_START
);
189 void rockchip_vpu2_jpeg_enc_done(struct hantro_ctx
*ctx
)
191 struct hantro_dev
*vpu
= ctx
->dev
;
192 u32 bytesused
= vepu_read(vpu
, VEPU_REG_STR_BUF_LIMIT
) / 8;
193 struct vb2_v4l2_buffer
*dst_buf
= hantro_get_dst_buf(ctx
);
195 vb2_set_plane_payload(&dst_buf
->vb2_buf
, 0,
196 ctx
->vpu_dst_fmt
->header_size
+ bytesused
);