2 * Copyright (C) 2012 Red Hat
3 * based in parts on udlfb.c:
4 * Copyright (C) 2009 Roberto De Ioris <roberto@unbit.it>
5 * Copyright (C) 2009 Jaya Kumar <jayakumar.lkml@gmail.com>
6 * Copyright (C) 2009 Bernie Thompson <bernie@plugable.com>
8 * This file is subject to the terms and conditions of the GNU General Public
9 * License v2. See the file COPYING in the main directory of this archive for
13 #include <linux/module.h>
14 #include <linux/slab.h>
16 #include <linux/prefetch.h>
17 #include <asm/unaligned.h>
22 #define MAX_CMD_PIXELS 255
24 #define RLX_HEADER_BYTES 7
25 #define MIN_RLX_PIX_BYTES 4
26 #define MIN_RLX_CMD_BYTES (RLX_HEADER_BYTES + MIN_RLX_PIX_BYTES)
28 #define RLE_HEADER_BYTES 6
29 #define MIN_RLE_PIX_BYTES 3
30 #define MIN_RLE_CMD_BYTES (RLE_HEADER_BYTES + MIN_RLE_PIX_BYTES)
32 #define RAW_HEADER_BYTES 6
33 #define MIN_RAW_PIX_BYTES 2
34 #define MIN_RAW_CMD_BYTES (RAW_HEADER_BYTES + MIN_RAW_PIX_BYTES)
37 * Trims identical data from front and back of line
38 * Sets new front buffer address and width
39 * And returns byte count of identical pixels
40 * Assumes CPU natural alignment (unsigned long)
41 * for back and front buffer ptrs and width
44 static int udl_trim_hline(const u8
*bback
, const u8
**bfront
, int *width_bytes
)
47 const unsigned long *back
= (const unsigned long *) bback
;
48 const unsigned long *front
= (const unsigned long *) *bfront
;
49 const int width
= *width_bytes
/ sizeof(unsigned long);
50 int identical
= width
;
54 prefetch((void *) front
);
55 prefetch((void *) back
);
57 for (j
= 0; j
< width
; j
++) {
58 if (back
[j
] != front
[j
]) {
64 for (k
= width
- 1; k
> j
; k
--) {
65 if (back
[k
] != front
[k
]) {
71 identical
= start
+ (width
- end
);
72 *bfront
= (u8
*) &front
[start
];
73 *width_bytes
= (end
- start
) * sizeof(unsigned long);
75 return identical
* sizeof(unsigned long);
79 static inline u16
pixel32_to_be16(const uint32_t pixel
)
81 return (((pixel
>> 3) & 0x001f) |
82 ((pixel
>> 5) & 0x07e0) |
83 ((pixel
>> 8) & 0xf800));
86 static inline u16
get_pixel_val16(const uint8_t *pixel
, int bpp
)
90 pixel_val16
= *(const uint16_t *)pixel
;
92 pixel_val16
= pixel32_to_be16(*(const uint32_t *)pixel
);
97 * Render a command stream for an encoded horizontal line segment of pixels.
99 * A command buffer holds several commands.
100 * It always begins with a fresh command header
101 * (the protocol doesn't require this, but we enforce it to allow
102 * multiple buffers to be potentially encoded and sent in parallel).
103 * A single command encodes one contiguous horizontal line of pixels
105 * The function relies on the client to do all allocation, so that
106 * rendering can be done directly to output buffers (e.g. USB URBs).
107 * The function fills the supplied command buffer, providing information
108 * on where it left off, so the client may call in again with additional
109 * buffers if the line will take several buffers to complete.
111 * A single command can transmit a maximum of 256 pixels,
112 * regardless of the compression ratio (protocol design limit).
113 * To the hardware, 0 for a size byte means 256
115 * Rather than 256 pixel commands which are either rl or raw encoded,
116 * the rlx command simply assumes alternating raw and rl spans within one cmd.
117 * This has a slightly larger header overhead, but produces more even results.
118 * It also processes all data (read and write) in a single pass.
119 * Performance benchmarks of common cases show it having just slightly better
120 * compression than 256 pixel raw or rle commands, with similar CPU consumpion.
121 * But for very rl friendly data, will compress not quite as well.
123 static void udl_compress_hline16(
124 const u8
**pixel_start_ptr
,
125 const u8
*const pixel_end
,
126 uint32_t *device_address_ptr
,
127 uint8_t **command_buffer_ptr
,
128 const uint8_t *const cmd_buffer_end
, int bpp
)
130 const u8
*pixel
= *pixel_start_ptr
;
131 uint32_t dev_addr
= *device_address_ptr
;
132 uint8_t *cmd
= *command_buffer_ptr
;
134 while ((pixel_end
> pixel
) &&
135 (cmd_buffer_end
- MIN_RLX_CMD_BYTES
> cmd
)) {
136 uint8_t *raw_pixels_count_byte
= NULL
;
137 uint8_t *cmd_pixels_count_byte
= NULL
;
138 const u8
*raw_pixel_start
= NULL
;
139 const u8
*cmd_pixel_start
, *cmd_pixel_end
= NULL
;
140 uint16_t pixel_val16
;
142 prefetchw((void *) cmd
); /* pull in one cache line at least */
146 *cmd
++ = (uint8_t) ((dev_addr
>> 16) & 0xFF);
147 *cmd
++ = (uint8_t) ((dev_addr
>> 8) & 0xFF);
148 *cmd
++ = (uint8_t) ((dev_addr
) & 0xFF);
150 cmd_pixels_count_byte
= cmd
++; /* we'll know this later */
151 cmd_pixel_start
= pixel
;
153 raw_pixels_count_byte
= cmd
++; /* we'll know this later */
154 raw_pixel_start
= pixel
;
156 cmd_pixel_end
= pixel
+ (min(MAX_CMD_PIXELS
+ 1,
157 min((int)(pixel_end
- pixel
) / bpp
,
158 (int)(cmd_buffer_end
- cmd
) / 2))) * bpp
;
160 prefetch_range((void *) pixel
, (cmd_pixel_end
- pixel
) * bpp
);
161 pixel_val16
= get_pixel_val16(pixel
, bpp
);
163 while (pixel
< cmd_pixel_end
) {
164 const u8
*const start
= pixel
;
165 const uint16_t repeating_pixel_val16
= pixel_val16
;
167 put_unaligned_be16(pixel_val16
, cmd
);
172 while (pixel
< cmd_pixel_end
) {
173 pixel_val16
= get_pixel_val16(pixel
, bpp
);
174 if (pixel_val16
!= repeating_pixel_val16
)
179 if (unlikely(pixel
> start
+ bpp
)) {
180 /* go back and fill in raw pixel count */
181 *raw_pixels_count_byte
= (((start
-
182 raw_pixel_start
) / bpp
) + 1) & 0xFF;
184 /* immediately after raw data is repeat byte */
185 *cmd
++ = (((pixel
- start
) / bpp
) - 1) & 0xFF;
187 /* Then start another raw pixel span */
188 raw_pixel_start
= pixel
;
189 raw_pixels_count_byte
= cmd
++;
193 if (pixel
> raw_pixel_start
) {
194 /* finalize last RAW span */
195 *raw_pixels_count_byte
= ((pixel
-raw_pixel_start
) / bpp
) & 0xFF;
198 *cmd_pixels_count_byte
= ((pixel
- cmd_pixel_start
) / bpp
) & 0xFF;
199 dev_addr
+= ((pixel
- cmd_pixel_start
) / bpp
) * 2;
202 if (cmd_buffer_end
<= MIN_RLX_CMD_BYTES
+ cmd
) {
203 /* Fill leftover bytes with no-ops */
204 if (cmd_buffer_end
> cmd
)
205 memset(cmd
, 0xAF, cmd_buffer_end
- cmd
);
206 cmd
= (uint8_t *) cmd_buffer_end
;
209 *command_buffer_ptr
= cmd
;
210 *pixel_start_ptr
= pixel
;
211 *device_address_ptr
= dev_addr
;
217 * There are 3 copies of every pixel: The front buffer that the fbdev
218 * client renders to, the actual framebuffer across the USB bus in hardware
219 * (that we can only write to, slowly, and can never read), and (optionally)
220 * our shadow copy that tracks what's been sent to that hardware buffer.
222 int udl_render_hline(struct drm_device
*dev
, int bpp
, struct urb
**urb_ptr
,
223 const char *front
, char **urb_buf_ptr
,
224 u32 byte_offset
, u32 device_byte_offset
,
226 int *ident_ptr
, int *sent_ptr
)
228 const u8
*line_start
, *line_end
, *next_pixel
;
229 u32 base16
= 0 + (device_byte_offset
/ bpp
) * 2;
230 struct urb
*urb
= *urb_ptr
;
231 u8
*cmd
= *urb_buf_ptr
;
232 u8
*cmd_end
= (u8
*) urb
->transfer_buffer
+ urb
->transfer_buffer_length
;
234 BUG_ON(!(bpp
== 2 || bpp
== 4));
236 line_start
= (u8
*) (front
+ byte_offset
);
237 next_pixel
= line_start
;
238 line_end
= next_pixel
+ byte_width
;
240 while (next_pixel
< line_end
) {
242 udl_compress_hline16(&next_pixel
,
244 (u8
**) &cmd
, (u8
*) cmd_end
, bpp
);
246 if (cmd
>= cmd_end
) {
247 int len
= cmd
- (u8
*) urb
->transfer_buffer
;
248 if (udl_submit_urb(dev
, urb
, len
))
249 return 1; /* lost pixels is set */
251 urb
= udl_get_urb(dev
);
253 return 1; /* lost_pixels is set */
255 cmd
= urb
->transfer_buffer
;
256 cmd_end
= &cmd
[urb
->transfer_buffer_length
];