avformat/mpeg: demux ivtv captions
[ffmpeg.git] / libavcodec / videotoolboxenc.c
blobda7b291b03ec1bcd84ab9033747d5b9daea62823
1 /*
2 * copyright (c) 2015 Rick Kern <kernrj@gmail.com>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <VideoToolbox/VideoToolbox.h>
22 #include <CoreVideo/CoreVideo.h>
23 #include <CoreMedia/CoreMedia.h>
24 #include <TargetConditionals.h>
25 #include <Availability.h>
26 #include "avcodec.h"
27 #include "libavutil/mem.h"
28 #include "libavutil/opt.h"
29 #include "libavutil/avassert.h"
30 #include "libavutil/avstring.h"
31 #include "libavcodec/avcodec.h"
32 #include "libavutil/pixdesc.h"
33 #include "libavutil/hwcontext_videotoolbox.h"
34 #include "codec_internal.h"
35 #include "internal.h"
36 #include <pthread.h>
37 #include "atsc_a53.h"
38 #include "encode.h"
39 #include "h264.h"
40 #include "h264_sei.h"
41 #include "hwconfig.h"
42 #include <dlfcn.h>
44 #if !HAVE_KCMVIDEOCODECTYPE_HEVC
45 enum { kCMVideoCodecType_HEVC = 'hvc1' };
46 #endif
48 #if !HAVE_KCMVIDEOCODECTYPE_HEVCWITHALPHA
49 enum { kCMVideoCodecType_HEVCWithAlpha = 'muxa' };
50 #endif
52 #if !HAVE_KCVPIXELFORMATTYPE_420YPCBCR10BIPLANARVIDEORANGE
53 enum { kCVPixelFormatType_420YpCbCr10BiPlanarFullRange = 'xf20' };
54 enum { kCVPixelFormatType_420YpCbCr10BiPlanarVideoRange = 'x420' };
55 #endif
57 #ifndef TARGET_CPU_ARM64
58 # define TARGET_CPU_ARM64 0
59 #endif
61 typedef OSStatus (*getParameterSetAtIndex)(CMFormatDescriptionRef videoDesc,
62 size_t parameterSetIndex,
63 const uint8_t **parameterSetPointerOut,
64 size_t *parameterSetSizeOut,
65 size_t *parameterSetCountOut,
66 int *NALUnitHeaderLengthOut);
69 * Symbols that aren't available in MacOS 10.8 and iOS 8.0 need to be accessed
70 * from compat_keys, or it will cause compiler errors when compiling for older
71 * OS versions.
73 * For example, kVTCompressionPropertyKey_H264EntropyMode was added in
74 * MacOS 10.9. If this constant were used directly, a compiler would generate
75 * an error when it has access to the MacOS 10.8 headers, but does not have
76 * 10.9 headers.
78 * Runtime errors will still occur when unknown keys are set. A warning is
79 * logged and encoding continues where possible.
81 * When adding new symbols, they should be loaded/set in loadVTEncSymbols().
83 static struct{
84 CFStringRef kCVImageBufferColorPrimaries_ITU_R_2020;
85 CFStringRef kCVImageBufferTransferFunction_ITU_R_2020;
86 CFStringRef kCVImageBufferYCbCrMatrix_ITU_R_2020;
88 CFStringRef kVTCompressionPropertyKey_H264EntropyMode;
89 CFStringRef kVTH264EntropyMode_CAVLC;
90 CFStringRef kVTH264EntropyMode_CABAC;
92 CFStringRef kVTProfileLevel_H264_Baseline_4_0;
93 CFStringRef kVTProfileLevel_H264_Baseline_4_2;
94 CFStringRef kVTProfileLevel_H264_Baseline_5_0;
95 CFStringRef kVTProfileLevel_H264_Baseline_5_1;
96 CFStringRef kVTProfileLevel_H264_Baseline_5_2;
97 CFStringRef kVTProfileLevel_H264_Baseline_AutoLevel;
98 CFStringRef kVTProfileLevel_H264_Main_4_2;
99 CFStringRef kVTProfileLevel_H264_Main_5_1;
100 CFStringRef kVTProfileLevel_H264_Main_5_2;
101 CFStringRef kVTProfileLevel_H264_Main_AutoLevel;
102 CFStringRef kVTProfileLevel_H264_High_3_0;
103 CFStringRef kVTProfileLevel_H264_High_3_1;
104 CFStringRef kVTProfileLevel_H264_High_3_2;
105 CFStringRef kVTProfileLevel_H264_High_4_0;
106 CFStringRef kVTProfileLevel_H264_High_4_1;
107 CFStringRef kVTProfileLevel_H264_High_4_2;
108 CFStringRef kVTProfileLevel_H264_High_5_1;
109 CFStringRef kVTProfileLevel_H264_High_5_2;
110 CFStringRef kVTProfileLevel_H264_High_AutoLevel;
111 CFStringRef kVTProfileLevel_H264_Extended_5_0;
112 CFStringRef kVTProfileLevel_H264_Extended_AutoLevel;
113 CFStringRef kVTProfileLevel_H264_ConstrainedBaseline_AutoLevel;
114 CFStringRef kVTProfileLevel_H264_ConstrainedHigh_AutoLevel;
116 CFStringRef kVTProfileLevel_HEVC_Main_AutoLevel;
117 CFStringRef kVTProfileLevel_HEVC_Main10_AutoLevel;
119 CFStringRef kVTCompressionPropertyKey_RealTime;
120 CFStringRef kVTCompressionPropertyKey_TargetQualityForAlpha;
121 CFStringRef kVTCompressionPropertyKey_PrioritizeEncodingSpeedOverQuality;
122 CFStringRef kVTCompressionPropertyKey_ConstantBitRate;
123 CFStringRef kVTCompressionPropertyKey_EncoderID;
125 CFStringRef kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder;
126 CFStringRef kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder;
127 CFStringRef kVTVideoEncoderSpecification_EnableLowLatencyRateControl;
128 CFStringRef kVTCompressionPropertyKey_AllowOpenGOP;
129 CFStringRef kVTCompressionPropertyKey_MaximizePowerEfficiency;
130 CFStringRef kVTCompressionPropertyKey_ReferenceBufferCount;
131 CFStringRef kVTCompressionPropertyKey_MaxAllowedFrameQP;
132 CFStringRef kVTCompressionPropertyKey_MinAllowedFrameQP;
134 getParameterSetAtIndex CMVideoFormatDescriptionGetHEVCParameterSetAtIndex;
135 } compat_keys;
137 #define GET_SYM(symbol, defaultVal) \
138 do{ \
139 CFStringRef* handle = (CFStringRef*)dlsym(RTLD_DEFAULT, #symbol); \
140 if(!handle) \
141 compat_keys.symbol = CFSTR(defaultVal); \
142 else \
143 compat_keys.symbol = *handle; \
144 }while(0)
146 static pthread_once_t once_ctrl = PTHREAD_ONCE_INIT;
148 static void loadVTEncSymbols(void){
149 compat_keys.CMVideoFormatDescriptionGetHEVCParameterSetAtIndex =
150 (getParameterSetAtIndex)dlsym(
151 RTLD_DEFAULT,
152 "CMVideoFormatDescriptionGetHEVCParameterSetAtIndex"
155 GET_SYM(kCVImageBufferColorPrimaries_ITU_R_2020, "ITU_R_2020");
156 GET_SYM(kCVImageBufferTransferFunction_ITU_R_2020, "ITU_R_2020");
157 GET_SYM(kCVImageBufferYCbCrMatrix_ITU_R_2020, "ITU_R_2020");
159 GET_SYM(kVTCompressionPropertyKey_H264EntropyMode, "H264EntropyMode");
160 GET_SYM(kVTH264EntropyMode_CAVLC, "CAVLC");
161 GET_SYM(kVTH264EntropyMode_CABAC, "CABAC");
163 GET_SYM(kVTProfileLevel_H264_Baseline_4_0, "H264_Baseline_4_0");
164 GET_SYM(kVTProfileLevel_H264_Baseline_4_2, "H264_Baseline_4_2");
165 GET_SYM(kVTProfileLevel_H264_Baseline_5_0, "H264_Baseline_5_0");
166 GET_SYM(kVTProfileLevel_H264_Baseline_5_1, "H264_Baseline_5_1");
167 GET_SYM(kVTProfileLevel_H264_Baseline_5_2, "H264_Baseline_5_2");
168 GET_SYM(kVTProfileLevel_H264_Baseline_AutoLevel, "H264_Baseline_AutoLevel");
169 GET_SYM(kVTProfileLevel_H264_Main_4_2, "H264_Main_4_2");
170 GET_SYM(kVTProfileLevel_H264_Main_5_1, "H264_Main_5_1");
171 GET_SYM(kVTProfileLevel_H264_Main_5_2, "H264_Main_5_2");
172 GET_SYM(kVTProfileLevel_H264_Main_AutoLevel, "H264_Main_AutoLevel");
173 GET_SYM(kVTProfileLevel_H264_High_3_0, "H264_High_3_0");
174 GET_SYM(kVTProfileLevel_H264_High_3_1, "H264_High_3_1");
175 GET_SYM(kVTProfileLevel_H264_High_3_2, "H264_High_3_2");
176 GET_SYM(kVTProfileLevel_H264_High_4_0, "H264_High_4_0");
177 GET_SYM(kVTProfileLevel_H264_High_4_1, "H264_High_4_1");
178 GET_SYM(kVTProfileLevel_H264_High_4_2, "H264_High_4_2");
179 GET_SYM(kVTProfileLevel_H264_High_5_1, "H264_High_5_1");
180 GET_SYM(kVTProfileLevel_H264_High_5_2, "H264_High_5_2");
181 GET_SYM(kVTProfileLevel_H264_High_AutoLevel, "H264_High_AutoLevel");
182 GET_SYM(kVTProfileLevel_H264_Extended_5_0, "H264_Extended_5_0");
183 GET_SYM(kVTProfileLevel_H264_Extended_AutoLevel, "H264_Extended_AutoLevel");
184 GET_SYM(kVTProfileLevel_H264_ConstrainedBaseline_AutoLevel, "H264_ConstrainedBaseline_AutoLevel");
185 GET_SYM(kVTProfileLevel_H264_ConstrainedHigh_AutoLevel, "H264_ConstrainedHigh_AutoLevel");
187 GET_SYM(kVTProfileLevel_HEVC_Main_AutoLevel, "HEVC_Main_AutoLevel");
188 GET_SYM(kVTProfileLevel_HEVC_Main10_AutoLevel, "HEVC_Main10_AutoLevel");
190 GET_SYM(kVTCompressionPropertyKey_RealTime, "RealTime");
191 GET_SYM(kVTCompressionPropertyKey_TargetQualityForAlpha,
192 "TargetQualityForAlpha");
193 GET_SYM(kVTCompressionPropertyKey_PrioritizeEncodingSpeedOverQuality,
194 "PrioritizeEncodingSpeedOverQuality");
195 GET_SYM(kVTCompressionPropertyKey_ConstantBitRate, "ConstantBitRate");
196 GET_SYM(kVTCompressionPropertyKey_EncoderID, "EncoderID");
198 GET_SYM(kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
199 "EnableHardwareAcceleratedVideoEncoder");
200 GET_SYM(kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder,
201 "RequireHardwareAcceleratedVideoEncoder");
202 GET_SYM(kVTVideoEncoderSpecification_EnableLowLatencyRateControl,
203 "EnableLowLatencyRateControl");
204 GET_SYM(kVTCompressionPropertyKey_AllowOpenGOP, "AllowOpenGOP");
205 GET_SYM(kVTCompressionPropertyKey_MaximizePowerEfficiency,
206 "MaximizePowerEfficiency");
207 GET_SYM(kVTCompressionPropertyKey_ReferenceBufferCount,
208 "ReferenceBufferCount");
209 GET_SYM(kVTCompressionPropertyKey_MaxAllowedFrameQP, "MaxAllowedFrameQP");
210 GET_SYM(kVTCompressionPropertyKey_MinAllowedFrameQP, "MinAllowedFrameQP");
213 #define H264_PROFILE_CONSTRAINED_HIGH (AV_PROFILE_H264_HIGH | AV_PROFILE_H264_CONSTRAINED)
215 typedef enum VTH264Entropy{
216 VT_ENTROPY_NOT_SET,
217 VT_CAVLC,
218 VT_CABAC
219 } VTH264Entropy;
221 static const uint8_t start_code[] = { 0, 0, 0, 1 };
223 typedef struct ExtraSEI {
224 void *data;
225 size_t size;
226 } ExtraSEI;
228 typedef struct BufNode {
229 CMSampleBufferRef cm_buffer;
230 ExtraSEI sei;
231 AVBufferRef *frame_buf;
232 struct BufNode* next;
233 } BufNode;
235 typedef struct VTEncContext {
236 AVClass *class;
237 enum AVCodecID codec_id;
238 VTCompressionSessionRef session;
239 CFDictionaryRef supported_props;
240 CFStringRef ycbcr_matrix;
241 CFStringRef color_primaries;
242 CFStringRef transfer_function;
243 getParameterSetAtIndex get_param_set_func;
245 pthread_mutex_t lock;
246 pthread_cond_t cv_sample_sent;
248 int async_error;
250 BufNode *q_head;
251 BufNode *q_tail;
253 int64_t frame_ct_out;
254 int64_t frame_ct_in;
256 int64_t first_pts;
257 int64_t dts_delta;
259 int profile;
260 int level;
261 int entropy;
262 int realtime;
263 int frames_before;
264 int frames_after;
265 int constant_bit_rate;
267 int allow_sw;
268 int require_sw;
269 double alpha_quality;
270 int prio_speed;
272 bool flushing;
273 int has_b_frames;
274 bool warned_color_range;
276 /* can't be bool type since AVOption will access it as int */
277 int a53_cc;
279 int max_slice_bytes;
280 int power_efficient;
281 int max_ref_frames;
282 } VTEncContext;
284 static void vtenc_free_buf_node(BufNode *info)
286 if (!info)
287 return;
289 av_free(info->sei.data);
290 if (info->cm_buffer)
291 CFRelease(info->cm_buffer);
292 av_buffer_unref(&info->frame_buf);
293 av_free(info);
296 static int vt_dump_encoder(AVCodecContext *avctx)
298 VTEncContext *vtctx = avctx->priv_data;
299 CFStringRef encoder_id = NULL;
300 int status;
301 CFIndex length, max_size;
302 char *name;
304 status = VTSessionCopyProperty(vtctx->session,
305 compat_keys.kVTCompressionPropertyKey_EncoderID,
306 kCFAllocatorDefault,
307 &encoder_id);
308 // OK if not supported
309 if (status != noErr)
310 return 0;
312 length = CFStringGetLength(encoder_id);
313 max_size = CFStringGetMaximumSizeForEncoding(length, kCFStringEncodingUTF8);
314 name = av_malloc(max_size);
315 if (!name) {
316 CFRelease(encoder_id);
317 return AVERROR(ENOMEM);
320 CFStringGetCString(encoder_id,
321 name,
322 max_size,
323 kCFStringEncodingUTF8);
324 av_log(avctx, AV_LOG_DEBUG, "Init the encoder: %s\n", name);
325 av_freep(&name);
326 CFRelease(encoder_id);
328 return 0;
331 static int vtenc_populate_extradata(AVCodecContext *avctx,
332 CMVideoCodecType codec_type,
333 CFStringRef profile_level,
334 CFNumberRef gamma_level,
335 CFDictionaryRef enc_info,
336 CFDictionaryRef pixel_buffer_info);
339 * NULL-safe release of *refPtr, and sets value to NULL.
341 static void vt_release_num(CFNumberRef* refPtr){
342 if (!*refPtr) {
343 return;
346 CFRelease(*refPtr);
347 *refPtr = NULL;
350 static void set_async_error(VTEncContext *vtctx, int err)
352 BufNode *info;
354 pthread_mutex_lock(&vtctx->lock);
356 vtctx->async_error = err;
358 info = vtctx->q_head;
359 vtctx->q_head = vtctx->q_tail = NULL;
361 while (info) {
362 BufNode *next = info->next;
363 vtenc_free_buf_node(info);
364 info = next;
367 pthread_mutex_unlock(&vtctx->lock);
370 static void clear_frame_queue(VTEncContext *vtctx)
372 set_async_error(vtctx, 0);
375 static void vtenc_reset(VTEncContext *vtctx)
377 if (vtctx->session) {
378 CFRelease(vtctx->session);
379 vtctx->session = NULL;
382 if (vtctx->supported_props) {
383 CFRelease(vtctx->supported_props);
384 vtctx->supported_props = NULL;
387 if (vtctx->color_primaries) {
388 CFRelease(vtctx->color_primaries);
389 vtctx->color_primaries = NULL;
392 if (vtctx->transfer_function) {
393 CFRelease(vtctx->transfer_function);
394 vtctx->transfer_function = NULL;
397 if (vtctx->ycbcr_matrix) {
398 CFRelease(vtctx->ycbcr_matrix);
399 vtctx->ycbcr_matrix = NULL;
403 static int vtenc_q_pop(VTEncContext *vtctx, bool wait, CMSampleBufferRef *buf, ExtraSEI *sei)
405 BufNode *info;
407 pthread_mutex_lock(&vtctx->lock);
409 if (vtctx->async_error) {
410 pthread_mutex_unlock(&vtctx->lock);
411 return vtctx->async_error;
414 if (vtctx->flushing && vtctx->frame_ct_in == vtctx->frame_ct_out) {
415 *buf = NULL;
417 pthread_mutex_unlock(&vtctx->lock);
418 return 0;
421 while (!vtctx->q_head && !vtctx->async_error && wait && !vtctx->flushing) {
422 pthread_cond_wait(&vtctx->cv_sample_sent, &vtctx->lock);
425 if (!vtctx->q_head) {
426 pthread_mutex_unlock(&vtctx->lock);
427 *buf = NULL;
428 return 0;
431 info = vtctx->q_head;
432 vtctx->q_head = vtctx->q_head->next;
433 if (!vtctx->q_head) {
434 vtctx->q_tail = NULL;
437 vtctx->frame_ct_out++;
438 pthread_mutex_unlock(&vtctx->lock);
440 *buf = info->cm_buffer;
441 info->cm_buffer = NULL;
442 if (sei && *buf) {
443 *sei = info->sei;
444 info->sei = (ExtraSEI) {0};
446 vtenc_free_buf_node(info);
448 return 0;
451 static void vtenc_q_push(VTEncContext *vtctx, BufNode *info)
453 pthread_mutex_lock(&vtctx->lock);
455 if (!vtctx->q_head) {
456 vtctx->q_head = info;
457 } else {
458 vtctx->q_tail->next = info;
461 vtctx->q_tail = info;
463 pthread_cond_signal(&vtctx->cv_sample_sent);
464 pthread_mutex_unlock(&vtctx->lock);
467 static int count_nalus(size_t length_code_size,
468 CMSampleBufferRef sample_buffer,
469 int *count)
471 size_t offset = 0;
472 int status;
473 int nalu_ct = 0;
474 uint8_t size_buf[4];
475 size_t src_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
476 CMBlockBufferRef block = CMSampleBufferGetDataBuffer(sample_buffer);
478 if (length_code_size > 4)
479 return AVERROR_INVALIDDATA;
481 while (offset < src_size) {
482 size_t curr_src_len;
483 size_t box_len = 0;
484 size_t i;
486 status = CMBlockBufferCopyDataBytes(block,
487 offset,
488 length_code_size,
489 size_buf);
491 if (status != kCMBlockBufferNoErr) {
492 return AVERROR_EXTERNAL;
495 for (i = 0; i < length_code_size; i++) {
496 box_len <<= 8;
497 box_len |= size_buf[i];
500 curr_src_len = box_len + length_code_size;
501 offset += curr_src_len;
503 nalu_ct++;
506 *count = nalu_ct;
507 return 0;
510 static CMVideoCodecType get_cm_codec_type(AVCodecContext *avctx,
511 int profile,
512 double alpha_quality)
514 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt == AV_PIX_FMT_VIDEOTOOLBOX ? avctx->sw_pix_fmt : avctx->pix_fmt);
515 switch (avctx->codec_id) {
516 case AV_CODEC_ID_H264: return kCMVideoCodecType_H264;
517 case AV_CODEC_ID_HEVC:
518 if (desc && (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpha_quality > 0.0) {
519 return kCMVideoCodecType_HEVCWithAlpha;
521 return kCMVideoCodecType_HEVC;
522 case AV_CODEC_ID_PRORES:
523 if (desc && (desc->flags & AV_PIX_FMT_FLAG_ALPHA))
524 avctx->bits_per_coded_sample = 32;
525 switch (profile) {
526 case AV_PROFILE_PRORES_PROXY:
527 return MKBETAG('a','p','c','o'); // kCMVideoCodecType_AppleProRes422Proxy
528 case AV_PROFILE_PRORES_LT:
529 return MKBETAG('a','p','c','s'); // kCMVideoCodecType_AppleProRes422LT
530 case AV_PROFILE_PRORES_STANDARD:
531 return MKBETAG('a','p','c','n'); // kCMVideoCodecType_AppleProRes422
532 case AV_PROFILE_PRORES_HQ:
533 return MKBETAG('a','p','c','h'); // kCMVideoCodecType_AppleProRes422HQ
534 case AV_PROFILE_PRORES_4444:
535 return MKBETAG('a','p','4','h'); // kCMVideoCodecType_AppleProRes4444
536 case AV_PROFILE_PRORES_XQ:
537 return MKBETAG('a','p','4','x'); // kCMVideoCodecType_AppleProRes4444XQ
539 default:
540 av_log(avctx, AV_LOG_ERROR, "Unknown profile ID: %d, using auto\n", profile);
541 case AV_PROFILE_UNKNOWN:
542 if (desc &&
543 ((desc->flags & AV_PIX_FMT_FLAG_ALPHA) ||
544 desc->log2_chroma_w == 0))
545 return MKBETAG('a','p','4','h'); // kCMVideoCodecType_AppleProRes4444
546 else
547 return MKBETAG('a','p','c','n'); // kCMVideoCodecType_AppleProRes422
549 default: return 0;
554 * Get the parameter sets from a CMSampleBufferRef.
555 * @param dst If *dst isn't NULL, the parameters are copied into existing
556 * memory. *dst_size must be set accordingly when *dst != NULL.
557 * If *dst is NULL, it will be allocated.
558 * In all cases, *dst_size is set to the number of bytes used starting
559 * at *dst.
561 static int get_params_size(
562 AVCodecContext *avctx,
563 CMVideoFormatDescriptionRef vid_fmt,
564 size_t *size)
566 VTEncContext *vtctx = avctx->priv_data;
567 size_t total_size = 0;
568 size_t ps_count;
569 int is_count_bad = 0;
570 size_t i;
571 int status;
572 status = vtctx->get_param_set_func(vid_fmt,
574 NULL,
575 NULL,
576 &ps_count,
577 NULL);
578 if (status) {
579 is_count_bad = 1;
580 ps_count = 0;
581 status = 0;
584 for (i = 0; i < ps_count || is_count_bad; i++) {
585 const uint8_t *ps;
586 size_t ps_size;
587 status = vtctx->get_param_set_func(vid_fmt,
589 &ps,
590 &ps_size,
591 NULL,
592 NULL);
593 if (status) {
595 * When ps_count is invalid, status != 0 ends the loop normally
596 * unless we didn't get any parameter sets.
598 if (i > 0 && is_count_bad) status = 0;
600 break;
603 total_size += ps_size + sizeof(start_code);
606 if (status) {
607 av_log(avctx, AV_LOG_ERROR, "Error getting parameter set sizes: %d\n", status);
608 return AVERROR_EXTERNAL;
611 *size = total_size;
612 return 0;
615 static int copy_param_sets(
616 AVCodecContext *avctx,
617 CMVideoFormatDescriptionRef vid_fmt,
618 uint8_t *dst,
619 size_t dst_size)
621 VTEncContext *vtctx = avctx->priv_data;
622 size_t ps_count;
623 int is_count_bad = 0;
624 int status;
625 size_t offset = 0;
626 size_t i;
628 status = vtctx->get_param_set_func(vid_fmt,
630 NULL,
631 NULL,
632 &ps_count,
633 NULL);
634 if (status) {
635 is_count_bad = 1;
636 ps_count = 0;
637 status = 0;
641 for (i = 0; i < ps_count || is_count_bad; i++) {
642 const uint8_t *ps;
643 size_t ps_size;
644 size_t next_offset;
646 status = vtctx->get_param_set_func(vid_fmt,
648 &ps,
649 &ps_size,
650 NULL,
651 NULL);
652 if (status) {
653 if (i > 0 && is_count_bad) status = 0;
655 break;
658 next_offset = offset + sizeof(start_code) + ps_size;
659 if (dst_size < next_offset) {
660 av_log(avctx, AV_LOG_ERROR, "Error: buffer too small for parameter sets.\n");
661 return AVERROR_BUFFER_TOO_SMALL;
664 memcpy(dst + offset, start_code, sizeof(start_code));
665 offset += sizeof(start_code);
667 memcpy(dst + offset, ps, ps_size);
668 offset = next_offset;
671 if (status) {
672 av_log(avctx, AV_LOG_ERROR, "Error getting parameter set data: %d\n", status);
673 return AVERROR_EXTERNAL;
676 return 0;
679 static int set_extradata(AVCodecContext *avctx, CMSampleBufferRef sample_buffer)
681 VTEncContext *vtctx = avctx->priv_data;
682 CMVideoFormatDescriptionRef vid_fmt;
683 size_t total_size;
684 int status;
686 vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
687 if (!vid_fmt) {
688 av_log(avctx, AV_LOG_ERROR, "No video format.\n");
689 return AVERROR_EXTERNAL;
692 if (vtctx->get_param_set_func) {
693 status = get_params_size(avctx, vid_fmt, &total_size);
694 if (status) {
695 av_log(avctx, AV_LOG_ERROR, "Could not get parameter sets.\n");
696 return status;
699 avctx->extradata = av_mallocz(total_size + AV_INPUT_BUFFER_PADDING_SIZE);
700 if (!avctx->extradata) {
701 return AVERROR(ENOMEM);
703 avctx->extradata_size = total_size;
705 status = copy_param_sets(avctx, vid_fmt, avctx->extradata, total_size);
707 if (status) {
708 av_log(avctx, AV_LOG_ERROR, "Could not copy param sets.\n");
709 return status;
711 } else {
712 CFDataRef data = CMFormatDescriptionGetExtension(vid_fmt, kCMFormatDescriptionExtension_VerbatimSampleDescription);
713 if (data && CFGetTypeID(data) == CFDataGetTypeID()) {
714 CFIndex size = CFDataGetLength(data);
716 avctx->extradata = av_mallocz(size + AV_INPUT_BUFFER_PADDING_SIZE);
717 if (!avctx->extradata)
718 return AVERROR(ENOMEM);
719 avctx->extradata_size = size;
721 CFDataGetBytes(data, CFRangeMake(0, size), avctx->extradata);
725 return 0;
728 static void vtenc_output_callback(
729 void *ctx,
730 void *sourceFrameCtx,
731 OSStatus status,
732 VTEncodeInfoFlags flags,
733 CMSampleBufferRef sample_buffer)
735 AVCodecContext *avctx = ctx;
736 VTEncContext *vtctx = avctx->priv_data;
737 BufNode *info = sourceFrameCtx;
739 av_buffer_unref(&info->frame_buf);
740 if (vtctx->async_error) {
741 vtenc_free_buf_node(info);
742 return;
745 if (status) {
746 vtenc_free_buf_node(info);
747 av_log(avctx, AV_LOG_ERROR, "Error encoding frame: %d\n", (int)status);
748 set_async_error(vtctx, AVERROR_EXTERNAL);
749 return;
752 if (!sample_buffer) {
753 return;
756 CFRetain(sample_buffer);
757 info->cm_buffer = sample_buffer;
759 if (!avctx->extradata && (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) {
760 int set_status = set_extradata(avctx, sample_buffer);
761 if (set_status) {
762 vtenc_free_buf_node(info);
763 set_async_error(vtctx, set_status);
764 return;
768 vtenc_q_push(vtctx, info);
771 static int get_length_code_size(
772 AVCodecContext *avctx,
773 CMSampleBufferRef sample_buffer,
774 size_t *size)
776 VTEncContext *vtctx = avctx->priv_data;
777 CMVideoFormatDescriptionRef vid_fmt;
778 int isize;
779 int status;
781 vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
782 if (!vid_fmt) {
783 av_log(avctx, AV_LOG_ERROR, "Error getting buffer format description.\n");
784 return AVERROR_EXTERNAL;
787 status = vtctx->get_param_set_func(vid_fmt,
789 NULL,
790 NULL,
791 NULL,
792 &isize);
793 if (status) {
794 av_log(avctx, AV_LOG_ERROR, "Error getting length code size: %d\n", status);
795 return AVERROR_EXTERNAL;
798 *size = isize;
799 return 0;
803 * Returns true on success.
805 * If profile_level_val is NULL and this method returns true, don't specify the
806 * profile/level to the encoder.
808 static bool get_vt_h264_profile_level(AVCodecContext *avctx,
809 CFStringRef *profile_level_val)
811 VTEncContext *vtctx = avctx->priv_data;
812 int profile = vtctx->profile;
814 if (profile == AV_PROFILE_UNKNOWN && vtctx->level) {
815 //Need to pick a profile if level is not auto-selected.
816 profile = vtctx->has_b_frames ? AV_PROFILE_H264_MAIN : AV_PROFILE_H264_BASELINE;
819 *profile_level_val = NULL;
821 switch (profile) {
822 case AV_PROFILE_UNKNOWN:
823 return true;
825 case AV_PROFILE_H264_BASELINE:
826 switch (vtctx->level) {
827 case 0: *profile_level_val =
828 compat_keys.kVTProfileLevel_H264_Baseline_AutoLevel; break;
829 case 13: *profile_level_val = kVTProfileLevel_H264_Baseline_1_3; break;
830 case 30: *profile_level_val = kVTProfileLevel_H264_Baseline_3_0; break;
831 case 31: *profile_level_val = kVTProfileLevel_H264_Baseline_3_1; break;
832 case 32: *profile_level_val = kVTProfileLevel_H264_Baseline_3_2; break;
833 case 40: *profile_level_val =
834 compat_keys.kVTProfileLevel_H264_Baseline_4_0; break;
835 case 41: *profile_level_val = kVTProfileLevel_H264_Baseline_4_1; break;
836 case 42: *profile_level_val =
837 compat_keys.kVTProfileLevel_H264_Baseline_4_2; break;
838 case 50: *profile_level_val =
839 compat_keys.kVTProfileLevel_H264_Baseline_5_0; break;
840 case 51: *profile_level_val =
841 compat_keys.kVTProfileLevel_H264_Baseline_5_1; break;
842 case 52: *profile_level_val =
843 compat_keys.kVTProfileLevel_H264_Baseline_5_2; break;
845 break;
847 case AV_PROFILE_H264_CONSTRAINED_BASELINE:
848 *profile_level_val = compat_keys.kVTProfileLevel_H264_ConstrainedBaseline_AutoLevel;
850 if (vtctx->level != 0) {
851 av_log(avctx,
852 AV_LOG_WARNING,
853 "Level is auto-selected when constrained-baseline "
854 "profile is used. The output may be encoded with a "
855 "different level.\n");
857 break;
859 case AV_PROFILE_H264_MAIN:
860 switch (vtctx->level) {
861 case 0: *profile_level_val =
862 compat_keys.kVTProfileLevel_H264_Main_AutoLevel; break;
863 case 30: *profile_level_val = kVTProfileLevel_H264_Main_3_0; break;
864 case 31: *profile_level_val = kVTProfileLevel_H264_Main_3_1; break;
865 case 32: *profile_level_val = kVTProfileLevel_H264_Main_3_2; break;
866 case 40: *profile_level_val = kVTProfileLevel_H264_Main_4_0; break;
867 case 41: *profile_level_val = kVTProfileLevel_H264_Main_4_1; break;
868 case 42: *profile_level_val =
869 compat_keys.kVTProfileLevel_H264_Main_4_2; break;
870 case 50: *profile_level_val = kVTProfileLevel_H264_Main_5_0; break;
871 case 51: *profile_level_val =
872 compat_keys.kVTProfileLevel_H264_Main_5_1; break;
873 case 52: *profile_level_val =
874 compat_keys.kVTProfileLevel_H264_Main_5_2; break;
876 break;
878 case H264_PROFILE_CONSTRAINED_HIGH:
879 *profile_level_val = compat_keys.kVTProfileLevel_H264_ConstrainedHigh_AutoLevel;
881 if (vtctx->level != 0) {
882 av_log(avctx,
883 AV_LOG_WARNING,
884 "Level is auto-selected when constrained-high profile "
885 "is used. The output may be encoded with a different "
886 "level.\n");
888 break;
890 case AV_PROFILE_H264_HIGH:
891 switch (vtctx->level) {
892 case 0: *profile_level_val =
893 compat_keys.kVTProfileLevel_H264_High_AutoLevel; break;
894 case 30: *profile_level_val =
895 compat_keys.kVTProfileLevel_H264_High_3_0; break;
896 case 31: *profile_level_val =
897 compat_keys.kVTProfileLevel_H264_High_3_1; break;
898 case 32: *profile_level_val =
899 compat_keys.kVTProfileLevel_H264_High_3_2; break;
900 case 40: *profile_level_val =
901 compat_keys.kVTProfileLevel_H264_High_4_0; break;
902 case 41: *profile_level_val =
903 compat_keys.kVTProfileLevel_H264_High_4_1; break;
904 case 42: *profile_level_val =
905 compat_keys.kVTProfileLevel_H264_High_4_2; break;
906 case 50: *profile_level_val = kVTProfileLevel_H264_High_5_0; break;
907 case 51: *profile_level_val =
908 compat_keys.kVTProfileLevel_H264_High_5_1; break;
909 case 52: *profile_level_val =
910 compat_keys.kVTProfileLevel_H264_High_5_2; break;
912 break;
913 case AV_PROFILE_H264_EXTENDED:
914 switch (vtctx->level) {
915 case 0: *profile_level_val =
916 compat_keys.kVTProfileLevel_H264_Extended_AutoLevel; break;
917 case 50: *profile_level_val =
918 compat_keys.kVTProfileLevel_H264_Extended_5_0; break;
920 break;
923 if (!*profile_level_val) {
924 av_log(avctx, AV_LOG_ERROR, "Invalid Profile/Level.\n");
925 return false;
928 return true;
932 * Returns true on success.
934 * If profile_level_val is NULL and this method returns true, don't specify the
935 * profile/level to the encoder.
937 static bool get_vt_hevc_profile_level(AVCodecContext *avctx,
938 CFStringRef *profile_level_val)
940 VTEncContext *vtctx = avctx->priv_data;
941 int profile = vtctx->profile;
942 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(
943 avctx->pix_fmt == AV_PIX_FMT_VIDEOTOOLBOX ? avctx->sw_pix_fmt
944 : avctx->pix_fmt);
945 int bit_depth = desc ? desc->comp[0].depth : 0;
947 *profile_level_val = NULL;
949 switch (profile) {
950 case AV_PROFILE_UNKNOWN:
951 // Set profile automatically if user don't specify
952 if (bit_depth == 10) {
953 *profile_level_val =
954 compat_keys.kVTProfileLevel_HEVC_Main10_AutoLevel;
955 break;
957 return true;
958 case AV_PROFILE_HEVC_MAIN:
959 if (bit_depth > 0 && bit_depth != 8)
960 av_log(avctx, AV_LOG_WARNING,
961 "main profile with %d bit input\n", bit_depth);
962 *profile_level_val =
963 compat_keys.kVTProfileLevel_HEVC_Main_AutoLevel;
964 break;
965 case AV_PROFILE_HEVC_MAIN_10:
966 if (bit_depth > 0 && bit_depth != 10) {
967 av_log(avctx, AV_LOG_ERROR,
968 "Invalid main10 profile with %d bit input\n", bit_depth);
969 return false;
971 *profile_level_val =
972 compat_keys.kVTProfileLevel_HEVC_Main10_AutoLevel;
973 break;
976 if (!*profile_level_val) {
977 av_log(avctx, AV_LOG_ERROR, "Invalid Profile/Level.\n");
978 return false;
981 return true;
984 static int get_cv_pixel_format(AVCodecContext* avctx,
985 enum AVPixelFormat fmt,
986 enum AVColorRange range,
987 int* av_pixel_format,
988 int* range_guessed)
990 const char *range_name;
991 if (range_guessed) *range_guessed = range != AVCOL_RANGE_MPEG &&
992 range != AVCOL_RANGE_JPEG;
994 //MPEG range is used when no range is set
995 *av_pixel_format = av_map_videotoolbox_format_from_pixfmt2(fmt, range == AVCOL_RANGE_JPEG);
996 if (*av_pixel_format)
997 return 0;
999 range_name = av_color_range_name(range);
1000 av_log(avctx, AV_LOG_ERROR,
1001 "Could not get pixel format for color format '%s' range '%s'.\n",
1002 av_get_pix_fmt_name(fmt),
1003 range_name ? range_name : "Unknown");
1005 return AVERROR(EINVAL);
1008 static void add_color_attr(AVCodecContext *avctx, CFMutableDictionaryRef dict) {
1009 VTEncContext *vtctx = avctx->priv_data;
1011 if (vtctx->color_primaries) {
1012 CFDictionarySetValue(dict,
1013 kCVImageBufferColorPrimariesKey,
1014 vtctx->color_primaries);
1017 if (vtctx->transfer_function) {
1018 CFDictionarySetValue(dict,
1019 kCVImageBufferTransferFunctionKey,
1020 vtctx->transfer_function);
1023 if (vtctx->ycbcr_matrix) {
1024 CFDictionarySetValue(dict,
1025 kCVImageBufferYCbCrMatrixKey,
1026 vtctx->ycbcr_matrix);
1030 static int create_cv_pixel_buffer_info(AVCodecContext* avctx,
1031 CFMutableDictionaryRef* dict)
1033 CFNumberRef cv_color_format_num = NULL;
1034 CFNumberRef width_num = NULL;
1035 CFNumberRef height_num = NULL;
1036 CFMutableDictionaryRef pixel_buffer_info = NULL;
1037 int cv_color_format;
1038 int status = get_cv_pixel_format(avctx,
1039 avctx->pix_fmt,
1040 avctx->color_range,
1041 &cv_color_format,
1042 NULL);
1043 if (status) return status;
1045 pixel_buffer_info = CFDictionaryCreateMutable(
1046 kCFAllocatorDefault,
1048 &kCFCopyStringDictionaryKeyCallBacks,
1049 &kCFTypeDictionaryValueCallBacks);
1051 if (!pixel_buffer_info) goto pbinfo_nomem;
1053 cv_color_format_num = CFNumberCreate(kCFAllocatorDefault,
1054 kCFNumberSInt32Type,
1055 &cv_color_format);
1056 if (!cv_color_format_num) goto pbinfo_nomem;
1058 CFDictionarySetValue(pixel_buffer_info,
1059 kCVPixelBufferPixelFormatTypeKey,
1060 cv_color_format_num);
1061 vt_release_num(&cv_color_format_num);
1063 width_num = CFNumberCreate(kCFAllocatorDefault,
1064 kCFNumberSInt32Type,
1065 &avctx->width);
1066 if (!width_num) goto pbinfo_nomem;
1068 CFDictionarySetValue(pixel_buffer_info,
1069 kCVPixelBufferWidthKey,
1070 width_num);
1071 vt_release_num(&width_num);
1073 height_num = CFNumberCreate(kCFAllocatorDefault,
1074 kCFNumberSInt32Type,
1075 &avctx->height);
1076 if (!height_num) goto pbinfo_nomem;
1078 CFDictionarySetValue(pixel_buffer_info,
1079 kCVPixelBufferHeightKey,
1080 height_num);
1081 vt_release_num(&height_num);
1083 add_color_attr(avctx, pixel_buffer_info);
1085 *dict = pixel_buffer_info;
1086 return 0;
1088 pbinfo_nomem:
1089 vt_release_num(&cv_color_format_num);
1090 vt_release_num(&width_num);
1091 vt_release_num(&height_num);
1092 if (pixel_buffer_info) CFRelease(pixel_buffer_info);
1094 return AVERROR(ENOMEM);
1097 static int get_cv_gamma(AVCodecContext *avctx,
1098 CFNumberRef *gamma_level)
1100 enum AVColorTransferCharacteristic trc = avctx->color_trc;
1101 Float32 gamma = 0;
1102 *gamma_level = NULL;
1104 if (trc == AVCOL_TRC_GAMMA22)
1105 gamma = 2.2;
1106 else if (trc == AVCOL_TRC_GAMMA28)
1107 gamma = 2.8;
1109 if (gamma != 0)
1110 *gamma_level = CFNumberCreate(NULL, kCFNumberFloat32Type, &gamma);
1111 return 0;
1114 // constant quality only on Macs with Apple Silicon
1115 static bool vtenc_qscale_enabled(void)
1117 return !TARGET_OS_IPHONE && TARGET_CPU_ARM64;
1120 static void set_encoder_property_or_log(AVCodecContext *avctx,
1121 CFStringRef key,
1122 const char *print_option_name,
1123 CFTypeRef value) {
1124 int status;
1125 VTEncContext *vtctx = avctx->priv_data;
1127 status = VTSessionSetProperty(vtctx->session, key, value);
1128 if (status == kVTPropertyNotSupportedErr) {
1129 av_log(avctx,
1130 AV_LOG_INFO,
1131 "This device does not support the %s option. Value ignored.\n",
1132 print_option_name);
1133 } else if (status != 0) {
1134 av_log(avctx,
1135 AV_LOG_ERROR,
1136 "Error setting %s: Error %d\n",
1137 print_option_name,
1138 status);
1142 static int set_encoder_int_property_or_log(AVCodecContext* avctx,
1143 CFStringRef key,
1144 const char* print_option_name,
1145 int value) {
1146 CFNumberRef value_cfnum = CFNumberCreate(kCFAllocatorDefault,
1147 kCFNumberIntType,
1148 &value);
1150 if (value_cfnum == NULL) {
1151 return AVERROR(ENOMEM);
1154 set_encoder_property_or_log(avctx, key, print_option_name, value_cfnum);
1156 CFRelease(value_cfnum);
1158 return 0;
1161 static int vtenc_create_encoder(AVCodecContext *avctx,
1162 CMVideoCodecType codec_type,
1163 CFStringRef profile_level,
1164 CFNumberRef gamma_level,
1165 CFDictionaryRef enc_info,
1166 CFDictionaryRef pixel_buffer_info,
1167 bool constant_bit_rate,
1168 VTCompressionSessionRef *session)
1170 VTEncContext *vtctx = avctx->priv_data;
1171 SInt32 bit_rate = avctx->bit_rate;
1172 SInt32 max_rate = avctx->rc_max_rate;
1173 Float32 quality = avctx->global_quality / FF_QP2LAMBDA;
1174 CFNumberRef bit_rate_num;
1175 CFNumberRef quality_num;
1176 CFNumberRef bytes_per_second;
1177 CFNumberRef one_second;
1178 CFArrayRef data_rate_limits;
1179 int64_t bytes_per_second_value = 0;
1180 int64_t one_second_value = 0;
1181 void *nums[2];
1183 int status = VTCompressionSessionCreate(kCFAllocatorDefault,
1184 avctx->width,
1185 avctx->height,
1186 codec_type,
1187 enc_info,
1188 pixel_buffer_info,
1189 kCFAllocatorDefault,
1190 vtenc_output_callback,
1191 avctx,
1192 session);
1194 if (status || !vtctx->session) {
1195 av_log(avctx, AV_LOG_ERROR, "Error: cannot create compression session: %d\n", status);
1197 #if !TARGET_OS_IPHONE
1198 if (!vtctx->allow_sw) {
1199 av_log(avctx, AV_LOG_ERROR, "Try -allow_sw 1. The hardware encoder may be busy, or not supported.\n");
1201 #endif
1203 return AVERROR_EXTERNAL;
1206 #if defined (MAC_OS_X_VERSION_10_13) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_13)
1207 if (__builtin_available(macOS 10.13, *)) {
1208 if (vtctx->supported_props) {
1209 CFRelease(vtctx->supported_props);
1210 vtctx->supported_props = NULL;
1212 status = VTCopySupportedPropertyDictionaryForEncoder(avctx->width,
1213 avctx->height,
1214 codec_type,
1215 enc_info,
1216 NULL,
1217 &vtctx->supported_props);
1219 if (status != noErr) {
1220 av_log(avctx, AV_LOG_ERROR,"Error retrieving the supported property dictionary err=%"PRId64"\n", (int64_t)status);
1221 return AVERROR_EXTERNAL;
1224 #endif
1226 status = vt_dump_encoder(avctx);
1227 if (status < 0)
1228 return status;
1230 if (avctx->flags & AV_CODEC_FLAG_QSCALE && !vtenc_qscale_enabled()) {
1231 av_log(avctx, AV_LOG_ERROR, "Error: -q:v qscale not available for encoder. Use -b:v bitrate instead.\n");
1232 return AVERROR_EXTERNAL;
1235 if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
1236 quality = quality >= 100 ? 1.0 : quality / 100;
1237 quality_num = CFNumberCreate(kCFAllocatorDefault,
1238 kCFNumberFloat32Type,
1239 &quality);
1240 if (!quality_num) return AVERROR(ENOMEM);
1242 status = VTSessionSetProperty(vtctx->session,
1243 kVTCompressionPropertyKey_Quality,
1244 quality_num);
1245 CFRelease(quality_num);
1246 } else if (avctx->codec_id != AV_CODEC_ID_PRORES) {
1247 bit_rate_num = CFNumberCreate(kCFAllocatorDefault,
1248 kCFNumberSInt32Type,
1249 &bit_rate);
1250 if (!bit_rate_num) return AVERROR(ENOMEM);
1252 if (constant_bit_rate) {
1253 status = VTSessionSetProperty(vtctx->session,
1254 compat_keys.kVTCompressionPropertyKey_ConstantBitRate,
1255 bit_rate_num);
1256 if (status == kVTPropertyNotSupportedErr) {
1257 av_log(avctx, AV_LOG_ERROR, "Error: -constant_bit_rate true is not supported by the encoder.\n");
1258 return AVERROR_EXTERNAL;
1260 } else {
1261 status = VTSessionSetProperty(vtctx->session,
1262 kVTCompressionPropertyKey_AverageBitRate,
1263 bit_rate_num);
1266 CFRelease(bit_rate_num);
1269 if (status) {
1270 av_log(avctx, AV_LOG_ERROR, "Error setting bitrate property: %d\n", status);
1271 return AVERROR_EXTERNAL;
1274 if (vtctx->prio_speed >= 0) {
1275 status = VTSessionSetProperty(vtctx->session,
1276 compat_keys.kVTCompressionPropertyKey_PrioritizeEncodingSpeedOverQuality,
1277 vtctx->prio_speed ? kCFBooleanTrue : kCFBooleanFalse);
1278 if (status) {
1279 av_log(avctx, AV_LOG_WARNING, "PrioritizeEncodingSpeedOverQuality property is not supported on this device. Ignoring.\n");
1283 if ((vtctx->codec_id == AV_CODEC_ID_H264 || vtctx->codec_id == AV_CODEC_ID_HEVC)
1284 && max_rate > 0) {
1285 bytes_per_second_value = max_rate >> 3;
1286 bytes_per_second = CFNumberCreate(kCFAllocatorDefault,
1287 kCFNumberSInt64Type,
1288 &bytes_per_second_value);
1289 if (!bytes_per_second) {
1290 return AVERROR(ENOMEM);
1292 one_second_value = 1;
1293 one_second = CFNumberCreate(kCFAllocatorDefault,
1294 kCFNumberSInt64Type,
1295 &one_second_value);
1296 if (!one_second) {
1297 CFRelease(bytes_per_second);
1298 return AVERROR(ENOMEM);
1300 nums[0] = (void *)bytes_per_second;
1301 nums[1] = (void *)one_second;
1302 data_rate_limits = CFArrayCreate(kCFAllocatorDefault,
1303 (const void **)nums,
1305 &kCFTypeArrayCallBacks);
1307 if (!data_rate_limits) {
1308 CFRelease(bytes_per_second);
1309 CFRelease(one_second);
1310 return AVERROR(ENOMEM);
1312 status = VTSessionSetProperty(vtctx->session,
1313 kVTCompressionPropertyKey_DataRateLimits,
1314 data_rate_limits);
1316 CFRelease(bytes_per_second);
1317 CFRelease(one_second);
1318 CFRelease(data_rate_limits);
1320 if (status) {
1321 av_log(avctx, AV_LOG_ERROR, "Error setting max bitrate property: %d\n", status);
1322 // kVTCompressionPropertyKey_DataRateLimits is available for HEVC
1323 // now but not on old release. There is no document about since
1324 // when. So ignore the error if it failed for hevc.
1325 if (vtctx->codec_id != AV_CODEC_ID_HEVC)
1326 return AVERROR_EXTERNAL;
1330 if (vtctx->codec_id == AV_CODEC_ID_HEVC) {
1331 if (avctx->pix_fmt == AV_PIX_FMT_BGRA && vtctx->alpha_quality > 0.0) {
1332 CFNumberRef alpha_quality_num = CFNumberCreate(kCFAllocatorDefault,
1333 kCFNumberDoubleType,
1334 &vtctx->alpha_quality);
1335 if (!alpha_quality_num) return AVERROR(ENOMEM);
1337 status = VTSessionSetProperty(vtctx->session,
1338 compat_keys.kVTCompressionPropertyKey_TargetQualityForAlpha,
1339 alpha_quality_num);
1340 CFRelease(alpha_quality_num);
1342 if (status) {
1343 av_log(avctx,
1344 AV_LOG_ERROR,
1345 "Error setting alpha quality: %d\n",
1346 status);
1351 if (profile_level) {
1352 status = VTSessionSetProperty(vtctx->session,
1353 kVTCompressionPropertyKey_ProfileLevel,
1354 profile_level);
1355 if (status) {
1356 av_log(avctx, AV_LOG_ERROR, "Error setting profile/level property: %d. Output will be encoded using a supported profile/level combination.\n", status);
1360 if (avctx->gop_size > 0 && avctx->codec_id != AV_CODEC_ID_PRORES) {
1361 CFNumberRef interval = CFNumberCreate(kCFAllocatorDefault,
1362 kCFNumberIntType,
1363 &avctx->gop_size);
1364 if (!interval) {
1365 return AVERROR(ENOMEM);
1368 status = VTSessionSetProperty(vtctx->session,
1369 kVTCompressionPropertyKey_MaxKeyFrameInterval,
1370 interval);
1371 CFRelease(interval);
1373 if (status) {
1374 av_log(avctx, AV_LOG_ERROR, "Error setting 'max key-frame interval' property: %d\n", status);
1375 return AVERROR_EXTERNAL;
1379 if (vtctx->frames_before) {
1380 status = VTSessionSetProperty(vtctx->session,
1381 kVTCompressionPropertyKey_MoreFramesBeforeStart,
1382 kCFBooleanTrue);
1384 if (status == kVTPropertyNotSupportedErr) {
1385 av_log(avctx, AV_LOG_WARNING, "frames_before property is not supported on this device. Ignoring.\n");
1386 } else if (status) {
1387 av_log(avctx, AV_LOG_ERROR, "Error setting frames_before property: %d\n", status);
1391 if (vtctx->frames_after) {
1392 status = VTSessionSetProperty(vtctx->session,
1393 kVTCompressionPropertyKey_MoreFramesAfterEnd,
1394 kCFBooleanTrue);
1396 if (status == kVTPropertyNotSupportedErr) {
1397 av_log(avctx, AV_LOG_WARNING, "frames_after property is not supported on this device. Ignoring.\n");
1398 } else if (status) {
1399 av_log(avctx, AV_LOG_ERROR, "Error setting frames_after property: %d\n", status);
1403 if (avctx->sample_aspect_ratio.num != 0) {
1404 CFNumberRef num;
1405 CFNumberRef den;
1406 CFMutableDictionaryRef par;
1407 AVRational *avpar = &avctx->sample_aspect_ratio;
1409 av_reduce(&avpar->num, &avpar->den,
1410 avpar->num, avpar->den,
1411 0xFFFFFFFF);
1413 num = CFNumberCreate(kCFAllocatorDefault,
1414 kCFNumberIntType,
1415 &avpar->num);
1417 den = CFNumberCreate(kCFAllocatorDefault,
1418 kCFNumberIntType,
1419 &avpar->den);
1423 par = CFDictionaryCreateMutable(kCFAllocatorDefault,
1425 &kCFCopyStringDictionaryKeyCallBacks,
1426 &kCFTypeDictionaryValueCallBacks);
1428 if (!par || !num || !den) {
1429 if (par) CFRelease(par);
1430 if (num) CFRelease(num);
1431 if (den) CFRelease(den);
1433 return AVERROR(ENOMEM);
1436 CFDictionarySetValue(
1437 par,
1438 kCMFormatDescriptionKey_PixelAspectRatioHorizontalSpacing,
1439 num);
1441 CFDictionarySetValue(
1442 par,
1443 kCMFormatDescriptionKey_PixelAspectRatioVerticalSpacing,
1444 den);
1446 status = VTSessionSetProperty(vtctx->session,
1447 kVTCompressionPropertyKey_PixelAspectRatio,
1448 par);
1450 CFRelease(par);
1451 CFRelease(num);
1452 CFRelease(den);
1454 if (status) {
1455 av_log(avctx,
1456 AV_LOG_ERROR,
1457 "Error setting pixel aspect ratio to %d:%d: %d.\n",
1458 avctx->sample_aspect_ratio.num,
1459 avctx->sample_aspect_ratio.den,
1460 status);
1462 return AVERROR_EXTERNAL;
1467 if (vtctx->transfer_function) {
1468 status = VTSessionSetProperty(vtctx->session,
1469 kVTCompressionPropertyKey_TransferFunction,
1470 vtctx->transfer_function);
1472 if (status) {
1473 av_log(avctx, AV_LOG_WARNING, "Could not set transfer function: %d\n", status);
1478 if (vtctx->ycbcr_matrix) {
1479 status = VTSessionSetProperty(vtctx->session,
1480 kVTCompressionPropertyKey_YCbCrMatrix,
1481 vtctx->ycbcr_matrix);
1483 if (status) {
1484 av_log(avctx, AV_LOG_WARNING, "Could not set ycbcr matrix: %d\n", status);
1489 if (vtctx->color_primaries) {
1490 status = VTSessionSetProperty(vtctx->session,
1491 kVTCompressionPropertyKey_ColorPrimaries,
1492 vtctx->color_primaries);
1494 if (status) {
1495 av_log(avctx, AV_LOG_WARNING, "Could not set color primaries: %d\n", status);
1499 if (gamma_level) {
1500 status = VTSessionSetProperty(vtctx->session,
1501 kCVImageBufferGammaLevelKey,
1502 gamma_level);
1504 if (status) {
1505 av_log(avctx, AV_LOG_WARNING, "Could not set gamma level: %d\n", status);
1509 if (!vtctx->has_b_frames && avctx->codec_id != AV_CODEC_ID_PRORES) {
1510 status = VTSessionSetProperty(vtctx->session,
1511 kVTCompressionPropertyKey_AllowFrameReordering,
1512 kCFBooleanFalse);
1514 if (status) {
1515 av_log(avctx, AV_LOG_ERROR, "Error setting 'allow frame reordering' property: %d\n", status);
1516 return AVERROR_EXTERNAL;
1520 if (vtctx->entropy != VT_ENTROPY_NOT_SET) {
1521 CFStringRef entropy = vtctx->entropy == VT_CABAC ?
1522 compat_keys.kVTH264EntropyMode_CABAC:
1523 compat_keys.kVTH264EntropyMode_CAVLC;
1525 status = VTSessionSetProperty(vtctx->session,
1526 compat_keys.kVTCompressionPropertyKey_H264EntropyMode,
1527 entropy);
1529 if (status) {
1530 av_log(avctx, AV_LOG_ERROR, "Error setting entropy property: %d\n", status);
1534 if (vtctx->realtime >= 0) {
1535 status = VTSessionSetProperty(vtctx->session,
1536 compat_keys.kVTCompressionPropertyKey_RealTime,
1537 vtctx->realtime ? kCFBooleanTrue : kCFBooleanFalse);
1539 if (status) {
1540 av_log(avctx, AV_LOG_ERROR, "Error setting realtime property: %d\n", status);
1544 if ((avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) != 0) {
1545 set_encoder_property_or_log(avctx,
1546 compat_keys.kVTCompressionPropertyKey_AllowOpenGOP,
1547 "AllowOpenGop",
1548 kCFBooleanFalse);
1551 if (avctx->qmin >= 0) {
1552 status = set_encoder_int_property_or_log(avctx,
1553 compat_keys.kVTCompressionPropertyKey_MinAllowedFrameQP,
1554 "qmin",
1555 avctx->qmin);
1557 if (status != 0) {
1558 return status;
1562 if (avctx->qmax >= 0) {
1563 status = set_encoder_int_property_or_log(avctx,
1564 compat_keys.kVTCompressionPropertyKey_MaxAllowedFrameQP,
1565 "qmax",
1566 avctx->qmax);
1568 if (status != 0) {
1569 return status;
1573 if (vtctx->max_slice_bytes >= 0 && avctx->codec_id == AV_CODEC_ID_H264) {
1574 status = set_encoder_int_property_or_log(avctx,
1575 kVTCompressionPropertyKey_MaxH264SliceBytes,
1576 "max_slice_bytes",
1577 vtctx->max_slice_bytes);
1579 if (status != 0) {
1580 return status;
1584 if (vtctx->power_efficient >= 0) {
1585 set_encoder_property_or_log(avctx,
1586 compat_keys.kVTCompressionPropertyKey_MaximizePowerEfficiency,
1587 "power_efficient",
1588 vtctx->power_efficient ? kCFBooleanTrue : kCFBooleanFalse);
1591 if (vtctx->max_ref_frames > 0) {
1592 status = set_encoder_int_property_or_log(avctx,
1593 compat_keys.kVTCompressionPropertyKey_ReferenceBufferCount,
1594 "max_ref_frames",
1595 vtctx->max_ref_frames);
1597 if (status != 0) {
1598 return status;
1602 status = VTCompressionSessionPrepareToEncodeFrames(vtctx->session);
1603 if (status) {
1604 av_log(avctx, AV_LOG_ERROR, "Error: cannot prepare encoder: %d\n", status);
1605 return AVERROR_EXTERNAL;
1608 return 0;
1611 static int vtenc_configure_encoder(AVCodecContext *avctx)
1613 CFMutableDictionaryRef enc_info;
1614 CFMutableDictionaryRef pixel_buffer_info = NULL;
1615 CMVideoCodecType codec_type;
1616 VTEncContext *vtctx = avctx->priv_data;
1617 CFStringRef profile_level = NULL;
1618 CFNumberRef gamma_level = NULL;
1619 int status;
1621 codec_type = get_cm_codec_type(avctx, vtctx->profile, vtctx->alpha_quality);
1622 if (!codec_type) {
1623 av_log(avctx, AV_LOG_ERROR, "Error: no mapping for AVCodecID %d\n", avctx->codec_id);
1624 return AVERROR(EINVAL);
1627 #if defined(MAC_OS_X_VERSION_10_9) && !TARGET_OS_IPHONE && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_9)
1628 if (avctx->codec_id == AV_CODEC_ID_PRORES) {
1629 if (__builtin_available(macOS 10.10, *)) {
1630 VTRegisterProfessionalVideoWorkflowVideoEncoders();
1633 #endif
1635 vtctx->codec_id = avctx->codec_id;
1637 if (vtctx->codec_id == AV_CODEC_ID_H264) {
1638 vtctx->get_param_set_func = CMVideoFormatDescriptionGetH264ParameterSetAtIndex;
1640 vtctx->has_b_frames = avctx->max_b_frames > 0;
1641 if(vtctx->has_b_frames && (0xFF & vtctx->profile) == AV_PROFILE_H264_BASELINE){
1642 av_log(avctx, AV_LOG_WARNING, "Cannot use B-frames with baseline profile. Output will not contain B-frames.\n");
1643 vtctx->has_b_frames = 0;
1646 if (vtctx->entropy == VT_CABAC && (0xFF & vtctx->profile) == AV_PROFILE_H264_BASELINE) {
1647 av_log(avctx, AV_LOG_WARNING, "CABAC entropy requires 'main' or 'high' profile, but baseline was requested. Encode will not use CABAC entropy.\n");
1648 vtctx->entropy = VT_ENTROPY_NOT_SET;
1651 if (!get_vt_h264_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);
1652 } else if (vtctx->codec_id == AV_CODEC_ID_HEVC) {
1653 vtctx->get_param_set_func = compat_keys.CMVideoFormatDescriptionGetHEVCParameterSetAtIndex;
1654 if (!vtctx->get_param_set_func) return AVERROR(EINVAL);
1655 if (!get_vt_hevc_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);
1656 // HEVC has b-byramid
1657 vtctx->has_b_frames = avctx->max_b_frames > 0 ? 2 : 0;
1658 } else if (vtctx->codec_id == AV_CODEC_ID_PRORES) {
1659 avctx->codec_tag = av_bswap32(codec_type);
1662 enc_info = CFDictionaryCreateMutable(
1663 kCFAllocatorDefault,
1665 &kCFCopyStringDictionaryKeyCallBacks,
1666 &kCFTypeDictionaryValueCallBacks
1669 if (!enc_info) return AVERROR(ENOMEM);
1671 #if !TARGET_OS_IPHONE
1672 if(vtctx->require_sw) {
1673 CFDictionarySetValue(enc_info,
1674 compat_keys.kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
1675 kCFBooleanFalse);
1676 } else if (!vtctx->allow_sw) {
1677 CFDictionarySetValue(enc_info,
1678 compat_keys.kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder,
1679 kCFBooleanTrue);
1680 } else {
1681 CFDictionarySetValue(enc_info,
1682 compat_keys.kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
1683 kCFBooleanTrue);
1685 #endif
1687 // low-latency mode: eliminate frame reordering, follow a one-in-one-out encoding mode
1688 if ((avctx->flags & AV_CODEC_FLAG_LOW_DELAY) && avctx->codec_id == AV_CODEC_ID_H264) {
1689 CFDictionarySetValue(enc_info,
1690 compat_keys.kVTVideoEncoderSpecification_EnableLowLatencyRateControl,
1691 kCFBooleanTrue);
1694 if (avctx->pix_fmt != AV_PIX_FMT_VIDEOTOOLBOX) {
1695 status = create_cv_pixel_buffer_info(avctx, &pixel_buffer_info);
1696 if (status)
1697 goto init_cleanup;
1700 vtctx->dts_delta = vtctx->has_b_frames ? -1 : 0;
1702 get_cv_gamma(avctx, &gamma_level);
1703 vtctx->transfer_function = av_map_videotoolbox_color_trc_from_av(avctx->color_trc);
1704 vtctx->ycbcr_matrix = av_map_videotoolbox_color_matrix_from_av(avctx->colorspace);
1705 vtctx->color_primaries = av_map_videotoolbox_color_primaries_from_av(avctx->color_primaries);
1708 if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
1709 status = vtenc_populate_extradata(avctx,
1710 codec_type,
1711 profile_level,
1712 gamma_level,
1713 enc_info,
1714 pixel_buffer_info);
1715 if (status)
1716 goto init_cleanup;
1719 status = vtenc_create_encoder(avctx,
1720 codec_type,
1721 profile_level,
1722 gamma_level,
1723 enc_info,
1724 pixel_buffer_info,
1725 vtctx->constant_bit_rate,
1726 &vtctx->session);
1728 init_cleanup:
1729 if (gamma_level)
1730 CFRelease(gamma_level);
1732 if (pixel_buffer_info)
1733 CFRelease(pixel_buffer_info);
1735 CFRelease(enc_info);
1737 return status;
1740 static av_cold int vtenc_init(AVCodecContext *avctx)
1742 VTEncContext *vtctx = avctx->priv_data;
1743 CFBooleanRef has_b_frames_cfbool;
1744 int status;
1746 pthread_once(&once_ctrl, loadVTEncSymbols);
1748 pthread_mutex_init(&vtctx->lock, NULL);
1749 pthread_cond_init(&vtctx->cv_sample_sent, NULL);
1751 // It can happen when user set avctx->profile directly.
1752 if (vtctx->profile == AV_PROFILE_UNKNOWN)
1753 vtctx->profile = avctx->profile;
1754 status = vtenc_configure_encoder(avctx);
1755 if (status) return status;
1757 status = VTSessionCopyProperty(vtctx->session,
1758 kVTCompressionPropertyKey_AllowFrameReordering,
1759 kCFAllocatorDefault,
1760 &has_b_frames_cfbool);
1762 if (!status && has_b_frames_cfbool) {
1763 //Some devices don't output B-frames for main profile, even if requested.
1764 // HEVC has b-pyramid
1765 if (CFBooleanGetValue(has_b_frames_cfbool))
1766 vtctx->has_b_frames = avctx->codec_id == AV_CODEC_ID_HEVC ? 2 : 1;
1767 else
1768 vtctx->has_b_frames = 0;
1769 CFRelease(has_b_frames_cfbool);
1771 avctx->has_b_frames = vtctx->has_b_frames;
1773 return 0;
1776 static void vtenc_get_frame_info(CMSampleBufferRef buffer, bool *is_key_frame)
1778 CFArrayRef attachments;
1779 CFDictionaryRef attachment;
1780 CFBooleanRef not_sync;
1781 CFIndex len;
1783 attachments = CMSampleBufferGetSampleAttachmentsArray(buffer, false);
1784 len = !attachments ? 0 : CFArrayGetCount(attachments);
1786 if (!len) {
1787 *is_key_frame = true;
1788 return;
1791 attachment = CFArrayGetValueAtIndex(attachments, 0);
1793 if (CFDictionaryGetValueIfPresent(attachment,
1794 kCMSampleAttachmentKey_NotSync,
1795 (const void **)&not_sync))
1797 *is_key_frame = !CFBooleanGetValue(not_sync);
1798 } else {
1799 *is_key_frame = true;
1803 static int is_post_sei_nal_type(int nal_type){
1804 return nal_type != H264_NAL_SEI &&
1805 nal_type != H264_NAL_SPS &&
1806 nal_type != H264_NAL_PPS &&
1807 nal_type != H264_NAL_AUD;
1811 * Finds the sei message start/size of type find_sei_type.
1812 * If more than one of that type exists, the last one is returned.
1814 static int find_sei_end(AVCodecContext *avctx,
1815 uint8_t *nal_data,
1816 size_t nal_size,
1817 uint8_t **sei_end)
1819 int nal_type;
1820 size_t sei_payload_size = 0;
1821 uint8_t *nal_start = nal_data;
1822 *sei_end = NULL;
1824 if (!nal_size)
1825 return 0;
1827 nal_type = *nal_data & 0x1F;
1828 if (nal_type != H264_NAL_SEI)
1829 return 0;
1831 nal_data++;
1832 nal_size--;
1834 if (nal_data[nal_size - 1] == 0x80)
1835 nal_size--;
1837 while (nal_size > 0 && *nal_data > 0) {
1839 nal_data++;
1840 nal_size--;
1841 } while (nal_size > 0 && *nal_data == 0xFF);
1843 if (!nal_size) {
1844 av_log(avctx, AV_LOG_ERROR, "Unexpected end of SEI NAL Unit parsing type.\n");
1845 return AVERROR_INVALIDDATA;
1849 sei_payload_size += *nal_data;
1850 nal_data++;
1851 nal_size--;
1852 } while (nal_size > 0 && *nal_data == 0xFF);
1854 if (nal_size < sei_payload_size) {
1855 av_log(avctx, AV_LOG_ERROR, "Unexpected end of SEI NAL Unit parsing size.\n");
1856 return AVERROR_INVALIDDATA;
1859 nal_data += sei_payload_size;
1860 nal_size -= sei_payload_size;
1863 *sei_end = nal_data;
1865 return nal_data - nal_start + 1;
1869 * Copies the data inserting emulation prevention bytes as needed.
1870 * Existing data in the destination can be taken into account by providing
1871 * dst with a dst_offset > 0.
1873 * @return The number of bytes copied on success. On failure, the negative of
1874 * the number of bytes needed to copy src is returned.
1876 static int copy_emulation_prev(const uint8_t *src,
1877 size_t src_size,
1878 uint8_t *dst,
1879 ssize_t dst_offset,
1880 size_t dst_size)
1882 int zeros = 0;
1883 int wrote_bytes;
1884 uint8_t* dst_start;
1885 uint8_t* dst_end = dst + dst_size;
1886 const uint8_t* src_end = src + src_size;
1887 int start_at = dst_offset > 2 ? dst_offset - 2 : 0;
1888 int i;
1889 for (i = start_at; i < dst_offset && i < dst_size; i++) {
1890 if (!dst[i])
1891 zeros++;
1892 else
1893 zeros = 0;
1896 dst += dst_offset;
1897 dst_start = dst;
1898 for (; src < src_end; src++, dst++) {
1899 if (zeros == 2) {
1900 int insert_ep3_byte = *src <= 3;
1901 if (insert_ep3_byte) {
1902 if (dst < dst_end)
1903 *dst = 3;
1904 dst++;
1907 zeros = 0;
1910 if (dst < dst_end)
1911 *dst = *src;
1913 if (!*src)
1914 zeros++;
1915 else
1916 zeros = 0;
1919 wrote_bytes = dst - dst_start;
1921 if (dst > dst_end)
1922 return -wrote_bytes;
1924 return wrote_bytes;
1927 static int write_sei(const ExtraSEI *sei,
1928 int sei_type,
1929 uint8_t *dst,
1930 size_t dst_size)
1932 uint8_t *sei_start = dst;
1933 size_t remaining_sei_size = sei->size;
1934 size_t remaining_dst_size = dst_size;
1935 int header_bytes;
1936 int bytes_written;
1937 ssize_t offset;
1939 if (!remaining_dst_size)
1940 return AVERROR_BUFFER_TOO_SMALL;
1942 while (sei_type && remaining_dst_size != 0) {
1943 int sei_byte = sei_type > 255 ? 255 : sei_type;
1944 *dst = sei_byte;
1946 sei_type -= sei_byte;
1947 dst++;
1948 remaining_dst_size--;
1951 if (!dst_size)
1952 return AVERROR_BUFFER_TOO_SMALL;
1954 while (remaining_sei_size && remaining_dst_size != 0) {
1955 int size_byte = remaining_sei_size > 255 ? 255 : remaining_sei_size;
1956 *dst = size_byte;
1958 remaining_sei_size -= size_byte;
1959 dst++;
1960 remaining_dst_size--;
1963 if (remaining_dst_size < sei->size)
1964 return AVERROR_BUFFER_TOO_SMALL;
1966 header_bytes = dst - sei_start;
1968 offset = header_bytes;
1969 bytes_written = copy_emulation_prev(sei->data,
1970 sei->size,
1971 sei_start,
1972 offset,
1973 dst_size);
1974 if (bytes_written < 0)
1975 return AVERROR_BUFFER_TOO_SMALL;
1977 bytes_written += header_bytes;
1978 return bytes_written;
1982 * Copies NAL units and replaces length codes with
1983 * H.264 Annex B start codes. On failure, the contents of
1984 * dst_data may have been modified.
1986 * @param length_code_size Byte length of each length code
1987 * @param sample_buffer NAL units prefixed with length codes.
1988 * @param sei Optional A53 closed captions SEI data.
1989 * @param dst_data Must be zeroed before calling this function.
1990 * Contains the copied NAL units prefixed with
1991 * start codes when the function returns
1992 * successfully.
1993 * @param dst_size Length of dst_data
1994 * @return 0 on success
1995 * AVERROR_INVALIDDATA if length_code_size is invalid
1996 * AVERROR_BUFFER_TOO_SMALL if dst_data is too small
1997 * or if a length_code in src_data specifies data beyond
1998 * the end of its buffer.
2000 static int copy_replace_length_codes(
2001 AVCodecContext *avctx,
2002 size_t length_code_size,
2003 CMSampleBufferRef sample_buffer,
2004 ExtraSEI *sei,
2005 uint8_t *dst_data,
2006 size_t dst_size)
2008 size_t src_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
2009 size_t remaining_src_size = src_size;
2010 size_t remaining_dst_size = dst_size;
2011 size_t src_offset = 0;
2012 int wrote_sei = 0;
2013 int status;
2014 uint8_t size_buf[4];
2015 uint8_t nal_type;
2016 CMBlockBufferRef block = CMSampleBufferGetDataBuffer(sample_buffer);
2018 if (length_code_size > 4) {
2019 return AVERROR_INVALIDDATA;
2022 while (remaining_src_size > 0) {
2023 size_t curr_src_len;
2024 size_t curr_dst_len;
2025 size_t box_len = 0;
2026 size_t i;
2028 uint8_t *dst_box;
2030 status = CMBlockBufferCopyDataBytes(block,
2031 src_offset,
2032 length_code_size,
2033 size_buf);
2034 if (status) {
2035 av_log(avctx, AV_LOG_ERROR, "Cannot copy length: %d\n", status);
2036 return AVERROR_EXTERNAL;
2039 status = CMBlockBufferCopyDataBytes(block,
2040 src_offset + length_code_size,
2042 &nal_type);
2044 if (status) {
2045 av_log(avctx, AV_LOG_ERROR, "Cannot copy type: %d\n", status);
2046 return AVERROR_EXTERNAL;
2049 nal_type &= 0x1F;
2051 for (i = 0; i < length_code_size; i++) {
2052 box_len <<= 8;
2053 box_len |= size_buf[i];
2056 if (sei && !wrote_sei && is_post_sei_nal_type(nal_type)) {
2057 //No SEI NAL unit - insert.
2058 int wrote_bytes;
2060 memcpy(dst_data, start_code, sizeof(start_code));
2061 dst_data += sizeof(start_code);
2062 remaining_dst_size -= sizeof(start_code);
2064 *dst_data = H264_NAL_SEI;
2065 dst_data++;
2066 remaining_dst_size--;
2068 wrote_bytes = write_sei(sei,
2069 SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35,
2070 dst_data,
2071 remaining_dst_size);
2073 if (wrote_bytes < 0)
2074 return wrote_bytes;
2076 remaining_dst_size -= wrote_bytes;
2077 dst_data += wrote_bytes;
2079 if (remaining_dst_size <= 0)
2080 return AVERROR_BUFFER_TOO_SMALL;
2082 *dst_data = 0x80;
2084 dst_data++;
2085 remaining_dst_size--;
2087 wrote_sei = 1;
2090 curr_src_len = box_len + length_code_size;
2091 curr_dst_len = box_len + sizeof(start_code);
2093 if (remaining_src_size < curr_src_len) {
2094 return AVERROR_BUFFER_TOO_SMALL;
2097 if (remaining_dst_size < curr_dst_len) {
2098 return AVERROR_BUFFER_TOO_SMALL;
2101 dst_box = dst_data + sizeof(start_code);
2103 memcpy(dst_data, start_code, sizeof(start_code));
2104 status = CMBlockBufferCopyDataBytes(block,
2105 src_offset + length_code_size,
2106 box_len,
2107 dst_box);
2109 if (status) {
2110 av_log(avctx, AV_LOG_ERROR, "Cannot copy data: %d\n", status);
2111 return AVERROR_EXTERNAL;
2114 if (sei && !wrote_sei && nal_type == H264_NAL_SEI) {
2115 //Found SEI NAL unit - append.
2116 int wrote_bytes;
2117 int old_sei_length;
2118 int extra_bytes;
2119 uint8_t *new_sei;
2120 old_sei_length = find_sei_end(avctx, dst_box, box_len, &new_sei);
2121 if (old_sei_length < 0)
2122 return status;
2124 wrote_bytes = write_sei(sei,
2125 SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35,
2126 new_sei,
2127 remaining_dst_size - old_sei_length);
2128 if (wrote_bytes < 0)
2129 return wrote_bytes;
2131 if (new_sei + wrote_bytes >= dst_data + remaining_dst_size)
2132 return AVERROR_BUFFER_TOO_SMALL;
2134 new_sei[wrote_bytes++] = 0x80;
2135 extra_bytes = wrote_bytes - (dst_box + box_len - new_sei);
2137 dst_data += extra_bytes;
2138 remaining_dst_size -= extra_bytes;
2140 wrote_sei = 1;
2143 src_offset += curr_src_len;
2144 dst_data += curr_dst_len;
2146 remaining_src_size -= curr_src_len;
2147 remaining_dst_size -= curr_dst_len;
2150 return 0;
2154 * Returns a sufficient number of bytes to contain the sei data.
2155 * It may be greater than the minimum required.
2157 static int get_sei_msg_bytes(const ExtraSEI* sei, int type){
2158 int copied_size;
2159 if (sei->size == 0)
2160 return 0;
2162 copied_size = -copy_emulation_prev(sei->data,
2163 sei->size,
2164 NULL,
2168 if ((sei->size % 255) == 0) //may result in an extra byte
2169 copied_size++;
2171 return copied_size + sei->size / 255 + 1 + type / 255 + 1;
2174 static int vtenc_cm_to_avpacket(
2175 AVCodecContext *avctx,
2176 CMSampleBufferRef sample_buffer,
2177 AVPacket *pkt,
2178 ExtraSEI *sei)
2180 VTEncContext *vtctx = avctx->priv_data;
2182 int status;
2183 bool is_key_frame;
2184 bool add_header;
2185 size_t length_code_size;
2186 size_t header_size = 0;
2187 size_t in_buf_size;
2188 size_t out_buf_size;
2189 size_t sei_nalu_size = 0;
2190 int64_t dts_delta;
2191 int64_t time_base_num;
2192 int nalu_count;
2193 CMTime pts;
2194 CMTime dts;
2195 CMVideoFormatDescriptionRef vid_fmt;
2197 vtenc_get_frame_info(sample_buffer, &is_key_frame);
2199 if (vtctx->get_param_set_func) {
2200 status = get_length_code_size(avctx, sample_buffer, &length_code_size);
2201 if (status) return status;
2203 add_header = is_key_frame && !(avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER);
2205 if (add_header) {
2206 vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
2207 if (!vid_fmt) {
2208 av_log(avctx, AV_LOG_ERROR, "Cannot get format description.\n");
2209 return AVERROR_EXTERNAL;
2212 status = get_params_size(avctx, vid_fmt, &header_size);
2213 if (status) return status;
2216 status = count_nalus(length_code_size, sample_buffer, &nalu_count);
2217 if(status)
2218 return status;
2220 if (sei) {
2221 size_t msg_size = get_sei_msg_bytes(sei,
2222 SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35);
2224 sei_nalu_size = sizeof(start_code) + 1 + msg_size + 1;
2227 in_buf_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
2228 out_buf_size = header_size +
2229 in_buf_size +
2230 sei_nalu_size +
2231 nalu_count * ((int)sizeof(start_code) - (int)length_code_size);
2233 status = ff_get_encode_buffer(avctx, pkt, out_buf_size, 0);
2234 if (status < 0)
2235 return status;
2237 if (add_header) {
2238 status = copy_param_sets(avctx, vid_fmt, pkt->data, out_buf_size);
2239 if(status) return status;
2242 status = copy_replace_length_codes(
2243 avctx,
2244 length_code_size,
2245 sample_buffer,
2246 sei,
2247 pkt->data + header_size,
2248 pkt->size - header_size
2251 if (status) {
2252 av_log(avctx, AV_LOG_ERROR, "Error copying packet data: %d\n", status);
2253 return status;
2255 } else {
2256 size_t len;
2257 CMBlockBufferRef buf = CMSampleBufferGetDataBuffer(sample_buffer);
2258 if (!buf) {
2259 av_log(avctx, AV_LOG_ERROR, "Error getting block buffer\n");
2260 return AVERROR_EXTERNAL;
2263 len = CMBlockBufferGetDataLength(buf);
2265 status = ff_get_encode_buffer(avctx, pkt, len, 0);
2266 if (status < 0)
2267 return status;
2269 status = CMBlockBufferCopyDataBytes(buf, 0, len, pkt->data);
2270 if (status) {
2271 av_log(avctx, AV_LOG_ERROR, "Error copying packet data: %d\n", status);
2272 return AVERROR_EXTERNAL;
2276 if (is_key_frame) {
2277 pkt->flags |= AV_PKT_FLAG_KEY;
2280 pts = CMSampleBufferGetPresentationTimeStamp(sample_buffer);
2281 dts = CMSampleBufferGetDecodeTimeStamp (sample_buffer);
2283 if (CMTIME_IS_INVALID(dts)) {
2284 if (!vtctx->has_b_frames) {
2285 dts = pts;
2286 } else {
2287 av_log(avctx, AV_LOG_ERROR, "DTS is invalid.\n");
2288 return AVERROR_EXTERNAL;
2292 dts_delta = vtctx->dts_delta >= 0 ? vtctx->dts_delta : 0;
2293 time_base_num = avctx->time_base.num;
2294 pkt->pts = pts.value / time_base_num;
2295 pkt->dts = dts.value / time_base_num - dts_delta;
2297 return 0;
2301 * contiguous_buf_size is 0 if not contiguous, and the size of the buffer
2302 * containing all planes if so.
2304 static int get_cv_pixel_info(
2305 AVCodecContext *avctx,
2306 const AVFrame *frame,
2307 int *color,
2308 int *plane_count,
2309 size_t *widths,
2310 size_t *heights,
2311 size_t *strides,
2312 size_t *contiguous_buf_size)
2314 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
2315 VTEncContext *vtctx = avctx->priv_data;
2316 int av_format = frame->format;
2317 int av_color_range = avctx->color_range;
2318 int i;
2319 int range_guessed;
2320 int status;
2322 if (!desc)
2323 return AVERROR(EINVAL);
2325 status = get_cv_pixel_format(avctx, av_format, av_color_range, color, &range_guessed);
2326 if (status)
2327 return status;
2329 if (range_guessed) {
2330 if (!vtctx->warned_color_range) {
2331 vtctx->warned_color_range = true;
2332 av_log(avctx,
2333 AV_LOG_WARNING,
2334 "Color range not set for %s. Using MPEG range.\n",
2335 av_get_pix_fmt_name(av_format));
2339 *plane_count = av_pix_fmt_count_planes(avctx->pix_fmt);
2341 for (i = 0; i < desc->nb_components; i++) {
2342 int p = desc->comp[i].plane;
2343 bool hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA);
2344 bool isAlpha = hasAlpha && (p + 1 == *plane_count);
2345 bool isChroma = (p != 0) && !isAlpha;
2346 int shiftw = isChroma ? desc->log2_chroma_w : 0;
2347 int shifth = isChroma ? desc->log2_chroma_h : 0;
2348 widths[p] = (avctx->width + ((1 << shiftw) >> 1)) >> shiftw;
2349 heights[p] = (avctx->height + ((1 << shifth) >> 1)) >> shifth;
2350 strides[p] = frame->linesize[p];
2353 *contiguous_buf_size = 0;
2354 for (i = 0; i < *plane_count; i++) {
2355 if (i < *plane_count - 1 &&
2356 frame->data[i] + strides[i] * heights[i] != frame->data[i + 1]) {
2357 *contiguous_buf_size = 0;
2358 break;
2361 *contiguous_buf_size += strides[i] * heights[i];
2364 return 0;
2367 //Not used on OSX - frame is never copied.
2368 static int copy_avframe_to_pixel_buffer(AVCodecContext *avctx,
2369 const AVFrame *frame,
2370 CVPixelBufferRef cv_img,
2371 const size_t *plane_strides,
2372 const size_t *plane_rows)
2374 int i, j;
2375 size_t plane_count;
2376 int status;
2377 int rows;
2378 int src_stride;
2379 int dst_stride;
2380 uint8_t *src_addr;
2381 uint8_t *dst_addr;
2382 size_t copy_bytes;
2384 status = CVPixelBufferLockBaseAddress(cv_img, 0);
2385 if (status) {
2386 av_log(
2387 avctx,
2388 AV_LOG_ERROR,
2389 "Error: Could not lock base address of CVPixelBuffer: %d.\n",
2390 status
2394 if (CVPixelBufferIsPlanar(cv_img)) {
2395 plane_count = CVPixelBufferGetPlaneCount(cv_img);
2396 for (i = 0; frame->data[i]; i++) {
2397 if (i == plane_count) {
2398 CVPixelBufferUnlockBaseAddress(cv_img, 0);
2399 av_log(avctx,
2400 AV_LOG_ERROR,
2401 "Error: different number of planes in AVFrame and CVPixelBuffer.\n"
2404 return AVERROR_EXTERNAL;
2407 dst_addr = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(cv_img, i);
2408 src_addr = (uint8_t*)frame->data[i];
2409 dst_stride = CVPixelBufferGetBytesPerRowOfPlane(cv_img, i);
2410 src_stride = plane_strides[i];
2411 rows = plane_rows[i];
2413 if (dst_stride == src_stride) {
2414 memcpy(dst_addr, src_addr, src_stride * rows);
2415 } else {
2416 copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
2418 for (j = 0; j < rows; j++) {
2419 memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
2423 } else {
2424 if (frame->data[1]) {
2425 CVPixelBufferUnlockBaseAddress(cv_img, 0);
2426 av_log(avctx,
2427 AV_LOG_ERROR,
2428 "Error: different number of planes in AVFrame and non-planar CVPixelBuffer.\n"
2431 return AVERROR_EXTERNAL;
2434 dst_addr = (uint8_t*)CVPixelBufferGetBaseAddress(cv_img);
2435 src_addr = (uint8_t*)frame->data[0];
2436 dst_stride = CVPixelBufferGetBytesPerRow(cv_img);
2437 src_stride = plane_strides[0];
2438 rows = plane_rows[0];
2440 if (dst_stride == src_stride) {
2441 memcpy(dst_addr, src_addr, src_stride * rows);
2442 } else {
2443 copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
2445 for (j = 0; j < rows; j++) {
2446 memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
2451 status = CVPixelBufferUnlockBaseAddress(cv_img, 0);
2452 if (status) {
2453 av_log(avctx, AV_LOG_ERROR, "Error: Could not unlock CVPixelBuffer base address: %d.\n", status);
2454 return AVERROR_EXTERNAL;
2457 return 0;
2460 static int create_cv_pixel_buffer(AVCodecContext *avctx,
2461 const AVFrame *frame,
2462 CVPixelBufferRef *cv_img,
2463 BufNode *node)
2465 int plane_count;
2466 int color;
2467 size_t widths [AV_NUM_DATA_POINTERS];
2468 size_t heights[AV_NUM_DATA_POINTERS];
2469 size_t strides[AV_NUM_DATA_POINTERS];
2470 int status;
2471 size_t contiguous_buf_size;
2472 CVPixelBufferPoolRef pix_buf_pool;
2473 VTEncContext* vtctx = avctx->priv_data;
2475 if (avctx->pix_fmt == AV_PIX_FMT_VIDEOTOOLBOX) {
2476 av_assert0(frame->format == AV_PIX_FMT_VIDEOTOOLBOX);
2478 *cv_img = (CVPixelBufferRef)frame->data[3];
2479 av_assert0(*cv_img);
2481 CFRetain(*cv_img);
2482 if (frame->buf[0]) {
2483 node->frame_buf = av_buffer_ref(frame->buf[0]);
2484 if (!node->frame_buf)
2485 return AVERROR(ENOMEM);
2488 return 0;
2491 memset(widths, 0, sizeof(widths));
2492 memset(heights, 0, sizeof(heights));
2493 memset(strides, 0, sizeof(strides));
2495 status = get_cv_pixel_info(
2496 avctx,
2497 frame,
2498 &color,
2499 &plane_count,
2500 widths,
2501 heights,
2502 strides,
2503 &contiguous_buf_size
2506 if (status) {
2507 av_log(
2508 avctx,
2509 AV_LOG_ERROR,
2510 "Error: Cannot convert format %d color_range %d: %d\n",
2511 frame->format,
2512 frame->color_range,
2513 status
2516 return status;
2519 pix_buf_pool = VTCompressionSessionGetPixelBufferPool(vtctx->session);
2520 if (!pix_buf_pool) {
2521 /* On iOS, the VT session is invalidated when the APP switches from
2522 * foreground to background and vice versa. Fetch the actual error code
2523 * of the VT session to detect that case and restart the VT session
2524 * accordingly. */
2525 OSStatus vtstatus;
2527 vtstatus = VTCompressionSessionPrepareToEncodeFrames(vtctx->session);
2528 if (vtstatus == kVTInvalidSessionErr) {
2529 vtenc_reset(vtctx);
2531 status = vtenc_configure_encoder(avctx);
2532 if (status == 0)
2533 pix_buf_pool = VTCompressionSessionGetPixelBufferPool(vtctx->session);
2535 if (!pix_buf_pool) {
2536 av_log(avctx, AV_LOG_ERROR, "Could not get pixel buffer pool.\n");
2537 return AVERROR_EXTERNAL;
2539 else
2540 av_log(avctx, AV_LOG_WARNING, "VT session restarted because of a "
2541 "kVTInvalidSessionErr error.\n");
2544 status = CVPixelBufferPoolCreatePixelBuffer(NULL,
2545 pix_buf_pool,
2546 cv_img);
2549 if (status) {
2550 av_log(avctx, AV_LOG_ERROR, "Could not create pixel buffer from pool: %d.\n", status);
2551 return AVERROR_EXTERNAL;
2554 status = copy_avframe_to_pixel_buffer(avctx, frame, *cv_img, strides, heights);
2555 if (status) {
2556 CFRelease(*cv_img);
2557 *cv_img = NULL;
2558 return status;
2561 return 0;
2564 static int create_encoder_dict_h264(const AVFrame *frame,
2565 CFDictionaryRef* dict_out)
2567 CFDictionaryRef dict = NULL;
2568 if (frame->pict_type == AV_PICTURE_TYPE_I) {
2569 const void *keys[] = { kVTEncodeFrameOptionKey_ForceKeyFrame };
2570 const void *vals[] = { kCFBooleanTrue };
2572 dict = CFDictionaryCreate(NULL, keys, vals, 1, NULL, NULL);
2573 if(!dict) return AVERROR(ENOMEM);
2576 *dict_out = dict;
2577 return 0;
2580 static int vtenc_send_frame(AVCodecContext *avctx,
2581 VTEncContext *vtctx,
2582 const AVFrame *frame)
2584 CMTime time;
2585 CFDictionaryRef frame_dict = NULL;
2586 CVPixelBufferRef cv_img = NULL;
2587 AVFrameSideData *side_data = NULL;
2588 BufNode *node = av_mallocz(sizeof(*node));
2589 int status;
2591 if (!node)
2592 return AVERROR(ENOMEM);
2594 status = create_cv_pixel_buffer(avctx, frame, &cv_img, node);
2595 if (status)
2596 goto out;
2598 status = create_encoder_dict_h264(frame, &frame_dict);
2599 if (status)
2600 goto out;
2602 #if CONFIG_ATSC_A53
2603 side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC);
2604 if (vtctx->a53_cc && side_data && side_data->size) {
2605 status = ff_alloc_a53_sei(frame, 0, &node->sei.data, &node->sei.size);
2606 if (status < 0) {
2607 goto out;
2610 #endif
2612 time = CMTimeMake(frame->pts * avctx->time_base.num, avctx->time_base.den);
2613 status = VTCompressionSessionEncodeFrame(
2614 vtctx->session,
2615 cv_img,
2616 time,
2617 kCMTimeInvalid,
2618 frame_dict,
2619 node,
2620 NULL
2623 if (status) {
2624 av_log(avctx, AV_LOG_ERROR, "Error: cannot encode frame: %d\n", status);
2625 status = AVERROR_EXTERNAL;
2626 // Not necessary, just in case new code put after here
2627 goto out;
2630 out:
2631 if (frame_dict)
2632 CFRelease(frame_dict);
2633 if (cv_img)
2634 CFRelease(cv_img);
2635 if (status)
2636 vtenc_free_buf_node(node);
2638 return status;
2641 static av_cold int vtenc_frame(
2642 AVCodecContext *avctx,
2643 AVPacket *pkt,
2644 const AVFrame *frame,
2645 int *got_packet)
2647 VTEncContext *vtctx = avctx->priv_data;
2648 bool get_frame;
2649 int status;
2650 CMSampleBufferRef buf = NULL;
2651 ExtraSEI sei = {0};
2653 if (frame) {
2654 status = vtenc_send_frame(avctx, vtctx, frame);
2656 if (status) {
2657 status = AVERROR_EXTERNAL;
2658 goto end_nopkt;
2661 if (vtctx->frame_ct_in == 0) {
2662 vtctx->first_pts = frame->pts;
2663 } else if(vtctx->frame_ct_in == vtctx->has_b_frames) {
2664 vtctx->dts_delta = frame->pts - vtctx->first_pts;
2667 vtctx->frame_ct_in++;
2668 } else if(!vtctx->flushing) {
2669 vtctx->flushing = true;
2671 status = VTCompressionSessionCompleteFrames(vtctx->session,
2672 kCMTimeIndefinite);
2674 if (status) {
2675 av_log(avctx, AV_LOG_ERROR, "Error flushing frames: %d\n", status);
2676 status = AVERROR_EXTERNAL;
2677 goto end_nopkt;
2681 *got_packet = 0;
2682 get_frame = vtctx->dts_delta >= 0 || !frame;
2683 if (!get_frame) {
2684 status = 0;
2685 goto end_nopkt;
2688 status = vtenc_q_pop(vtctx, !frame, &buf, &sei);
2689 if (status) goto end_nopkt;
2690 if (!buf) goto end_nopkt;
2692 status = vtenc_cm_to_avpacket(avctx, buf, pkt, sei.data ? &sei : NULL);
2693 av_free(sei.data);
2694 CFRelease(buf);
2695 if (status) goto end_nopkt;
2697 *got_packet = 1;
2698 return 0;
2700 end_nopkt:
2701 av_packet_unref(pkt);
2702 return status;
2705 static int vtenc_populate_extradata(AVCodecContext *avctx,
2706 CMVideoCodecType codec_type,
2707 CFStringRef profile_level,
2708 CFNumberRef gamma_level,
2709 CFDictionaryRef enc_info,
2710 CFDictionaryRef pixel_buffer_info)
2712 VTEncContext *vtctx = avctx->priv_data;
2713 int status;
2714 CVPixelBufferPoolRef pool = NULL;
2715 CVPixelBufferRef pix_buf = NULL;
2716 CMTime time;
2717 CMSampleBufferRef buf = NULL;
2718 BufNode *node = av_mallocz(sizeof(*node));
2720 if (!node)
2721 return AVERROR(ENOMEM);
2723 status = vtenc_create_encoder(avctx,
2724 codec_type,
2725 profile_level,
2726 gamma_level,
2727 enc_info,
2728 pixel_buffer_info,
2729 vtctx->constant_bit_rate,
2730 &vtctx->session);
2731 if (status)
2732 goto pe_cleanup;
2734 pool = VTCompressionSessionGetPixelBufferPool(vtctx->session);
2735 if(!pool){
2736 av_log(avctx, AV_LOG_ERROR, "Error getting pixel buffer pool.\n");
2737 status = AVERROR_EXTERNAL;
2738 goto pe_cleanup;
2741 status = CVPixelBufferPoolCreatePixelBuffer(NULL,
2742 pool,
2743 &pix_buf);
2745 if(status != kCVReturnSuccess){
2746 av_log(avctx, AV_LOG_ERROR, "Error creating frame from pool: %d\n", status);
2747 status = AVERROR_EXTERNAL;
2748 goto pe_cleanup;
2751 time = CMTimeMake(0, avctx->time_base.den);
2752 status = VTCompressionSessionEncodeFrame(vtctx->session,
2753 pix_buf,
2754 time,
2755 kCMTimeInvalid,
2756 NULL,
2757 node,
2758 NULL);
2760 if (status) {
2761 av_log(avctx,
2762 AV_LOG_ERROR,
2763 "Error sending frame for extradata: %d\n",
2764 status);
2765 status = AVERROR_EXTERNAL;
2766 goto pe_cleanup;
2768 node = NULL;
2770 //Populates extradata - output frames are flushed and param sets are available.
2771 status = VTCompressionSessionCompleteFrames(vtctx->session,
2772 kCMTimeIndefinite);
2774 if (status) {
2775 status = AVERROR_EXTERNAL;
2776 goto pe_cleanup;
2779 status = vtenc_q_pop(vtctx, 0, &buf, NULL);
2780 if (status) {
2781 av_log(avctx, AV_LOG_ERROR, "popping: %d\n", status);
2782 goto pe_cleanup;
2785 CFRelease(buf);
2789 pe_cleanup:
2790 CVPixelBufferRelease(pix_buf);
2792 if (status) {
2793 vtenc_reset(vtctx);
2794 } else if (vtctx->session) {
2795 CFRelease(vtctx->session);
2796 vtctx->session = NULL;
2799 vtctx->frame_ct_out = 0;
2801 av_assert0(status != 0 || (avctx->extradata && avctx->extradata_size > 0));
2802 if (!status)
2803 vtenc_free_buf_node(node);
2805 return status;
2808 static av_cold int vtenc_close(AVCodecContext *avctx)
2810 VTEncContext *vtctx = avctx->priv_data;
2812 if(!vtctx->session) {
2813 pthread_cond_destroy(&vtctx->cv_sample_sent);
2814 pthread_mutex_destroy(&vtctx->lock);
2815 return 0;
2818 VTCompressionSessionCompleteFrames(vtctx->session,
2819 kCMTimeIndefinite);
2820 clear_frame_queue(vtctx);
2821 pthread_cond_destroy(&vtctx->cv_sample_sent);
2822 pthread_mutex_destroy(&vtctx->lock);
2824 vtenc_reset(vtctx);
2826 return 0;
2829 static const enum AVPixelFormat avc_pix_fmts[] = {
2830 AV_PIX_FMT_VIDEOTOOLBOX,
2831 AV_PIX_FMT_NV12,
2832 AV_PIX_FMT_YUV420P,
2833 AV_PIX_FMT_NONE
2836 static const enum AVPixelFormat hevc_pix_fmts[] = {
2837 AV_PIX_FMT_VIDEOTOOLBOX,
2838 AV_PIX_FMT_NV12,
2839 AV_PIX_FMT_YUV420P,
2840 AV_PIX_FMT_BGRA,
2841 AV_PIX_FMT_P010LE,
2842 AV_PIX_FMT_NONE
2845 static const enum AVPixelFormat prores_pix_fmts[] = {
2846 AV_PIX_FMT_VIDEOTOOLBOX,
2847 AV_PIX_FMT_YUV420P,
2848 #ifdef kCFCoreFoundationVersionNumber10_7
2849 AV_PIX_FMT_NV12,
2850 AV_PIX_FMT_AYUV64,
2851 #endif
2852 AV_PIX_FMT_UYVY422,
2853 #if HAVE_KCVPIXELFORMATTYPE_420YPCBCR10BIPLANARVIDEORANGE
2854 AV_PIX_FMT_P010,
2855 #endif
2856 #if HAVE_KCVPIXELFORMATTYPE_422YPCBCR8BIPLANARVIDEORANGE
2857 AV_PIX_FMT_NV16,
2858 #endif
2859 #if HAVE_KCVPIXELFORMATTYPE_422YPCBCR10BIPLANARVIDEORANGE
2860 AV_PIX_FMT_P210,
2861 #endif
2862 #if HAVE_KCVPIXELFORMATTYPE_422YPCBCR16BIPLANARVIDEORANGE
2863 AV_PIX_FMT_P216,
2864 #endif
2865 #if HAVE_KCVPIXELFORMATTYPE_444YPCBCR8BIPLANARVIDEORANGE
2866 AV_PIX_FMT_NV24,
2867 #endif
2868 #if HAVE_KCVPIXELFORMATTYPE_444YPCBCR10BIPLANARVIDEORANGE
2869 AV_PIX_FMT_P410,
2870 #endif
2871 #if HAVE_KCVPIXELFORMATTYPE_444YPCBCR16BIPLANARVIDEORANGE
2872 AV_PIX_FMT_P416,
2873 #endif
2874 AV_PIX_FMT_BGRA,
2875 AV_PIX_FMT_NONE
2878 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
2879 #define COMMON_OPTIONS \
2880 { "allow_sw", "Allow software encoding", OFFSET(allow_sw), AV_OPT_TYPE_BOOL, \
2881 { .i64 = 0 }, 0, 1, VE }, \
2882 { "require_sw", "Require software encoding", OFFSET(require_sw), AV_OPT_TYPE_BOOL, \
2883 { .i64 = 0 }, 0, 1, VE }, \
2884 { "realtime", "Hint that encoding should happen in real-time if not faster (e.g. capturing from camera).", \
2885 OFFSET(realtime), AV_OPT_TYPE_BOOL, { .i64 = 0 }, -1, 1, VE }, \
2886 { "frames_before", "Other frames will come before the frames in this session. This helps smooth concatenation issues.", \
2887 OFFSET(frames_before), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, \
2888 { "frames_after", "Other frames will come after the frames in this session. This helps smooth concatenation issues.", \
2889 OFFSET(frames_after), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, \
2890 { "prio_speed", "prioritize encoding speed", OFFSET(prio_speed), AV_OPT_TYPE_BOOL, \
2891 { .i64 = -1 }, -1, 1, VE }, \
2892 { "power_efficient", "Set to 1 to enable more power-efficient encoding if supported.", \
2893 OFFSET(power_efficient), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE }, \
2894 { "max_ref_frames", \
2895 "Sets the maximum number of reference frames. This only has an effect when the value is less than the maximum allowed by the profile/level.", \
2896 OFFSET(max_ref_frames), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
2898 static const AVCodecHWConfigInternal *const vt_encode_hw_configs[] = {
2899 HW_CONFIG_ENCODER_FRAMES(VIDEOTOOLBOX, VIDEOTOOLBOX),
2900 NULL,
2903 #define OFFSET(x) offsetof(VTEncContext, x)
2904 static const AVOption h264_options[] = {
2905 { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = AV_PROFILE_UNKNOWN }, AV_PROFILE_UNKNOWN, INT_MAX, VE, .unit = "profile" },
2906 { "baseline", "Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_H264_BASELINE }, INT_MIN, INT_MAX, VE, .unit = "profile" },
2907 { "constrained_baseline", "Constrained Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_H264_CONSTRAINED_BASELINE }, INT_MIN, INT_MAX, VE, .unit = "profile" },
2908 { "main", "Main Profile", 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_H264_MAIN }, INT_MIN, INT_MAX, VE, .unit = "profile" },
2909 { "high", "High Profile", 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_H264_HIGH }, INT_MIN, INT_MAX, VE, .unit = "profile" },
2910 { "constrained_high", "Constrained High Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROFILE_CONSTRAINED_HIGH }, INT_MIN, INT_MAX, VE, .unit = "profile" },
2911 { "extended", "Extend Profile", 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_H264_EXTENDED }, INT_MIN, INT_MAX, VE, .unit = "profile" },
2913 { "level", "Level", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 52, VE, .unit = "level" },
2914 { "1.3", "Level 1.3, only available with Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = 13 }, INT_MIN, INT_MAX, VE, .unit = "level" },
2915 { "3.0", "Level 3.0", 0, AV_OPT_TYPE_CONST, { .i64 = 30 }, INT_MIN, INT_MAX, VE, .unit = "level" },
2916 { "3.1", "Level 3.1", 0, AV_OPT_TYPE_CONST, { .i64 = 31 }, INT_MIN, INT_MAX, VE, .unit = "level" },
2917 { "3.2", "Level 3.2", 0, AV_OPT_TYPE_CONST, { .i64 = 32 }, INT_MIN, INT_MAX, VE, .unit = "level" },
2918 { "4.0", "Level 4.0", 0, AV_OPT_TYPE_CONST, { .i64 = 40 }, INT_MIN, INT_MAX, VE, .unit = "level" },
2919 { "4.1", "Level 4.1", 0, AV_OPT_TYPE_CONST, { .i64 = 41 }, INT_MIN, INT_MAX, VE, .unit = "level" },
2920 { "4.2", "Level 4.2", 0, AV_OPT_TYPE_CONST, { .i64 = 42 }, INT_MIN, INT_MAX, VE, .unit = "level" },
2921 { "5.0", "Level 5.0", 0, AV_OPT_TYPE_CONST, { .i64 = 50 }, INT_MIN, INT_MAX, VE, .unit = "level" },
2922 { "5.1", "Level 5.1", 0, AV_OPT_TYPE_CONST, { .i64 = 51 }, INT_MIN, INT_MAX, VE, .unit = "level" },
2923 { "5.2", "Level 5.2", 0, AV_OPT_TYPE_CONST, { .i64 = 52 }, INT_MIN, INT_MAX, VE, .unit = "level" },
2925 { "coder", "Entropy coding", OFFSET(entropy), AV_OPT_TYPE_INT, { .i64 = VT_ENTROPY_NOT_SET }, VT_ENTROPY_NOT_SET, VT_CABAC, VE, .unit = "coder" },
2926 { "cavlc", "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, .unit = "coder" },
2927 { "vlc", "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, .unit = "coder" },
2928 { "cabac", "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, .unit = "coder" },
2929 { "ac", "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, .unit = "coder" },
2931 { "a53cc", "Use A53 Closed Captions (if available)", OFFSET(a53_cc), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, VE },
2933 { "constant_bit_rate", "Require constant bit rate (macOS 13 or newer)", OFFSET(constant_bit_rate), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2934 { "max_slice_bytes", "Set the maximum number of bytes in an H.264 slice.", OFFSET(max_slice_bytes), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE },
2935 COMMON_OPTIONS
2936 { NULL },
2939 static const FFCodecDefault vt_defaults[] = {
2940 {"b", "0"},
2941 {"qmin", "-1"},
2942 {"qmax", "-1"},
2943 {NULL},
2946 static const AVClass h264_videotoolbox_class = {
2947 .class_name = "h264_videotoolbox",
2948 .item_name = av_default_item_name,
2949 .option = h264_options,
2950 .version = LIBAVUTIL_VERSION_INT,
2953 const FFCodec ff_h264_videotoolbox_encoder = {
2954 .p.name = "h264_videotoolbox",
2955 CODEC_LONG_NAME("VideoToolbox H.264 Encoder"),
2956 .p.type = AVMEDIA_TYPE_VIDEO,
2957 .p.id = AV_CODEC_ID_H264,
2958 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
2959 .priv_data_size = sizeof(VTEncContext),
2960 .p.pix_fmts = avc_pix_fmts,
2961 .defaults = vt_defaults,
2962 .init = vtenc_init,
2963 FF_CODEC_ENCODE_CB(vtenc_frame),
2964 .close = vtenc_close,
2965 .p.priv_class = &h264_videotoolbox_class,
2966 .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
2967 .hw_configs = vt_encode_hw_configs,
2970 static const AVOption hevc_options[] = {
2971 { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = AV_PROFILE_UNKNOWN }, AV_PROFILE_UNKNOWN, INT_MAX, VE, .unit = "profile" },
2972 { "main", "Main Profile", 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_HEVC_MAIN }, INT_MIN, INT_MAX, VE, .unit = "profile" },
2973 { "main10", "Main10 Profile", 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_HEVC_MAIN_10 }, INT_MIN, INT_MAX, VE, .unit = "profile" },
2975 { "alpha_quality", "Compression quality for the alpha channel", OFFSET(alpha_quality), AV_OPT_TYPE_DOUBLE, { .dbl = 0.0 }, 0.0, 1.0, VE },
2977 { "constant_bit_rate", "Require constant bit rate (macOS 13 or newer)", OFFSET(constant_bit_rate), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2979 COMMON_OPTIONS
2980 { NULL },
2983 static const AVClass hevc_videotoolbox_class = {
2984 .class_name = "hevc_videotoolbox",
2985 .item_name = av_default_item_name,
2986 .option = hevc_options,
2987 .version = LIBAVUTIL_VERSION_INT,
2990 const FFCodec ff_hevc_videotoolbox_encoder = {
2991 .p.name = "hevc_videotoolbox",
2992 CODEC_LONG_NAME("VideoToolbox H.265 Encoder"),
2993 .p.type = AVMEDIA_TYPE_VIDEO,
2994 .p.id = AV_CODEC_ID_HEVC,
2995 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
2996 AV_CODEC_CAP_HARDWARE,
2997 .priv_data_size = sizeof(VTEncContext),
2998 .p.pix_fmts = hevc_pix_fmts,
2999 .defaults = vt_defaults,
3000 .color_ranges = AVCOL_RANGE_MPEG | AVCOL_RANGE_JPEG,
3001 .init = vtenc_init,
3002 FF_CODEC_ENCODE_CB(vtenc_frame),
3003 .close = vtenc_close,
3004 .p.priv_class = &hevc_videotoolbox_class,
3005 .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
3006 .p.wrapper_name = "videotoolbox",
3007 .hw_configs = vt_encode_hw_configs,
3010 static const AVOption prores_options[] = {
3011 { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = AV_PROFILE_UNKNOWN }, AV_PROFILE_UNKNOWN, AV_PROFILE_PRORES_XQ, VE, .unit = "profile" },
3012 { "auto", "Automatically determine based on input format", 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_UNKNOWN }, INT_MIN, INT_MAX, VE, .unit = "profile" },
3013 { "proxy", "ProRes 422 Proxy", 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_PRORES_PROXY }, INT_MIN, INT_MAX, VE, .unit = "profile" },
3014 { "lt", "ProRes 422 LT", 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_PRORES_LT }, INT_MIN, INT_MAX, VE, .unit = "profile" },
3015 { "standard", "ProRes 422", 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_PRORES_STANDARD }, INT_MIN, INT_MAX, VE, .unit = "profile" },
3016 { "hq", "ProRes 422 HQ", 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_PRORES_HQ }, INT_MIN, INT_MAX, VE, .unit = "profile" },
3017 { "4444", "ProRes 4444", 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_PRORES_4444 }, INT_MIN, INT_MAX, VE, .unit = "profile" },
3018 { "xq", "ProRes 4444 XQ", 0, AV_OPT_TYPE_CONST, { .i64 = AV_PROFILE_PRORES_XQ }, INT_MIN, INT_MAX, VE, .unit = "profile" },
3020 COMMON_OPTIONS
3021 { NULL },
3024 static const AVClass prores_videotoolbox_class = {
3025 .class_name = "prores_videotoolbox",
3026 .item_name = av_default_item_name,
3027 .option = prores_options,
3028 .version = LIBAVUTIL_VERSION_INT,
3031 const FFCodec ff_prores_videotoolbox_encoder = {
3032 .p.name = "prores_videotoolbox",
3033 CODEC_LONG_NAME("VideoToolbox ProRes Encoder"),
3034 .p.type = AVMEDIA_TYPE_VIDEO,
3035 .p.id = AV_CODEC_ID_PRORES,
3036 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3037 AV_CODEC_CAP_HARDWARE,
3038 .priv_data_size = sizeof(VTEncContext),
3039 .p.pix_fmts = prores_pix_fmts,
3040 .defaults = vt_defaults,
3041 .color_ranges = AVCOL_RANGE_MPEG | AVCOL_RANGE_JPEG,
3042 .init = vtenc_init,
3043 FF_CODEC_ENCODE_CB(vtenc_frame),
3044 .close = vtenc_close,
3045 .p.priv_class = &prores_videotoolbox_class,
3046 .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
3047 .p.wrapper_name = "videotoolbox",
3048 .hw_configs = vt_encode_hw_configs,