2 * Copyright (c) 2018 Sergey Lavrushkin
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * Filter implementing image super-resolution using deep convolutional networks.
24 * https://arxiv.org/abs/1501.00092
25 * https://arxiv.org/abs/1609.05158
31 #include "libavutil/opt.h"
32 #include "libavutil/pixdesc.h"
33 #include "libswscale/swscale.h"
34 #include "dnn_filter_common.h"
36 typedef struct SRContext
{
40 struct SwsContext
*sws_uv_scale
;
42 struct SwsContext
*sws_pre_scale
;
45 #define OFFSET(x) offsetof(SRContext, x)
46 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
47 static const AVOption sr_options
[] = {
48 { "dnn_backend", "DNN backend used for model execution", OFFSET(dnnctx
.backend_type
), AV_OPT_TYPE_INT
, { .i64
= 1 }, 0, 1, FLAGS
, .unit
= "backend" },
49 #if (CONFIG_LIBTENSORFLOW == 1)
50 { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST
, { .i64
= 1 }, 0, 0, FLAGS
, .unit
= "backend" },
52 { "scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor
), AV_OPT_TYPE_INT
, { .i64
= 2 }, 2, 4, FLAGS
},
56 AVFILTER_DNN_DEFINE_CLASS(sr
, DNN_TF
);
58 static av_cold
int init(AVFilterContext
*context
)
60 SRContext
*sr_context
= context
->priv
;
61 return ff_dnn_init(&sr_context
->dnnctx
, DFT_PROCESS_FRAME
, context
);
64 static const enum AVPixelFormat pixel_formats
[] = {
65 AV_PIX_FMT_YUV420P
, AV_PIX_FMT_YUV422P
, AV_PIX_FMT_YUV444P
,
66 AV_PIX_FMT_YUV410P
, AV_PIX_FMT_YUV411P
, AV_PIX_FMT_GRAY8
,
70 static int config_output(AVFilterLink
*outlink
)
72 AVFilterContext
*context
= outlink
->src
;
73 SRContext
*ctx
= context
->priv
;
75 AVFilterLink
*inlink
= context
->inputs
[0];
76 int out_width
, out_height
;
78 // have a try run in case that the dnn model resize the frame
79 result
= ff_dnn_get_output(&ctx
->dnnctx
, inlink
->w
, inlink
->h
, &out_width
, &out_height
);
81 av_log(ctx
, AV_LOG_ERROR
, "could not get output from the model\n");
85 if (inlink
->w
!= out_width
|| inlink
->h
!= out_height
) {
87 outlink
->w
= out_width
;
88 outlink
->h
= out_height
;
89 if (inlink
->format
!= AV_PIX_FMT_GRAY8
){
90 const AVPixFmtDescriptor
*desc
= av_pix_fmt_desc_get(inlink
->format
);
91 int sws_src_h
= AV_CEIL_RSHIFT(inlink
->h
, desc
->log2_chroma_h
);
92 int sws_src_w
= AV_CEIL_RSHIFT(inlink
->w
, desc
->log2_chroma_w
);
93 int sws_dst_h
= AV_CEIL_RSHIFT(outlink
->h
, desc
->log2_chroma_h
);
94 int sws_dst_w
= AV_CEIL_RSHIFT(outlink
->w
, desc
->log2_chroma_w
);
95 ctx
->sws_uv_scale
= sws_getContext(sws_src_w
, sws_src_h
, AV_PIX_FMT_GRAY8
,
96 sws_dst_w
, sws_dst_h
, AV_PIX_FMT_GRAY8
,
97 SWS_BICUBIC
, NULL
, NULL
, NULL
);
98 ctx
->sws_uv_height
= sws_src_h
;
102 outlink
->w
= out_width
* ctx
->scale_factor
;
103 outlink
->h
= out_height
* ctx
->scale_factor
;
104 ctx
->sws_pre_scale
= sws_getContext(inlink
->w
, inlink
->h
, inlink
->format
,
105 outlink
->w
, outlink
->h
, outlink
->format
,
106 SWS_BICUBIC
, NULL
, NULL
, NULL
);
112 static int filter_frame(AVFilterLink
*inlink
, AVFrame
*in
)
114 DNNAsyncStatusType async_state
= 0;
115 AVFilterContext
*context
= inlink
->dst
;
116 SRContext
*ctx
= context
->priv
;
117 AVFilterLink
*outlink
= context
->outputs
[0];
118 AVFrame
*out
= ff_get_video_buffer(outlink
, outlink
->w
, outlink
->h
);
122 av_log(context
, AV_LOG_ERROR
, "could not allocate memory for output frame\n");
124 return AVERROR(ENOMEM
);
126 av_frame_copy_props(out
, in
);
128 if (ctx
->sws_pre_scale
) {
129 sws_scale(ctx
->sws_pre_scale
,
130 (const uint8_t **)in
->data
, in
->linesize
, 0, in
->height
,
131 out
->data
, out
->linesize
);
132 dnn_result
= ff_dnn_execute_model(&ctx
->dnnctx
, out
, out
);
134 dnn_result
= ff_dnn_execute_model(&ctx
->dnnctx
, in
, out
);
137 if (dnn_result
!= 0){
138 av_log(ctx
, AV_LOG_ERROR
, "failed to execute loaded model\n");
145 async_state
= ff_dnn_get_result(&ctx
->dnnctx
, &in
, &out
);
146 } while (async_state
== DAST_NOT_READY
);
148 if (async_state
!= DAST_SUCCESS
)
149 return AVERROR(EINVAL
);
151 if (ctx
->sws_uv_scale
) {
152 sws_scale(ctx
->sws_uv_scale
, (const uint8_t **)(in
->data
+ 1), in
->linesize
+ 1,
153 0, ctx
->sws_uv_height
, out
->data
+ 1, out
->linesize
+ 1);
154 sws_scale(ctx
->sws_uv_scale
, (const uint8_t **)(in
->data
+ 2), in
->linesize
+ 2,
155 0, ctx
->sws_uv_height
, out
->data
+ 2, out
->linesize
+ 2);
160 return ff_filter_frame(outlink
, out
);
163 static av_cold
void uninit(AVFilterContext
*context
)
165 SRContext
*sr_context
= context
->priv
;
167 ff_dnn_uninit(&sr_context
->dnnctx
);
168 sws_freeContext(sr_context
->sws_uv_scale
);
169 sws_freeContext(sr_context
->sws_pre_scale
);
172 static const AVFilterPad sr_inputs
[] = {
175 .type
= AVMEDIA_TYPE_VIDEO
,
176 .filter_frame
= filter_frame
,
180 static const AVFilterPad sr_outputs
[] = {
183 .config_props
= config_output
,
184 .type
= AVMEDIA_TYPE_VIDEO
,
188 const FFFilter ff_vf_sr
= {
190 .p
.description
= NULL_IF_CONFIG_SMALL("Apply DNN-based image super resolution to the input."),
191 .p
.priv_class
= &sr_class
,
192 .priv_size
= sizeof(SRContext
),
193 .preinit
= ff_dnn_filter_init_child_class
,
196 FILTER_INPUTS(sr_inputs
),
197 FILTER_OUTPUTS(sr_outputs
),
198 FILTER_PIXFMTS_ARRAY(pixel_formats
),