From 2fc2b57b363e49b2dd6e443e51202a416a95ed1d Mon Sep 17 00:00:00 2001
From: Stefan-Gabriel Muscalu <stefan.gabriel.muscalu@gmail.com>
Date: Thu, 16 Apr 2020 18:23:28 +0300
Subject: [PATCH 01/10] add(vf_zoom): sub-pixel rendering

---
 libavfilter/vf_zoom.c | 143 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 135 insertions(+), 8 deletions(-)

diff --git a/libavfilter/vf_zoom.c b/libavfilter/vf_zoom.c
index 4d5fe7c2b93..2c36c109761 100644
--- a/libavfilter/vf_zoom.c
+++ b/libavfilter/vf_zoom.c
@@ -88,6 +88,10 @@ typedef struct ZoomContext {
     int hsub, vsub;
 } ZoomContext;
 
+#define SUBPIXEL_LUT_RESOLUTION 1000
+int  subpixel_LUT_inited = 0;
+char subpixel_LUT[256][256][SUBPIXEL_LUT_RESOLUTION];
+
 enum {
     FAST_BILINEAR   = SWS_FAST_BILINEAR,
     BILINEAR        = SWS_BILINEAR,
@@ -133,6 +137,21 @@ AVFILTER_DEFINE_CLASS(zoom);
 
 static av_cold int init(AVFilterContext *ctx)
 {
+
+    if(!subpixel_LUT_inited){
+        subpixel_LUT_inited = 1;
+
+        for(int i = 0; i < 256; i++) {
+            for(int j = 0; j < 256; j++) {
+                for(int k = 0; k < SUBPIXEL_LUT_RESOLUTION; k++) {
+                    subpixel_LUT[i][j][k] = i *      k / (float)SUBPIXEL_LUT_RESOLUTION +
+                                            j * (1 - k / (float)SUBPIXEL_LUT_RESOLUTION);
+                }
+            }
+        }
+
+    }
+
     return 0;
 }
 
@@ -296,6 +315,74 @@ static inline int normalize_xy(double d, int chroma_sub)
   return (int)d & ~((1 << chroma_sub) - 1);
 }
 
+static inline float decimal_part(float d){
+    return d - (int64_t)d;
+}
+
+static inline uint8_t *pointer_at(FFDrawContext *draw, uint8_t *data[], int linesize[],
+                           int plane, int x, int y)
+{
+    return data[plane] +
+           (y >> draw->vsub[plane]) * linesize[plane] +
+           (x >> draw->hsub[plane]) * draw->pixelstep[plane];
+}
+
+static void ff_copy_rectangle_subpixel(FFDrawContext *draw,
+                                uint8_t *dst[], int dst_linesize[],
+                                uint8_t *src[], int src_linesize[],
+                                int dst_x, int dst_y, int src_x, int src_y,
+                                int w, int h, float sub_x, float sub_y)
+{
+
+    int plane, y, x, wp, hp;
+    int plane_step, copy_w, plane_depth, pixel_step;
+    int start_x_src;
+    uint8_t *p, *q;
+
+    float inverted_sub_x = 1 - sub_x;
+
+    for (plane = 0; plane < draw->nb_planes; plane++) {
+        start_x_src = (src_x >> draw->hsub[plane]) * draw->pixelstep[plane];
+
+        p = pointer_at(draw, src, src_linesize, plane, src_x, src_y);
+        q = pointer_at(draw, dst, dst_linesize, plane, dst_x, dst_y);
+        wp = AV_CEIL_RSHIFT(w, draw->hsub[plane]) * draw->pixelstep[plane];
+        hp = AV_CEIL_RSHIFT(h, draw->vsub[plane]);
+
+        plane_step = draw->desc->comp[plane].step;
+        plane_depth = draw->desc->comp[plane].depth;
+        pixel_step = plane_step / (plane_depth / 8);
+        copy_w = wp / (plane_depth / 8);
+
+        for (y = 0; y < hp; y++) {
+
+            for(x = plane_step; x < copy_w; x ++) {
+                // x = (sin(t*PI/180) + 1)/2
+                // todo: optimize
+                // fixme: when feeding yuv444p10be it looks ok, yuv444p10le looks like it's backwards but it deosn't make sense
+                if (plane_depth == 8) {
+                    ((uint8_t*)q)[x - pixel_step] = ((uint8_t*)p)[x] * sub_x + ((uint8_t*)p)[x - pixel_step] * inverted_sub_x;
+                } else {
+                    ((uint16_t*)q)[x - pixel_step] = ((uint16_t*)p)[x] * sub_x + ((uint16_t*)p)[x - pixel_step] * inverted_sub_x;
+                }
+            }
+
+            // fill in the last column of pixels
+            for(x = copy_w - plane_step; x < copy_w; x++){
+                if (plane_depth == 8) {
+                    ((uint8_t*)q)[x] = ((uint8_t*)p)[x];
+                } else {
+                    ((uint16_t*)q)[x] = ((uint16_t*)p)[x];
+                }
+            }
+
+//            memcpy(q, p, wp);
+            p += src_linesize[plane];
+            q += dst_linesize[plane];
+        }
+    }
+}
+
 static int zoom_out(ZoomContext *zoom, AVFrame *in, AVFrame *out, AVFilterLink *outlink)
 {
     av_log(zoom, AV_LOG_DEBUG, "zoom out\n");
@@ -536,10 +623,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         av_log(zoom, AV_LOG_WARNING, "x position %.2f is out of range of [0-1]\n", zoom->x);
         zoom->x = av_clipd_c(zoom->x, 0, 1);
     }
-		if(zoom->y < 0 || zoom->y > 1){
-				av_log(zoom, AV_LOG_WARNING, "y position %.2f is out of range of [0-1]\n", zoom->y);
-        zoom->y = av_clipd_c(zoom->y, 0, 1);
-		}
+	if(zoom->y < 0 || zoom->y > 1){
+		av_log(zoom, AV_LOG_WARNING, "y position %.2f is out of range of [0-1]\n", zoom->y);
+		zoom->y = av_clipd_c(zoom->y, 0, 1);
+	}
     // copy in the background
     ff_fill_rectangle(&zoom->dc, &zoom->fillcolor,
                       out->data, out->linesize,
@@ -550,13 +637,53 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     if(zoom_val == 1) {
         // it's 1, just copy
         // quite an expensive noop :D
-        ff_copy_rectangle2(&zoom->dc,
+
+        int from_x = av_clip_c(in_w * zoom->x - out_w / 2.0, 0, in_w - out_w);
+        int from_y = av_clip_c(in_h * zoom->y - out_h / 2.0, 0, in_h - out_h);
+        float sub_x = decimal_part(av_clipf_c(in_w * zoom->x - out_w / 2.0, 0.0, in_w - out_w));
+        float sub_y = decimal_part(av_clipf_c(in_h * zoom->y - out_h / 2.0, 0.0, in_h - out_h));
+
+        ff_copy_rectangle_subpixel(&zoom->dc,
                            out->data, out->linesize,
                            in->data, in->linesize,
                            0, 0,
-                           av_clip_c(in_w * zoom->x - out_w / 2.0, 0, in_w - out_w),
-                           av_clip_c(in_h * zoom->y - out_h / 2.0, 0, in_h - out_h),
-                           out_w, out_h);
+                           from_x,
+                           from_y,
+                           out_w, out_h, sub_x, sub_y);
+
+
+//        printf("%d",subpixel_LUT[0][0][0]);
+//        float subpixel_x = av_clipf_c(in_w * zoom->x - out_w / 2.0, 0.0, in_w - out_w);
+//        float subpixel_y = av_clipf_c(in_h * zoom->y - out_h / 2.0, 0.0, in_h - out_h);
+//        subpixel_x -= (int)subpixel_x;
+//        subpixel_y -= (int)subpixel_y;
+//
+//
+//        printf("zoom->desc->nb_components %d %s\n", zoom->desc->nb_components, zoom->desc->name);
+//        printf("out->linesize %d %d %d\n", out->linesize[0], out->linesize[1], out->linesize[2]);
+//
+//        printf("subpixel_x %.3f subpixel_y %.3f\n", subpixel_x, subpixel_y);
+//
+//        for (int c = 0; c < zoom->desc->nb_components; c++){
+//                for (int y = 0; y < out_h; y++){
+//
+//                        int line_start = y * out->linesize[c];
+//
+//
+//
+//                        for(int x = 1; x < out_w; x++){
+//
+////                                printf("y %d x %d c %d loc %d linesize %d\n",y,x,c,line_start + x - 1, out->linesize[c]);
+//
+//                                out->data[c][line_start + x - 1] =
+//                                        out->data[c][line_start + x - 1] * (1-subpixel_x) +
+//                                        out->data[c][line_start + x + 0] * (subpixel_x);
+//                        }
+//
+//
+//                }
+//        }
+
     } else if (zoom_val <= 0) {
         // if it's 0 or lower do nothing
         // noop

From 390c9d853d7a647705ca57fbbade356157baba3f Mon Sep 17 00:00:00 2001
From: Stefan-Gabriel Muscalu <stefan.gabriel.muscalu@gmail.com>
Date: Thu, 23 Apr 2020 19:50:04 +0300
Subject: [PATCH 02/10] update(vf_zoom): implement subpixel rendering

implemented subpixel rendering with support for 8 and 16 bit color
spaces
---
 libavfilter/vf_zoom.c | 255 ++++++++++++++++++++++++++++--------------
 1 file changed, 173 insertions(+), 82 deletions(-)

diff --git a/libavfilter/vf_zoom.c b/libavfilter/vf_zoom.c
index 2c36c109761..3344ae5fda3 100644
--- a/libavfilter/vf_zoom.c
+++ b/libavfilter/vf_zoom.c
@@ -157,7 +157,58 @@ static av_cold int init(AVFilterContext *ctx)
 
 static int query_formats(AVFilterContext *ctx)
 {
-    return ff_set_common_formats(ctx, ff_draw_supported_pixel_formats(0));
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_RGB24,
+        AV_PIX_FMT_BGR24,
+        AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUV410P,
+        AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_YUVJ420P,
+        AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUVJ444P,
+        AV_PIX_FMT_NV12,
+        AV_PIX_FMT_NV21,
+        AV_PIX_FMT_ARGB,
+        AV_PIX_FMT_RGBA,
+        AV_PIX_FMT_ABGR,
+        AV_PIX_FMT_BGRA,
+        AV_PIX_FMT_GRAY16LE,
+        AV_PIX_FMT_YUV440P,
+        AV_PIX_FMT_YUVJ440P,
+        AV_PIX_FMT_YUVA420P,
+        AV_PIX_FMT_YUV420P16LE,
+        AV_PIX_FMT_YUV422P16LE,
+        AV_PIX_FMT_YUV444P16LE,
+        AV_PIX_FMT_YA8,
+        AV_PIX_FMT_GBRP,
+        AV_PIX_FMT_GBRP16LE,
+        AV_PIX_FMT_YUVA422P,
+        AV_PIX_FMT_YUVA444P,
+        AV_PIX_FMT_YUVA420P16LE,
+        AV_PIX_FMT_YUVA422P16LE,
+        AV_PIX_FMT_YUVA444P16LE,
+        AV_PIX_FMT_NV16,
+        AV_PIX_FMT_YA16LE,
+        AV_PIX_FMT_GBRAP,
+        AV_PIX_FMT_GBRAP16LE,
+        AV_PIX_FMT_0RGB,
+        AV_PIX_FMT_RGB0,
+        AV_PIX_FMT_0BGR,
+        AV_PIX_FMT_BGR0,
+        AV_PIX_FMT_YUVJ411P,
+        AV_PIX_FMT_NV24,
+        AV_PIX_FMT_NV42,
+
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
 }
 static int config_props(AVFilterLink *inlink)
 {
@@ -327,59 +378,103 @@ static inline uint8_t *pointer_at(FFDrawContext *draw, uint8_t *data[], int line
            (x >> draw->hsub[plane]) * draw->pixelstep[plane];
 }
 
+
+#define intra_field_calc_8  ((uint8_t*)q)[x - pixel_step] = subpixel_LUT[ ((uint8_t*)p)[x] ]                    \
+                                                                        [ ((uint8_t*)p)[x - pixel_step] ]       \
+                                                                        [ subpix_x_bucket ]
+
+#define intra_field_calc_16 ((uint16_t*)q)[x - pixel_step] = ((uint16_t*)p)[x] * sub_x + ((uint16_t*)p)[x - pixel_step] * inverted_sub_x
+#define intra_field_calc_16_disabled ((uint16_t*)q)[x - pixel_step] = ((uint16_t*)p)[x - pixel_step]
+
+#define intra_field_copy_8  ((uint8_t*)q)[x] = ((uint8_t*)p)[x]
+#define intra_field_copy_16 ((uint16_t*)q)[x] = ((uint16_t*)p)[x]
+
+#define inter_field_calc_8  ((uint8_t*)q_)[x] = subpixel_LUT[ ((uint8_t*)q)[x] ] \
+                                                            [ ((uint8_t*)q_)[x] ] \
+                                                            [ subpix_y_bucket ]
+
+#define inter_field_calc_16 ((uint16_t*)q_)[x] = ((uint16_t*)q)[x] * sub_y + ((uint16_t*)q_)[x] * inverted_sub_y
+#define inter_field_calc_16_disabled {}while(0)
+
+#define ff_copy_rectangle_subpixel_mapping(intra_calc, intra_copy, inter_calc) ({ \
+        for (y = 0; y < hp; y++) {                                                                      \
+\
+            for(x = pixel_step; x < copy_w; x ++) {                                          \
+                intra_calc;                                                                             \
+            }                                                                                           \
+\
+            /* fill in the last column of pixels */                                                     \
+            /* this should set the last pixel as the one before * inverted_sub_x + current one * sub_x */\
+            /* as it is right now, it generates a 1px pop-in effect on the last column */               \
+            for(x = copy_w - pixel_step; x < copy_w; x++){                                              \
+                intra_copy;                                                                             \
+            }                                                                                           \
+\
+            if(y > 0) {                                                                                 \
+                p_ = p - src_linesize[plane];                                                           \
+                q_ = q - dst_linesize[plane];                                                           \
+\
+                for(x = 0; x < copy_w; x ++){                                                           \
+                    inter_calc;                                                                         \
+                }                                                                                       \
+            }                                                                                           \
+\
+            p += src_linesize[plane];                                                                   \
+            q += dst_linesize[plane];                                                                   \
+        }                                                                                               \
+        /* todo: interpolate last row too like interpolating last column */                             \
+})
+
 static void ff_copy_rectangle_subpixel(FFDrawContext *draw,
-                                uint8_t *dst[], int dst_linesize[],
-                                uint8_t *src[], int src_linesize[],
-                                int dst_x, int dst_y, int src_x, int src_y,
-                                int w, int h, float sub_x, float sub_y)
+                                       uint8_t *dst[], int dst_linesize[],
+                                       uint8_t *src[], int src_linesize[],
+                                       int dst_x, int dst_y,
+                                       int src_x, int src_y,
+                                       int w, int h,
+                                       float original_sub_x, float original_sub_y)
 {
 
     int plane, y, x, wp, hp;
     int plane_step, copy_w, plane_depth, pixel_step;
-    int start_x_src;
-    uint8_t *p, *q;
+    uint8_t *p, *q, *p_, *q_;
+    uint8_t vsub, hsub;
 
-    float inverted_sub_x = 1 - sub_x;
+    float sub_x, sub_y;
 
-    for (plane = 0; plane < draw->nb_planes; plane++) {
-        start_x_src = (src_x >> draw->hsub[plane]) * draw->pixelstep[plane];
+    float inverted_sub_x;
+    float inverted_sub_y;
+
+    uint16_t subpix_x_bucket = (sub_x * SUBPIXEL_LUT_RESOLUTION);
+    uint16_t subpix_y_bucket = (sub_y * SUBPIXEL_LUT_RESOLUTION);
 
+    for (plane = 0; plane < draw->nb_planes; plane++) {
         p = pointer_at(draw, src, src_linesize, plane, src_x, src_y);
         q = pointer_at(draw, dst, dst_linesize, plane, dst_x, dst_y);
-        wp = AV_CEIL_RSHIFT(w, draw->hsub[plane]) * draw->pixelstep[plane];
-        hp = AV_CEIL_RSHIFT(h, draw->vsub[plane]);
+        vsub = draw->vsub[plane];
+        hsub = draw->hsub[plane];
+        wp = AV_CEIL_RSHIFT(w, hsub) * draw->pixelstep[plane];
+        hp = AV_CEIL_RSHIFT(h, vsub);
 
         plane_step = draw->desc->comp[plane].step;
         plane_depth = draw->desc->comp[plane].depth;
         pixel_step = plane_step / (plane_depth / 8);
         copy_w = wp / (plane_depth / 8);
 
-        for (y = 0; y < hp; y++) {
+        sub_x = decimal_part((src_x + original_sub_x) / (1 << vsub));
+        sub_y = decimal_part((src_y + original_sub_y) / (1 << hsub));
 
-            for(x = plane_step; x < copy_w; x ++) {
-                // x = (sin(t*PI/180) + 1)/2
-                // todo: optimize
-                // fixme: when feeding yuv444p10be it looks ok, yuv444p10le looks like it's backwards but it deosn't make sense
-                if (plane_depth == 8) {
-                    ((uint8_t*)q)[x - pixel_step] = ((uint8_t*)p)[x] * sub_x + ((uint8_t*)p)[x - pixel_step] * inverted_sub_x;
-                } else {
-                    ((uint16_t*)q)[x - pixel_step] = ((uint16_t*)p)[x] * sub_x + ((uint16_t*)p)[x - pixel_step] * inverted_sub_x;
-                }
-            }
+        inverted_sub_x = 1 - sub_x;
+        inverted_sub_y = 1 - sub_y;
 
-            // fill in the last column of pixels
-            for(x = copy_w - plane_step; x < copy_w; x++){
-                if (plane_depth == 8) {
-                    ((uint8_t*)q)[x] = ((uint8_t*)p)[x];
-                } else {
-                    ((uint16_t*)q)[x] = ((uint16_t*)p)[x];
-                }
-            }
+        subpix_x_bucket = (sub_x * SUBPIXEL_LUT_RESOLUTION);
+        subpix_y_bucket = (sub_y * SUBPIXEL_LUT_RESOLUTION);
 
-//            memcpy(q, p, wp);
-            p += src_linesize[plane];
-            q += dst_linesize[plane];
+        if(plane_depth == 8) {
+            ff_copy_rectangle_subpixel_mapping(intra_field_calc_8,  intra_field_copy_8,  inter_field_calc_8);
+        }else{
+            ff_copy_rectangle_subpixel_mapping(intra_field_calc_16, intra_field_copy_16, inter_field_calc_16 );
         }
+
     }
 }
 
@@ -516,26 +611,50 @@ static int zoom_in (ZoomContext *zoom, AVFrame *in, AVFrame *out, AVFilterLink *
     av_log(zoom, AV_LOG_DEBUG, "dx: %d dy: %d\n", dx, dy);
 
 
-    int px[4], py[4];
-    uint8_t *input[4];
+//    int px[4], py[4];
+//    uint8_t *input[4];
 
-    uint8_t chroma_w = zoom->desc->log2_chroma_w;
-    uint8_t chroma_h = zoom->desc->log2_chroma_h;
-    av_log(zoom, AV_LOG_DEBUG, "chroma_w: %d chroma_h: %d\n", chroma_w, chroma_h);
-    av_log(zoom, AV_LOG_DEBUG, "l[0]: %d l[1]: %d l[2]: %d l[3]: %d \n", in->linesize[0], in->linesize[1],in->linesize[2],in->linesize[3]);
-    av_log(zoom, AV_LOG_DEBUG, "planes: %d\n", zoom->nb_planes);
-    av_log(zoom, AV_LOG_DEBUG, "components: %d\n", zoom->nb_components);
+//    uint8_t chroma_w = zoom->desc->log2_chroma_w;
+//    uint8_t chroma_h = zoom->desc->log2_chroma_h;
+//    av_log(zoom, AV_LOG_DEBUG, "chroma_w: %d chroma_h: %d\n", chroma_w, chroma_h);
+//    av_log(zoom, AV_LOG_DEBUG, "l[0]: %d l[1]: %d l[2]: %d l[3]: %d \n", in->linesize[0], in->linesize[1],in->linesize[2],in->linesize[3]);
+//    av_log(zoom, AV_LOG_DEBUG, "planes: %d\n", zoom->nb_planes);
+//    av_log(zoom, AV_LOG_DEBUG, "components: %d\n", zoom->nb_components);
 
     // cutoff top left
-    px[1] = px[2] = AV_CEIL_RSHIFT(dx, chroma_w);
+//    px[1] = px[2] = AV_CEIL_RSHIFT(dx, chroma_w);
     //                    support for yuv*, rgb*, etc... (any components & planes)
-    px[0] = px[3] = dx * (1.0 * zoom->nb_components / zoom->nb_planes);
+//    px[0] = px[3] = dx * (1.0 * zoom->nb_components / zoom->nb_planes);
+
+//    py[1] = py[2] = AV_CEIL_RSHIFT(dy, chroma_h);
+//    py[0] = py[3] = dy;
+
+    AVFrame* small_crop = alloc_frame(in->format, in_w, in_h);
+    if (!small_crop) {
+        ret = AVERROR(ENOMEM);
+        goto error;
+    }
+
+    // this is the old way, just need to branch out if we want
+    // the fast path or not
+//    for (int k = 0; in->data[k]; k++)
+//        input[k] = in->data[k] + py[k] * in->linesize[k] + px[k];
 
-    py[1] = py[2] = AV_CEIL_RSHIFT(dy, chroma_h);
-    py[0] = py[3] = dy;
+    // this is the new way
 
-    for (int k = 0; in->data[k]; k++)
-        input[k] = in->data[k] + py[k] * in->linesize[k] + px[k];
+    const float bound_pix_x = FFMIN(FFMAX(pix_x, 0), FFMAX(in->width - in_w, 0));
+    const float bound_pix_y = FFMIN(FFMAX(pix_y, 0), FFMAX(in->height - in_h, 0));
+
+    const float subpix_x = decimal_part(bound_pix_x);
+    const float subpix_y = decimal_part(bound_pix_y);
+//
+    ff_copy_rectangle_subpixel(&zoom->dc,
+                               small_crop->data, small_crop->linesize,
+                               in->data, in->linesize,
+                               0, 0,
+                               bound_pix_x, bound_pix_y,
+                               in_w, in_h,
+                               subpix_x, subpix_y);
 
     // stretching bottom right
     av_opt_set_int(zoom->sws, "srcw", in_w, 0);
@@ -550,8 +669,13 @@ static int zoom_in (ZoomContext *zoom, AVFrame *in, AVFrame *out, AVFilterLink *
     if ((ret = sws_init_context(zoom->sws, NULL, NULL)) < 0)
         goto error;
 
-    sws_scale(zoom->sws, (const uint8_t *const *)&input, in->linesize, 0, in_h, out->data, out->linesize);
+//    sws_scale(zoom->sws, (const uint8_t *const *)&input, in->linesize, 0, in_h, out->data, out->linesize);
+    sws_scale(zoom->sws,
+              small_crop->data, small_crop->linesize,
+              0, in_h,
+              out->data, out->linesize);
 
+    av_frame_free(&small_crop);
     sws_freeContext(zoom->sws);
     zoom->sws = NULL;
 
@@ -651,39 +775,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
                            from_y,
                            out_w, out_h, sub_x, sub_y);
 
-
-//        printf("%d",subpixel_LUT[0][0][0]);
-//        float subpixel_x = av_clipf_c(in_w * zoom->x - out_w / 2.0, 0.0, in_w - out_w);
-//        float subpixel_y = av_clipf_c(in_h * zoom->y - out_h / 2.0, 0.0, in_h - out_h);
-//        subpixel_x -= (int)subpixel_x;
-//        subpixel_y -= (int)subpixel_y;
-//
-//
-//        printf("zoom->desc->nb_components %d %s\n", zoom->desc->nb_components, zoom->desc->name);
-//        printf("out->linesize %d %d %d\n", out->linesize[0], out->linesize[1], out->linesize[2]);
-//
-//        printf("subpixel_x %.3f subpixel_y %.3f\n", subpixel_x, subpixel_y);
-//
-//        for (int c = 0; c < zoom->desc->nb_components; c++){
-//                for (int y = 0; y < out_h; y++){
-//
-//                        int line_start = y * out->linesize[c];
-//
-//
-//
-//                        for(int x = 1; x < out_w; x++){
-//
-////                                printf("y %d x %d c %d loc %d linesize %d\n",y,x,c,line_start + x - 1, out->linesize[c]);
-//
-//                                out->data[c][line_start + x - 1] =
-//                                        out->data[c][line_start + x - 1] * (1-subpixel_x) +
-//                                        out->data[c][line_start + x + 0] * (subpixel_x);
-//                        }
-//
-//
-//                }
-//        }
-
     } else if (zoom_val <= 0) {
         // if it's 0 or lower do nothing
         // noop

From baf6d403339c2c2ebe347d56ba53b034b97de759 Mon Sep 17 00:00:00 2001
From: Stefan-Gabriel Muscalu <stefan.gabriel.muscalu@gmail.com>
Date: Tue, 19 May 2020 16:21:16 +0300
Subject: [PATCH 03/10] vf_zoom: plan for subpixel fixing

---
 libavfilter/vf_zoom.c | 71 ++++++++++++++++++++-----------------------
 1 file changed, 33 insertions(+), 38 deletions(-)

diff --git a/libavfilter/vf_zoom.c b/libavfilter/vf_zoom.c
index 3344ae5fda3..eb036499767 100644
--- a/libavfilter/vf_zoom.c
+++ b/libavfilter/vf_zoom.c
@@ -338,6 +338,7 @@ static int config_output(AVFilterLink *outlink)
     if(outlink->h <= 0){
       outlink->h = 2;
     }
+
     return 0;
 }
 
@@ -378,8 +379,8 @@ static inline uint8_t *pointer_at(FFDrawContext *draw, uint8_t *data[], int line
            (x >> draw->hsub[plane]) * draw->pixelstep[plane];
 }
 
-
-#define intra_field_calc_8  ((uint8_t*)q)[x - pixel_step] = subpixel_LUT[ ((uint8_t*)p)[x] ]                    \
+#define intra_field_calc_8      ((uint8_t*)q)[x - pixel_step] = ((uint8_t*)p)[x] * sub_x + ((uint8_t*)p)[x - pixel_step] * inverted_sub_x
+#define intra_field_calc_8_opt  ((uint8_t*)q)[x - pixel_step] = subpixel_LUT[ ((uint8_t*)p)[x] ]                    \
                                                                         [ ((uint8_t*)p)[x - pixel_step] ]       \
                                                                         [ subpix_x_bucket ]
 
@@ -389,7 +390,8 @@ static inline uint8_t *pointer_at(FFDrawContext *draw, uint8_t *data[], int line
 #define intra_field_copy_8  ((uint8_t*)q)[x] = ((uint8_t*)p)[x]
 #define intra_field_copy_16 ((uint16_t*)q)[x] = ((uint16_t*)p)[x]
 
-#define inter_field_calc_8  ((uint8_t*)q_)[x] = subpixel_LUT[ ((uint8_t*)q)[x] ] \
+#define inter_field_calc_8      ((uint8_t*)q_)[x] = ((uint8_t*)q)[x] * sub_y + ((uint8_t*)q_)[x] * inverted_sub_y
+#define inter_field_calc_8_opt  ((uint8_t*)q_)[x] = subpixel_LUT[ ((uint8_t*)q)[x] ] \
                                                             [ ((uint8_t*)q_)[x] ] \
                                                             [ subpix_y_bucket ]
 
@@ -470,7 +472,7 @@ static void ff_copy_rectangle_subpixel(FFDrawContext *draw,
         subpix_y_bucket = (sub_y * SUBPIXEL_LUT_RESOLUTION);
 
         if(plane_depth == 8) {
-            ff_copy_rectangle_subpixel_mapping(intra_field_calc_8,  intra_field_copy_8,  inter_field_calc_8);
+            ff_copy_rectangle_subpixel_mapping(intra_field_calc_8_opt,  intra_field_copy_8,  inter_field_calc_8_opt);
         }else{
             ff_copy_rectangle_subpixel_mapping(intra_field_calc_16, intra_field_copy_16, inter_field_calc_16 );
         }
@@ -569,17 +571,22 @@ static int zoom_in (ZoomContext *zoom, AVFrame *in, AVFrame *out, AVFilterLink *
 
     const double zoom_val = zoom->zoom;
 
-          int in_w  = in->width / zoom_val;
-          int in_h  = in->height / zoom_val;
+        float in_w_f = in->width / zoom_val;
+        float in_h_f = in->height / zoom_val;
+
+          int in_w  = in_w_f;
+          int in_h  = in_h_f;
     const int in_f  = in->format;
 
-    const double originalAspectRatio = 1.0 * in_w / in_h;
+    const double originalAspectRatio = 1.0 * in_w_f / in_h_f;
     const double aspectRatio = zoom->outAspectRatio;
 
     if(originalAspectRatio < aspectRatio){
-      in_h = round(in_h * (originalAspectRatio / aspectRatio));
+      in_h_f = in_h_f * (originalAspectRatio / aspectRatio);
+      in_h = round(in_h_f);
     }else{
-      in_w = round(in_w * (aspectRatio / originalAspectRatio));
+      in_w_f = in_w_f * (aspectRatio / originalAspectRatio);
+      in_w = round(in_w_f);
     }
 
     const int out_w = out->width;
@@ -596,8 +603,8 @@ static int zoom_in (ZoomContext *zoom, AVFrame *in, AVFrame *out, AVFilterLink *
     av_log(zoom, AV_LOG_DEBUG, "in_w: %d in_h: %d\n", in_w, in_h);
     av_log(zoom, AV_LOG_DEBUG, "out_w: %d out_h: %d\n", out_w, out_h);
 
-    const double pix_x = in->width * x - in_w / 2.0;
-    const double pix_y = in->height * y - in_h / 2.0;
+    const double pix_x = in->width * x - in_w_f / 2.0;
+    const double pix_y = in->height * y - in_h_f / 2.0;
 
     const int dx = normalize_xy(
         FFMIN(FFMAX(pix_x, 0), FFMAX(in->width - in_w, 0)),
@@ -610,37 +617,23 @@ static int zoom_in (ZoomContext *zoom, AVFrame *in, AVFrame *out, AVFilterLink *
     av_log(zoom, AV_LOG_DEBUG, "pix_x: %.3f pix_y: %.3f\n", pix_x, pix_y);
     av_log(zoom, AV_LOG_DEBUG, "dx: %d dy: %d\n", dx, dy);
 
-
-//    int px[4], py[4];
-//    uint8_t *input[4];
-
-//    uint8_t chroma_w = zoom->desc->log2_chroma_w;
-//    uint8_t chroma_h = zoom->desc->log2_chroma_h;
-//    av_log(zoom, AV_LOG_DEBUG, "chroma_w: %d chroma_h: %d\n", chroma_w, chroma_h);
-//    av_log(zoom, AV_LOG_DEBUG, "l[0]: %d l[1]: %d l[2]: %d l[3]: %d \n", in->linesize[0], in->linesize[1],in->linesize[2],in->linesize[3]);
-//    av_log(zoom, AV_LOG_DEBUG, "planes: %d\n", zoom->nb_planes);
-//    av_log(zoom, AV_LOG_DEBUG, "components: %d\n", zoom->nb_components);
-
-    // cutoff top left
-//    px[1] = px[2] = AV_CEIL_RSHIFT(dx, chroma_w);
-    //                    support for yuv*, rgb*, etc... (any components & planes)
-//    px[0] = px[3] = dx * (1.0 * zoom->nb_components / zoom->nb_planes);
-
-//    py[1] = py[2] = AV_CEIL_RSHIFT(dy, chroma_h);
-//    py[0] = py[3] = dy;
-
     AVFrame* small_crop = alloc_frame(in->format, in_w, in_h);
     if (!small_crop) {
         ret = AVERROR(ENOMEM);
         goto error;
     }
 
-    // this is the old way, just need to branch out if we want
-    // the fast path or not
-//    for (int k = 0; in->data[k]; k++)
-//        input[k] = in->data[k] + py[k] * in->linesize[k] + px[k];
-
-    // this is the new way
+    // fixme: there is an issue here when zooming in/out slowly
+    // the size changes and due to the scaling up being to the same values, the top/left jitters
+    //
+    // battle plan:
+    // 1. find the crop area (x, y, w and h according to new AR)
+    // 2. expand it by vsub/hsub px top/bottom/left/right where possible
+    // 3. crop with expanded area
+    // 4. scale up by zoom amount (use old zoom_in code from https://github.com/findie/FFmpeg/blob/190eaf3c027d1a343ac2b357d66a308191a3b448/libavfilter/vf_zoom.c#L417-L469)
+    // 5. calculate the new padding size (vsub/hsub * zoom)
+    // 6. use ff_copy_rectangle_subpixel to get the subpixel copy of the actual window
+    // 7. use that as final image
 
     const float bound_pix_x = FFMIN(FFMAX(pix_x, 0), FFMAX(in->width - in_w, 0));
     const float bound_pix_y = FFMIN(FFMAX(pix_y, 0), FFMAX(in->height - in_h, 0));
@@ -648,13 +641,15 @@ static int zoom_in (ZoomContext *zoom, AVFrame *in, AVFrame *out, AVFilterLink *
     const float subpix_x = decimal_part(bound_pix_x);
     const float subpix_y = decimal_part(bound_pix_y);
 //
+    printf("%.3f %.3f %.5f | %.3f x %.3f | %d x %d \n", bound_pix_x, bound_pix_y, zoom->zoom, in_w_f, in_h_f, in_w, in_h);
+
     ff_copy_rectangle_subpixel(&zoom->dc,
                                small_crop->data, small_crop->linesize,
                                in->data, in->linesize,
                                0, 0,
                                bound_pix_x, bound_pix_y,
-                               in_w, in_h,
-                               subpix_x, subpix_y);
+                               in_w, in_h);//,
+//                               subpix_x, subpix_y);
 
     // stretching bottom right
     av_opt_set_int(zoom->sws, "srcw", in_w, 0);

From 348abaff0a7b45c861d3342f86191245d718be0b Mon Sep 17 00:00:00 2001
From: Stefan-Gabriel Muscalu <stefan.gabriel.muscalu@gmail.com>
Date: Fri, 3 Jul 2020 17:49:34 +0300
Subject: [PATCH 04/10] update(vf_zoom): reworked subpixel rendering to use
 projections

removed support for packed color spaces
known issues - broken support for 16bit color
---
 libavfilter/vf_zoom.c | 318 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 274 insertions(+), 44 deletions(-)

diff --git a/libavfilter/vf_zoom.c b/libavfilter/vf_zoom.c
index eb036499767..9528ba7ed2b 100644
--- a/libavfilter/vf_zoom.c
+++ b/libavfilter/vf_zoom.c
@@ -159,8 +159,6 @@ static int query_formats(AVFilterContext *ctx)
 {
     static const enum AVPixelFormat pix_fmts[] = {
         AV_PIX_FMT_YUV420P,
-        AV_PIX_FMT_RGB24,
-        AV_PIX_FMT_BGR24,
         AV_PIX_FMT_YUV422P,
         AV_PIX_FMT_YUV444P,
         AV_PIX_FMT_YUV410P,
@@ -171,10 +169,6 @@ static int query_formats(AVFilterContext *ctx)
         AV_PIX_FMT_YUVJ444P,
         AV_PIX_FMT_NV12,
         AV_PIX_FMT_NV21,
-        AV_PIX_FMT_ARGB,
-        AV_PIX_FMT_RGBA,
-        AV_PIX_FMT_ABGR,
-        AV_PIX_FMT_BGRA,
         AV_PIX_FMT_GRAY16LE,
         AV_PIX_FMT_YUV440P,
         AV_PIX_FMT_YUVJ440P,
@@ -194,10 +188,6 @@ static int query_formats(AVFilterContext *ctx)
         AV_PIX_FMT_YA16LE,
         AV_PIX_FMT_GBRAP,
         AV_PIX_FMT_GBRAP16LE,
-        AV_PIX_FMT_0RGB,
-        AV_PIX_FMT_RGB0,
-        AV_PIX_FMT_0BGR,
-        AV_PIX_FMT_BGR0,
         AV_PIX_FMT_YUVJ411P,
         AV_PIX_FMT_NV24,
         AV_PIX_FMT_NV42,
@@ -371,7 +361,7 @@ static inline float decimal_part(float d){
     return d - (int64_t)d;
 }
 
-static inline uint8_t *pointer_at(FFDrawContext *draw, uint8_t *data[], int linesize[],
+static inline uint8_t *pointer_at(const FFDrawContext *draw, uint8_t *data[], int linesize[],
                            int plane, int x, int y)
 {
     return data[plane] +
@@ -379,6 +369,244 @@ static inline uint8_t *pointer_at(FFDrawContext *draw, uint8_t *data[], int line
            (x >> draw->hsub[plane]) * draw->pixelstep[plane];
 }
 
+
+// this function takes x/y already scaled to the chrome sub
+// supports only planar formats
+static inline uint8_t sample8_bilinear_at(uint8_t *data,
+                                          int linesize, int pixelstep,
+                                          float x, float y,
+                                          int w, int h,
+                                          int8_t oob_value
+                                          )
+{
+    int ix = x;
+    int iy = y;
+    float fracx = x - ix;
+    float fracy = y - iy;
+    float ifracx = 1.0f - fracx;
+    float ifracy = 1.0f - fracy;
+    float lin0, lin1;
+
+    // check if requested value is out of bounds
+    if(x < 0 || y < 0 || x > w - 1 || y > h - 1){
+        return oob_value;
+    }
+
+    uint8_t *row_y = data + iy * linesize;
+
+    // top left
+    uint8_t *a11 = row_y + ix * pixelstep;
+    // top right = top left + 1px
+    uint8_t *a12 = a11 + pixelstep;
+
+    // bottom left = top left + 1row
+    uint8_t *a21 = a11 + linesize;
+    // bottom right = bottom left + 1px
+    uint8_t *a22 = a21 + pixelstep;
+
+    // top interp
+    lin0 = ifracx * (*a11) + fracx * (*a12);
+    // bottom interp
+    lin1 = ifracx * (*a21) + fracx * (*a22);
+
+    // vertical interp
+    return ifracy * lin0 + fracy * lin1;
+}
+
+typedef struct float2 {
+    float x, y;
+} float2;
+
+static inline float2 scale_coords_pxout_to_pxin(float2 pix_out, float2 dim_out, float ZOOM, float2 dim_in, float2 PAN) {
+
+    float2 pix_in;
+
+    if (ZOOM < 1) {
+        //                                               canvas offset   obj scaled center offset   scaled px location
+        // px_out                                      = dim_out * PAN - dim_in / 2 * ZOOM        + px_in * ZOOM
+
+        // -dim_out * PAN + px_out                     = (-dim_in/2 + px_in) * ZOOM
+
+        // (-dim_out * PAN + px_out) / ZOOM            = -dim_in/2 + px_in
+
+        // (-dim_out * PAN + px_out) / ZOOM + dim_in/2 = px_in
+
+        pix_in.x = (-dim_out.x * PAN.x + pix_out.x) / ZOOM + dim_in.x / 2;
+        pix_in.y = (-dim_out.y * PAN.y + pix_out.y) / ZOOM + dim_in.y / 2;
+    }
+    // zoom >= 1
+    else {
+
+        pix_in.x = (pix_out.x - dim_out.x / 2.0f) / ZOOM + dim_in.x * PAN.x;
+        pix_in.y = (pix_out.y - dim_out.y / 2.0f) / ZOOM + dim_in.y * PAN.y;
+    }
+
+    return pix_in;
+}
+
+
+static inline float2 scale_coords_find_PAN(float2 pix_in, float2 pix_out, float2 dim_out, float ZOOM, float2 dim_in) {
+    float2 PAN;
+
+    if(ZOOM < 1){
+        // taken from scale_coords_pxout_to_pxin
+        // pix_in                                                = (-dim_out * PAN + pix_out) / ZOOM + dim_in / 2;
+        // pix_in - dim_in / 2                                   = (-dim_out * PAN + pix_out) / ZOOM
+        // (pix_in - dim_in / 2) * ZOOM                          = -dim_out * PAN + pix_out
+        // (pix_in - dim_in / 2) * ZOOM - pix_out                = -dim_out * PAN
+        // ((pix_in - dim_in / 2) * ZOOM - pix_out) / (-dim_out) = PAN
+
+        PAN.x = ((pix_in.x - dim_in.x / 2.0f) * ZOOM - pix_out.x) / (-dim_out.x);
+        PAN.y = ((pix_in.y - dim_in.y / 2.0f) * ZOOM - pix_out.y) / (-dim_out.y);
+    }
+    // zoom >= 1
+    else {
+        // taken from scale_coords_pxout_to_pxin
+        // pix_in                                           = (pix_out - dim_out/2) / ZOOM + dim_in * PAN
+        // pix_in - (pix_out - dim_out/2) / ZOOM            = dim_in * PAN
+        // (pix_in - (pix_out - dim_out/2) / ZOOM) / dim_in = PAN
+
+        PAN.x = (pix_in.x - (pix_out.x - dim_out.x / 2.0f) / ZOOM) / dim_in.x;
+        PAN.y = (pix_in.y - (pix_out.y - dim_out.y / 2.0f) / ZOOM) / dim_in.y;
+    }
+    return PAN;
+}
+
+static inline float clampf(float val, float min, float max) {
+    if(val < min) return min;
+    if(val > max) return max;
+    return val;
+}
+
+static inline float2 clamp_pan_inbounds(float2 PAN, float2 dim_out, float ZOOM, float2 dim_in) {
+
+    float2 adjusted_dim_in = {dim_in.x * ZOOM, dim_in.y * ZOOM};
+    
+    float2 top_left = scale_coords_find_PAN((float2){0.0f, 0.0f}, (float2){-1.0f, -1.0f}, dim_out, ZOOM, dim_in);
+    //float2 bottom_right = {1.0f - top_left.x, 1.0f - top_left.y};
+    float2 bottom_right = scale_coords_find_PAN((float2){ dim_in.x + 0.0f,  dim_in.y + 0.0f},
+                                                (float2){dim_out.x + ZOOM, dim_out.y + ZOOM},
+                                                dim_out, ZOOM, dim_in);
+
+    float2 CLAMPED_PAN = {0.0f, 0.0f};
+
+    if(ZOOM < 1) {
+        // if it fits
+        if(adjusted_dim_in.x <= dim_out.x && adjusted_dim_in.y <= dim_out.y) {
+            CLAMPED_PAN.x = clampf(PAN.x, top_left.x, bottom_right.x);
+            CLAMPED_PAN.y = clampf(PAN.y, top_left.y, bottom_right.y);
+        }
+            // if it doesn't fix
+        else {
+
+            CLAMPED_PAN.x = adjusted_dim_in.x > dim_out.x ?
+                            // doesn't fit on W
+                            FFMAX(FFMIN(1 - PAN.x, top_left.x), bottom_right.x):
+                            // fits on W
+                            FFMAX(FFMIN(PAN.y, bottom_right.x), top_left.x);
+            CLAMPED_PAN.y = adjusted_dim_in.y > dim_out.y ?
+                            // doesn't fit on W
+                            FFMAX(FFMIN(1 - PAN.y, top_left.y), bottom_right.y):
+                            // fits on W
+                            FFMAX(FFMIN(PAN.y, bottom_right.y), top_left.y);
+
+        }
+    } else {
+
+        CLAMPED_PAN.x = clampf(PAN.x, top_left.x, bottom_right.x);
+        CLAMPED_PAN.y = clampf(PAN.y, top_left.y, bottom_right.y);
+    }
+
+    return CLAMPED_PAN;
+}
+
+
+static void apply_zoom_plane(float ZOOM, float2 PAN, int plane,
+                             int in_pix_step,
+                             int out_pix_step,
+                             uint8_t *in, int linesize_in,
+                             uint8_t *out, int linesize_out,
+                             float2 dim_in,
+                             float2 dim_out) {
+    int x, y;
+
+    int h = dim_out.y;
+    int w = dim_out.x;
+
+    for(y = 0; y < h; y++){
+        for(x = 0; x < w; x++){
+            float2 src_location = scale_coords_pxout_to_pxin(
+                (float2){(float)x, (float)y},
+                dim_out,
+                ZOOM,
+                dim_in,
+                PAN
+            );
+            int8_t value = sample8_bilinear_at(
+                in,
+                linesize_in,
+                in_pix_step,
+                src_location.x, src_location.y,
+                dim_in.x, dim_in.y,
+                255 // out of bounds value
+            );
+
+            int8_t *dst_pixel = out +
+                                y * linesize_out +
+                                x * out_pix_step;
+
+            (*dst_pixel) = value;
+        }
+    }
+
+
+}
+
+
+static int apply_zoom(ZoomContext *s, AVFrame *in, AVFrame *out){
+    int x, y, plane;
+
+    int out_w = out->width;
+    int out_h = out->height;
+    int in_w = in->width;
+    int in_h = in->height;
+
+    int hsub = s->hsub;
+    int vsub = s->vsub;
+    const FFDrawContext *draw = &s->dc;
+    const struct AVPixFmtDescriptor *desc = draw->desc;
+
+
+    float2 dim_in_full  = {(float) in_w, (float) in_h};
+    float2 dim_out_full = {(float)out_w, (float)out_h};
+
+    float2 dim_in_chroma  = {(float)( in_w >> hsub), (float)( in_h >> vsub)};
+    float2 dim_out_chroma = {(float)(out_w >> hsub), (float)(out_h >> vsub)};
+
+    const float  ZOOM           = s->zoom;
+    const float2 UNCLAMPED_PAN  = {s->x, s->y};
+          float2 PAN            = clamp_pan_inbounds(UNCLAMPED_PAN, dim_out_full, ZOOM, dim_in_full);
+
+    printf("ZOOM %.3f\n", ZOOM);
+    printf("UNCLAMPED_PAN x %.3f y %.3f\n", UNCLAMPED_PAN.x, UNCLAMPED_PAN.y);
+    printf("PAN x %.3f y %.3f\n", PAN.x, PAN.y);
+
+    for(plane = 0; plane < desc->nb_components; plane++){
+        float2 dim_in  = plane == 1 || plane == 2 ? dim_in_chroma  : dim_in_full;
+        float2 dim_out = plane == 1 || plane == 2 ? dim_out_chroma : dim_out_full;
+
+        apply_zoom_plane(ZOOM, PAN, plane,
+                         desc->comp[plane].step, // in pix step
+                         desc->comp[plane].step, // out pix step
+                         in->data[plane], in->linesize[plane],
+                         out->data[plane], out->linesize[plane],
+                         dim_in,
+                         dim_out);
+    }
+
+    return 0;
+}
+
 #define intra_field_calc_8      ((uint8_t*)q)[x - pixel_step] = ((uint8_t*)p)[x] * sub_x + ((uint8_t*)p)[x - pixel_step] * inverted_sub_x
 #define intra_field_calc_8_opt  ((uint8_t*)q)[x - pixel_step] = subpixel_LUT[ ((uint8_t*)p)[x] ]                    \
                                                                         [ ((uint8_t*)p)[x - pixel_step] ]       \
@@ -648,8 +876,8 @@ static int zoom_in (ZoomContext *zoom, AVFrame *in, AVFrame *out, AVFilterLink *
                                in->data, in->linesize,
                                0, 0,
                                bound_pix_x, bound_pix_y,
-                               in_w, in_h);//,
-//                               subpix_x, subpix_y);
+                               in_w, in_h,
+                               subpix_x, subpix_y);
 
     // stretching bottom right
     av_opt_set_int(zoom->sws, "srcw", in_w, 0);
@@ -752,38 +980,40 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
                       0, 0,
                       out_w, out_h);
 
+	apply_zoom(zoom, in, out);
+
     // scale
-    if(zoom_val == 1) {
-        // it's 1, just copy
-        // quite an expensive noop :D
-
-        int from_x = av_clip_c(in_w * zoom->x - out_w / 2.0, 0, in_w - out_w);
-        int from_y = av_clip_c(in_h * zoom->y - out_h / 2.0, 0, in_h - out_h);
-        float sub_x = decimal_part(av_clipf_c(in_w * zoom->x - out_w / 2.0, 0.0, in_w - out_w));
-        float sub_y = decimal_part(av_clipf_c(in_h * zoom->y - out_h / 2.0, 0.0, in_h - out_h));
-
-        ff_copy_rectangle_subpixel(&zoom->dc,
-                           out->data, out->linesize,
-                           in->data, in->linesize,
-                           0, 0,
-                           from_x,
-                           from_y,
-                           out_w, out_h, sub_x, sub_y);
-
-    } else if (zoom_val <= 0) {
-        // if it's 0 or lower do nothing
-        // noop
-    } else if (zoom_val < 1) {
-        // zoom in (0, 1)
-        ret = zoom_out(zoom, in, out, outlink);
-        if(ret)
-            goto error;
-    } else if (zoom_val > 1){
-        // zoom in (1, +ing)
-        ret = zoom_in(zoom, in, out, outlink);
-        if(ret)
-            goto error;
-    }
+//    if(zoom_val == 1) {
+//        // it's 1, just copy
+//        // quite an expensive noop :D
+//
+//        int from_x = av_clip_c(in_w * zoom->x - out_w / 2.0, 0, in_w - out_w);
+//        int from_y = av_clip_c(in_h * zoom->y - out_h / 2.0, 0, in_h - out_h);
+//        float sub_x = decimal_part(av_clipf_c(in_w * zoom->x - out_w / 2.0, 0.0, in_w - out_w));
+//        float sub_y = decimal_part(av_clipf_c(in_h * zoom->y - out_h / 2.0, 0.0, in_h - out_h));
+//
+//        ff_copy_rectangle_subpixel(&zoom->dc,
+//                           out->data, out->linesize,
+//                           in->data, in->linesize,
+//                           0, 0,
+//                           from_x,
+//                           from_y,
+//                           out_w, out_h, sub_x, sub_y);
+//
+//    } else if (zoom_val <= 0) {
+//        // if it's 0 or lower do nothing
+//        // noop
+//    } else if (zoom_val < 1) {
+//        // zoom in (0, 1)
+//        ret = zoom_out(zoom, in, out, outlink);
+//        if(ret)
+//            goto error;
+//    } else if (zoom_val > 1){
+//        // zoom in (1, +ing)
+//        ret = zoom_in(zoom, in, out, outlink);
+//        if(ret)
+//            goto error;
+//    }
 
     av_frame_free(&in);
     return ff_filter_frame(outlink, out);

From 43103ba559e4f25ac7783227858e31ba4c79c26a Mon Sep 17 00:00:00 2001
From: Stefan-Gabriel Muscalu <stefan.gabriel.muscalu@gmail.com>
Date: Fri, 3 Jul 2020 19:16:08 +0300
Subject: [PATCH 05/10] update(vf_zoom): reintroduced support for 16bit color &
 fill color

---
 libavfilter/vf_zoom.c | 110 +++++++++++++++++++++++++++++++++---------
 1 file changed, 88 insertions(+), 22 deletions(-)

diff --git a/libavfilter/vf_zoom.c b/libavfilter/vf_zoom.c
index 9528ba7ed2b..d3fcfef4593 100644
--- a/libavfilter/vf_zoom.c
+++ b/libavfilter/vf_zoom.c
@@ -370,7 +370,7 @@ static inline uint8_t *pointer_at(const FFDrawContext *draw, uint8_t *data[], in
 }
 
 
-// this function takes x/y already scaled to the chrome sub
+// this function takes x/y already scaled to the chroma sub
 // supports only planar formats
 static inline uint8_t sample8_bilinear_at(uint8_t *data,
                                           int linesize, int pixelstep,
@@ -413,6 +413,51 @@ static inline uint8_t sample8_bilinear_at(uint8_t *data,
     return ifracy * lin0 + fracy * lin1;
 }
 
+
+// this function takes x/y already scaled to the chroma sub
+// supports only planar formats
+static inline uint16_t sample16_bilinear_at(uint8_t *data,
+                                            int linesize, int pixelstep,
+                                            float x, float y,
+                                            int w, int h,
+                                            int16_t oob_value
+                                            )
+{
+    int ix = x;
+    int iy = y;
+    float fracx = x - ix;
+    float fracy = y - iy;
+    float ifracx = 1.0f - fracx;
+    float ifracy = 1.0f - fracy;
+    float lin0, lin1;
+
+    // check if requested value is out of bounds
+    if(x < 0 || y < 0 || x > w - 1 || y > h - 1){
+        return oob_value;
+    }
+
+    uint8_t *row_y = data + iy * linesize;
+
+    // top left
+    uint8_t *a11 = row_y + ix * pixelstep;
+    // top right = top left + 1px
+    uint8_t *a12 = a11 + pixelstep;
+
+    // bottom left = top left + 1row
+    uint8_t *a21 = a11 + linesize;
+    // bottom right = bottom left + 1px
+    uint8_t *a22 = a21 + pixelstep;
+
+    // top interp
+    lin0 = ifracx * (*((uint16_t*)a11)) + fracx * (*((uint16_t*)a12));
+    // bottom interp
+    lin1 = ifracx * (*((uint16_t*)a21)) + fracx * (*((uint16_t*)a22));
+
+    // vertical interp
+    return ifracy * lin0 + fracy * lin1;
+}
+
+
 typedef struct float2 {
     float x, y;
 } float2;
@@ -522,12 +567,13 @@ static inline float2 clamp_pan_inbounds(float2 PAN, float2 dim_out, float ZOOM,
 
 
 static void apply_zoom_plane(float ZOOM, float2 PAN, int plane,
-                             int in_pix_step,
-                             int out_pix_step,
+                             int pix_step,
+                             int pix_depth,
                              uint8_t *in, int linesize_in,
                              uint8_t *out, int linesize_out,
                              float2 dim_in,
-                             float2 dim_out) {
+                             float2 dim_out,
+                             FFDrawColor *fillcolor) {
     int x, y;
 
     int h = dim_out.y;
@@ -542,20 +588,39 @@ static void apply_zoom_plane(float ZOOM, float2 PAN, int plane,
                 dim_in,
                 PAN
             );
-            int8_t value = sample8_bilinear_at(
-                in,
-                linesize_in,
-                in_pix_step,
-                src_location.x, src_location.y,
-                dim_in.x, dim_in.y,
-                255 // out of bounds value
-            );
-
-            int8_t *dst_pixel = out +
-                                y * linesize_out +
-                                x * out_pix_step;
 
-            (*dst_pixel) = value;
+            if (pix_depth == 8) {
+                int8_t value = sample8_bilinear_at(
+                    in,
+                    linesize_in,
+                    pix_step,
+                    src_location.x, src_location.y,
+                    dim_in.x, dim_in.y,
+                    fillcolor->comp[plane].u8[0] // out of bounds value
+                );
+
+                int8_t *dst_pixel = out +
+                                    y * linesize_out +
+                                    x * pix_step;
+
+                (*dst_pixel) = value;
+            }
+            else {
+                int16_t value = sample16_bilinear_at(
+                                    in,
+                                    linesize_in,
+                                    pix_step,
+                                    src_location.x, src_location.y,
+                                    dim_in.x, dim_in.y,
+                                    fillcolor->comp[plane].u16[0] // out of bounds value
+                                );
+
+                int16_t *dst_pixel = out +
+                                     y * linesize_out +
+                                     x * pix_step;
+
+                (*dst_pixel) = value;
+            }
         }
     }
 
@@ -563,7 +628,7 @@ static void apply_zoom_plane(float ZOOM, float2 PAN, int plane,
 }
 
 
-static int apply_zoom(ZoomContext *s, AVFrame *in, AVFrame *out){
+static int apply_zoom(ZoomContext *s, AVFrame *in, AVFrame *out, FFDrawColor *fillcolor){
     int x, y, plane;
 
     int out_w = out->width;
@@ -596,12 +661,13 @@ static int apply_zoom(ZoomContext *s, AVFrame *in, AVFrame *out){
         float2 dim_out = plane == 1 || plane == 2 ? dim_out_chroma : dim_out_full;
 
         apply_zoom_plane(ZOOM, PAN, plane,
-                         desc->comp[plane].step, // in pix step
-                         desc->comp[plane].step, // out pix step
+                         desc->comp[plane].step,
+                         desc->comp[plane].depth,
                          in->data[plane], in->linesize[plane],
                          out->data[plane], out->linesize[plane],
                          dim_in,
-                         dim_out);
+                         dim_out,
+                         fillcolor);
     }
 
     return 0;
@@ -980,7 +1046,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
                       0, 0,
                       out_w, out_h);
 
-	apply_zoom(zoom, in, out);
+	apply_zoom(zoom, in, out, &zoom->fillcolor);
 
     // scale
 //    if(zoom_val == 1) {

From 51160a1eb5c149a0f1a189d02d093d2b64a8862a Mon Sep 17 00:00:00 2001
From: Stefan-Gabriel Muscalu <stefan.gabriel.muscalu@gmail.com>
Date: Fri, 3 Jul 2020 19:27:31 +0300
Subject: [PATCH 06/10] update(vf_zoom): pruned old, and dead code

---
 libavfilter/vf_zoom.c | 378 +-----------------------------------------
 1 file changed, 3 insertions(+), 375 deletions(-)

diff --git a/libavfilter/vf_zoom.c b/libavfilter/vf_zoom.c
index d3fcfef4593..5702f49095c 100644
--- a/libavfilter/vf_zoom.c
+++ b/libavfilter/vf_zoom.c
@@ -88,10 +88,6 @@ typedef struct ZoomContext {
     int hsub, vsub;
 } ZoomContext;
 
-#define SUBPIXEL_LUT_RESOLUTION 1000
-int  subpixel_LUT_inited = 0;
-char subpixel_LUT[256][256][SUBPIXEL_LUT_RESOLUTION];
-
 enum {
     FAST_BILINEAR   = SWS_FAST_BILINEAR,
     BILINEAR        = SWS_BILINEAR,
@@ -137,21 +133,6 @@ AVFILTER_DEFINE_CLASS(zoom);
 
 static av_cold int init(AVFilterContext *ctx)
 {
-
-    if(!subpixel_LUT_inited){
-        subpixel_LUT_inited = 1;
-
-        for(int i = 0; i < 256; i++) {
-            for(int j = 0; j < 256; j++) {
-                for(int k = 0; k < SUBPIXEL_LUT_RESOLUTION; k++) {
-                    subpixel_LUT[i][j][k] = i *      k / (float)SUBPIXEL_LUT_RESOLUTION +
-                                            j * (1 - k / (float)SUBPIXEL_LUT_RESOLUTION);
-                }
-            }
-        }
-
-    }
-
     return 0;
 }
 
@@ -350,26 +331,6 @@ static AVFrame* alloc_frame(enum AVPixelFormat pixfmt, int w, int h)
     return frame;
 }
 
-static inline int normalize_xy(double d, int chroma_sub)
-{
-  if (isnan(d))
-    return INT_MAX;
-  return (int)d & ~((1 << chroma_sub) - 1);
-}
-
-static inline float decimal_part(float d){
-    return d - (int64_t)d;
-}
-
-static inline uint8_t *pointer_at(const FFDrawContext *draw, uint8_t *data[], int linesize[],
-                           int plane, int x, int y)
-{
-    return data[plane] +
-           (y >> draw->vsub[plane]) * linesize[plane] +
-           (x >> draw->hsub[plane]) * draw->pixelstep[plane];
-}
-
-
 // this function takes x/y already scaled to the chroma sub
 // supports only planar formats
 static inline uint8_t sample8_bilinear_at(uint8_t *data,
@@ -579,8 +540,9 @@ static void apply_zoom_plane(float ZOOM, float2 PAN, int plane,
     int h = dim_out.y;
     int w = dim_out.x;
 
-    for(y = 0; y < h; y++){
-        for(x = 0; x < w; x++){
+    for(y = 0; y < h; y++) {
+        for(x = 0; x < w; x++) {
+
             float2 src_location = scale_coords_pxout_to_pxin(
                 (float2){(float)x, (float)y},
                 dim_out,
@@ -673,307 +635,6 @@ static int apply_zoom(ZoomContext *s, AVFrame *in, AVFrame *out, FFDrawColor *fi
     return 0;
 }
 
-#define intra_field_calc_8      ((uint8_t*)q)[x - pixel_step] = ((uint8_t*)p)[x] * sub_x + ((uint8_t*)p)[x - pixel_step] * inverted_sub_x
-#define intra_field_calc_8_opt  ((uint8_t*)q)[x - pixel_step] = subpixel_LUT[ ((uint8_t*)p)[x] ]                    \
-                                                                        [ ((uint8_t*)p)[x - pixel_step] ]       \
-                                                                        [ subpix_x_bucket ]
-
-#define intra_field_calc_16 ((uint16_t*)q)[x - pixel_step] = ((uint16_t*)p)[x] * sub_x + ((uint16_t*)p)[x - pixel_step] * inverted_sub_x
-#define intra_field_calc_16_disabled ((uint16_t*)q)[x - pixel_step] = ((uint16_t*)p)[x - pixel_step]
-
-#define intra_field_copy_8  ((uint8_t*)q)[x] = ((uint8_t*)p)[x]
-#define intra_field_copy_16 ((uint16_t*)q)[x] = ((uint16_t*)p)[x]
-
-#define inter_field_calc_8      ((uint8_t*)q_)[x] = ((uint8_t*)q)[x] * sub_y + ((uint8_t*)q_)[x] * inverted_sub_y
-#define inter_field_calc_8_opt  ((uint8_t*)q_)[x] = subpixel_LUT[ ((uint8_t*)q)[x] ] \
-                                                            [ ((uint8_t*)q_)[x] ] \
-                                                            [ subpix_y_bucket ]
-
-#define inter_field_calc_16 ((uint16_t*)q_)[x] = ((uint16_t*)q)[x] * sub_y + ((uint16_t*)q_)[x] * inverted_sub_y
-#define inter_field_calc_16_disabled {}while(0)
-
-#define ff_copy_rectangle_subpixel_mapping(intra_calc, intra_copy, inter_calc) ({ \
-        for (y = 0; y < hp; y++) {                                                                      \
-\
-            for(x = pixel_step; x < copy_w; x ++) {                                          \
-                intra_calc;                                                                             \
-            }                                                                                           \
-\
-            /* fill in the last column of pixels */                                                     \
-            /* this should set the last pixel as the one before * inverted_sub_x + current one * sub_x */\
-            /* as it is right now, it generates a 1px pop-in effect on the last column */               \
-            for(x = copy_w - pixel_step; x < copy_w; x++){                                              \
-                intra_copy;                                                                             \
-            }                                                                                           \
-\
-            if(y > 0) {                                                                                 \
-                p_ = p - src_linesize[plane];                                                           \
-                q_ = q - dst_linesize[plane];                                                           \
-\
-                for(x = 0; x < copy_w; x ++){                                                           \
-                    inter_calc;                                                                         \
-                }                                                                                       \
-            }                                                                                           \
-\
-            p += src_linesize[plane];                                                                   \
-            q += dst_linesize[plane];                                                                   \
-        }                                                                                               \
-        /* todo: interpolate last row too like interpolating last column */                             \
-})
-
-static void ff_copy_rectangle_subpixel(FFDrawContext *draw,
-                                       uint8_t *dst[], int dst_linesize[],
-                                       uint8_t *src[], int src_linesize[],
-                                       int dst_x, int dst_y,
-                                       int src_x, int src_y,
-                                       int w, int h,
-                                       float original_sub_x, float original_sub_y)
-{
-
-    int plane, y, x, wp, hp;
-    int plane_step, copy_w, plane_depth, pixel_step;
-    uint8_t *p, *q, *p_, *q_;
-    uint8_t vsub, hsub;
-
-    float sub_x, sub_y;
-
-    float inverted_sub_x;
-    float inverted_sub_y;
-
-    uint16_t subpix_x_bucket = (sub_x * SUBPIXEL_LUT_RESOLUTION);
-    uint16_t subpix_y_bucket = (sub_y * SUBPIXEL_LUT_RESOLUTION);
-
-    for (plane = 0; plane < draw->nb_planes; plane++) {
-        p = pointer_at(draw, src, src_linesize, plane, src_x, src_y);
-        q = pointer_at(draw, dst, dst_linesize, plane, dst_x, dst_y);
-        vsub = draw->vsub[plane];
-        hsub = draw->hsub[plane];
-        wp = AV_CEIL_RSHIFT(w, hsub) * draw->pixelstep[plane];
-        hp = AV_CEIL_RSHIFT(h, vsub);
-
-        plane_step = draw->desc->comp[plane].step;
-        plane_depth = draw->desc->comp[plane].depth;
-        pixel_step = plane_step / (plane_depth / 8);
-        copy_w = wp / (plane_depth / 8);
-
-        sub_x = decimal_part((src_x + original_sub_x) / (1 << vsub));
-        sub_y = decimal_part((src_y + original_sub_y) / (1 << hsub));
-
-        inverted_sub_x = 1 - sub_x;
-        inverted_sub_y = 1 - sub_y;
-
-        subpix_x_bucket = (sub_x * SUBPIXEL_LUT_RESOLUTION);
-        subpix_y_bucket = (sub_y * SUBPIXEL_LUT_RESOLUTION);
-
-        if(plane_depth == 8) {
-            ff_copy_rectangle_subpixel_mapping(intra_field_calc_8_opt,  intra_field_copy_8,  inter_field_calc_8_opt);
-        }else{
-            ff_copy_rectangle_subpixel_mapping(intra_field_calc_16, intra_field_copy_16, inter_field_calc_16 );
-        }
-
-    }
-}
-
-static int zoom_out(ZoomContext *zoom, AVFrame *in, AVFrame *out, AVFilterLink *outlink)
-{
-    av_log(zoom, AV_LOG_DEBUG, "zoom out\n");
-
-    int ret = 0;
-    zoom->sws = sws_alloc_context();
-    if (!zoom->sws) {
-        ret = AVERROR(ENOMEM);
-        goto error;
-    }
-
-    const double zoom_val = zoom->zoom;
-
-    const int in_w  = in->width;
-    const int in_h  = in->height;
-    const int in_f  = in->format;
-
-          int out_w = in->width * zoom_val;
-          int out_h = in->height * zoom_val;
-    const int out_f = outlink->format;
-
-    const int fout_w = out->width;
-    const int fout_h = out->height;
-
-
-    const double originalAspectRatio = 1.0 * in_w / in_h;
-    const double aspectRatio = zoom->outAspectRatio;
-
-    const double x  = zoom->x;
-    const double y  = zoom->y;
-
-    if(out_h <= 0 || out_w <= 0)
-        goto bypass;
-
-    // todo there's surely a way to implement this without a temp frame
-    AVFrame* temp_frame = alloc_frame(out_f, out_w, out_h);
-    av_log(zoom, AV_LOG_DEBUG, "zoom: %.6f y: %.3f\n", zoom->zoom);
-    av_log(zoom, AV_LOG_DEBUG, "scaling: %dx%d -> %dx%d\n", in_w, in_h, out_w, out_h);
-
-    av_opt_set_int(zoom->sws, "srcw", in_w, 0);
-    av_opt_set_int(zoom->sws, "srch", in_h, 0);
-    av_opt_set_int(zoom->sws, "src_format", in_f, 0);
-    av_opt_set_int(zoom->sws, "dstw", out_w, 0);
-    av_opt_set_int(zoom->sws, "dsth", out_h, 0);
-    av_opt_set_int(zoom->sws, "dst_format", out_f, 0);
-
-    if(zoom->interpolation)
-        av_opt_set_int(zoom->sws, "sws_flags", zoom->interpolation, 0);
-
-    if ((ret = sws_init_context(zoom->sws, NULL, NULL)) < 0)
-        goto error;
-
-    sws_scale(zoom->sws, (const uint8_t *const *)&in->data, in->linesize, 0, in_h, temp_frame->data, temp_frame->linesize);
-
-    sws_freeContext(zoom->sws);
-    zoom->sws = NULL;
-
-    av_log(zoom, AV_LOG_DEBUG, "x: %.3f y: %.3f\n", x, y);
-    const int dx = FFMIN(FFMAX(fout_w * x - out_w/2, 0), FFMAX(fout_w - out_w, 0));
-    const int dy = FFMIN(FFMAX(fout_h * y - out_h/2, 0), FFMAX(fout_h - out_h, 0));
-    av_log(zoom, AV_LOG_DEBUG, "dx: %d dy: %d\n", dx, dy);
-    av_log(zoom, AV_LOG_DEBUG, "in_w: %d in_h: %d\n", in_w, in_h);
-
-    ff_copy_rectangle2(&zoom->dc,
-                       out->data, out->linesize,
-                       temp_frame->data, temp_frame->linesize,
-                       dx, dy, 0, 0,
-                       FFMIN(out_w, fout_w - dx),
-                       FFMIN(out_h, fout_h - dy));
-
-    av_frame_free(&temp_frame);
-
-error:
-    return ret;
-bypass:
-    return 0;
-}
-
-static int zoom_in (ZoomContext *zoom, AVFrame *in, AVFrame *out, AVFilterLink *outlink)
-{
-    av_log(zoom, AV_LOG_DEBUG, "zoom in\n");
-
-    int ret = 0;
-    zoom->sws = sws_alloc_context();
-    if (!zoom->sws) {
-        ret = AVERROR(ENOMEM);
-        goto error;
-    }
-
-    const double zoom_val = zoom->zoom;
-
-        float in_w_f = in->width / zoom_val;
-        float in_h_f = in->height / zoom_val;
-
-          int in_w  = in_w_f;
-          int in_h  = in_h_f;
-    const int in_f  = in->format;
-
-    const double originalAspectRatio = 1.0 * in_w_f / in_h_f;
-    const double aspectRatio = zoom->outAspectRatio;
-
-    if(originalAspectRatio < aspectRatio){
-      in_h_f = in_h_f * (originalAspectRatio / aspectRatio);
-      in_h = round(in_h_f);
-    }else{
-      in_w_f = in_w_f * (aspectRatio / originalAspectRatio);
-      in_w = round(in_w_f);
-    }
-
-    const int out_w = out->width;
-    const int out_h = out->height;
-    const int out_f = outlink->format;
-
-    const double x  = zoom->x;
-    const double y  = zoom->y;
-
-    if(out_h <= 0 || out_w <= 0)
-        goto bypass;
-
-    av_log(zoom, AV_LOG_DEBUG, "original in_w: %d in_h: %d\n", in->width, in->height);
-    av_log(zoom, AV_LOG_DEBUG, "in_w: %d in_h: %d\n", in_w, in_h);
-    av_log(zoom, AV_LOG_DEBUG, "out_w: %d out_h: %d\n", out_w, out_h);
-
-    const double pix_x = in->width * x - in_w_f / 2.0;
-    const double pix_y = in->height * y - in_h_f / 2.0;
-
-    const int dx = normalize_xy(
-        FFMIN(FFMAX(pix_x, 0), FFMAX(in->width - in_w, 0)),
-        zoom->hsub);
-    const int dy = normalize_xy(
-        FFMIN(FFMAX(pix_y, 0), FFMAX(in->height - in_h, 0)),
-        zoom->vsub);
-
-    av_log(zoom, AV_LOG_DEBUG, "x: %0.3f y: %0.3f\n", x, y);
-    av_log(zoom, AV_LOG_DEBUG, "pix_x: %.3f pix_y: %.3f\n", pix_x, pix_y);
-    av_log(zoom, AV_LOG_DEBUG, "dx: %d dy: %d\n", dx, dy);
-
-    AVFrame* small_crop = alloc_frame(in->format, in_w, in_h);
-    if (!small_crop) {
-        ret = AVERROR(ENOMEM);
-        goto error;
-    }
-
-    // fixme: there is an issue here when zooming in/out slowly
-    // the size changes and due to the scaling up being to the same values, the top/left jitters
-    //
-    // battle plan:
-    // 1. find the crop area (x, y, w and h according to new AR)
-    // 2. expand it by vsub/hsub px top/bottom/left/right where possible
-    // 3. crop with expanded area
-    // 4. scale up by zoom amount (use old zoom_in code from https://github.com/findie/FFmpeg/blob/190eaf3c027d1a343ac2b357d66a308191a3b448/libavfilter/vf_zoom.c#L417-L469)
-    // 5. calculate the new padding size (vsub/hsub * zoom)
-    // 6. use ff_copy_rectangle_subpixel to get the subpixel copy of the actual window
-    // 7. use that as final image
-
-    const float bound_pix_x = FFMIN(FFMAX(pix_x, 0), FFMAX(in->width - in_w, 0));
-    const float bound_pix_y = FFMIN(FFMAX(pix_y, 0), FFMAX(in->height - in_h, 0));
-
-    const float subpix_x = decimal_part(bound_pix_x);
-    const float subpix_y = decimal_part(bound_pix_y);
-//
-    printf("%.3f %.3f %.5f | %.3f x %.3f | %d x %d \n", bound_pix_x, bound_pix_y, zoom->zoom, in_w_f, in_h_f, in_w, in_h);
-
-    ff_copy_rectangle_subpixel(&zoom->dc,
-                               small_crop->data, small_crop->linesize,
-                               in->data, in->linesize,
-                               0, 0,
-                               bound_pix_x, bound_pix_y,
-                               in_w, in_h,
-                               subpix_x, subpix_y);
-
-    // stretching bottom right
-    av_opt_set_int(zoom->sws, "srcw", in_w, 0);
-    av_opt_set_int(zoom->sws, "srch", in_h, 0);
-    av_opt_set_int(zoom->sws, "src_format", in_f, 0);
-    av_opt_set_int(zoom->sws, "dstw", out_w, 0);
-    av_opt_set_int(zoom->sws, "dsth", out_h, 0);
-    av_opt_set_int(zoom->sws, "dst_format", out_f, 0);
-    if(zoom->interpolation)
-        av_opt_set_int(zoom->sws, "sws_flags", zoom->interpolation, 0);
-
-    if ((ret = sws_init_context(zoom->sws, NULL, NULL)) < 0)
-        goto error;
-
-//    sws_scale(zoom->sws, (const uint8_t *const *)&input, in->linesize, 0, in_h, out->data, out->linesize);
-    sws_scale(zoom->sws,
-              small_crop->data, small_crop->linesize,
-              0, in_h,
-              out->data, out->linesize);
-
-    av_frame_free(&small_crop);
-    sws_freeContext(zoom->sws);
-    zoom->sws = NULL;
-
-    error:
-    return ret;
-    bypass:
-    return 0;
-}
-
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVFilterContext *ctx = inlink->dst;
@@ -1048,39 +709,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 
 	apply_zoom(zoom, in, out, &zoom->fillcolor);
 
-    // scale
-//    if(zoom_val == 1) {
-//        // it's 1, just copy
-//        // quite an expensive noop :D
-//
-//        int from_x = av_clip_c(in_w * zoom->x - out_w / 2.0, 0, in_w - out_w);
-//        int from_y = av_clip_c(in_h * zoom->y - out_h / 2.0, 0, in_h - out_h);
-//        float sub_x = decimal_part(av_clipf_c(in_w * zoom->x - out_w / 2.0, 0.0, in_w - out_w));
-//        float sub_y = decimal_part(av_clipf_c(in_h * zoom->y - out_h / 2.0, 0.0, in_h - out_h));
-//
-//        ff_copy_rectangle_subpixel(&zoom->dc,
-//                           out->data, out->linesize,
-//                           in->data, in->linesize,
-//                           0, 0,
-//                           from_x,
-//                           from_y,
-//                           out_w, out_h, sub_x, sub_y);
-//
-//    } else if (zoom_val <= 0) {
-//        // if it's 0 or lower do nothing
-//        // noop
-//    } else if (zoom_val < 1) {
-//        // zoom in (0, 1)
-//        ret = zoom_out(zoom, in, out, outlink);
-//        if(ret)
-//            goto error;
-//    } else if (zoom_val > 1){
-//        // zoom in (1, +ing)
-//        ret = zoom_in(zoom, in, out, outlink);
-//        if(ret)
-//            goto error;
-//    }
-
     av_frame_free(&in);
     return ff_filter_frame(outlink, out);
 

From 40e96669003eb1b399ebd669d01360c94135807f Mon Sep 17 00:00:00 2001
From: Stefan-Gabriel Muscalu <stefan.gabriel.muscalu@gmail.com>
Date: Fri, 3 Jul 2020 19:47:36 +0300
Subject: [PATCH 07/10] fix(vf_zoom): removed unsupported formats

---
 libavfilter/vf_zoom.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/libavfilter/vf_zoom.c b/libavfilter/vf_zoom.c
index 5702f49095c..5fb43dbf92b 100644
--- a/libavfilter/vf_zoom.c
+++ b/libavfilter/vf_zoom.c
@@ -148,8 +148,6 @@ static int query_formats(AVFilterContext *ctx)
         AV_PIX_FMT_YUVJ420P,
         AV_PIX_FMT_YUVJ422P,
         AV_PIX_FMT_YUVJ444P,
-        AV_PIX_FMT_NV12,
-        AV_PIX_FMT_NV21,
         AV_PIX_FMT_GRAY16LE,
         AV_PIX_FMT_YUV440P,
         AV_PIX_FMT_YUVJ440P,
@@ -165,13 +163,29 @@ static int query_formats(AVFilterContext *ctx)
         AV_PIX_FMT_YUVA420P16LE,
         AV_PIX_FMT_YUVA422P16LE,
         AV_PIX_FMT_YUVA444P16LE,
-        AV_PIX_FMT_NV16,
         AV_PIX_FMT_YA16LE,
         AV_PIX_FMT_GBRAP,
         AV_PIX_FMT_GBRAP16LE,
         AV_PIX_FMT_YUVJ411P,
-        AV_PIX_FMT_NV24,
-        AV_PIX_FMT_NV42,
+
+        // unsupported: planar Y but packed U & V
+        // AV_PIX_FMT_NV12,
+        // AV_PIX_FMT_NV21,
+        // AV_PIX_FMT_NV16,
+        // AV_PIX_FMT_NV24,
+        // AV_PIX_FMT_NV42
+
+        // unsupported: packed RGB & variants
+        // AV_PIX_FMT_RGB24,
+        // AV_PIX_FMT_BGR24,
+        // AV_PIX_FMT_ARGB,
+        // AV_PIX_FMT_RGBA,
+        // AV_PIX_FMT_ABGR,
+        // AV_PIX_FMT_BGRA,
+        // AV_PIX_FMT_0RGB,
+        // AV_PIX_FMT_RGB0,
+        // AV_PIX_FMT_0BGR,
+        // AV_PIX_FMT_BGR0,
 
         AV_PIX_FMT_NONE
     };

From b9ddacef4e5ed845b6be45b33b0443e72e21a420 Mon Sep 17 00:00:00 2001
From: Stefan-Gabriel Muscalu <stefan.gabriel.muscalu@gmail.com>
Date: Fri, 3 Jul 2020 19:53:50 +0300
Subject: [PATCH 08/10] update(vf_zoom): removed printf's debug data

---
 libavfilter/vf_zoom.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavfilter/vf_zoom.c b/libavfilter/vf_zoom.c
index 5fb43dbf92b..bd0a46ec336 100644
--- a/libavfilter/vf_zoom.c
+++ b/libavfilter/vf_zoom.c
@@ -628,9 +628,9 @@ static int apply_zoom(ZoomContext *s, AVFrame *in, AVFrame *out, FFDrawColor *fi
     const float2 UNCLAMPED_PAN  = {s->x, s->y};
           float2 PAN            = clamp_pan_inbounds(UNCLAMPED_PAN, dim_out_full, ZOOM, dim_in_full);
 
-    printf("ZOOM %.3f\n", ZOOM);
-    printf("UNCLAMPED_PAN x %.3f y %.3f\n", UNCLAMPED_PAN.x, UNCLAMPED_PAN.y);
-    printf("PAN x %.3f y %.3f\n", PAN.x, PAN.y);
+    av_log(s, AV_LOG_DEBUG, "ZOOM %.3f\n", ZOOM);
+    av_log(s, AV_LOG_DEBUG, "UNCLAMPED_PAN x %.3f y %.3f\n", UNCLAMPED_PAN.x, UNCLAMPED_PAN.y);
+    av_log(s, AV_LOG_DEBUG, "PAN x %.3f y %.3f\n", PAN.x, PAN.y);
 
     for(plane = 0; plane < desc->nb_components; plane++){
         float2 dim_in  = plane == 1 || plane == 2 ? dim_in_chroma  : dim_in_full;

From 10485096fb730d1d85693ff073da2b7477ecf69a Mon Sep 17 00:00:00 2001
From: Stefan-Gabriel Muscalu <stefan.gabriel.muscalu@gmail.com>
Date: Fri, 3 Jul 2020 21:02:53 +0300
Subject: [PATCH 09/10] update(vf_zoom): some optimizations added, but it's
 still too slow

---
 libavfilter/vf_zoom.c | 103 ++++++++++++++++++++++++++++++------------
 1 file changed, 75 insertions(+), 28 deletions(-)

diff --git a/libavfilter/vf_zoom.c b/libavfilter/vf_zoom.c
index bd0a46ec336..82f223022a8 100644
--- a/libavfilter/vf_zoom.c
+++ b/libavfilter/vf_zoom.c
@@ -131,8 +131,26 @@ static const AVOption zoom_options[] = {
 
 AVFILTER_DEFINE_CLASS(zoom);
 
+#define SUBPIXEL_LUT_RESOLUTION 100
+int     subpixel_LUT_inited = 0;
+uint8_t subpixel_LUT[256][256][SUBPIXEL_LUT_RESOLUTION];
+
 static av_cold int init(AVFilterContext *ctx)
 {
+    if(!subpixel_LUT_inited){
+        subpixel_LUT_inited = 1;
+
+        for(int i = 0; i < 256; i++) {
+            for(int j = 0; j < 256; j++) {
+                for(int k = 0; k < SUBPIXEL_LUT_RESOLUTION; k++) {
+                    subpixel_LUT[i][j][k] = i * (    k / (float)SUBPIXEL_LUT_RESOLUTION) +
+                                            j * (1 - (k / (float)SUBPIXEL_LUT_RESOLUTION));
+                }
+            }
+        }
+
+    }
+
     return 0;
 }
 
@@ -358,9 +376,12 @@ static inline uint8_t sample8_bilinear_at(uint8_t *data,
     int iy = y;
     float fracx = x - ix;
     float fracy = y - iy;
-    float ifracx = 1.0f - fracx;
-    float ifracy = 1.0f - fracy;
-    float lin0, lin1;
+    //float ifracx = 1.0f - fracx;
+    //float ifracy = 1.0f - fracy;
+    //float lin0, lin1;
+
+    int _fractx = fracx * SUBPIXEL_LUT_RESOLUTION;
+    int _fracty = fracy * SUBPIXEL_LUT_RESOLUTION;
 
     // check if requested value is out of bounds
     if(x < 0 || y < 0 || x > w - 1 || y > h - 1){
@@ -371,6 +392,7 @@ static inline uint8_t sample8_bilinear_at(uint8_t *data,
 
     // top left
     uint8_t *a11 = row_y + ix * pixelstep;
+
     // top right = top left + 1px
     uint8_t *a12 = a11 + pixelstep;
 
@@ -379,13 +401,18 @@ static inline uint8_t sample8_bilinear_at(uint8_t *data,
     // bottom right = bottom left + 1px
     uint8_t *a22 = a21 + pixelstep;
 
+    uint8_t l0 = subpixel_LUT[(*a11)][(*a12)][_fractx];
+    uint8_t l1 = subpixel_LUT[(*a21)][(*a22)][_fractx];
+
+    return subpixel_LUT[l0][l1][_fracty];
+
     // top interp
-    lin0 = ifracx * (*a11) + fracx * (*a12);
+    //lin0 = ifracx * (*a11) + fracx * (*a12);
     // bottom interp
-    lin1 = ifracx * (*a21) + fracx * (*a22);
+    //lin1 = ifracx * (*a21) + fracx * (*a22);
 
     // vertical interp
-    return ifracy * lin0 + fracy * lin1;
+    //return ifracy * lin0 + fracy * lin1;
 }
 
 
@@ -549,35 +576,60 @@ static void apply_zoom_plane(float ZOOM, float2 PAN, int plane,
                              float2 dim_in,
                              float2 dim_out,
                              FFDrawColor *fillcolor) {
-    int x, y;
+    float x, y;
+    int _x, _y;
 
     int h = dim_out.y;
     int w = dim_out.x;
 
-    for(y = 0; y < h; y++) {
-        for(x = 0; x < w; x++) {
-
-            float2 src_location = scale_coords_pxout_to_pxin(
-                (float2){(float)x, (float)y},
-                dim_out,
-                ZOOM,
-                dim_in,
-                PAN
-            );
+    // calculate origin, origin + (0, 1), origin + (1, 0)
+    float2 src_location_00 = scale_coords_pxout_to_pxin(
+        (float2){0.0f, 0.0f},
+        dim_out,
+        ZOOM,
+        dim_in,
+        PAN
+    );
+    float2 src_location_01 = scale_coords_pxout_to_pxin(
+        (float2){0.0f, 1.0f},
+        dim_out,
+        ZOOM,
+        dim_in,
+        PAN
+    );
+    float2 src_location_10 = scale_coords_pxout_to_pxin(
+        (float2){1.0f, 0.0f},
+        dim_out,
+        ZOOM,
+        dim_in,
+        PAN
+    );
+
+    // calculate deltas on X and Y
+    // to use to loop over frame space
+    // we can do this since out transformation is a simple liniar translation
+    float source_x = src_location_00.x;
+    float delta_x = src_location_10.x - src_location_00.x;
+
+    float source_y = src_location_00.y;
+    float delta_y = src_location_01.y - src_location_00.y;
+
+    for(y = source_y, _y = 0; _y < h; y+=delta_y, _y++) {
+        for(x = source_x, _x = 0; _x < w; x+=delta_x, _x++) {
 
             if (pix_depth == 8) {
                 int8_t value = sample8_bilinear_at(
                     in,
                     linesize_in,
                     pix_step,
-                    src_location.x, src_location.y,
+                    x, y,
                     dim_in.x, dim_in.y,
                     fillcolor->comp[plane].u8[0] // out of bounds value
                 );
 
                 int8_t *dst_pixel = out +
-                                    y * linesize_out +
-                                    x * pix_step;
+                                    _y * linesize_out +
+                                    _x * pix_step;
 
                 (*dst_pixel) = value;
             }
@@ -586,14 +638,14 @@ static void apply_zoom_plane(float ZOOM, float2 PAN, int plane,
                                     in,
                                     linesize_in,
                                     pix_step,
-                                    src_location.x, src_location.y,
+                                    x, y,
                                     dim_in.x, dim_in.y,
                                     fillcolor->comp[plane].u16[0] // out of bounds value
                                 );
 
                 int16_t *dst_pixel = out +
-                                     y * linesize_out +
-                                     x * pix_step;
+                                     _y * linesize_out +
+                                     _x * pix_step;
 
                 (*dst_pixel) = value;
             }
@@ -715,11 +767,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 		av_log(zoom, AV_LOG_WARNING, "y position %.2f is out of range of [0-1]\n", zoom->y);
 		zoom->y = av_clipd_c(zoom->y, 0, 1);
 	}
-    // copy in the background
-    ff_fill_rectangle(&zoom->dc, &zoom->fillcolor,
-                      out->data, out->linesize,
-                      0, 0,
-                      out_w, out_h);
 
 	apply_zoom(zoom, in, out, &zoom->fillcolor);
 

From 0bf2455d27eec3321021bb6e0c014a7d8c81ec57 Mon Sep 17 00:00:00 2001
From: Stefan-Gabriel Muscalu <stefan.gabriel.muscalu@gmail.com>
Date: Mon, 6 Jul 2020 15:30:48 +0300
Subject: [PATCH 10/10] update(vf_zoom): added threaded execution

---
 libavfilter/vf_zoom.c | 135 +++++++++++++++++++++++++-----------------
 1 file changed, 81 insertions(+), 54 deletions(-)

diff --git a/libavfilter/vf_zoom.c b/libavfilter/vf_zoom.c
index 82f223022a8..25311022308 100644
--- a/libavfilter/vf_zoom.c
+++ b/libavfilter/vf_zoom.c
@@ -567,44 +567,62 @@ static inline float2 clamp_pan_inbounds(float2 PAN, float2 dim_out, float ZOOM,
     return CLAMPED_PAN;
 }
 
+typedef struct ZoomThreadData {
+    float2 PAN;
+    float2 dim_in;
+    float2 dim_out;
+
+    uint8_t* in;
+    uint8_t* out;
+
+    FFDrawColor *fillcolor;
+
+    float ZOOM;
+    int plane;
+    int linesize_in;
+    int linesize_out;
+    int pix_step;
+    int pix_depth;
+
+} ZoomThreadData;
+
+static void apply_zoom_plane_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) {
+    ZoomThreadData *td = arg;
 
-static void apply_zoom_plane(float ZOOM, float2 PAN, int plane,
-                             int pix_step,
-                             int pix_depth,
-                             uint8_t *in, int linesize_in,
-                             uint8_t *out, int linesize_out,
-                             float2 dim_in,
-                             float2 dim_out,
-                             FFDrawColor *fillcolor) {
     float x, y;
     int _x, _y;
 
-    int h = dim_out.y;
-    int w = dim_out.x;
+    int h = td->dim_out.y;
+    int w = td->dim_out.x;
+
+    const int slice_start = (h * jobnr) / nb_jobs;
+    const int slice_end = (h * (jobnr + 1)) / nb_jobs;
 
     // calculate origin, origin + (0, 1), origin + (1, 0)
     float2 src_location_00 = scale_coords_pxout_to_pxin(
         (float2){0.0f, 0.0f},
-        dim_out,
-        ZOOM,
-        dim_in,
-        PAN
+        td->dim_out,
+        td->ZOOM,
+        td->dim_in,
+        td->PAN
     );
     float2 src_location_01 = scale_coords_pxout_to_pxin(
         (float2){0.0f, 1.0f},
-        dim_out,
-        ZOOM,
-        dim_in,
-        PAN
+        td->dim_out,
+        td->ZOOM,
+        td->dim_in,
+        td->PAN
     );
     float2 src_location_10 = scale_coords_pxout_to_pxin(
         (float2){1.0f, 0.0f},
-        dim_out,
-        ZOOM,
-        dim_in,
-        PAN
+        td->dim_out,
+        td->ZOOM,
+        td->dim_in,
+        td->PAN
     );
 
+
+
     // calculate deltas on X and Y
     // to use to loop over frame space
     // we can do this since out transformation is a simple liniar translation
@@ -614,38 +632,38 @@ static void apply_zoom_plane(float ZOOM, float2 PAN, int plane,
     float source_y = src_location_00.y;
     float delta_y = src_location_01.y - src_location_00.y;
 
-    for(y = source_y, _y = 0; _y < h; y+=delta_y, _y++) {
+    for(y = source_y + slice_start * delta_y, _y = slice_start; _y < slice_end; y += delta_y, _y++) {
         for(x = source_x, _x = 0; _x < w; x+=delta_x, _x++) {
 
-            if (pix_depth == 8) {
+            if (td->pix_depth == 8) {
                 int8_t value = sample8_bilinear_at(
-                    in,
-                    linesize_in,
-                    pix_step,
+                    td->in,
+                    td->linesize_in,
+                    td->pix_step,
                     x, y,
-                    dim_in.x, dim_in.y,
-                    fillcolor->comp[plane].u8[0] // out of bounds value
+                    td->dim_in.x, td->dim_in.y,
+                    td->fillcolor->comp[td->plane].u8[0] // out of bounds value
                 );
 
-                int8_t *dst_pixel = out +
-                                    _y * linesize_out +
-                                    _x * pix_step;
+                int8_t *dst_pixel = td->out +
+                                    _y * td->linesize_out +
+                                    _x * td->pix_step;
 
                 (*dst_pixel) = value;
             }
             else {
                 int16_t value = sample16_bilinear_at(
-                                    in,
-                                    linesize_in,
-                                    pix_step,
-                                    x, y,
-                                    dim_in.x, dim_in.y,
-                                    fillcolor->comp[plane].u16[0] // out of bounds value
-                                );
-
-                int16_t *dst_pixel = out +
-                                     _y * linesize_out +
-                                     _x * pix_step;
+                    td->in,
+                    td->linesize_in,
+                    td->pix_step,
+                    x, y,
+                    td->dim_in.x, td->dim_in.y,
+                    td->fillcolor->comp[td->plane].u16[0] // out of bounds value
+                );
+
+                int16_t *dst_pixel = td->out +
+                                     _y * td->linesize_out +
+                                     _x * td->pix_step;
 
                 (*dst_pixel) = value;
             }
@@ -656,7 +674,7 @@ static void apply_zoom_plane(float ZOOM, float2 PAN, int plane,
 }
 
 
-static int apply_zoom(ZoomContext *s, AVFrame *in, AVFrame *out, FFDrawColor *fillcolor){
+static int apply_zoom(ZoomContext *s, AVFilterLink *link, AVFrame *in, AVFrame *out, FFDrawColor *fillcolor){
     int x, y, plane;
 
     int out_w = out->width;
@@ -684,18 +702,27 @@ static int apply_zoom(ZoomContext *s, AVFrame *in, AVFrame *out, FFDrawColor *fi
     av_log(s, AV_LOG_DEBUG, "UNCLAMPED_PAN x %.3f y %.3f\n", UNCLAMPED_PAN.x, UNCLAMPED_PAN.y);
     av_log(s, AV_LOG_DEBUG, "PAN x %.3f y %.3f\n", PAN.x, PAN.y);
 
+    ZoomThreadData td;
+    td.PAN = PAN;
+    td.fillcolor = fillcolor;
+    td.ZOOM = ZOOM;
+
     for(plane = 0; plane < desc->nb_components; plane++){
         float2 dim_in  = plane == 1 || plane == 2 ? dim_in_chroma  : dim_in_full;
         float2 dim_out = plane == 1 || plane == 2 ? dim_out_chroma : dim_out_full;
 
-        apply_zoom_plane(ZOOM, PAN, plane,
-                         desc->comp[plane].step,
-                         desc->comp[plane].depth,
-                         in->data[plane], in->linesize[plane],
-                         out->data[plane], out->linesize[plane],
-                         dim_in,
-                         dim_out,
-                         fillcolor);
+        td.dim_in = dim_in;
+        td.dim_out = dim_out;
+        td.in = in->data[plane];
+        td.out = out->data[plane];
+        td.plane = plane;
+        td.linesize_in = in->linesize[plane];
+        td.linesize_out = out->linesize[plane];
+        td.pix_step = desc->comp[plane].step;
+        td.pix_depth = desc->comp[plane].depth;
+
+        link->dst->internal->execute(link->dst, apply_zoom_plane_slice, &td, NULL, FFMIN(out_h, ff_filter_get_nb_threads(link->dst)));
+
     }
 
     return 0;
@@ -768,7 +795,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 		zoom->y = av_clipd_c(zoom->y, 0, 1);
 	}
 
-	apply_zoom(zoom, in, out, &zoom->fillcolor);
+	apply_zoom(zoom, inlink, in, out, &zoom->fillcolor);
 
     av_frame_free(&in);
     return ff_filter_frame(outlink, out);
@@ -819,5 +846,5 @@ AVFilter ff_vf_zoom = {
     .inputs        = zoom_inputs,
     .outputs       = zoom_outputs,
     .priv_class    = &zoom_class,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
 };