2 #define DO_FEATHER_MAX_N 30
4 #define DO_FEATHER_3(type, int_type, max, N); \
7 int frame_w = input->get_w(); \
8 int frame_h = input->get_h(); \
9 int start_in = start_out - offset; \
10 int end_in = end_out + offset; \
11 type **in_rows = (type**)input->get_rows(); \
12 type **out_rows = (type**)output->get_rows(); \
14 int_type tmp1, tmp2; \
16 int_type *SC0 = new int_type[(frame_w + offset * 2) * 2]; \
17 memset(SC0, 0, sizeof(int_type) * (frame_w + offset * 2) * 2); \
18 for (i = start_in; i < end_in; i++) \
22 in_row = in_rows[0]; \
23 else if (i >= frame_h) \
24 in_row = in_rows[frame_h - 1]; \
26 in_row = in_rows[i]; \
30 if (i >= start_out + offset && i<= end_out + offset) \
31 out_row = out_rows[i - offset]; \
35 for (j = 0; j < frame_w + offset * 2; j++) \
39 else if (j >= frame_w + offset) \
40 tmp1 = in_row[frame_w - 1]; \
42 tmp1 = in_row[j - offset]; \
47 tmp2 = SC[0] + tmp1; \
49 if (j >= offset * 2) \
50 if (out_row) out_row[j - offset * 2] = (8 + SC[1] + tmp2) >> 4; \
58 #define DO_FEATHER_5(type, int_type, max, N); \
61 int frame_w = input->get_w(); \
62 int frame_h = input->get_h(); \
63 int start_in = start_out - offset; \
64 int end_in = end_out + offset; \
65 type **in_rows = (type**)input->get_rows(); \
66 type **out_rows = (type**)output->get_rows(); \
68 int_type tmp1, tmp2; \
69 int_type SR0, SR1, SR2, SR3; \
70 int_type *SC0 = new int_type[(frame_w + offset * 2) * 4]; \
71 memset(SC0, 0, sizeof(int_type) * (frame_w + offset * 2) * 4); \
72 for (i = start_in; i < end_in; i++) \
76 in_row = in_rows[0]; \
77 else if (i >= frame_h) \
78 in_row = in_rows[frame_h - 1]; \
80 in_row = in_rows[i]; \
81 SR0 = SR1 = SR2 = SR3 = 0; \
84 if (i >= start_out + offset && i<= end_out + offset) \
85 out_row = out_rows[i - offset]; \
89 for (j = 0; j < frame_w + offset * 2; j++) \
93 else if (j >= frame_w + offset) \
94 tmp1 = in_row[frame_w - 1]; \
96 tmp1 = in_row[j - offset]; \
105 tmp2 = SC[0] + tmp1; \
107 tmp1 = SC[1] + tmp2; \
109 tmp2 = SC[2] + tmp1; \
111 if (j >= offset * 2) \
112 if (out_row) out_row[j - offset * 2] = (128 + SC[3] + tmp2) >> 8; \
120 #define DO_FEATHER_7(type, int_type, max, N); \
122 int offset = N / 2; \
123 int frame_w = input->get_w(); \
124 int frame_h = input->get_h(); \
125 int start_in = start_out - offset; \
126 int end_in = end_out + offset; \
127 type **in_rows = (type**)input->get_rows(); \
128 type **out_rows = (type**)output->get_rows(); \
130 int_type tmp1, tmp2; \
131 int_type SR0, SR1, SR2, SR3, SR4, SR5; \
132 int_type *SC0 = new int_type[(frame_w + offset * 2) * 6]; \
133 memset(SC0, 0, sizeof(int_type) * (frame_w + offset * 2) * 6); \
134 for (i = start_in; i < end_in; i++) \
138 in_row = in_rows[0]; \
139 else if (i >= frame_h) \
140 in_row = in_rows[frame_h - 1]; \
142 in_row = in_rows[i]; \
143 SR0 = SR1 = SR2 = SR3 = SR4 = SR5 = 0; \
146 if (i >= start_out + offset && i<= end_out + offset) \
147 out_row = out_rows[i - offset]; \
150 int_type *SC = SC0; \
151 for (j = 0; j < frame_w + offset * 2; j++) \
155 else if (j >= frame_w + offset) \
156 tmp1 = in_row[frame_w - 1]; \
158 tmp1 = in_row[j - offset]; \
171 tmp2 = SC[0] + tmp1; \
173 tmp1 = SC[1] + tmp2; \
175 tmp2 = SC[2] + tmp1; \
177 tmp1 = SC[3] + tmp2; \
179 tmp2 = SC[4] + tmp1; \
181 if (j >= offset * 2) \
182 if (out_row) out_row[j - offset * 2] = (2048 + SC[5] + tmp2) >> 12; \
190 #define DO_FEATHER_9(type, int_type, max, N); \
192 int offset = N / 2; \
193 int frame_w = input->get_w(); \
194 int frame_h = input->get_h(); \
195 int start_in = start_out - offset; \
196 int end_in = end_out + offset; \
197 type **in_rows = (type**)input->get_rows(); \
198 type **out_rows = (type**)output->get_rows(); \
200 int_type tmp1, tmp2; \
201 int_type SR0, SR1, SR2, SR3, SR4, SR5, SR6, SR7; \
202 int_type *SC0 = new int_type[(frame_w + offset * 2) * 8]; \
203 memset(SC0, 0, sizeof(int_type) * (frame_w + offset * 2) * 8); \
204 for (i = start_in; i < end_in; i++) \
208 in_row = in_rows[0]; \
209 else if (i >= frame_h) \
210 in_row = in_rows[frame_h - 1]; \
212 in_row = in_rows[i]; \
213 SR0 = SR1 = SR2 = SR3 = SR4 = SR5 = SR6 = SR7 = 0; \
216 if (i >= start_out + offset && i<= end_out + offset) \
217 out_row = out_rows[i - offset]; \
220 int_type *SC = SC0; \
221 for (j = 0; j < frame_w + offset * 2; j++) \
225 else if (j >= frame_w + offset) \
226 tmp1 = in_row[frame_w - 1]; \
228 tmp1 = in_row[j - offset]; \
245 tmp2 = SC[0] + tmp1; \
247 tmp1 = SC[1] + tmp2; \
249 tmp2 = SC[2] + tmp1; \
251 tmp1 = SC[3] + tmp2; \
253 tmp2 = SC[4] + tmp1; \
255 tmp1 = SC[5] + tmp2; \
257 tmp2 = SC[6] + tmp1; \
259 if (j >= offset * 2) \
260 if (out_row) out_row[j - offset * 2] = (32768 + SC[7] + tmp2) >> 16; \
268 #define DO_FEATHER_11(type, int_type, max, N); \
270 int offset = N / 2; \
271 int frame_w = input->get_w(); \
272 int frame_h = input->get_h(); \
273 int start_in = start_out - offset; \
274 int end_in = end_out + offset; \
275 type **in_rows = (type**)input->get_rows(); \
276 type **out_rows = (type**)output->get_rows(); \
278 int_type tmp1, tmp2; \
279 int_type SR0, SR1, SR2, SR3, SR4, SR5, SR6, SR7, SR8, SR9; \
280 int_type *SC0 = new int_type[(frame_w + offset * 2) * 10]; \
281 memset(SC0, 0, sizeof(int_type) * (frame_w + offset * 2) * 10); \
282 for (i = start_in; i < end_in; i++) \
286 in_row = in_rows[0]; \
287 else if (i >= frame_h) \
288 in_row = in_rows[frame_h - 1]; \
290 in_row = in_rows[i]; \
291 SR0 = SR1 = SR2 = SR3 = SR4 = SR5 = SR6 = SR7 = SR8 = SR9 = 0; \
294 if (i >= start_out + offset && i<= end_out + offset) \
295 out_row = out_rows[i - offset]; \
298 int_type *SC = SC0; \
299 for (j = 0; j < frame_w + offset * 2; j++) \
303 else if (j >= frame_w + offset) \
304 tmp1 = in_row[frame_w - 1]; \
306 tmp1 = in_row[j - offset]; \
327 tmp2 = SC[0] + tmp1; \
329 tmp1 = SC[1] + tmp2; \
331 tmp2 = SC[2] + tmp1; \
333 tmp1 = SC[3] + tmp2; \
335 tmp2 = SC[4] + tmp1; \
337 tmp1 = SC[5] + tmp2; \
339 tmp2 = SC[6] + tmp1; \
341 tmp1 = SC[7] + tmp2; \
343 tmp2 = SC[8] + tmp1; \
345 if (j >= offset * 2) \
346 if (out_row) out_row[j - offset * 2] = (524288 + SC[9] + tmp2) >> 20; \
354 #define DO_FEATHER_13(type, int_type, max, N); \
356 int offset = N / 2; \
357 int frame_w = input->get_w(); \
358 int frame_h = input->get_h(); \
359 int start_in = start_out - offset; \
360 int end_in = end_out + offset; \
361 type **in_rows = (type**)input->get_rows(); \
362 type **out_rows = (type**)output->get_rows(); \
364 int_type tmp1, tmp2; \
365 int_type SR0, SR1, SR2, SR3, SR4, SR5, SR6, SR7, SR8, SR9, SR10, SR11; \
366 int_type *SC0 = new int_type[(frame_w + offset * 2) * 12]; \
367 memset(SC0, 0, sizeof(int_type) * (frame_w + offset * 2) * 12); \
368 for (i = start_in; i < end_in; i++) \
372 in_row = in_rows[0]; \
373 else if (i >= frame_h) \
374 in_row = in_rows[frame_h - 1]; \
376 in_row = in_rows[i]; \
377 SR0 = SR1 = SR2 = SR3 = SR4 = SR5 = SR6 = SR7 = SR8 = SR9 = SR10 = SR11 = 0; \
380 if (i >= start_out + offset && i<= end_out + offset) \
381 out_row = out_rows[i - offset]; \
384 int_type *SC = SC0; \
385 for (j = 0; j < frame_w + offset * 2; j++) \
389 else if (j >= frame_w + offset) \
390 tmp1 = in_row[frame_w - 1]; \
392 tmp1 = in_row[j - offset]; \
413 tmp2 = SR10 + tmp1; \
415 tmp1 = SR11 + tmp2; \
417 tmp2 = SC[0] + tmp1; \
419 tmp1 = SC[1] + tmp2; \
421 tmp2 = SC[2] + tmp1; \
423 tmp1 = SC[3] + tmp2; \
425 tmp2 = SC[4] + tmp1; \
427 tmp1 = SC[5] + tmp2; \
429 tmp2 = SC[6] + tmp1; \
431 tmp1 = SC[7] + tmp2; \
433 tmp2 = SC[8] + tmp1; \
435 tmp1 = SC[9] + tmp2; \
437 tmp2 = SC[10] + tmp1; \
439 if (j >= offset * 2) \
440 if (out_row) out_row[j - offset * 2] = (8388608 + SC[11] + tmp2) >> 24; \
448 #define DO_FEATHER_15(type, int_type, max, N); \
450 int offset = N / 2; \
451 int frame_w = input->get_w(); \
452 int frame_h = input->get_h(); \
453 int start_in = start_out - offset; \
454 int end_in = end_out + offset; \
455 type **in_rows = (type**)input->get_rows(); \
456 type **out_rows = (type**)output->get_rows(); \
458 int_type tmp1, tmp2; \
459 int_type SR0, SR1, SR2, SR3, SR4, SR5, SR6, SR7, SR8, SR9, SR10, SR11, SR12, SR13; \
460 int_type *SC0 = new int_type[(frame_w + offset * 2) * 14]; \
461 memset(SC0, 0, sizeof(int_type) * (frame_w + offset * 2) * 14); \
462 for (i = start_in; i < end_in; i++) \
466 in_row = in_rows[0]; \
467 else if (i >= frame_h) \
468 in_row = in_rows[frame_h - 1]; \
470 in_row = in_rows[i]; \
471 SR0 = SR1 = SR2 = SR3 = SR4 = SR5 = SR6 = SR7 = SR8 = SR9 = SR10 = SR11 = SR12 = SR13 = 0; \
474 if (i >= start_out + offset && i<= end_out + offset) \
475 out_row = out_rows[i - offset]; \
478 int_type *SC = SC0; \
479 for (j = 0; j < frame_w + offset * 2; j++) \
483 else if (j >= frame_w + offset) \
484 tmp1 = in_row[frame_w - 1]; \
486 tmp1 = in_row[j - offset]; \
507 tmp2 = SR10 + tmp1; \
509 tmp1 = SR11 + tmp2; \
511 tmp2 = SR12 + tmp1; \
513 tmp1 = SR13 + tmp2; \
515 tmp2 = SC[0] + tmp1; \
517 tmp1 = SC[1] + tmp2; \
519 tmp2 = SC[2] + tmp1; \
521 tmp1 = SC[3] + tmp2; \
523 tmp2 = SC[4] + tmp1; \
525 tmp1 = SC[5] + tmp2; \
527 tmp2 = SC[6] + tmp1; \
529 tmp1 = SC[7] + tmp2; \
531 tmp2 = SC[8] + tmp1; \
533 tmp1 = SC[9] + tmp2; \
535 tmp2 = SC[10] + tmp1; \
537 tmp1 = SC[11] + tmp2; \
539 tmp2 = SC[12] + tmp1; \
541 if (j >= offset * 2) \
542 if (out_row) out_row[j - offset * 2] = (((uint64_t)1 <<27) + SC[13] + tmp2) >> 28; \
550 #define DO_FEATHER_17(type, int_type, max, N); \
552 int offset = N / 2; \
553 int frame_w = input->get_w(); \
554 int frame_h = input->get_h(); \
555 int start_in = start_out - offset; \
556 int end_in = end_out + offset; \
557 type **in_rows = (type**)input->get_rows(); \
558 type **out_rows = (type**)output->get_rows(); \
560 int_type tmp1, tmp2; \
561 int_type SR0, SR1, SR2, SR3, SR4, SR5, SR6, SR7, SR8, SR9, SR10, SR11, SR12, SR13, SR14, SR15; \
562 int_type *SC0 = new int_type[(frame_w + offset * 2) * 16]; \
563 memset(SC0, 0, sizeof(int_type) * (frame_w + offset * 2) * 16); \
564 for (i = start_in; i < end_in; i++) \
568 in_row = in_rows[0]; \
569 else if (i >= frame_h) \
570 in_row = in_rows[frame_h - 1]; \
572 in_row = in_rows[i]; \
573 SR0 = SR1 = SR2 = SR3 = SR4 = SR5 = SR6 = SR7 = SR8 = SR9 = SR10 = SR11 = SR12 = SR13 = SR14 = SR15 = 0; \
576 if (i >= start_out + offset && i<= end_out + offset) \
577 out_row = out_rows[i - offset]; \
580 int_type *SC = SC0; \
581 for (j = 0; j < frame_w + offset * 2; j++) \
585 else if (j >= frame_w + offset) \
586 tmp1 = in_row[frame_w - 1]; \
588 tmp1 = in_row[j - offset]; \
609 tmp2 = SR10 + tmp1; \
611 tmp1 = SR11 + tmp2; \
613 tmp2 = SR12 + tmp1; \
615 tmp1 = SR13 + tmp2; \
617 tmp2 = SR14 + tmp1; \
619 tmp1 = SR15 + tmp2; \
621 tmp2 = SC[0] + tmp1; \
623 tmp1 = SC[1] + tmp2; \
625 tmp2 = SC[2] + tmp1; \
627 tmp1 = SC[3] + tmp2; \
629 tmp2 = SC[4] + tmp1; \
631 tmp1 = SC[5] + tmp2; \
633 tmp2 = SC[6] + tmp1; \
635 tmp1 = SC[7] + tmp2; \
637 tmp2 = SC[8] + tmp1; \
639 tmp1 = SC[9] + tmp2; \
641 tmp2 = SC[10] + tmp1; \
643 tmp1 = SC[11] + tmp2; \
645 tmp2 = SC[12] + tmp1; \
647 tmp1 = SC[13] + tmp2; \
649 tmp2 = SC[14] + tmp1; \
651 if (j >= offset * 2) \
652 if (out_row) out_row[j - offset * 2] = (((uint64_t)1 <<31) + SC[15] + tmp2) >> 32; \
660 #define DO_FEATHER_19(type, int_type, max, N); \
662 int offset = N / 2; \
663 int frame_w = input->get_w(); \
664 int frame_h = input->get_h(); \
665 int start_in = start_out - offset; \
666 int end_in = end_out + offset; \
667 type **in_rows = (type**)input->get_rows(); \
668 type **out_rows = (type**)output->get_rows(); \
670 int_type tmp1, tmp2; \
671 int_type SR0, SR1, SR2, SR3, SR4, SR5, SR6, SR7, SR8, SR9, SR10, SR11, SR12, SR13, SR14, SR15, SR16, SR17; \
672 int_type *SC0 = new int_type[(frame_w + offset * 2) * 18]; \
673 memset(SC0, 0, sizeof(int_type) * (frame_w + offset * 2) * 18); \
674 for (i = start_in; i < end_in; i++) \
678 in_row = in_rows[0]; \
679 else if (i >= frame_h) \
680 in_row = in_rows[frame_h - 1]; \
682 in_row = in_rows[i]; \
683 SR0 = SR1 = SR2 = SR3 = SR4 = SR5 = SR6 = SR7 = SR8 = SR9 = SR10 = SR11 = SR12 = SR13 = SR14 = SR15 = SR16 = SR17 = 0; \
686 if (i >= start_out + offset && i<= end_out + offset) \
687 out_row = out_rows[i - offset]; \
690 int_type *SC = SC0; \
691 for (j = 0; j < frame_w + offset * 2; j++) \
695 else if (j >= frame_w + offset) \
696 tmp1 = in_row[frame_w - 1]; \
698 tmp1 = in_row[j - offset]; \
719 tmp2 = SR10 + tmp1; \
721 tmp1 = SR11 + tmp2; \
723 tmp2 = SR12 + tmp1; \
725 tmp1 = SR13 + tmp2; \
727 tmp2 = SR14 + tmp1; \
729 tmp1 = SR15 + tmp2; \
731 tmp2 = SR16 + tmp1; \
733 tmp1 = SR17 + tmp2; \
735 tmp2 = SC[0] + tmp1; \
737 tmp1 = SC[1] + tmp2; \
739 tmp2 = SC[2] + tmp1; \
741 tmp1 = SC[3] + tmp2; \
743 tmp2 = SC[4] + tmp1; \
745 tmp1 = SC[5] + tmp2; \
747 tmp2 = SC[6] + tmp1; \
749 tmp1 = SC[7] + tmp2; \
751 tmp2 = SC[8] + tmp1; \
753 tmp1 = SC[9] + tmp2; \
755 tmp2 = SC[10] + tmp1; \
757 tmp1 = SC[11] + tmp2; \
759 tmp2 = SC[12] + tmp1; \
761 tmp1 = SC[13] + tmp2; \
763 tmp2 = SC[14] + tmp1; \
765 tmp1 = SC[15] + tmp2; \
767 tmp2 = SC[16] + tmp1; \
769 if (j >= offset * 2) \
770 if (out_row) out_row[j - offset * 2] = (((uint64_t)1 <<35) + SC[17] + tmp2) >> 36; \
778 #define DO_FEATHER_21(type, int_type, max, N); \
780 int offset = N / 2; \
781 int frame_w = input->get_w(); \
782 int frame_h = input->get_h(); \
783 int start_in = start_out - offset; \
784 int end_in = end_out + offset; \
785 type **in_rows = (type**)input->get_rows(); \
786 type **out_rows = (type**)output->get_rows(); \
788 int_type tmp1, tmp2; \
789 int_type SR0, SR1, SR2, SR3, SR4, SR5, SR6, SR7, SR8, SR9, SR10, SR11, SR12, SR13, SR14, SR15, SR16, SR17, SR18, SR19; \
790 int_type *SC0 = new int_type[(frame_w + offset * 2) * 20]; \
791 memset(SC0, 0, sizeof(int_type) * (frame_w + offset * 2) * 20); \
792 for (i = start_in; i < end_in; i++) \
796 in_row = in_rows[0]; \
797 else if (i >= frame_h) \
798 in_row = in_rows[frame_h - 1]; \
800 in_row = in_rows[i]; \
801 SR0 = SR1 = SR2 = SR3 = SR4 = SR5 = SR6 = SR7 = SR8 = SR9 = SR10 = SR11 = SR12 = SR13 = SR14 = SR15 = SR16 = SR17 = SR18 = SR19 = 0; \
804 if (i >= start_out + offset && i<= end_out + offset) \
805 out_row = out_rows[i - offset]; \
808 int_type *SC = SC0; \
809 for (j = 0; j < frame_w + offset * 2; j++) \
813 else if (j >= frame_w + offset) \
814 tmp1 = in_row[frame_w - 1]; \
816 tmp1 = in_row[j - offset]; \
837 tmp2 = SR10 + tmp1; \
839 tmp1 = SR11 + tmp2; \
841 tmp2 = SR12 + tmp1; \
843 tmp1 = SR13 + tmp2; \
845 tmp2 = SR14 + tmp1; \
847 tmp1 = SR15 + tmp2; \
849 tmp2 = SR16 + tmp1; \
851 tmp1 = SR17 + tmp2; \
853 tmp2 = SR18 + tmp1; \
855 tmp1 = SR19 + tmp2; \
857 tmp2 = SC[0] + tmp1; \
859 tmp1 = SC[1] + tmp2; \
861 tmp2 = SC[2] + tmp1; \
863 tmp1 = SC[3] + tmp2; \
865 tmp2 = SC[4] + tmp1; \
867 tmp1 = SC[5] + tmp2; \
869 tmp2 = SC[6] + tmp1; \
871 tmp1 = SC[7] + tmp2; \
873 tmp2 = SC[8] + tmp1; \
875 tmp1 = SC[9] + tmp2; \
877 tmp2 = SC[10] + tmp1; \
879 tmp1 = SC[11] + tmp2; \
881 tmp2 = SC[12] + tmp1; \
883 tmp1 = SC[13] + tmp2; \
885 tmp2 = SC[14] + tmp1; \
887 tmp1 = SC[15] + tmp2; \
889 tmp2 = SC[16] + tmp1; \
891 tmp1 = SC[17] + tmp2; \
893 tmp2 = SC[18] + tmp1; \
895 if (j >= offset * 2) \
896 if (out_row) out_row[j - offset * 2] = (((uint64_t)1 <<39) + SC[19] + tmp2) >> 40; \
904 #define DO_FEATHER_23(type, int_type, max, N); \
906 int offset = N / 2; \
907 int frame_w = input->get_w(); \
908 int frame_h = input->get_h(); \
909 int start_in = start_out - offset; \
910 int end_in = end_out + offset; \
911 type **in_rows = (type**)input->get_rows(); \
912 type **out_rows = (type**)output->get_rows(); \
914 int_type tmp1, tmp2; \
915 int_type SR0, SR1, SR2, SR3, SR4, SR5, SR6, SR7, SR8, SR9, SR10, SR11, SR12, SR13, SR14, SR15, SR16, SR17, SR18, SR19, SR20, SR21; \
916 int_type *SC0 = new int_type[(frame_w + offset * 2) * 22]; \
917 memset(SC0, 0, sizeof(int_type) * (frame_w + offset * 2) * 22); \
918 for (i = start_in; i < end_in; i++) \
922 in_row = in_rows[0]; \
923 else if (i >= frame_h) \
924 in_row = in_rows[frame_h - 1]; \
926 in_row = in_rows[i]; \
927 SR0 = SR1 = SR2 = SR3 = SR4 = SR5 = SR6 = SR7 = SR8 = SR9 = SR10 = SR11 = SR12 = SR13 = SR14 = SR15 = SR16 = SR17 = SR18 = SR19 = SR20 = SR21 = 0; \
930 if (i >= start_out + offset && i<= end_out + offset) \
931 out_row = out_rows[i - offset]; \
934 int_type *SC = SC0; \
935 for (j = 0; j < frame_w + offset * 2; j++) \
939 else if (j >= frame_w + offset) \
940 tmp1 = in_row[frame_w - 1]; \
942 tmp1 = in_row[j - offset]; \
963 tmp2 = SR10 + tmp1; \
965 tmp1 = SR11 + tmp2; \
967 tmp2 = SR12 + tmp1; \
969 tmp1 = SR13 + tmp2; \
971 tmp2 = SR14 + tmp1; \
973 tmp1 = SR15 + tmp2; \
975 tmp2 = SR16 + tmp1; \
977 tmp1 = SR17 + tmp2; \
979 tmp2 = SR18 + tmp1; \
981 tmp1 = SR19 + tmp2; \
983 tmp2 = SR20 + tmp1; \
985 tmp1 = SR21 + tmp2; \
987 tmp2 = SC[0] + tmp1; \
989 tmp1 = SC[1] + tmp2; \
991 tmp2 = SC[2] + tmp1; \
993 tmp1 = SC[3] + tmp2; \
995 tmp2 = SC[4] + tmp1; \
997 tmp1 = SC[5] + tmp2; \
999 tmp2 = SC[6] + tmp1; \
1001 tmp1 = SC[7] + tmp2; \
1003 tmp2 = SC[8] + tmp1; \
1005 tmp1 = SC[9] + tmp2; \
1007 tmp2 = SC[10] + tmp1; \
1009 tmp1 = SC[11] + tmp2; \
1011 tmp2 = SC[12] + tmp1; \
1013 tmp1 = SC[13] + tmp2; \
1015 tmp2 = SC[14] + tmp1; \
1017 tmp1 = SC[15] + tmp2; \
1019 tmp2 = SC[16] + tmp1; \
1021 tmp1 = SC[17] + tmp2; \
1023 tmp2 = SC[18] + tmp1; \
1025 tmp1 = SC[19] + tmp2; \
1027 tmp2 = SC[20] + tmp1; \
1029 if (j >= offset * 2) \
1030 if (out_row) out_row[j - offset * 2] = (((uint64_t)1 <<43) + SC[21] + tmp2) >> 44; \
1038 #define DO_FEATHER_25(type, int_type, max, N); \
1040 int offset = N / 2; \
1041 int frame_w = input->get_w(); \
1042 int frame_h = input->get_h(); \
1043 int start_in = start_out - offset; \
1044 int end_in = end_out + offset; \
1045 type **in_rows = (type**)input->get_rows(); \
1046 type **out_rows = (type**)output->get_rows(); \
1048 int_type tmp1, tmp2; \
1049 int_type SR0, SR1, SR2, SR3, SR4, SR5, SR6, SR7, SR8, SR9, SR10, SR11, SR12, SR13, SR14, SR15, SR16, SR17, SR18, SR19, SR20, SR21, SR22, SR23; \
1050 int_type *SC0 = new int_type[(frame_w + offset * 2) * 24]; \
1051 memset(SC0, 0, sizeof(int_type) * (frame_w + offset * 2) * 24); \
1052 for (i = start_in; i < end_in; i++) \
1056 in_row = in_rows[0]; \
1057 else if (i >= frame_h) \
1058 in_row = in_rows[frame_h - 1]; \
1060 in_row = in_rows[i]; \
1061 SR0 = SR1 = SR2 = SR3 = SR4 = SR5 = SR6 = SR7 = SR8 = SR9 = SR10 = SR11 = SR12 = SR13 = SR14 = SR15 = SR16 = SR17 = SR18 = SR19 = SR20 = SR21 = SR22 = SR23 = 0; \
1064 if (i >= start_out + offset && i<= end_out + offset) \
1065 out_row = out_rows[i - offset]; \
1068 int_type *SC = SC0; \
1069 for (j = 0; j < frame_w + offset * 2; j++) \
1073 else if (j >= frame_w + offset) \
1074 tmp1 = in_row[frame_w - 1]; \
1076 tmp1 = in_row[j - offset]; \
1077 tmp2 = SR0 + tmp1; \
1079 tmp1 = SR1 + tmp2; \
1081 tmp2 = SR2 + tmp1; \
1083 tmp1 = SR3 + tmp2; \
1085 tmp2 = SR4 + tmp1; \
1087 tmp1 = SR5 + tmp2; \
1089 tmp2 = SR6 + tmp1; \
1091 tmp1 = SR7 + tmp2; \
1093 tmp2 = SR8 + tmp1; \
1095 tmp1 = SR9 + tmp2; \
1097 tmp2 = SR10 + tmp1; \
1099 tmp1 = SR11 + tmp2; \
1101 tmp2 = SR12 + tmp1; \
1103 tmp1 = SR13 + tmp2; \
1105 tmp2 = SR14 + tmp1; \
1107 tmp1 = SR15 + tmp2; \
1109 tmp2 = SR16 + tmp1; \
1111 tmp1 = SR17 + tmp2; \
1113 tmp2 = SR18 + tmp1; \
1115 tmp1 = SR19 + tmp2; \
1117 tmp2 = SR20 + tmp1; \
1119 tmp1 = SR21 + tmp2; \
1121 tmp2 = SR22 + tmp1; \
1123 tmp1 = SR23 + tmp2; \
1125 tmp2 = SC[0] + tmp1; \
1127 tmp1 = SC[1] + tmp2; \
1129 tmp2 = SC[2] + tmp1; \
1131 tmp1 = SC[3] + tmp2; \
1133 tmp2 = SC[4] + tmp1; \
1135 tmp1 = SC[5] + tmp2; \
1137 tmp2 = SC[6] + tmp1; \
1139 tmp1 = SC[7] + tmp2; \
1141 tmp2 = SC[8] + tmp1; \
1143 tmp1 = SC[9] + tmp2; \
1145 tmp2 = SC[10] + tmp1; \
1147 tmp1 = SC[11] + tmp2; \
1149 tmp2 = SC[12] + tmp1; \
1151 tmp1 = SC[13] + tmp2; \
1153 tmp2 = SC[14] + tmp1; \
1155 tmp1 = SC[15] + tmp2; \
1157 tmp2 = SC[16] + tmp1; \
1159 tmp1 = SC[17] + tmp2; \
1161 tmp2 = SC[18] + tmp1; \
1163 tmp1 = SC[19] + tmp2; \
1165 tmp2 = SC[20] + tmp1; \
1167 tmp1 = SC[21] + tmp2; \
1169 tmp2 = SC[22] + tmp1; \
1171 if (j >= offset * 2) \
1172 if (out_row) out_row[j - offset * 2] = (((uint64_t)1 <<47) + SC[23] + tmp2) >> 48; \
1180 #define DO_FEATHER_27(type, int_type, max, N); \
1182 int offset = N / 2; \
1183 int frame_w = input->get_w(); \
1184 int frame_h = input->get_h(); \
1185 int start_in = start_out - offset; \
1186 int end_in = end_out + offset; \
1187 type **in_rows = (type**)input->get_rows(); \
1188 type **out_rows = (type**)output->get_rows(); \
1190 int_type tmp1, tmp2; \
1191 int_type SR0, SR1, SR2, SR3, SR4, SR5, SR6, SR7, SR8, SR9, SR10, SR11, SR12, SR13, SR14, SR15, SR16, SR17, SR18, SR19, SR20, SR21, SR22, SR23, SR24, SR25; \
1192 int_type *SC0 = new int_type[(frame_w + offset * 2) * 26]; \
1193 memset(SC0, 0, sizeof(int_type) * (frame_w + offset * 2) * 26); \
1194 for (i = start_in; i < end_in; i++) \
1198 in_row = in_rows[0]; \
1199 else if (i >= frame_h) \
1200 in_row = in_rows[frame_h - 1]; \
1202 in_row = in_rows[i]; \
1203 SR0 = SR1 = SR2 = SR3 = SR4 = SR5 = SR6 = SR7 = SR8 = SR9 = SR10 = SR11 = SR12 = SR13 = SR14 = SR15 = SR16 = SR17 = SR18 = SR19 = SR20 = SR21 = SR22 = SR23 = SR24 = SR25 = 0; \
1206 if (i >= start_out + offset && i<= end_out + offset) \
1207 out_row = out_rows[i - offset]; \
1210 int_type *SC = SC0; \
1211 for (j = 0; j < frame_w + offset * 2; j++) \
1215 else if (j >= frame_w + offset) \
1216 tmp1 = in_row[frame_w - 1]; \
1218 tmp1 = in_row[j - offset]; \
1219 tmp2 = SR0 + tmp1; \
1221 tmp1 = SR1 + tmp2; \
1223 tmp2 = SR2 + tmp1; \
1225 tmp1 = SR3 + tmp2; \
1227 tmp2 = SR4 + tmp1; \
1229 tmp1 = SR5 + tmp2; \
1231 tmp2 = SR6 + tmp1; \
1233 tmp1 = SR7 + tmp2; \
1235 tmp2 = SR8 + tmp1; \
1237 tmp1 = SR9 + tmp2; \
1239 tmp2 = SR10 + tmp1; \
1241 tmp1 = SR11 + tmp2; \
1243 tmp2 = SR12 + tmp1; \
1245 tmp1 = SR13 + tmp2; \
1247 tmp2 = SR14 + tmp1; \
1249 tmp1 = SR15 + tmp2; \
1251 tmp2 = SR16 + tmp1; \
1253 tmp1 = SR17 + tmp2; \
1255 tmp2 = SR18 + tmp1; \
1257 tmp1 = SR19 + tmp2; \
1259 tmp2 = SR20 + tmp1; \
1261 tmp1 = SR21 + tmp2; \
1263 tmp2 = SR22 + tmp1; \
1265 tmp1 = SR23 + tmp2; \
1267 tmp2 = SR24 + tmp1; \
1269 tmp1 = SR25 + tmp2; \
1271 tmp2 = SC[0] + tmp1; \
1273 tmp1 = SC[1] + tmp2; \
1275 tmp2 = SC[2] + tmp1; \
1277 tmp1 = SC[3] + tmp2; \
1279 tmp2 = SC[4] + tmp1; \
1281 tmp1 = SC[5] + tmp2; \
1283 tmp2 = SC[6] + tmp1; \
1285 tmp1 = SC[7] + tmp2; \
1287 tmp2 = SC[8] + tmp1; \
1289 tmp1 = SC[9] + tmp2; \
1291 tmp2 = SC[10] + tmp1; \
1293 tmp1 = SC[11] + tmp2; \
1295 tmp2 = SC[12] + tmp1; \
1297 tmp1 = SC[13] + tmp2; \
1299 tmp2 = SC[14] + tmp1; \
1301 tmp1 = SC[15] + tmp2; \
1303 tmp2 = SC[16] + tmp1; \
1305 tmp1 = SC[17] + tmp2; \
1307 tmp2 = SC[18] + tmp1; \
1309 tmp1 = SC[19] + tmp2; \
1311 tmp2 = SC[20] + tmp1; \
1313 tmp1 = SC[21] + tmp2; \
1315 tmp2 = SC[22] + tmp1; \
1317 tmp1 = SC[23] + tmp2; \
1319 tmp2 = SC[24] + tmp1; \
1321 if (j >= offset * 2) \
1322 if (out_row) out_row[j - offset * 2] = (((uint64_t)1 <<51) + SC[25] + tmp2) >> 52; \
1330 #define DO_FEATHER_29(type, int_type, max, N); \
1332 int offset = N / 2; \
1333 int frame_w = input->get_w(); \
1334 int frame_h = input->get_h(); \
1335 int start_in = start_out - offset; \
1336 int end_in = end_out + offset; \
1337 type **in_rows = (type**)input->get_rows(); \
1338 type **out_rows = (type**)output->get_rows(); \
1340 int_type tmp1, tmp2; \
1341 int_type SR0, SR1, SR2, SR3, SR4, SR5, SR6, SR7, SR8, SR9, SR10, SR11, SR12, SR13, SR14, SR15, SR16, SR17, SR18, SR19, SR20, SR21, SR22, SR23, SR24, SR25, SR26, SR27; \
1342 int_type *SC0 = new int_type[(frame_w + offset * 2) * 28]; \
1343 memset(SC0, 0, sizeof(int_type) * (frame_w + offset * 2) * 28); \
1344 for (i = start_in; i < end_in; i++) \
1348 in_row = in_rows[0]; \
1349 else if (i >= frame_h) \
1350 in_row = in_rows[frame_h - 1]; \
1352 in_row = in_rows[i]; \
1353 SR0 = SR1 = SR2 = SR3 = SR4 = SR5 = SR6 = SR7 = SR8 = SR9 = SR10 = SR11 = SR12 = SR13 = SR14 = SR15 = SR16 = SR17 = SR18 = SR19 = SR20 = SR21 = SR22 = SR23 = SR24 = SR25 = SR26 = SR27 = 0; \
1356 if (i >= start_out + offset && i<= end_out + offset) \
1357 out_row = out_rows[i - offset]; \
1360 int_type *SC = SC0; \
1361 for (j = 0; j < frame_w + offset * 2; j++) \
1365 else if (j >= frame_w + offset) \
1366 tmp1 = in_row[frame_w - 1]; \
1368 tmp1 = in_row[j - offset]; \
1369 tmp2 = SR0 + tmp1; \
1371 tmp1 = SR1 + tmp2; \
1373 tmp2 = SR2 + tmp1; \
1375 tmp1 = SR3 + tmp2; \
1377 tmp2 = SR4 + tmp1; \
1379 tmp1 = SR5 + tmp2; \
1381 tmp2 = SR6 + tmp1; \
1383 tmp1 = SR7 + tmp2; \
1385 tmp2 = SR8 + tmp1; \
1387 tmp1 = SR9 + tmp2; \
1389 tmp2 = SR10 + tmp1; \
1391 tmp1 = SR11 + tmp2; \
1393 tmp2 = SR12 + tmp1; \
1395 tmp1 = SR13 + tmp2; \
1397 tmp2 = SR14 + tmp1; \
1399 tmp1 = SR15 + tmp2; \
1401 tmp2 = SR16 + tmp1; \
1403 tmp1 = SR17 + tmp2; \
1405 tmp2 = SR18 + tmp1; \
1407 tmp1 = SR19 + tmp2; \
1409 tmp2 = SR20 + tmp1; \
1411 tmp1 = SR21 + tmp2; \
1413 tmp2 = SR22 + tmp1; \
1415 tmp1 = SR23 + tmp2; \
1417 tmp2 = SR24 + tmp1; \
1419 tmp1 = SR25 + tmp2; \
1421 tmp2 = SR26 + tmp1; \
1423 tmp1 = SR27 + tmp2; \
1425 tmp2 = SC[0] + tmp1; \
1427 tmp1 = SC[1] + tmp2; \
1429 tmp2 = SC[2] + tmp1; \
1431 tmp1 = SC[3] + tmp2; \
1433 tmp2 = SC[4] + tmp1; \
1435 tmp1 = SC[5] + tmp2; \
1437 tmp2 = SC[6] + tmp1; \
1439 tmp1 = SC[7] + tmp2; \
1441 tmp2 = SC[8] + tmp1; \
1443 tmp1 = SC[9] + tmp2; \
1445 tmp2 = SC[10] + tmp1; \
1447 tmp1 = SC[11] + tmp2; \
1449 tmp2 = SC[12] + tmp1; \
1451 tmp1 = SC[13] + tmp2; \
1453 tmp2 = SC[14] + tmp1; \
1455 tmp1 = SC[15] + tmp2; \
1457 tmp2 = SC[16] + tmp1; \
1459 tmp1 = SC[17] + tmp2; \
1461 tmp2 = SC[18] + tmp1; \
1463 tmp1 = SC[19] + tmp2; \
1465 tmp2 = SC[20] + tmp1; \
1467 tmp1 = SC[21] + tmp2; \
1469 tmp2 = SC[22] + tmp1; \
1471 tmp1 = SC[23] + tmp2; \
1473 tmp2 = SC[24] + tmp1; \
1475 tmp1 = SC[25] + tmp2; \
1477 tmp2 = SC[26] + tmp1; \
1479 if (j >= offset * 2) \
1480 if (out_row) out_row[j - offset * 2] = (((uint64_t)1 <<55) + SC[27] + tmp2) >> 56; \
1489 /* THIS WORKS ONLY FOR ODD N >= 3 */
1490 #define DO_FEATHER_N(type, int_type, max, N) \
1492 switch(input->get_color_model()) \
1498 DO_FEATHER_3(unsigned char, uint16_t, max, 3); \
1502 DO_FEATHER_5(unsigned char, uint16_t, max, 5); \
1506 DO_FEATHER_7(unsigned char, uint32_t, max, 7); \
1510 DO_FEATHER_9(unsigned char, uint32_t, max, 9); \
1514 DO_FEATHER_11(unsigned char, uint32_t, max, 11); \
1518 DO_FEATHER_13(unsigned char, uint32_t, max, 13); \
1522 DO_FEATHER_15(unsigned char, uint64_t, max, 15); \
1526 DO_FEATHER_17(unsigned char, uint64_t, max, 17); \
1530 DO_FEATHER_19(unsigned char, uint64_t, max, 19); \
1534 DO_FEATHER_21(unsigned char, uint64_t, max, 21); \
1538 DO_FEATHER_23(unsigned char, uint64_t, max, 23); \
1542 DO_FEATHER_25(unsigned char, uint64_t, max, 25); \
1546 DO_FEATHER_27(unsigned char, uint64_t, max, 27); \
1550 DO_FEATHER_29(unsigned char, uint64_t, max, 29); \
1560 DO_FEATHER_3(uint16_t, uint32_t, max, 3); \
1564 DO_FEATHER_5(uint16_t, uint32_t, max, 5); \
1568 DO_FEATHER_7(uint16_t, uint32_t, max, 7); \
1572 DO_FEATHER_9(uint16_t, uint32_t, max, 9); \
1576 DO_FEATHER_11(uint16_t, uint64_t, max, 11); \
1580 DO_FEATHER_13(uint16_t, uint64_t, max, 13); \
1584 DO_FEATHER_15(uint16_t, uint64_t, max, 15); \
1588 DO_FEATHER_17(uint16_t, uint64_t, max, 17); \
1592 DO_FEATHER_19(uint16_t, uint64_t, max, 19); \
1596 DO_FEATHER_21(uint16_t, uint64_t, max, 21); \
1600 DO_FEATHER_23(uint16_t, uint64_t, max, 23); \
1604 DO_FEATHER_25(uint16_t, uint64_t, max, 25); \