00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "avcodec.h"
00022 #include "dsputil.h"
00023 #include "snow.h"
00024
00025 #include "rangecoder.h"
00026
00027 #include "mpegvideo.h"
00028
00029 #undef NDEBUG
00030 #include <assert.h>
00031
00032 static const int8_t quant3[256]={
00033 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00034 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00035 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00036 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00037 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00038 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00039 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00040 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00041 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00042 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00043 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00044 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00045 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00046 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00047 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00048 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
00049 };
00050 static const int8_t quant3b[256]={
00051 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00052 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00053 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00054 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00055 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00056 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00057 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00058 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00059 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00060 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00061 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00062 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00063 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00064 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00065 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00066 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00067 };
00068 static const int8_t quant3bA[256]={
00069 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00070 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00071 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00072 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00073 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00074 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00075 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00076 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00077 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00078 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00079 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00080 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00081 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00082 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00083 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00084 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00085 };
00086 static const int8_t quant5[256]={
00087 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00088 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00089 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00090 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00091 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00092 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00093 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00094 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00095 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00096 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00097 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00098 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00099 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
00103 };
00104 static const int8_t quant7[256]={
00105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
00108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
00119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
00121 };
00122 static const int8_t quant9[256]={
00123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
00138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
00139 };
00140 static const int8_t quant11[256]={
00141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
00142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
00155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
00157 };
00158 static const int8_t quant13[256]={
00159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
00160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
00172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
00175 };
00176
00177 #if 0 //64*cubic
00178 static const uint8_t obmc32[1024]={
00179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00180 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00181 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00182 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
00183 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
00184 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
00185 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
00186 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
00187 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
00188 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
00189 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
00190 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
00191 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
00192 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
00193 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
00194 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
00195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
00196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
00197 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
00198 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
00199 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
00200 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
00201 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
00202 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
00203 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
00204 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
00205 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
00206 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
00207 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
00208 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00209 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00211
00212 };
00213 static const uint8_t obmc16[256]={
00214 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00215 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
00216 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
00217 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
00218 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
00219 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
00220 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
00221 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
00222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
00223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
00224 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
00225 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
00226 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
00227 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
00228 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
00229 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00230
00231 };
00232 #elif 1 // 64*linear
00233 static const uint8_t obmc32[1024]={
00234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
00235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
00236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
00237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
00238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
00239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
00240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
00241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
00242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
00243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
00244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
00245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
00246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
00247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
00248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
00249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
00250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
00251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
00252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
00253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
00254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
00255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
00256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
00257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
00258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
00259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
00260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
00261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
00262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
00263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
00264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
00265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
00266
00267 };
00268 static const uint8_t obmc16[256]={
00269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
00270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
00271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
00272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
00273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
00274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
00275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
00276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
00277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
00278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
00279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
00280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
00281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
00282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
00283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
00284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
00285
00286 };
00287 #else //64*cos
00288 static const uint8_t obmc32[1024]={
00289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00290 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00291 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00292 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
00293 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
00294 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
00295 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
00296 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
00297 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
00298 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
00299 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
00300 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
00301 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
00302 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
00303 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
00304 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
00305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
00306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
00307 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
00308 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
00309 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
00310 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
00311 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
00312 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
00313 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
00314 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
00315 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
00316 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
00317 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
00318 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
00319 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
00320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00321
00322 };
00323 static const uint8_t obmc16[256]={
00324 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00325 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
00326 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
00327 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
00328 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
00329 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
00330 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
00331 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
00332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
00333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
00334 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
00335 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
00336 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
00337 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
00338 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
00339 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
00340
00341 };
00342 #endif
00343
00344
00345 static const uint8_t obmc8[64]={
00346 4, 12, 20, 28, 28, 20, 12, 4,
00347 12, 36, 60, 84, 84, 60, 36, 12,
00348 20, 60,100,140,140,100, 60, 20,
00349 28, 84,140,196,196,140, 84, 28,
00350 28, 84,140,196,196,140, 84, 28,
00351 20, 60,100,140,140,100, 60, 20,
00352 12, 36, 60, 84, 84, 60, 36, 12,
00353 4, 12, 20, 28, 28, 20, 12, 4,
00354
00355 };
00356
00357
00358 static const uint8_t obmc4[16]={
00359 16, 48, 48, 16,
00360 48,144,144, 48,
00361 48,144,144, 48,
00362 16, 48, 48, 16,
00363
00364 };
00365
00366 static const uint8_t *obmc_tab[4]={
00367 obmc32, obmc16, obmc8, obmc4
00368 };
00369
00370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
00371
00372 typedef struct BlockNode{
00373 int16_t mx;
00374 int16_t my;
00375 uint8_t ref;
00376 uint8_t color[3];
00377 uint8_t type;
00378
00379 #define BLOCK_INTRA 1
00380 #define BLOCK_OPT 2
00381
00382 uint8_t level;
00383 }BlockNode;
00384
00385 static const BlockNode null_block= {
00386 .color= {128,128,128},
00387 .mx= 0,
00388 .my= 0,
00389 .ref= 0,
00390 .type= 0,
00391 .level= 0,
00392 };
00393
00394 #define LOG2_MB_SIZE 4
00395 #define MB_SIZE (1<<LOG2_MB_SIZE)
00396 #define ENCODER_EXTRA_BITS 4
00397 #define HTAPS_MAX 8
00398
00399 typedef struct x_and_coeff{
00400 int16_t x;
00401 uint16_t coeff;
00402 } x_and_coeff;
00403
00404 typedef struct SubBand{
00405 int level;
00406 int stride;
00407 int width;
00408 int height;
00409 int qlog;
00410 DWTELEM *buf;
00411 IDWTELEM *ibuf;
00412 int buf_x_offset;
00413 int buf_y_offset;
00414 int stride_line;
00415 x_and_coeff * x_coeff;
00416 struct SubBand *parent;
00417 uint8_t state[ 7 + 512][32];
00418 }SubBand;
00419
00420 typedef struct Plane{
00421 int width;
00422 int height;
00423 SubBand band[MAX_DECOMPOSITIONS][4];
00424
00425 int htaps;
00426 int8_t hcoeff[HTAPS_MAX/2];
00427 int diag_mc;
00428 int fast_mc;
00429
00430 int last_htaps;
00431 int8_t last_hcoeff[HTAPS_MAX/2];
00432 int last_diag_mc;
00433 }Plane;
00434
00435 typedef struct SnowContext{
00436
00437
00438 AVCodecContext *avctx;
00439 RangeCoder c;
00440 DSPContext dsp;
00441 AVFrame new_picture;
00442 AVFrame input_picture;
00443 AVFrame current_picture;
00444 AVFrame last_picture[MAX_REF_FRAMES];
00445 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
00446 AVFrame mconly_picture;
00447
00448 uint8_t header_state[32];
00449 uint8_t block_state[128 + 32*128];
00450 int keyframe;
00451 int always_reset;
00452 int version;
00453 int spatial_decomposition_type;
00454 int last_spatial_decomposition_type;
00455 int temporal_decomposition_type;
00456 int spatial_decomposition_count;
00457 int last_spatial_decomposition_count;
00458 int temporal_decomposition_count;
00459 int max_ref_frames;
00460 int ref_frames;
00461 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
00462 uint32_t *ref_scores[MAX_REF_FRAMES];
00463 DWTELEM *spatial_dwt_buffer;
00464 IDWTELEM *spatial_idwt_buffer;
00465 int colorspace_type;
00466 int chroma_h_shift;
00467 int chroma_v_shift;
00468 int spatial_scalability;
00469 int qlog;
00470 int last_qlog;
00471 int lambda;
00472 int lambda2;
00473 int pass1_rc;
00474 int mv_scale;
00475 int last_mv_scale;
00476 int qbias;
00477 int last_qbias;
00478 #define QBIAS_SHIFT 3
00479 int b_width;
00480 int b_height;
00481 int block_max_depth;
00482 int last_block_max_depth;
00483 Plane plane[MAX_PLANES];
00484 BlockNode *block;
00485 #define ME_CACHE_SIZE 1024
00486 int me_cache[ME_CACHE_SIZE];
00487 int me_cache_generation;
00488 slice_buffer sb;
00489
00490 MpegEncContext m;
00491 }SnowContext;
00492
00493 typedef struct {
00494 IDWTELEM *b0;
00495 IDWTELEM *b1;
00496 IDWTELEM *b2;
00497 IDWTELEM *b3;
00498 int y;
00499 } dwt_compose_t;
00500
00501 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
00502
00503
00504 static void iterative_me(SnowContext *s);
00505
00506 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
00507 {
00508 int i;
00509
00510 buf->base_buffer = base_buffer;
00511 buf->line_count = line_count;
00512 buf->line_width = line_width;
00513 buf->data_count = max_allocated_lines;
00514 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
00515 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
00516
00517 for (i = 0; i < max_allocated_lines; i++)
00518 {
00519 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
00520 }
00521
00522 buf->data_stack_top = max_allocated_lines - 1;
00523 }
00524
00525 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
00526 {
00527 int offset;
00528 IDWTELEM * buffer;
00529
00530
00531
00532 assert(buf->data_stack_top >= 0);
00533
00534 if (buf->line[line])
00535 return buf->line[line];
00536
00537 offset = buf->line_width * line;
00538 buffer = buf->data_stack[buf->data_stack_top];
00539 buf->data_stack_top--;
00540 buf->line[line] = buffer;
00541
00542
00543
00544 return buffer;
00545 }
00546
00547 static void slice_buffer_release(slice_buffer * buf, int line)
00548 {
00549 int offset;
00550 IDWTELEM * buffer;
00551
00552 assert(line >= 0 && line < buf->line_count);
00553 assert(buf->line[line]);
00554
00555 offset = buf->line_width * line;
00556 buffer = buf->line[line];
00557 buf->data_stack_top++;
00558 buf->data_stack[buf->data_stack_top] = buffer;
00559 buf->line[line] = NULL;
00560
00561
00562 }
00563
00564 static void slice_buffer_flush(slice_buffer * buf)
00565 {
00566 int i;
00567 for (i = 0; i < buf->line_count; i++)
00568 {
00569 if (buf->line[i])
00570 {
00571
00572 slice_buffer_release(buf, i);
00573 }
00574 }
00575 }
00576
00577 static void slice_buffer_destroy(slice_buffer * buf)
00578 {
00579 int i;
00580 slice_buffer_flush(buf);
00581
00582 for (i = buf->data_count - 1; i >= 0; i--)
00583 {
00584 av_freep(&buf->data_stack[i]);
00585 }
00586 av_freep(&buf->data_stack);
00587 av_freep(&buf->line);
00588 }
00589
00590 #ifdef __sgi
00591
00592 #undef qexp
00593 #endif
00594 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
00595 static uint8_t qexp[QROOT];
00596
00597 static inline int mirror(int v, int m){
00598 while((unsigned)v > (unsigned)m){
00599 v=-v;
00600 if(v<0) v+= 2*m;
00601 }
00602 return v;
00603 }
00604
00605 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
00606 int i;
00607
00608 if(v){
00609 const int a= FFABS(v);
00610 const int e= av_log2(a);
00611 #if 1
00612 const int el= FFMIN(e, 10);
00613 put_rac(c, state+0, 0);
00614
00615 for(i=0; i<el; i++){
00616 put_rac(c, state+1+i, 1);
00617 }
00618 for(; i<e; i++){
00619 put_rac(c, state+1+9, 1);
00620 }
00621 put_rac(c, state+1+FFMIN(i,9), 0);
00622
00623 for(i=e-1; i>=el; i--){
00624 put_rac(c, state+22+9, (a>>i)&1);
00625 }
00626 for(; i>=0; i--){
00627 put_rac(c, state+22+i, (a>>i)&1);
00628 }
00629
00630 if(is_signed)
00631 put_rac(c, state+11 + el, v < 0);
00632 #else
00633
00634 put_rac(c, state+0, 0);
00635 if(e<=9){
00636 for(i=0; i<e; i++){
00637 put_rac(c, state+1+i, 1);
00638 }
00639 put_rac(c, state+1+i, 0);
00640
00641 for(i=e-1; i>=0; i--){
00642 put_rac(c, state+22+i, (a>>i)&1);
00643 }
00644
00645 if(is_signed)
00646 put_rac(c, state+11 + e, v < 0);
00647 }else{
00648 for(i=0; i<e; i++){
00649 put_rac(c, state+1+FFMIN(i,9), 1);
00650 }
00651 put_rac(c, state+1+FFMIN(i,9), 0);
00652
00653 for(i=e-1; i>=0; i--){
00654 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1);
00655 }
00656
00657 if(is_signed)
00658 put_rac(c, state+11 + FFMIN(e,10), v < 0);
00659 }
00660 #endif
00661 }else{
00662 put_rac(c, state+0, 1);
00663 }
00664 }
00665
00666 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
00667 if(get_rac(c, state+0))
00668 return 0;
00669 else{
00670 int i, e, a;
00671 e= 0;
00672 while(get_rac(c, state+1 + FFMIN(e,9))){
00673 e++;
00674 }
00675
00676 a= 1;
00677 for(i=e-1; i>=0; i--){
00678 a += a + get_rac(c, state+22 + FFMIN(i,9));
00679 }
00680
00681 if(is_signed && get_rac(c, state+11 + FFMIN(e,10)))
00682 return -a;
00683 else
00684 return a;
00685 }
00686 }
00687
00688 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
00689 int i;
00690 int r= log2>=0 ? 1<<log2 : 1;
00691
00692 assert(v>=0);
00693 assert(log2>=-4);
00694
00695 while(v >= r){
00696 put_rac(c, state+4+log2, 1);
00697 v -= r;
00698 log2++;
00699 if(log2>0) r+=r;
00700 }
00701 put_rac(c, state+4+log2, 0);
00702
00703 for(i=log2-1; i>=0; i--){
00704 put_rac(c, state+31-i, (v>>i)&1);
00705 }
00706 }
00707
00708 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
00709 int i;
00710 int r= log2>=0 ? 1<<log2 : 1;
00711 int v=0;
00712
00713 assert(log2>=-4);
00714
00715 while(get_rac(c, state+4+log2)){
00716 v+= r;
00717 log2++;
00718 if(log2>0) r+=r;
00719 }
00720
00721 for(i=log2-1; i>=0; i--){
00722 v+= get_rac(c, state+31-i)<<i;
00723 }
00724
00725 return v;
00726 }
00727
00728 static av_always_inline void
00729 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
00730 int dst_step, int src_step, int ref_step,
00731 int width, int mul, int add, int shift,
00732 int highpass, int inverse){
00733 const int mirror_left= !highpass;
00734 const int mirror_right= (width&1) ^ highpass;
00735 const int w= (width>>1) - 1 + (highpass & width);
00736 int i;
00737
00738 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
00739 if(mirror_left){
00740 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
00741 dst += dst_step;
00742 src += src_step;
00743 }
00744
00745 for(i=0; i<w; i++){
00746 dst[i*dst_step] =
00747 LIFT(src[i*src_step],
00748 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
00749 inverse);
00750 }
00751
00752 if(mirror_right){
00753 dst[w*dst_step] =
00754 LIFT(src[w*src_step],
00755 ((mul*2*ref[w*ref_step]+add)>>shift),
00756 inverse);
00757 }
00758 }
00759
00760 static av_always_inline void
00761 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
00762 int dst_step, int src_step, int ref_step,
00763 int width, int mul, int add, int shift,
00764 int highpass, int inverse){
00765 const int mirror_left= !highpass;
00766 const int mirror_right= (width&1) ^ highpass;
00767 const int w= (width>>1) - 1 + (highpass & width);
00768 int i;
00769
00770 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
00771 if(mirror_left){
00772 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
00773 dst += dst_step;
00774 src += src_step;
00775 }
00776
00777 for(i=0; i<w; i++){
00778 dst[i*dst_step] =
00779 LIFT(src[i*src_step],
00780 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
00781 inverse);
00782 }
00783
00784 if(mirror_right){
00785 dst[w*dst_step] =
00786 LIFT(src[w*src_step],
00787 ((mul*2*ref[w*ref_step]+add)>>shift),
00788 inverse);
00789 }
00790 }
00791
00792 #ifndef liftS
00793 static av_always_inline void
00794 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
00795 int dst_step, int src_step, int ref_step,
00796 int width, int mul, int add, int shift,
00797 int highpass, int inverse){
00798 const int mirror_left= !highpass;
00799 const int mirror_right= (width&1) ^ highpass;
00800 const int w= (width>>1) - 1 + (highpass & width);
00801 int i;
00802
00803 assert(shift == 4);
00804 #define LIFTS(src, ref, inv) \
00805 ((inv) ? \
00806 (src) + (((ref) + 4*(src))>>shift): \
00807 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
00808 if(mirror_left){
00809 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
00810 dst += dst_step;
00811 src += src_step;
00812 }
00813
00814 for(i=0; i<w; i++){
00815 dst[i*dst_step] =
00816 LIFTS(src[i*src_step],
00817 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
00818 inverse);
00819 }
00820
00821 if(mirror_right){
00822 dst[w*dst_step] =
00823 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
00824 }
00825 }
00826 static av_always_inline void
00827 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
00828 int dst_step, int src_step, int ref_step,
00829 int width, int mul, int add, int shift,
00830 int highpass, int inverse){
00831 const int mirror_left= !highpass;
00832 const int mirror_right= (width&1) ^ highpass;
00833 const int w= (width>>1) - 1 + (highpass & width);
00834 int i;
00835
00836 assert(shift == 4);
00837 #define LIFTS(src, ref, inv) \
00838 ((inv) ? \
00839 (src) + (((ref) + 4*(src))>>shift): \
00840 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
00841 if(mirror_left){
00842 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
00843 dst += dst_step;
00844 src += src_step;
00845 }
00846
00847 for(i=0; i<w; i++){
00848 dst[i*dst_step] =
00849 LIFTS(src[i*src_step],
00850 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
00851 inverse);
00852 }
00853
00854 if(mirror_right){
00855 dst[w*dst_step] =
00856 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
00857 }
00858 }
00859 #endif
00860
00861 static void horizontal_decompose53i(DWTELEM *b, int width){
00862 DWTELEM temp[width];
00863 const int width2= width>>1;
00864 int x;
00865 const int w2= (width+1)>>1;
00866
00867 for(x=0; x<width2; x++){
00868 temp[x ]= b[2*x ];
00869 temp[x+w2]= b[2*x + 1];
00870 }
00871 if(width&1)
00872 temp[x ]= b[2*x ];
00873 #if 0
00874 {
00875 int A1,A2,A3,A4;
00876 A2= temp[1 ];
00877 A4= temp[0 ];
00878 A1= temp[0+width2];
00879 A1 -= (A2 + A4)>>1;
00880 A4 += (A1 + 1)>>1;
00881 b[0+width2] = A1;
00882 b[0 ] = A4;
00883 for(x=1; x+1<width2; x+=2){
00884 A3= temp[x+width2];
00885 A4= temp[x+1 ];
00886 A3 -= (A2 + A4)>>1;
00887 A2 += (A1 + A3 + 2)>>2;
00888 b[x+width2] = A3;
00889 b[x ] = A2;
00890
00891 A1= temp[x+1+width2];
00892 A2= temp[x+2 ];
00893 A1 -= (A2 + A4)>>1;
00894 A4 += (A1 + A3 + 2)>>2;
00895 b[x+1+width2] = A1;
00896 b[x+1 ] = A4;
00897 }
00898 A3= temp[width-1];
00899 A3 -= A2;
00900 A2 += (A1 + A3 + 2)>>2;
00901 b[width -1] = A3;
00902 b[width2-1] = A2;
00903 }
00904 #else
00905 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
00906 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
00907 #endif
00908 }
00909
00910 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00911 int i;
00912
00913 for(i=0; i<width; i++){
00914 b1[i] -= (b0[i] + b2[i])>>1;
00915 }
00916 }
00917
00918 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00919 int i;
00920
00921 for(i=0; i<width; i++){
00922 b1[i] += (b0[i] + b2[i] + 2)>>2;
00923 }
00924 }
00925
00926 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
00927 int y;
00928 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
00929 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
00930
00931 for(y=-2; y<height; y+=2){
00932 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
00933 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
00934
00935 {START_TIMER
00936 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
00937 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
00938 STOP_TIMER("horizontal_decompose53i")}
00939
00940 {START_TIMER
00941 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
00942 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
00943 STOP_TIMER("vertical_decompose53i*")}
00944
00945 b0=b2;
00946 b1=b3;
00947 }
00948 }
00949
00950 static void horizontal_decompose97i(DWTELEM *b, int width){
00951 DWTELEM temp[width];
00952 const int w2= (width+1)>>1;
00953
00954 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
00955 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
00956 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
00957 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
00958 }
00959
00960
00961 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00962 int i;
00963
00964 for(i=0; i<width; i++){
00965 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
00966 }
00967 }
00968
00969 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00970 int i;
00971
00972 for(i=0; i<width; i++){
00973 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
00974 }
00975 }
00976
00977 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00978 int i;
00979
00980 for(i=0; i<width; i++){
00981 #ifdef liftS
00982 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
00983 #else
00984 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
00985 #endif
00986 }
00987 }
00988
00989 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00990 int i;
00991
00992 for(i=0; i<width; i++){
00993 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
00994 }
00995 }
00996
00997 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
00998 int y;
00999 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
01000 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
01001 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
01002 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
01003
01004 for(y=-4; y<height; y+=2){
01005 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
01006 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
01007
01008 {START_TIMER
01009 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
01010 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
01011 if(width>400){
01012 STOP_TIMER("horizontal_decompose97i")
01013 }}
01014
01015 {START_TIMER
01016 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
01017 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
01018 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
01019 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
01020
01021 if(width>400){
01022 STOP_TIMER("vertical_decompose97i")
01023 }}
01024
01025 b0=b2;
01026 b1=b3;
01027 b2=b4;
01028 b3=b5;
01029 }
01030 }
01031
01032 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01033 int level;
01034
01035 for(level=0; level<decomposition_count; level++){
01036 switch(type){
01037 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
01038 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
01039 }
01040 }
01041 }
01042
01043 static void horizontal_compose53i(IDWTELEM *b, int width){
01044 IDWTELEM temp[width];
01045 const int width2= width>>1;
01046 const int w2= (width+1)>>1;
01047 int x;
01048
01049 #if 0
01050 int A1,A2,A3,A4;
01051 A2= temp[1 ];
01052 A4= temp[0 ];
01053 A1= temp[0+width2];
01054 A1 -= (A2 + A4)>>1;
01055 A4 += (A1 + 1)>>1;
01056 b[0+width2] = A1;
01057 b[0 ] = A4;
01058 for(x=1; x+1<width2; x+=2){
01059 A3= temp[x+width2];
01060 A4= temp[x+1 ];
01061 A3 -= (A2 + A4)>>1;
01062 A2 += (A1 + A3 + 2)>>2;
01063 b[x+width2] = A3;
01064 b[x ] = A2;
01065
01066 A1= temp[x+1+width2];
01067 A2= temp[x+2 ];
01068 A1 -= (A2 + A4)>>1;
01069 A4 += (A1 + A3 + 2)>>2;
01070 b[x+1+width2] = A1;
01071 b[x+1 ] = A4;
01072 }
01073 A3= temp[width-1];
01074 A3 -= A2;
01075 A2 += (A1 + A3 + 2)>>2;
01076 b[width -1] = A3;
01077 b[width2-1] = A2;
01078 #else
01079 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
01080 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
01081 #endif
01082 for(x=0; x<width2; x++){
01083 b[2*x ]= temp[x ];
01084 b[2*x + 1]= temp[x+w2];
01085 }
01086 if(width&1)
01087 b[2*x ]= temp[x ];
01088 }
01089
01090 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01091 int i;
01092
01093 for(i=0; i<width; i++){
01094 b1[i] += (b0[i] + b2[i])>>1;
01095 }
01096 }
01097
01098 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01099 int i;
01100
01101 for(i=0; i<width; i++){
01102 b1[i] -= (b0[i] + b2[i] + 2)>>2;
01103 }
01104 }
01105
01106 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
01107 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
01108 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
01109 cs->y = -1;
01110 }
01111
01112 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
01113 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
01114 cs->b1 = buffer + mirror(-1 , height-1)*stride;
01115 cs->y = -1;
01116 }
01117
01118 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
01119 int y= cs->y;
01120
01121 IDWTELEM *b0= cs->b0;
01122 IDWTELEM *b1= cs->b1;
01123 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
01124 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
01125
01126 {START_TIMER
01127 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
01128 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
01129 STOP_TIMER("vertical_compose53i*")}
01130
01131 {START_TIMER
01132 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
01133 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
01134 STOP_TIMER("horizontal_compose53i")}
01135
01136 cs->b0 = b2;
01137 cs->b1 = b3;
01138 cs->y += 2;
01139 }
01140
01141 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
01142 int y= cs->y;
01143 IDWTELEM *b0= cs->b0;
01144 IDWTELEM *b1= cs->b1;
01145 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
01146 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
01147
01148 {START_TIMER
01149 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
01150 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
01151 STOP_TIMER("vertical_compose53i*")}
01152
01153 {START_TIMER
01154 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
01155 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
01156 STOP_TIMER("horizontal_compose53i")}
01157
01158 cs->b0 = b2;
01159 cs->b1 = b3;
01160 cs->y += 2;
01161 }
01162
01163 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
01164 dwt_compose_t cs;
01165 spatial_compose53i_init(&cs, buffer, height, stride);
01166 while(cs.y <= height)
01167 spatial_compose53i_dy(&cs, buffer, width, height, stride);
01168 }
01169
01170
01171 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
01172 IDWTELEM temp[width];
01173 const int w2= (width+1)>>1;
01174
01175 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
01176 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
01177 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
01178 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
01179 }
01180
01181 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01182 int i;
01183
01184 for(i=0; i<width; i++){
01185 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
01186 }
01187 }
01188
01189 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01190 int i;
01191
01192 for(i=0; i<width; i++){
01193 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
01194 }
01195 }
01196
01197 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01198 int i;
01199
01200 for(i=0; i<width; i++){
01201 #ifdef liftS
01202 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
01203 #else
01204 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
01205 #endif
01206 }
01207 }
01208
01209 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01210 int i;
01211
01212 for(i=0; i<width; i++){
01213 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
01214 }
01215 }
01216
01217 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
01218 int i;
01219
01220 for(i=0; i<width; i++){
01221 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
01222 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
01223 #ifdef liftS
01224 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
01225 #else
01226 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
01227 #endif
01228 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
01229 }
01230 }
01231
01232 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
01233 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
01234 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
01235 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
01236 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
01237 cs->y = -3;
01238 }
01239
01240 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
01241 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
01242 cs->b1 = buffer + mirror(-3 , height-1)*stride;
01243 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
01244 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
01245 cs->y = -3;
01246 }
01247
01248 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
01249 int y = cs->y;
01250
01251 IDWTELEM *b0= cs->b0;
01252 IDWTELEM *b1= cs->b1;
01253 IDWTELEM *b2= cs->b2;
01254 IDWTELEM *b3= cs->b3;
01255 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
01256 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
01257
01258 {START_TIMER
01259 if(y>0 && y+4<height){
01260 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
01261 }else{
01262 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
01263 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
01264 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
01265 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
01266 }
01267 if(width>400){
01268 STOP_TIMER("vertical_compose97i")}}
01269
01270 {START_TIMER
01271 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
01272 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
01273 if(width>400 && y+0<(unsigned)height){
01274 STOP_TIMER("horizontal_compose97i")}}
01275
01276 cs->b0=b2;
01277 cs->b1=b3;
01278 cs->b2=b4;
01279 cs->b3=b5;
01280 cs->y += 2;
01281 }
01282
01283 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
01284 int y = cs->y;
01285 IDWTELEM *b0= cs->b0;
01286 IDWTELEM *b1= cs->b1;
01287 IDWTELEM *b2= cs->b2;
01288 IDWTELEM *b3= cs->b3;
01289 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
01290 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
01291
01292 {START_TIMER
01293 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
01294 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
01295 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
01296 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
01297 if(width>400){
01298 STOP_TIMER("vertical_compose97i")}}
01299
01300 {START_TIMER
01301 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
01302 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
01303 if(width>400 && b0 <= b2){
01304 STOP_TIMER("horizontal_compose97i")}}
01305
01306 cs->b0=b2;
01307 cs->b1=b3;
01308 cs->b2=b4;
01309 cs->b3=b5;
01310 cs->y += 2;
01311 }
01312
01313 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
01314 dwt_compose_t cs;
01315 spatial_compose97i_init(&cs, buffer, height, stride);
01316 while(cs.y <= height)
01317 spatial_compose97i_dy(&cs, buffer, width, height, stride);
01318 }
01319
01320 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
01321 int level;
01322 for(level=decomposition_count-1; level>=0; level--){
01323 switch(type){
01324 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
01325 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
01326 }
01327 }
01328 }
01329
01330 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01331 int level;
01332 for(level=decomposition_count-1; level>=0; level--){
01333 switch(type){
01334 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
01335 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
01336 }
01337 }
01338 }
01339
01340 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
01341 const int support = type==1 ? 3 : 5;
01342 int level;
01343 if(type==2) return;
01344
01345 for(level=decomposition_count-1; level>=0; level--){
01346 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
01347 switch(type){
01348 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
01349 break;
01350 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
01351 break;
01352 }
01353 }
01354 }
01355 }
01356
01357 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
01358 const int support = type==1 ? 3 : 5;
01359 int level;
01360 if(type==2) return;
01361
01362 for(level=decomposition_count-1; level>=0; level--){
01363 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
01364 switch(type){
01365 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
01366 break;
01367 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
01368 break;
01369 }
01370 }
01371 }
01372 }
01373
01374 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01375 dwt_compose_t cs[MAX_DECOMPOSITIONS];
01376 int y;
01377 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
01378 for(y=0; y<height; y+=4)
01379 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
01380 }
01381
01382 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
01383 const int w= b->width;
01384 const int h= b->height;
01385 int x, y;
01386
01387 if(1){
01388 int run=0;
01389 int runs[w*h];
01390 int run_index=0;
01391 int max_index;
01392
01393 for(y=0; y<h; y++){
01394 for(x=0; x<w; x++){
01395 int v, p=0;
01396 int l=0, lt=0, t=0, rt=0;
01397 v= src[x + y*stride];
01398
01399 if(y){
01400 t= src[x + (y-1)*stride];
01401 if(x){
01402 lt= src[x - 1 + (y-1)*stride];
01403 }
01404 if(x + 1 < w){
01405 rt= src[x + 1 + (y-1)*stride];
01406 }
01407 }
01408 if(x){
01409 l= src[x - 1 + y*stride];
01410
01411
01412
01413
01414 }
01415 if(parent){
01416 int px= x>>1;
01417 int py= y>>1;
01418 if(px<b->parent->width && py<b->parent->height)
01419 p= parent[px + py*2*stride];
01420 }
01421 if(!(l|lt|t|rt|p)){
01422 if(v){
01423 runs[run_index++]= run;
01424 run=0;
01425 }else{
01426 run++;
01427 }
01428 }
01429 }
01430 }
01431 max_index= run_index;
01432 runs[run_index++]= run;
01433 run_index=0;
01434 run= runs[run_index++];
01435
01436 put_symbol2(&s->c, b->state[30], max_index, 0);
01437 if(run_index <= max_index)
01438 put_symbol2(&s->c, b->state[1], run, 3);
01439
01440 for(y=0; y<h; y++){
01441 if(s->c.bytestream_end - s->c.bytestream < w*40){
01442 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
01443 return -1;
01444 }
01445 for(x=0; x<w; x++){
01446 int v, p=0;
01447 int l=0, lt=0, t=0, rt=0;
01448 v= src[x + y*stride];
01449
01450 if(y){
01451 t= src[x + (y-1)*stride];
01452 if(x){
01453 lt= src[x - 1 + (y-1)*stride];
01454 }
01455 if(x + 1 < w){
01456 rt= src[x + 1 + (y-1)*stride];
01457 }
01458 }
01459 if(x){
01460 l= src[x - 1 + y*stride];
01461
01462
01463
01464
01465 }
01466 if(parent){
01467 int px= x>>1;
01468 int py= y>>1;
01469 if(px<b->parent->width && py<b->parent->height)
01470 p= parent[px + py*2*stride];
01471 }
01472 if(l|lt|t|rt|p){
01473 int context= av_log2(3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
01474
01475 put_rac(&s->c, &b->state[0][context], !!v);
01476 }else{
01477 if(!run){
01478 run= runs[run_index++];
01479
01480 if(run_index <= max_index)
01481 put_symbol2(&s->c, b->state[1], run, 3);
01482 assert(v);
01483 }else{
01484 run--;
01485 assert(!v);
01486 }
01487 }
01488 if(v){
01489 int context= av_log2(3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
01490 int l2= 2*FFABS(l) + (l<0);
01491 int t2= 2*FFABS(t) + (t<0);
01492
01493 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
01494 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
01495 }
01496 }
01497 }
01498 }
01499 return 0;
01500 }
01501
01502 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
01503
01504
01505 return encode_subband_c0run(s, b, src, parent, stride, orientation);
01506
01507 }
01508
01509 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
01510 const int w= b->width;
01511 const int h= b->height;
01512 int x,y;
01513
01514 if(1){
01515 int run, runs;
01516 x_and_coeff *xc= b->x_coeff;
01517 x_and_coeff *prev_xc= NULL;
01518 x_and_coeff *prev2_xc= xc;
01519 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
01520 x_and_coeff *prev_parent_xc= parent_xc;
01521
01522 runs= get_symbol2(&s->c, b->state[30], 0);
01523 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
01524 else run= INT_MAX;
01525
01526 for(y=0; y<h; y++){
01527 int v=0;
01528 int lt=0, t=0, rt=0;
01529
01530 if(y && prev_xc->x == 0){
01531 rt= prev_xc->coeff;
01532 }
01533 for(x=0; x<w; x++){
01534 int p=0;
01535 const int l= v;
01536
01537 lt= t; t= rt;
01538
01539 if(y){
01540 if(prev_xc->x <= x)
01541 prev_xc++;
01542 if(prev_xc->x == x + 1)
01543 rt= prev_xc->coeff;
01544 else
01545 rt=0;
01546 }
01547 if(parent_xc){
01548 if(x>>1 > parent_xc->x){
01549 parent_xc++;
01550 }
01551 if(x>>1 == parent_xc->x){
01552 p= parent_xc->coeff;
01553 }
01554 }
01555 if(l|lt|t|rt|p){
01556 int context= av_log2(3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
01557
01558 v=get_rac(&s->c, &b->state[0][context]);
01559 if(v){
01560 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
01561 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
01562
01563 xc->x=x;
01564 (xc++)->coeff= v;
01565 }
01566 }else{
01567 if(!run){
01568 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
01569 else run= INT_MAX;
01570 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
01571 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
01572
01573 xc->x=x;
01574 (xc++)->coeff= v;
01575 }else{
01576 int max_run;
01577 run--;
01578 v=0;
01579
01580 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
01581 else max_run= FFMIN(run, w-x-1);
01582 if(parent_xc)
01583 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
01584 x+= max_run;
01585 run-= max_run;
01586 }
01587 }
01588 }
01589 (xc++)->x= w+1;
01590 prev_xc= prev2_xc;
01591 prev2_xc= xc;
01592
01593 if(parent_xc){
01594 if(y&1){
01595 while(parent_xc->x != parent->width+1)
01596 parent_xc++;
01597 parent_xc++;
01598 prev_parent_xc= parent_xc;
01599 }else{
01600 parent_xc= prev_parent_xc;
01601 }
01602 }
01603 }
01604
01605 (xc++)->x= w+1;
01606 }
01607 }
01608
01609 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
01610 const int w= b->width;
01611 int y;
01612 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
01613 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
01614 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
01615 int new_index = 0;
01616
01617 START_TIMER
01618
01619 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
01620 qadd= 0;
01621 qmul= 1<<QEXPSHIFT;
01622 }
01623
01624
01625 if (start_y != 0)
01626 new_index = save_state[0];
01627
01628
01629 for(y=start_y; y<h; y++){
01630 int x = 0;
01631 int v;
01632 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
01633 memset(line, 0, b->width*sizeof(IDWTELEM));
01634 v = b->x_coeff[new_index].coeff;
01635 x = b->x_coeff[new_index++].x;
01636 while(x < w)
01637 {
01638 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
01639 register int u= -(v&1);
01640 line[x] = (t^u) - u;
01641
01642 v = b->x_coeff[new_index].coeff;
01643 x = b->x_coeff[new_index++].x;
01644 }
01645 }
01646 if(w > 200 && start_y != 0){
01647 STOP_TIMER("decode_subband")
01648 }
01649
01650
01651 save_state[0] = new_index;
01652
01653 return;
01654 }
01655
01656 static void reset_contexts(SnowContext *s){
01657 int plane_index, level, orientation;
01658
01659 for(plane_index=0; plane_index<3; plane_index++){
01660 for(level=0; level<MAX_DECOMPOSITIONS; level++){
01661 for(orientation=level ? 1:0; orientation<4; orientation++){
01662 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
01663 }
01664 }
01665 }
01666 memset(s->header_state, MID_STATE, sizeof(s->header_state));
01667 memset(s->block_state, MID_STATE, sizeof(s->block_state));
01668 }
01669
01670 static int alloc_blocks(SnowContext *s){
01671 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
01672 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
01673
01674 s->b_width = w;
01675 s->b_height= h;
01676
01677 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
01678 return 0;
01679 }
01680
01681 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
01682 uint8_t *bytestream= d->bytestream;
01683 uint8_t *bytestream_start= d->bytestream_start;
01684 *d= *s;
01685 d->bytestream= bytestream;
01686 d->bytestream_start= bytestream_start;
01687 }
01688
01689
01690 static int pix_sum(uint8_t * pix, int line_size, int w)
01691 {
01692 int s, i, j;
01693
01694 s = 0;
01695 for (i = 0; i < w; i++) {
01696 for (j = 0; j < w; j++) {
01697 s += pix[0];
01698 pix ++;
01699 }
01700 pix += line_size - w;
01701 }
01702 return s;
01703 }
01704
01705
01706 static int pix_norm1(uint8_t * pix, int line_size, int w)
01707 {
01708 int s, i, j;
01709 uint32_t *sq = ff_squareTbl + 256;
01710
01711 s = 0;
01712 for (i = 0; i < w; i++) {
01713 for (j = 0; j < w; j ++) {
01714 s += sq[pix[0]];
01715 pix ++;
01716 }
01717 pix += line_size - w;
01718 }
01719 return s;
01720 }
01721
01722 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
01723 const int w= s->b_width << s->block_max_depth;
01724 const int rem_depth= s->block_max_depth - level;
01725 const int index= (x + y*w) << rem_depth;
01726 const int block_w= 1<<rem_depth;
01727 BlockNode block;
01728 int i,j;
01729
01730 block.color[0]= l;
01731 block.color[1]= cb;
01732 block.color[2]= cr;
01733 block.mx= mx;
01734 block.my= my;
01735 block.ref= ref;
01736 block.type= type;
01737 block.level= level;
01738
01739 for(j=0; j<block_w; j++){
01740 for(i=0; i<block_w; i++){
01741 s->block[index + i + j*w]= block;
01742 }
01743 }
01744 }
01745
01746 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
01747 const int offset[3]= {
01748 y*c-> stride + x,
01749 ((y*c->uvstride + x)>>1),
01750 ((y*c->uvstride + x)>>1),
01751 };
01752 int i;
01753 for(i=0; i<3; i++){
01754 c->src[0][i]= src [i];
01755 c->ref[0][i]= ref [i] + offset[i];
01756 }
01757 assert(!ref_index);
01758 }
01759
01760 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
01761 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
01762 if(s->ref_frames == 1){
01763 *mx = mid_pred(left->mx, top->mx, tr->mx);
01764 *my = mid_pred(left->my, top->my, tr->my);
01765 }else{
01766 const int *scale = scale_mv_ref[ref];
01767 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
01768 (top ->mx * scale[top ->ref] + 128) >>8,
01769 (tr ->mx * scale[tr ->ref] + 128) >>8);
01770 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
01771 (top ->my * scale[top ->ref] + 128) >>8,
01772 (tr ->my * scale[tr ->ref] + 128) >>8);
01773 }
01774 }
01775
01776
01777 #define P_LEFT P[1]
01778 #define P_TOP P[2]
01779 #define P_TOPRIGHT P[3]
01780 #define P_MEDIAN P[4]
01781 #define P_MV1 P[9]
01782 #define FLAG_QPEL 1 //must be 1
01783
01784 static int encode_q_branch(SnowContext *s, int level, int x, int y){
01785 uint8_t p_buffer[1024];
01786 uint8_t i_buffer[1024];
01787 uint8_t p_state[sizeof(s->block_state)];
01788 uint8_t i_state[sizeof(s->block_state)];
01789 RangeCoder pc, ic;
01790 uint8_t *pbbak= s->c.bytestream;
01791 uint8_t *pbbak_start= s->c.bytestream_start;
01792 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
01793 const int w= s->b_width << s->block_max_depth;
01794 const int h= s->b_height << s->block_max_depth;
01795 const int rem_depth= s->block_max_depth - level;
01796 const int index= (x + y*w) << rem_depth;
01797 const int block_w= 1<<(LOG2_MB_SIZE - level);
01798 int trx= (x+1)<<rem_depth;
01799 int try= (y+1)<<rem_depth;
01800 const BlockNode *left = x ? &s->block[index-1] : &null_block;
01801 const BlockNode *top = y ? &s->block[index-w] : &null_block;
01802 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
01803 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
01804 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
01805 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl;
01806 int pl = left->color[0];
01807 int pcb= left->color[1];
01808 int pcr= left->color[2];
01809 int pmx, pmy;
01810 int mx=0, my=0;
01811 int l,cr,cb;
01812 const int stride= s->current_picture.linesize[0];
01813 const int uvstride= s->current_picture.linesize[1];
01814 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
01815 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
01816 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
01817 int P[10][2];
01818 int16_t last_mv[3][2];
01819 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL);
01820 const int shift= 1+qpel;
01821 MotionEstContext *c= &s->m.me;
01822 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
01823 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
01824 int my_context= av_log2(2*FFABS(left->my - top->my));
01825 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
01826 int ref, best_ref, ref_score, ref_mx, ref_my;
01827
01828 assert(sizeof(s->block_state) >= 256);
01829 if(s->keyframe){
01830 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
01831 return 0;
01832 }
01833
01834
01835
01836 P_LEFT[0]= left->mx;
01837 P_LEFT[1]= left->my;
01838 P_TOP [0]= top->mx;
01839 P_TOP [1]= top->my;
01840 P_TOPRIGHT[0]= tr->mx;
01841 P_TOPRIGHT[1]= tr->my;
01842
01843 last_mv[0][0]= s->block[index].mx;
01844 last_mv[0][1]= s->block[index].my;
01845 last_mv[1][0]= right->mx;
01846 last_mv[1][1]= right->my;
01847 last_mv[2][0]= bottom->mx;
01848 last_mv[2][1]= bottom->my;
01849
01850 s->m.mb_stride=2;
01851 s->m.mb_x=
01852 s->m.mb_y= 0;
01853 c->skip= 0;
01854
01855 assert(c-> stride == stride);
01856 assert(c->uvstride == uvstride);
01857
01858 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
01859 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
01860 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
01861 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
01862
01863 c->xmin = - x*block_w - 16+2;
01864 c->ymin = - y*block_w - 16+2;
01865 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
01866 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
01867
01868 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
01869 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
01870 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
01871 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
01872 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
01873 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
01874 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
01875
01876 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
01877 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
01878
01879 if (!y) {
01880 c->pred_x= P_LEFT[0];
01881 c->pred_y= P_LEFT[1];
01882 } else {
01883 c->pred_x = P_MEDIAN[0];
01884 c->pred_y = P_MEDIAN[1];
01885 }
01886
01887 score= INT_MAX;
01888 best_ref= 0;
01889 for(ref=0; ref<s->ref_frames; ref++){
01890 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
01891
01892 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, 0, last_mv,
01893 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
01894
01895 assert(ref_mx >= c->xmin);
01896 assert(ref_mx <= c->xmax);
01897 assert(ref_my >= c->ymin);
01898 assert(ref_my <= c->ymax);
01899
01900 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
01901 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
01902 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
01903 if(s->ref_mvs[ref]){
01904 s->ref_mvs[ref][index][0]= ref_mx;
01905 s->ref_mvs[ref][index][1]= ref_my;
01906 s->ref_scores[ref][index]= ref_score;
01907 }
01908 if(score > ref_score){
01909 score= ref_score;
01910 best_ref= ref;
01911 mx= ref_mx;
01912 my= ref_my;
01913 }
01914 }
01915
01916
01917
01918 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
01919 pc= s->c;
01920 pc.bytestream_start=
01921 pc.bytestream= p_buffer;
01922 memcpy(p_state, s->block_state, sizeof(s->block_state));
01923
01924 if(level!=s->block_max_depth)
01925 put_rac(&pc, &p_state[4 + s_context], 1);
01926 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
01927 if(s->ref_frames > 1)
01928 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
01929 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
01930 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
01931 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
01932 p_len= pc.bytestream - pc.bytestream_start;
01933 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
01934
01935 block_s= block_w*block_w;
01936 sum = pix_sum(current_data[0], stride, block_w);
01937 l= (sum + block_s/2)/block_s;
01938 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
01939
01940 block_s= block_w*block_w>>2;
01941 sum = pix_sum(current_data[1], uvstride, block_w>>1);
01942 cb= (sum + block_s/2)/block_s;
01943
01944 sum = pix_sum(current_data[2], uvstride, block_w>>1);
01945 cr= (sum + block_s/2)/block_s;
01946
01947
01948 ic= s->c;
01949 ic.bytestream_start=
01950 ic.bytestream= i_buffer;
01951 memcpy(i_state, s->block_state, sizeof(s->block_state));
01952 if(level!=s->block_max_depth)
01953 put_rac(&ic, &i_state[4 + s_context], 1);
01954 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
01955 put_symbol(&ic, &i_state[32], l-pl , 1);
01956 put_symbol(&ic, &i_state[64], cb-pcb, 1);
01957 put_symbol(&ic, &i_state[96], cr-pcr, 1);
01958 i_len= ic.bytestream - ic.bytestream_start;
01959 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
01960
01961
01962 assert(iscore < 255*255*256 + s->lambda2*10);
01963 assert(iscore >= 0);
01964 assert(l>=0 && l<=255);
01965 assert(pl>=0 && pl<=255);
01966
01967 if(level==0){
01968 int varc= iscore >> 8;
01969 int vard= score >> 8;
01970 if (vard <= 64 || vard < varc)
01971 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
01972 else
01973 c->scene_change_score+= s->m.qscale;
01974 }
01975
01976 if(level!=s->block_max_depth){
01977 put_rac(&s->c, &s->block_state[4 + s_context], 0);
01978 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
01979 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
01980 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
01981 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
01982 score2+= s->lambda2>>FF_LAMBDA_SHIFT;
01983
01984 if(score2 < score && score2 < iscore)
01985 return score2;
01986 }
01987
01988 if(iscore < score){
01989 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
01990 memcpy(pbbak, i_buffer, i_len);
01991 s->c= ic;
01992 s->c.bytestream_start= pbbak_start;
01993 s->c.bytestream= pbbak + i_len;
01994 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
01995 memcpy(s->block_state, i_state, sizeof(s->block_state));
01996 return iscore;
01997 }else{
01998 memcpy(pbbak, p_buffer, p_len);
01999 s->c= pc;
02000 s->c.bytestream_start= pbbak_start;
02001 s->c.bytestream= pbbak + p_len;
02002 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
02003 memcpy(s->block_state, p_state, sizeof(s->block_state));
02004 return score;
02005 }
02006 }
02007
02008 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
02009 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
02010 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
02011 }else{
02012 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
02013 }
02014 }
02015
02016 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
02017 const int w= s->b_width << s->block_max_depth;
02018 const int rem_depth= s->block_max_depth - level;
02019 const int index= (x + y*w) << rem_depth;
02020 int trx= (x+1)<<rem_depth;
02021 BlockNode *b= &s->block[index];
02022 const BlockNode *left = x ? &s->block[index-1] : &null_block;
02023 const BlockNode *top = y ? &s->block[index-w] : &null_block;
02024 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
02025 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl;
02026 int pl = left->color[0];
02027 int pcb= left->color[1];
02028 int pcr= left->color[2];
02029 int pmx, pmy;
02030 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
02031 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
02032 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
02033 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
02034
02035 if(s->keyframe){
02036 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
02037 return;
02038 }
02039
02040 if(level!=s->block_max_depth){
02041 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
02042 put_rac(&s->c, &s->block_state[4 + s_context], 1);
02043 }else{
02044 put_rac(&s->c, &s->block_state[4 + s_context], 0);
02045 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
02046 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
02047 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
02048 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
02049 return;
02050 }
02051 }
02052 if(b->type & BLOCK_INTRA){
02053 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
02054 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
02055 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
02056 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
02057 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
02058 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
02059 }else{
02060 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
02061 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
02062 if(s->ref_frames > 1)
02063 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
02064 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
02065 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
02066 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
02067 }
02068 }
02069
02070 static void decode_q_branch(SnowContext *s, int level, int x, int y){
02071 const int w= s->b_width << s->block_max_depth;
02072 const int rem_depth= s->block_max_depth - level;
02073 const int index= (x + y*w) << rem_depth;
02074 int trx= (x+1)<<rem_depth;
02075 const BlockNode *left = x ? &s->block[index-1] : &null_block;
02076 const BlockNode *top = y ? &s->block[index-w] : &null_block;
02077 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
02078 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl;
02079 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
02080
02081 if(s->keyframe){
02082 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
02083 return;
02084 }
02085
02086 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
02087 int type, mx, my;
02088 int l = left->color[0];
02089 int cb= left->color[1];
02090 int cr= left->color[2];
02091 int ref = 0;
02092 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
02093 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
02094 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
02095
02096 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
02097
02098 if(type){
02099 pred_mv(s, &mx, &my, 0, left, top, tr);
02100 l += get_symbol(&s->c, &s->block_state[32], 1);
02101 cb+= get_symbol(&s->c, &s->block_state[64], 1);
02102 cr+= get_symbol(&s->c, &s->block_state[96], 1);
02103 }else{
02104 if(s->ref_frames > 1)
02105 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
02106 pred_mv(s, &mx, &my, ref, left, top, tr);
02107 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
02108 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
02109 }
02110 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
02111 }else{
02112 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
02113 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
02114 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
02115 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
02116 }
02117 }
02118
02119 static void encode_blocks(SnowContext *s, int search){
02120 int x, y;
02121 int w= s->b_width;
02122 int h= s->b_height;
02123
02124 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
02125 iterative_me(s);
02126
02127 for(y=0; y<h; y++){
02128 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){
02129 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
02130 return;
02131 }
02132 for(x=0; x<w; x++){
02133 if(s->avctx->me_method == ME_ITER || !search)
02134 encode_q_branch2(s, 0, x, y);
02135 else
02136 encode_q_branch (s, 0, x, y);
02137 }
02138 }
02139 }
02140
02141 static void decode_blocks(SnowContext *s){
02142 int x, y;
02143 int w= s->b_width;
02144 int h= s->b_height;
02145
02146 for(y=0; y<h; y++){
02147 for(x=0; x<w; x++){
02148 decode_q_branch(s, 0, x, y);
02149 }
02150 }
02151 }
02152
02153 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
02154 const static uint8_t weight[64]={
02155 8,7,6,5,4,3,2,1,
02156 7,7,0,0,0,0,0,1,
02157 6,0,6,0,0,0,2,0,
02158 5,0,0,5,0,3,0,0,
02159 4,0,0,0,4,0,0,0,
02160 3,0,0,5,0,3,0,0,
02161 2,0,6,0,0,0,2,0,
02162 1,7,0,0,0,0,0,1,
02163 };
02164
02165 const static uint8_t brane[256]={
02166 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
02167 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
02168 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
02169 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
02170 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
02171 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
02172 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
02173 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
02174 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
02175 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
02176 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
02177 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
02178 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
02179 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
02180 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
02181 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
02182 };
02183
02184 const static uint8_t needs[16]={
02185 0,1,0,0,
02186 2,4,2,0,
02187 0,1,0,0,
02188 15
02189 };
02190
02191 int x, y, b, r, l;
02192 int16_t tmpIt [64*(32+HTAPS_MAX)];
02193 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
02194 int16_t *tmpI= tmpIt;
02195 uint8_t *tmp2= tmp2t[0];
02196 const uint8_t *hpel[11];
02197 START_TIMER
02198 assert(dx<16 && dy<16);
02199 r= brane[dx + 16*dy]&15;
02200 l= brane[dx + 16*dy]>>4;
02201
02202 b= needs[l] | needs[r];
02203 if(p && !p->diag_mc)
02204 b= 15;
02205
02206 if(b&5){
02207 for(y=0; y < b_h+HTAPS_MAX-1; y++){
02208 for(x=0; x < b_w; x++){
02209 int a_1=src[x + HTAPS_MAX/2-4];
02210 int a0= src[x + HTAPS_MAX/2-3];
02211 int a1= src[x + HTAPS_MAX/2-2];
02212 int a2= src[x + HTAPS_MAX/2-1];
02213 int a3= src[x + HTAPS_MAX/2+0];
02214 int a4= src[x + HTAPS_MAX/2+1];
02215 int a5= src[x + HTAPS_MAX/2+2];
02216 int a6= src[x + HTAPS_MAX/2+3];
02217 int am=0;
02218 if(!p || p->fast_mc){
02219 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
02220 tmpI[x]= am;
02221 am= (am+16)>>5;
02222 }else{
02223 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
02224 tmpI[x]= am;
02225 am= (am+32)>>6;
02226 }
02227
02228 if(am&(~255)) am= ~(am>>31);
02229 tmp2[x]= am;
02230 }
02231 tmpI+= 64;
02232 tmp2+= stride;
02233 src += stride;
02234 }
02235 src -= stride*y;
02236 }
02237 src += HTAPS_MAX/2 - 1;
02238 tmp2= tmp2t[1];
02239
02240 if(b&2){
02241 for(y=0; y < b_h; y++){
02242 for(x=0; x < b_w+1; x++){
02243 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
02244 int a0= src[x + (HTAPS_MAX/2-3)*stride];
02245 int a1= src[x + (HTAPS_MAX/2-2)*stride];
02246 int a2= src[x + (HTAPS_MAX/2-1)*stride];
02247 int a3= src[x + (HTAPS_MAX/2+0)*stride];
02248 int a4= src[x + (HTAPS_MAX/2+1)*stride];
02249 int a5= src[x + (HTAPS_MAX/2+2)*stride];
02250 int a6= src[x + (HTAPS_MAX/2+3)*stride];
02251 int am=0;
02252 if(!p || p->fast_mc)
02253 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
02254 else
02255 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
02256
02257 if(am&(~255)) am= ~(am>>31);
02258 tmp2[x]= am;
02259 }
02260 src += stride;
02261 tmp2+= stride;
02262 }
02263 src -= stride*y;
02264 }
02265 src += stride*(HTAPS_MAX/2 - 1);
02266 tmp2= tmp2t[2];
02267 tmpI= tmpIt;
02268 if(b&4){
02269 for(y=0; y < b_h; y++){
02270 for(x=0; x < b_w; x++){
02271 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
02272 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
02273 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
02274 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
02275 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
02276 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
02277 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
02278 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
02279 int am=0;
02280 if(!p || p->fast_mc)
02281 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
02282 else
02283 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
02284 if(am&(~255)) am= ~(am>>31);
02285 tmp2[x]= am;
02286 }
02287 tmpI+= 64;
02288 tmp2+= stride;
02289 }
02290 }
02291
02292 hpel[ 0]= src;
02293 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
02294 hpel[ 2]= src + 1;
02295
02296 hpel[ 4]= tmp2t[1];
02297 hpel[ 5]= tmp2t[2];
02298 hpel[ 6]= tmp2t[1] + 1;
02299
02300 hpel[ 8]= src + stride;
02301 hpel[ 9]= hpel[1] + stride;
02302 hpel[10]= hpel[8] + 1;
02303
02304 if(b==15){
02305 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
02306 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
02307 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
02308 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
02309 dx&=7;
02310 dy&=7;
02311 for(y=0; y < b_h; y++){
02312 for(x=0; x < b_w; x++){
02313 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
02314 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
02315 }
02316 src1+=stride;
02317 src2+=stride;
02318 src3+=stride;
02319 src4+=stride;
02320 dst +=stride;
02321 }
02322 }else{
02323 const uint8_t *src1= hpel[l];
02324 const uint8_t *src2= hpel[r];
02325 int a= weight[((dx&7) + (8*(dy&7)))];
02326 int b= 8-a;
02327 for(y=0; y < b_h; y++){
02328 for(x=0; x < b_w; x++){
02329 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
02330 }
02331 src1+=stride;
02332 src2+=stride;
02333 dst +=stride;
02334 }
02335 }
02336 STOP_TIMER("mc_block")
02337 }
02338
02339 #define mca(dx,dy,b_w)\
02340 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
02341 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
02342 assert(h==b_w);\
02343 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
02344 }
02345
02346 mca( 0, 0,16)
02347 mca( 8, 0,16)
02348 mca( 0, 8,16)
02349 mca( 8, 8,16)
02350 mca( 0, 0,8)
02351 mca( 8, 0,8)
02352 mca( 0, 8,8)
02353 mca( 8, 8,8)
02354
02355 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
02356 if(block->type & BLOCK_INTRA){
02357 int x, y;
02358 const int color = block->color[plane_index];
02359 const int color4= color*0x01010101;
02360 if(b_w==32){
02361 for(y=0; y < b_h; y++){
02362 *(uint32_t*)&dst[0 + y*stride]= color4;
02363 *(uint32_t*)&dst[4 + y*stride]= color4;
02364 *(uint32_t*)&dst[8 + y*stride]= color4;
02365 *(uint32_t*)&dst[12+ y*stride]= color4;
02366 *(uint32_t*)&dst[16+ y*stride]= color4;
02367 *(uint32_t*)&dst[20+ y*stride]= color4;
02368 *(uint32_t*)&dst[24+ y*stride]= color4;
02369 *(uint32_t*)&dst[28+ y*stride]= color4;
02370 }
02371 }else if(b_w==16){
02372 for(y=0; y < b_h; y++){
02373 *(uint32_t*)&dst[0 + y*stride]= color4;
02374 *(uint32_t*)&dst[4 + y*stride]= color4;
02375 *(uint32_t*)&dst[8 + y*stride]= color4;
02376 *(uint32_t*)&dst[12+ y*stride]= color4;
02377 }
02378 }else if(b_w==8){
02379 for(y=0; y < b_h; y++){
02380 *(uint32_t*)&dst[0 + y*stride]= color4;
02381 *(uint32_t*)&dst[4 + y*stride]= color4;
02382 }
02383 }else if(b_w==4){
02384 for(y=0; y < b_h; y++){
02385 *(uint32_t*)&dst[0 + y*stride]= color4;
02386 }
02387 }else{
02388 for(y=0; y < b_h; y++){
02389 for(x=0; x < b_w; x++){
02390 dst[x + y*stride]= color;
02391 }
02392 }
02393 }
02394 }else{
02395 uint8_t *src= s->last_picture[block->ref].data[plane_index];
02396 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
02397 int mx= block->mx*scale;
02398 int my= block->my*scale;
02399 const int dx= mx&15;
02400 const int dy= my&15;
02401 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
02402 sx += (mx>>4) - (HTAPS_MAX/2-1);
02403 sy += (my>>4) - (HTAPS_MAX/2-1);
02404 src += sx + sy*stride;
02405 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
02406 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
02407 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
02408 src= tmp + MB_SIZE;
02409 }
02410
02411
02412 assert(b_w>1 && b_h>1);
02413 assert((tab_index>=0 && tab_index<4) || b_w==32);
02414 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
02415 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
02416 else if(b_w==32){
02417 int y;
02418 for(y=0; y<b_h; y+=16){
02419 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
02420 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
02421 }
02422 }else if(b_w==b_h)
02423 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
02424 else if(b_w==2*b_h){
02425 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
02426 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
02427 }else{
02428 assert(2*b_w==b_h);
02429 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
02430 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
02431 }
02432 }
02433 }
02434
02435 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
02436 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
02437 int y, x;
02438 IDWTELEM * dst;
02439 for(y=0; y<b_h; y++){
02440
02441 const uint8_t *obmc1= obmc + y*obmc_stride;
02442 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
02443 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
02444 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02445 dst = slice_buffer_get_line(sb, src_y + y);
02446 for(x=0; x<b_w; x++){
02447 int v= obmc1[x] * block[3][x + y*src_stride]
02448 +obmc2[x] * block[2][x + y*src_stride]
02449 +obmc3[x] * block[1][x + y*src_stride]
02450 +obmc4[x] * block[0][x + y*src_stride];
02451
02452 v <<= 8 - LOG2_OBMC_MAX;
02453 if(FRAC_BITS != 8){
02454 v >>= 8 - FRAC_BITS;
02455 }
02456 if(add){
02457 v += dst[x + src_x];
02458 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
02459 if(v&(~255)) v= ~(v>>31);
02460 dst8[x + y*src_stride] = v;
02461 }else{
02462 dst[x + src_x] -= v;
02463 }
02464 }
02465 }
02466 }
02467
02468
02469 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
02470 const int b_width = s->b_width << s->block_max_depth;
02471 const int b_height= s->b_height << s->block_max_depth;
02472 const int b_stride= b_width;
02473 BlockNode *lt= &s->block[b_x + b_y*b_stride];
02474 BlockNode *rt= lt+1;
02475 BlockNode *lb= lt+b_stride;
02476 BlockNode *rb= lb+1;
02477 uint8_t *block[4];
02478 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
02479 uint8_t tmp[src_stride*7*MB_SIZE];
02480 uint8_t *ptmp;
02481 int x,y;
02482
02483 if(b_x<0){
02484 lt= rt;
02485 lb= rb;
02486 }else if(b_x + 1 >= b_width){
02487 rt= lt;
02488 rb= lb;
02489 }
02490 if(b_y<0){
02491 lt= lb;
02492 rt= rb;
02493 }else if(b_y + 1 >= b_height){
02494 lb= lt;
02495 rb= rt;
02496 }
02497
02498 if(src_x<0){
02499 obmc -= src_x;
02500 b_w += src_x;
02501 if(!sliced && !offset_dst)
02502 dst -= src_x;
02503 src_x=0;
02504 }else if(src_x + b_w > w){
02505 b_w = w - src_x;
02506 }
02507 if(src_y<0){
02508 obmc -= src_y*obmc_stride;
02509 b_h += src_y;
02510 if(!sliced && !offset_dst)
02511 dst -= src_y*dst_stride;
02512 src_y=0;
02513 }else if(src_y + b_h> h){
02514 b_h = h - src_y;
02515 }
02516
02517 if(b_w<=0 || b_h<=0) return;
02518
02519 assert(src_stride > 2*MB_SIZE + 5);
02520 if(!sliced && offset_dst)
02521 dst += src_x + src_y*dst_stride;
02522 dst8+= src_x + src_y*src_stride;
02523
02524
02525 ptmp= tmp + 3*tmp_step;
02526 block[0]= ptmp;
02527 ptmp+=tmp_step;
02528 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
02529
02530 if(same_block(lt, rt)){
02531 block[1]= block[0];
02532 }else{
02533 block[1]= ptmp;
02534 ptmp+=tmp_step;
02535 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
02536 }
02537
02538 if(same_block(lt, lb)){
02539 block[2]= block[0];
02540 }else if(same_block(rt, lb)){
02541 block[2]= block[1];
02542 }else{
02543 block[2]= ptmp;
02544 ptmp+=tmp_step;
02545 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
02546 }
02547
02548 if(same_block(lt, rb) ){
02549 block[3]= block[0];
02550 }else if(same_block(rt, rb)){
02551 block[3]= block[1];
02552 }else if(same_block(lb, rb)){
02553 block[3]= block[2];
02554 }else{
02555 block[3]= ptmp;
02556 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
02557 }
02558 #if 0
02559 for(y=0; y<b_h; y++){
02560 for(x=0; x<b_w; x++){
02561 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
02562 if(add) dst[x + y*dst_stride] += v;
02563 else dst[x + y*dst_stride] -= v;
02564 }
02565 }
02566 for(y=0; y<b_h; y++){
02567 uint8_t *obmc2= obmc + (obmc_stride>>1);
02568 for(x=0; x<b_w; x++){
02569 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
02570 if(add) dst[x + y*dst_stride] += v;
02571 else dst[x + y*dst_stride] -= v;
02572 }
02573 }
02574 for(y=0; y<b_h; y++){
02575 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
02576 for(x=0; x<b_w; x++){
02577 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
02578 if(add) dst[x + y*dst_stride] += v;
02579 else dst[x + y*dst_stride] -= v;
02580 }
02581 }
02582 for(y=0; y<b_h; y++){
02583 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
02584 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02585 for(x=0; x<b_w; x++){
02586 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
02587 if(add) dst[x + y*dst_stride] += v;
02588 else dst[x + y*dst_stride] -= v;
02589 }
02590 }
02591 #else
02592 if(sliced){
02593 START_TIMER
02594
02595 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
02596 STOP_TIMER("inner_add_yblock")
02597 }else
02598 for(y=0; y<b_h; y++){
02599
02600 const uint8_t *obmc1= obmc + y*obmc_stride;
02601 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
02602 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
02603 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02604 for(x=0; x<b_w; x++){
02605 int v= obmc1[x] * block[3][x + y*src_stride]
02606 +obmc2[x] * block[2][x + y*src_stride]
02607 +obmc3[x] * block[1][x + y*src_stride]
02608 +obmc4[x] * block[0][x + y*src_stride];
02609
02610 v <<= 8 - LOG2_OBMC_MAX;
02611 if(FRAC_BITS != 8){
02612 v >>= 8 - FRAC_BITS;
02613 }
02614 if(add){
02615 v += dst[x + y*dst_stride];
02616 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
02617 if(v&(~255)) v= ~(v>>31);
02618 dst8[x + y*src_stride] = v;
02619 }else{
02620 dst[x + y*dst_stride] -= v;
02621 }
02622 }
02623 }
02624 #endif
02625 }
02626
02627 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
02628 Plane *p= &s->plane[plane_index];
02629 const int mb_w= s->b_width << s->block_max_depth;
02630 const int mb_h= s->b_height << s->block_max_depth;
02631 int x, y, mb_x;
02632 int block_size = MB_SIZE >> s->block_max_depth;
02633 int block_w = plane_index ? block_size/2 : block_size;
02634 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02635 int obmc_stride= plane_index ? block_size : 2*block_size;
02636 int ref_stride= s->current_picture.linesize[plane_index];
02637 uint8_t *dst8= s->current_picture.data[plane_index];
02638 int w= p->width;
02639 int h= p->height;
02640 START_TIMER
02641
02642 if(s->keyframe || (s->avctx->debug&512)){
02643 if(mb_y==mb_h)
02644 return;
02645
02646 if(add){
02647 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
02648 {
02649
02650 IDWTELEM * line = sb->line[y];
02651 for(x=0; x<w; x++)
02652 {
02653
02654 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
02655 v >>= FRAC_BITS;
02656 if(v&(~255)) v= ~(v>>31);
02657 dst8[x + y*ref_stride]= v;
02658 }
02659 }
02660 }else{
02661 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
02662 {
02663
02664 IDWTELEM * line = sb->line[y];
02665 for(x=0; x<w; x++)
02666 {
02667 line[x] -= 128 << FRAC_BITS;
02668
02669 }
02670 }
02671 }
02672
02673 return;
02674 }
02675
02676 for(mb_x=0; mb_x<=mb_w; mb_x++){
02677 START_TIMER
02678
02679 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
02680 block_w*mb_x - block_w/2,
02681 block_w*mb_y - block_w/2,
02682 block_w, block_w,
02683 w, h,
02684 w, ref_stride, obmc_stride,
02685 mb_x - 1, mb_y - 1,
02686 add, 0, plane_index);
02687
02688 STOP_TIMER("add_yblock")
02689 }
02690
02691 STOP_TIMER("predict_slice")
02692 }
02693
02694 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
02695 Plane *p= &s->plane[plane_index];
02696 const int mb_w= s->b_width << s->block_max_depth;
02697 const int mb_h= s->b_height << s->block_max_depth;
02698 int x, y, mb_x;
02699 int block_size = MB_SIZE >> s->block_max_depth;
02700 int block_w = plane_index ? block_size/2 : block_size;
02701 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02702 const int obmc_stride= plane_index ? block_size : 2*block_size;
02703 int ref_stride= s->current_picture.linesize[plane_index];
02704 uint8_t *dst8= s->current_picture.data[plane_index];
02705 int w= p->width;
02706 int h= p->height;
02707 START_TIMER
02708
02709 if(s->keyframe || (s->avctx->debug&512)){
02710 if(mb_y==mb_h)
02711 return;
02712
02713 if(add){
02714 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02715 for(x=0; x<w; x++){
02716 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
02717 v >>= FRAC_BITS;
02718 if(v&(~255)) v= ~(v>>31);
02719 dst8[x + y*ref_stride]= v;
02720 }
02721 }
02722 }else{
02723 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02724 for(x=0; x<w; x++){
02725 buf[x + y*w]-= 128<<FRAC_BITS;
02726 }
02727 }
02728 }
02729
02730 return;
02731 }
02732
02733 for(mb_x=0; mb_x<=mb_w; mb_x++){
02734 START_TIMER
02735
02736 add_yblock(s, 0, NULL, buf, dst8, obmc,
02737 block_w*mb_x - block_w/2,
02738 block_w*mb_y - block_w/2,
02739 block_w, block_w,
02740 w, h,
02741 w, ref_stride, obmc_stride,
02742 mb_x - 1, mb_y - 1,
02743 add, 1, plane_index);
02744
02745 STOP_TIMER("add_yblock")
02746 }
02747
02748 STOP_TIMER("predict_slice")
02749 }
02750
02751 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
02752 const int mb_h= s->b_height << s->block_max_depth;
02753 int mb_y;
02754 for(mb_y=0; mb_y<=mb_h; mb_y++)
02755 predict_slice(s, buf, plane_index, add, mb_y);
02756 }
02757
02758 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
02759 int i, x2, y2;
02760 Plane *p= &s->plane[plane_index];
02761 const int block_size = MB_SIZE >> s->block_max_depth;
02762 const int block_w = plane_index ? block_size/2 : block_size;
02763 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02764 const int obmc_stride= plane_index ? block_size : 2*block_size;
02765 const int ref_stride= s->current_picture.linesize[plane_index];
02766 uint8_t *src= s-> input_picture.data[plane_index];
02767 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
02768 const int b_stride = s->b_width << s->block_max_depth;
02769 const int w= p->width;
02770 const int h= p->height;
02771 int index= mb_x + mb_y*b_stride;
02772 BlockNode *b= &s->block[index];
02773 BlockNode backup= *b;
02774 int ab=0;
02775 int aa=0;
02776
02777 b->type|= BLOCK_INTRA;
02778 b->color[plane_index]= 0;
02779 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
02780
02781 for(i=0; i<4; i++){
02782 int mb_x2= mb_x + (i &1) - 1;
02783 int mb_y2= mb_y + (i>>1) - 1;
02784 int x= block_w*mb_x2 + block_w/2;
02785 int y= block_w*mb_y2 + block_w/2;
02786
02787 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
02788 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
02789
02790 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
02791 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
02792 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
02793 int obmc_v= obmc[index];
02794 int d;
02795 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
02796 if(x<0) obmc_v += obmc[index + block_w];
02797 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
02798 if(x+block_w>w) obmc_v += obmc[index - block_w];
02799
02800
02801 d = -dst[index] + (1<<(FRAC_BITS-1));
02802 dst[index] = d;
02803 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
02804 aa += obmc_v * obmc_v;
02805 }
02806 }
02807 }
02808 *b= backup;
02809
02810 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255);
02811 }
02812
02813 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
02814 const int b_stride = s->b_width << s->block_max_depth;
02815 const int b_height = s->b_height<< s->block_max_depth;
02816 int index= x + y*b_stride;
02817 const BlockNode *b = &s->block[index];
02818 const BlockNode *left = x ? &s->block[index-1] : &null_block;
02819 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
02820 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
02821 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
02822 int dmx, dmy;
02823
02824
02825
02826 if(x<0 || x>=b_stride || y>=b_height)
02827 return 0;
02828
02829
02830
02831
02832
02833
02834
02835
02836
02837 if(b->type & BLOCK_INTRA){
02838 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
02839 + av_log2(2*FFABS(left->color[1] - b->color[1]))
02840 + av_log2(2*FFABS(left->color[2] - b->color[2])));
02841 }else{
02842 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
02843 dmx-= b->mx;
02844 dmy-= b->my;
02845 return 2*(1 + av_log2(2*FFABS(dmx))
02846 + av_log2(2*FFABS(dmy))
02847 + av_log2(2*b->ref));
02848 }
02849 }
02850
02851 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
02852 Plane *p= &s->plane[plane_index];
02853 const int block_size = MB_SIZE >> s->block_max_depth;
02854 const int block_w = plane_index ? block_size/2 : block_size;
02855 const int obmc_stride= plane_index ? block_size : 2*block_size;
02856 const int ref_stride= s->current_picture.linesize[plane_index];
02857 uint8_t *dst= s->current_picture.data[plane_index];
02858 uint8_t *src= s-> input_picture.data[plane_index];
02859 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
02860 uint8_t cur[ref_stride*2*MB_SIZE];
02861 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
02862 const int b_stride = s->b_width << s->block_max_depth;
02863 const int b_height = s->b_height<< s->block_max_depth;
02864 const int w= p->width;
02865 const int h= p->height;
02866 int distortion;
02867 int rate= 0;
02868 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
02869 int sx= block_w*mb_x - block_w/2;
02870 int sy= block_w*mb_y - block_w/2;
02871 int x0= FFMAX(0,-sx);
02872 int y0= FFMAX(0,-sy);
02873 int x1= FFMIN(block_w*2, w-sx);
02874 int y1= FFMIN(block_w*2, h-sy);
02875 int i,x,y;
02876
02877 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
02878
02879 for(y=y0; y<y1; y++){
02880 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
02881 const IDWTELEM *pred1 = pred + y*obmc_stride;
02882 uint8_t *cur1 = cur + y*ref_stride;
02883 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
02884 for(x=x0; x<x1; x++){
02885 #if FRAC_BITS >= LOG2_OBMC_MAX
02886 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
02887 #else
02888 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
02889 #endif
02890 v = (v + pred1[x]) >> FRAC_BITS;
02891 if(v&(~255)) v= ~(v>>31);
02892 dst1[x] = v;
02893 }
02894 }
02895
02896
02897 if(LOG2_OBMC_MAX == 8
02898 && (mb_x == 0 || mb_x == b_stride-1)
02899 && (mb_y == 0 || mb_y == b_height-1)){
02900 if(mb_x == 0)
02901 x1 = block_w;
02902 else
02903 x0 = block_w;
02904 if(mb_y == 0)
02905 y1 = block_w;
02906 else
02907 y0 = block_w;
02908 for(y=y0; y<y1; y++)
02909 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
02910 }
02911
02912 if(block_w==16){
02913
02914
02915
02916
02917
02918
02919 if(s->avctx->me_cmp == FF_CMP_W97)
02920 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
02921 else if(s->avctx->me_cmp == FF_CMP_W53)
02922 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
02923 else{
02924 distortion = 0;
02925 for(i=0; i<4; i++){
02926 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
02927 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
02928 }
02929 }
02930 }else{
02931 assert(block_w==8);
02932 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
02933 }
02934
02935 if(plane_index==0){
02936 for(i=0; i<4; i++){
02937
02938
02939
02940
02941 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
02942 }
02943 if(mb_x == b_stride-2)
02944 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
02945 }
02946 return distortion + rate*penalty_factor;
02947 }
02948
02949 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
02950 int i, y2;
02951 Plane *p= &s->plane[plane_index];
02952 const int block_size = MB_SIZE >> s->block_max_depth;
02953 const int block_w = plane_index ? block_size/2 : block_size;
02954 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02955 const int obmc_stride= plane_index ? block_size : 2*block_size;
02956 const int ref_stride= s->current_picture.linesize[plane_index];
02957 uint8_t *dst= s->current_picture.data[plane_index];
02958 uint8_t *src= s-> input_picture.data[plane_index];
02959 static const IDWTELEM zero_dst[4096];
02960 const int b_stride = s->b_width << s->block_max_depth;
02961 const int w= p->width;
02962 const int h= p->height;
02963 int distortion= 0;
02964 int rate= 0;
02965 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
02966
02967 for(i=0; i<9; i++){
02968 int mb_x2= mb_x + (i%3) - 1;
02969 int mb_y2= mb_y + (i/3) - 1;
02970 int x= block_w*mb_x2 + block_w/2;
02971 int y= block_w*mb_y2 + block_w/2;
02972
02973 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
02974 x, y, block_w, block_w, w, h, 0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
02975
02976
02977 for(y2= y; y2<0; y2++)
02978 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
02979 for(y2= h; y2<y+block_w; y2++)
02980 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
02981 if(x<0){
02982 for(y2= y; y2<y+block_w; y2++)
02983 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
02984 }
02985 if(x+block_w > w){
02986 for(y2= y; y2<y+block_w; y2++)
02987 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
02988 }
02989
02990 assert(block_w== 8 || block_w==16);
02991 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
02992 }
02993
02994 if(plane_index==0){
02995 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
02996 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
02997
02998
02999
03000
03001
03002
03003 if(merged)
03004 rate = get_block_bits(s, mb_x, mb_y, 2);
03005 for(i=merged?4:0; i<9; i++){
03006 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
03007 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
03008 }
03009 }
03010 return distortion + rate*penalty_factor;
03011 }
03012
03013 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
03014 const int b_stride= s->b_width << s->block_max_depth;
03015 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
03016 BlockNode backup= *block;
03017 int rd, index, value;
03018
03019 assert(mb_x>=0 && mb_y>=0);
03020 assert(mb_x<b_stride);
03021
03022 if(intra){
03023 block->color[0] = p[0];
03024 block->color[1] = p[1];
03025 block->color[2] = p[2];
03026 block->type |= BLOCK_INTRA;
03027 }else{
03028 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
03029 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
03030 if(s->me_cache[index] == value)
03031 return 0;
03032 s->me_cache[index]= value;
03033
03034 block->mx= p[0];
03035 block->my= p[1];
03036 block->type &= ~BLOCK_INTRA;
03037 }
03038
03039 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
03040
03041
03042 if(rd < *best_rd){
03043 *best_rd= rd;
03044 return 1;
03045 }else{
03046 *block= backup;
03047 return 0;
03048 }
03049 }
03050
03051
03052 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
03053 int p[2] = {p0, p1};
03054 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
03055 }
03056
03057 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
03058 const int b_stride= s->b_width << s->block_max_depth;
03059 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
03060 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
03061 int rd, index, value;
03062
03063 assert(mb_x>=0 && mb_y>=0);
03064 assert(mb_x<b_stride);
03065 assert(((mb_x|mb_y)&1) == 0);
03066
03067 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
03068 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
03069 if(s->me_cache[index] == value)
03070 return 0;
03071 s->me_cache[index]= value;
03072
03073 block->mx= p0;
03074 block->my= p1;
03075 block->ref= ref;
03076 block->type &= ~BLOCK_INTRA;
03077 block[1]= block[b_stride]= block[b_stride+1]= *block;
03078
03079 rd= get_4block_rd(s, mb_x, mb_y, 0);
03080
03081
03082 if(rd < *best_rd){
03083 *best_rd= rd;
03084 return 1;
03085 }else{
03086 block[0]= backup[0];
03087 block[1]= backup[1];
03088 block[b_stride]= backup[2];
03089 block[b_stride+1]= backup[3];
03090 return 0;
03091 }
03092 }
03093
03094 static void iterative_me(SnowContext *s){
03095 int pass, mb_x, mb_y;
03096 const int b_width = s->b_width << s->block_max_depth;
03097 const int b_height= s->b_height << s->block_max_depth;
03098 const int b_stride= b_width;
03099 int color[3];
03100
03101 {
03102 RangeCoder r = s->c;
03103 uint8_t state[sizeof(s->block_state)];
03104 memcpy(state, s->block_state, sizeof(s->block_state));
03105 for(mb_y= 0; mb_y<s->b_height; mb_y++)
03106 for(mb_x= 0; mb_x<s->b_width; mb_x++)
03107 encode_q_branch(s, 0, mb_x, mb_y);
03108 s->c = r;
03109 memcpy(s->block_state, state, sizeof(s->block_state));
03110 }
03111
03112 for(pass=0; pass<25; pass++){
03113 int change= 0;
03114
03115 for(mb_y= 0; mb_y<b_height; mb_y++){
03116 for(mb_x= 0; mb_x<b_width; mb_x++){
03117 int dia_change, i, j, ref;
03118 int best_rd= INT_MAX, ref_rd;
03119 BlockNode backup, ref_b;
03120 const int index= mb_x + mb_y * b_stride;
03121 BlockNode *block= &s->block[index];
03122 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
03123 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
03124 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
03125 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
03126 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
03127 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
03128 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
03129 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
03130 const int b_w= (MB_SIZE >> s->block_max_depth);
03131 uint8_t obmc_edged[b_w*2][b_w*2];
03132
03133 if(pass && (block->type & BLOCK_OPT))
03134 continue;
03135 block->type |= BLOCK_OPT;
03136
03137 backup= *block;
03138
03139 if(!s->me_cache_generation)
03140 memset(s->me_cache, 0, sizeof(s->me_cache));
03141 s->me_cache_generation += 1<<22;
03142
03143
03144 {
03145 int x, y;
03146 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
03147 if(mb_x==0)
03148 for(y=0; y<b_w*2; y++)
03149 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
03150 if(mb_x==b_stride-1)
03151 for(y=0; y<b_w*2; y++)
03152 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
03153 if(mb_y==0){
03154 for(x=0; x<b_w*2; x++)
03155 obmc_edged[0][x] += obmc_edged[b_w-1][x];
03156 for(y=1; y<b_w; y++)
03157 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
03158 }
03159 if(mb_y==b_height-1){
03160 for(x=0; x<b_w*2; x++)
03161 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
03162 for(y=b_w; y<b_w*2-1; y++)
03163 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
03164 }
03165 }
03166
03167
03168 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
03169 {
03170 uint8_t *src= s-> input_picture.data[0];
03171 uint8_t *dst= s->current_picture.data[0];
03172 const int stride= s->current_picture.linesize[0];
03173 const int block_w= MB_SIZE >> s->block_max_depth;
03174 const int sx= block_w*mb_x - block_w/2;
03175 const int sy= block_w*mb_y - block_w/2;
03176 const int w= s->plane[0].width;
03177 const int h= s->plane[0].height;
03178 int y;
03179
03180 for(y=sy; y<0; y++)
03181 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
03182 for(y=h; y<sy+block_w*2; y++)
03183 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
03184 if(sx<0){
03185 for(y=sy; y<sy+block_w*2; y++)
03186 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
03187 }
03188 if(sx+block_w*2 > w){
03189 for(y=sy; y<sy+block_w*2; y++)
03190 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
03191 }
03192 }
03193
03194
03195 for(i=0; i<3; i++)
03196 color[i]= get_dc(s, mb_x, mb_y, i);
03197
03198
03199 if(pass > 0 && (block->type&BLOCK_INTRA)){
03200 int color0[3]= {block->color[0], block->color[1], block->color[2]};
03201 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
03202 }else
03203 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
03204
03205 ref_b= *block;
03206 ref_rd= best_rd;
03207 for(ref=0; ref < s->ref_frames; ref++){
03208 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
03209 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2)
03210 continue;
03211 block->ref= ref;
03212 best_rd= INT_MAX;
03213
03214 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
03215 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
03216 if(tb)
03217 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
03218 if(lb)
03219 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
03220 if(rb)
03221 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
03222 if(bb)
03223 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
03224
03225
03226
03227 do{
03228 dia_change=0;
03229 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
03230 for(j=0; j<i; j++){
03231 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
03232 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
03233 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
03234 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
03235 }
03236 }
03237 }while(dia_change);
03238
03239 do{
03240 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
03241 dia_change=0;
03242 for(i=0; i<8; i++)
03243 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
03244 }while(dia_change);
03245
03246
03247 mvr[0][0]= block->mx;
03248 mvr[0][1]= block->my;
03249 if(ref_rd > best_rd){
03250 ref_rd= best_rd;
03251 ref_b= *block;
03252 }
03253 }
03254 best_rd= ref_rd;
03255 *block= ref_b;
03256 #if 1
03257 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
03258
03259 #endif
03260 if(!same_block(block, &backup)){
03261 if(tb ) tb ->type &= ~BLOCK_OPT;
03262 if(lb ) lb ->type &= ~BLOCK_OPT;
03263 if(rb ) rb ->type &= ~BLOCK_OPT;
03264 if(bb ) bb ->type &= ~BLOCK_OPT;
03265 if(tlb) tlb->type &= ~BLOCK_OPT;
03266 if(trb) trb->type &= ~BLOCK_OPT;
03267 if(blb) blb->type &= ~BLOCK_OPT;
03268 if(brb) brb->type &= ~BLOCK_OPT;
03269 change ++;
03270 }
03271 }
03272 }
03273 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
03274 if(!change)
03275 break;
03276 }
03277
03278 if(s->block_max_depth == 1){
03279 int change= 0;
03280 for(mb_y= 0; mb_y<b_height; mb_y+=2){
03281 for(mb_x= 0; mb_x<b_width; mb_x+=2){
03282 int i;
03283 int best_rd, init_rd;
03284 const int index= mb_x + mb_y * b_stride;
03285 BlockNode *b[4];
03286
03287 b[0]= &s->block[index];
03288 b[1]= b[0]+1;
03289 b[2]= b[0]+b_stride;
03290 b[3]= b[2]+1;
03291 if(same_block(b[0], b[1]) &&
03292 same_block(b[0], b[2]) &&
03293 same_block(b[0], b[3]))
03294 continue;
03295
03296 if(!s->me_cache_generation)
03297 memset(s->me_cache, 0, sizeof(s->me_cache));
03298 s->me_cache_generation += 1<<22;
03299
03300 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
03301
03302
03303 check_4block_inter(s, mb_x, mb_y,
03304 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
03305 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
03306
03307 for(i=0; i<4; i++)
03308 if(!(b[i]->type&BLOCK_INTRA))
03309 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
03310
03311 if(init_rd != best_rd)
03312 change++;
03313 }
03314 }
03315 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
03316 }
03317 }
03318
03319 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
03320 const int level= b->level;
03321 const int w= b->width;
03322 const int h= b->height;
03323 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03324 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
03325 int x,y, thres1, thres2;
03326
03327
03328 if(s->qlog == LOSSLESS_QLOG){
03329 for(y=0; y<h; y++)
03330 for(x=0; x<w; x++)
03331 dst[x + y*stride]= src[x + y*stride];
03332 return;
03333 }
03334
03335 bias= bias ? 0 : (3*qmul)>>3;
03336 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
03337 thres2= 2*thres1;
03338
03339 if(!bias){
03340 for(y=0; y<h; y++){
03341 for(x=0; x<w; x++){
03342 int i= src[x + y*stride];
03343
03344 if((unsigned)(i+thres1) > thres2){
03345 if(i>=0){
03346 i<<= QEXPSHIFT;
03347 i/= qmul;
03348 dst[x + y*stride]= i;
03349 }else{
03350 i= -i;
03351 i<<= QEXPSHIFT;
03352 i/= qmul;
03353 dst[x + y*stride]= -i;
03354 }
03355 }else
03356 dst[x + y*stride]= 0;
03357 }
03358 }
03359 }else{
03360 for(y=0; y<h; y++){
03361 for(x=0; x<w; x++){
03362 int i= src[x + y*stride];
03363
03364 if((unsigned)(i+thres1) > thres2){
03365 if(i>=0){
03366 i<<= QEXPSHIFT;
03367 i= (i + bias) / qmul;
03368 dst[x + y*stride]= i;
03369 }else{
03370 i= -i;
03371 i<<= QEXPSHIFT;
03372 i= (i + bias) / qmul;
03373 dst[x + y*stride]= -i;
03374 }
03375 }else
03376 dst[x + y*stride]= 0;
03377 }
03378 }
03379 }
03380 if(level+1 == s->spatial_decomposition_count){
03381
03382 }
03383 }
03384
03385 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
03386 const int w= b->width;
03387 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03388 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03389 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
03390 int x,y;
03391 START_TIMER
03392
03393 if(s->qlog == LOSSLESS_QLOG) return;
03394
03395 for(y=start_y; y<end_y; y++){
03396
03397 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03398 for(x=0; x<w; x++){
03399 int i= line[x];
03400 if(i<0){
03401 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT));
03402 }else if(i>0){
03403 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
03404 }
03405 }
03406 }
03407 if(w > 200 ){
03408 STOP_TIMER("dquant")
03409 }
03410 }
03411
03412 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
03413 const int w= b->width;
03414 const int h= b->height;
03415 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03416 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03417 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
03418 int x,y;
03419 START_TIMER
03420
03421 if(s->qlog == LOSSLESS_QLOG) return;
03422
03423 for(y=0; y<h; y++){
03424 for(x=0; x<w; x++){
03425 int i= src[x + y*stride];
03426 if(i<0){
03427 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT));
03428 }else if(i>0){
03429 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
03430 }
03431 }
03432 }
03433 if(w > 200 ){
03434 STOP_TIMER("dquant")
03435 }
03436 }
03437
03438 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
03439 const int w= b->width;
03440 const int h= b->height;
03441 int x,y;
03442
03443 for(y=h-1; y>=0; y--){
03444 for(x=w-1; x>=0; x--){
03445 int i= x + y*stride;
03446
03447 if(x){
03448 if(use_median){
03449 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
03450 else src[i] -= src[i - 1];
03451 }else{
03452 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
03453 else src[i] -= src[i - 1];
03454 }
03455 }else{
03456 if(y) src[i] -= src[i - stride];
03457 }
03458 }
03459 }
03460 }
03461
03462 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
03463 const int w= b->width;
03464 int x,y;
03465
03466
03467
03468 IDWTELEM * line=0;
03469 IDWTELEM * prev;
03470
03471 if (start_y != 0)
03472 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03473
03474 for(y=start_y; y<end_y; y++){
03475 prev = line;
03476
03477 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03478 for(x=0; x<w; x++){
03479 if(x){
03480 if(use_median){
03481 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
03482 else line[x] += line[x - 1];
03483 }else{
03484 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
03485 else line[x] += line[x - 1];
03486 }
03487 }else{
03488 if(y) line[x] += prev[x];
03489 }
03490 }
03491 }
03492
03493
03494 }
03495
03496 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
03497 const int w= b->width;
03498 const int h= b->height;
03499 int x,y;
03500
03501 for(y=0; y<h; y++){
03502 for(x=0; x<w; x++){
03503 int i= x + y*stride;
03504
03505 if(x){
03506 if(use_median){
03507 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
03508 else src[i] += src[i - 1];
03509 }else{
03510 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
03511 else src[i] += src[i - 1];
03512 }
03513 }else{
03514 if(y) src[i] += src[i - stride];
03515 }
03516 }
03517 }
03518 }
03519
03520 static void encode_qlogs(SnowContext *s){
03521 int plane_index, level, orientation;
03522
03523 for(plane_index=0; plane_index<2; plane_index++){
03524 for(level=0; level<s->spatial_decomposition_count; level++){
03525 for(orientation=level ? 1:0; orientation<4; orientation++){
03526 if(orientation==2) continue;
03527 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
03528 }
03529 }
03530 }
03531 }
03532
03533 static void encode_header(SnowContext *s){
03534 int plane_index, i;
03535 uint8_t kstate[32];
03536
03537 memset(kstate, MID_STATE, sizeof(kstate));
03538
03539 put_rac(&s->c, kstate, s->keyframe);
03540 if(s->keyframe || s->always_reset){
03541 reset_contexts(s);
03542 s->last_spatial_decomposition_type=
03543 s->last_qlog=
03544 s->last_qbias=
03545 s->last_mv_scale=
03546 s->last_block_max_depth= 0;
03547 for(plane_index=0; plane_index<2; plane_index++){
03548 Plane *p= &s->plane[plane_index];
03549 p->last_htaps=0;
03550 p->last_diag_mc=0;
03551 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
03552 }
03553 }
03554 if(s->keyframe){
03555 put_symbol(&s->c, s->header_state, s->version, 0);
03556 put_rac(&s->c, s->header_state, s->always_reset);
03557 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
03558 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
03559 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
03560 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
03561 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
03562 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
03563 put_rac(&s->c, s->header_state, s->spatial_scalability);
03564
03565 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
03566
03567 encode_qlogs(s);
03568 }
03569
03570 if(!s->keyframe){
03571 int update_mc=0;
03572 for(plane_index=0; plane_index<2; plane_index++){
03573 Plane *p= &s->plane[plane_index];
03574 update_mc |= p->last_htaps != p->htaps;
03575 update_mc |= p->last_diag_mc != p->diag_mc;
03576 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
03577 }
03578 put_rac(&s->c, s->header_state, update_mc);
03579 if(update_mc){
03580 for(plane_index=0; plane_index<2; plane_index++){
03581 Plane *p= &s->plane[plane_index];
03582 put_rac(&s->c, s->header_state, p->diag_mc);
03583 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
03584 for(i= p->htaps/2; i; i--)
03585 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
03586 }
03587 }
03588 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
03589 put_rac(&s->c, s->header_state, 1);
03590 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
03591 encode_qlogs(s);
03592 }else
03593 put_rac(&s->c, s->header_state, 0);
03594 }
03595
03596 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
03597 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
03598 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
03599 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
03600 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
03601
03602 }
03603
03604 static void update_last_header_values(SnowContext *s){
03605 int plane_index;
03606
03607 if(!s->keyframe){
03608 for(plane_index=0; plane_index<2; plane_index++){
03609 Plane *p= &s->plane[plane_index];
03610 p->last_diag_mc= p->diag_mc;
03611 p->last_htaps = p->htaps;
03612 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
03613 }
03614 }
03615
03616 s->last_spatial_decomposition_type= s->spatial_decomposition_type;
03617 s->last_qlog = s->qlog;
03618 s->last_qbias = s->qbias;
03619 s->last_mv_scale = s->mv_scale;
03620 s->last_block_max_depth = s->block_max_depth;
03621 s->last_spatial_decomposition_count= s->spatial_decomposition_count;
03622 }
03623
03624 static void decode_qlogs(SnowContext *s){
03625 int plane_index, level, orientation;
03626
03627 for(plane_index=0; plane_index<3; plane_index++){
03628 for(level=0; level<s->spatial_decomposition_count; level++){
03629 for(orientation=level ? 1:0; orientation<4; orientation++){
03630 int q;
03631 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
03632 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
03633 else q= get_symbol(&s->c, s->header_state, 1);
03634 s->plane[plane_index].band[level][orientation].qlog= q;
03635 }
03636 }
03637 }
03638 }
03639
03640 static int decode_header(SnowContext *s){
03641 int plane_index;
03642 uint8_t kstate[32];
03643
03644 memset(kstate, MID_STATE, sizeof(kstate));
03645
03646 s->keyframe= get_rac(&s->c, kstate);
03647 if(s->keyframe || s->always_reset){
03648 reset_contexts(s);
03649 s->spatial_decomposition_type=
03650 s->qlog=
03651 s->qbias=
03652 s->mv_scale=
03653 s->block_max_depth= 0;
03654 }
03655 if(s->keyframe){
03656 s->version= get_symbol(&s->c, s->header_state, 0);
03657 if(s->version>0){
03658 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
03659 return -1;
03660 }
03661 s->always_reset= get_rac(&s->c, s->header_state);
03662 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
03663 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03664 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03665 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
03666 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
03667 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
03668 s->spatial_scalability= get_rac(&s->c, s->header_state);
03669
03670 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
03671
03672 decode_qlogs(s);
03673 }
03674
03675 if(!s->keyframe){
03676 if(get_rac(&s->c, s->header_state)){
03677 for(plane_index=0; plane_index<2; plane_index++){
03678 int htaps, i, sum=0;
03679 Plane *p= &s->plane[plane_index];
03680 p->diag_mc= get_rac(&s->c, s->header_state);
03681 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
03682 if((unsigned)htaps > HTAPS_MAX || htaps==0)
03683 return -1;
03684 p->htaps= htaps;
03685 for(i= htaps/2; i; i--){
03686 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
03687 sum += p->hcoeff[i];
03688 }
03689 p->hcoeff[0]= 32-sum;
03690 }
03691 s->plane[2].diag_mc= s->plane[1].diag_mc;
03692 s->plane[2].htaps = s->plane[1].htaps;
03693 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
03694 }
03695 if(get_rac(&s->c, s->header_state)){
03696 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03697 decode_qlogs(s);
03698 }
03699 }
03700
03701 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
03702 if(s->spatial_decomposition_type > 1){
03703 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
03704 return -1;
03705 }
03706
03707 s->qlog += get_symbol(&s->c, s->header_state, 1);
03708 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
03709 s->qbias += get_symbol(&s->c, s->header_state, 1);
03710 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
03711 if(s->block_max_depth > 1 || s->block_max_depth < 0){
03712 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
03713 s->block_max_depth= 0;
03714 return -1;
03715 }
03716
03717 return 0;
03718 }
03719
03720 static void init_qexp(void){
03721 int i;
03722 double v=128;
03723
03724 for(i=0; i<QROOT; i++){
03725 qexp[i]= lrintf(v);
03726 v *= pow(2, 1.0 / QROOT);
03727 }
03728 }
03729
03730 static int common_init(AVCodecContext *avctx){
03731 SnowContext *s = avctx->priv_data;
03732 int width, height;
03733 int i, j;
03734
03735 s->avctx= avctx;
03736
03737 dsputil_init(&s->dsp, avctx);
03738
03739 #define mcf(dx,dy)\
03740 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
03741 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
03742 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
03743 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
03744 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
03745 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
03746
03747 mcf( 0, 0)
03748 mcf( 4, 0)
03749 mcf( 8, 0)
03750 mcf(12, 0)
03751 mcf( 0, 4)
03752 mcf( 4, 4)
03753 mcf( 8, 4)
03754 mcf(12, 4)
03755 mcf( 0, 8)
03756 mcf( 4, 8)
03757 mcf( 8, 8)
03758 mcf(12, 8)
03759 mcf( 0,12)
03760 mcf( 4,12)
03761 mcf( 8,12)
03762 mcf(12,12)
03763
03764 #define mcfh(dx,dy)\
03765 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
03766 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
03767 mc_block_hpel ## dx ## dy ## 16;\
03768 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
03769 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
03770 mc_block_hpel ## dx ## dy ## 8;
03771
03772 mcfh(0, 0)
03773 mcfh(8, 0)
03774 mcfh(0, 8)
03775 mcfh(8, 8)
03776
03777 if(!qexp[0])
03778 init_qexp();
03779
03780
03781
03782 width= s->avctx->width;
03783 height= s->avctx->height;
03784
03785 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
03786 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
03787
03788 for(i=0; i<MAX_REF_FRAMES; i++)
03789 for(j=0; j<MAX_REF_FRAMES; j++)
03790 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
03791
03792 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
03793
03794 return 0;
03795 }
03796
03797 static int common_init_after_header(AVCodecContext *avctx){
03798 SnowContext *s = avctx->priv_data;
03799 int plane_index, level, orientation;
03800
03801 for(plane_index=0; plane_index<3; plane_index++){
03802 int w= s->avctx->width;
03803 int h= s->avctx->height;
03804
03805 if(plane_index){
03806 w>>= s->chroma_h_shift;
03807 h>>= s->chroma_v_shift;
03808 }
03809 s->plane[plane_index].width = w;
03810 s->plane[plane_index].height= h;
03811
03812
03813 for(level=s->spatial_decomposition_count-1; level>=0; level--){
03814 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03815 SubBand *b= &s->plane[plane_index].band[level][orientation];
03816
03817 b->buf= s->spatial_dwt_buffer;
03818 b->level= level;
03819 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
03820 b->width = (w + !(orientation&1))>>1;
03821 b->height= (h + !(orientation>1))>>1;
03822
03823 b->stride_line = 1 << (s->spatial_decomposition_count - level);
03824 b->buf_x_offset = 0;
03825 b->buf_y_offset = 0;
03826
03827 if(orientation&1){
03828 b->buf += (w+1)>>1;
03829 b->buf_x_offset = (w+1)>>1;
03830 }
03831 if(orientation>1){
03832 b->buf += b->stride>>1;
03833 b->buf_y_offset = b->stride_line >> 1;
03834 }
03835 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
03836
03837 if(level)
03838 b->parent= &s->plane[plane_index].band[level-1][orientation];
03839
03840 av_freep(&b->x_coeff);
03841 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
03842 }
03843 w= (w+1)>>1;
03844 h= (h+1)>>1;
03845 }
03846 }
03847
03848 return 0;
03849 }
03850
03851 static int qscale2qlog(int qscale){
03852 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
03853 + 61*QROOT/8;
03854 }
03855
03856 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
03857 {
03858
03859
03860
03861 uint32_t coef_sum= 0;
03862 int level, orientation, delta_qlog;
03863
03864 for(level=0; level<s->spatial_decomposition_count; level++){
03865 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03866 SubBand *b= &s->plane[0].band[level][orientation];
03867 IDWTELEM *buf= b->ibuf;
03868 const int w= b->width;
03869 const int h= b->height;
03870 const int stride= b->stride;
03871 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
03872 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03873 const int qdiv= (1<<16)/qmul;
03874 int x, y;
03875
03876 for(y=0; y<h; y++)
03877 for(x=0; x<w; x++)
03878 buf[x+y*stride]= b->buf[x+y*stride];
03879 if(orientation==0)
03880 decorrelate(s, b, buf, stride, 1, 0);
03881 for(y=0; y<h; y++)
03882 for(x=0; x<w; x++)
03883 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
03884 }
03885 }
03886
03887
03888 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
03889 assert(coef_sum < INT_MAX);
03890
03891 if(pict->pict_type == I_TYPE){
03892 s->m.current_picture.mb_var_sum= coef_sum;
03893 s->m.current_picture.mc_mb_var_sum= 0;
03894 }else{
03895 s->m.current_picture.mc_mb_var_sum= coef_sum;
03896 s->m.current_picture.mb_var_sum= 0;
03897 }
03898
03899 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
03900 if (pict->quality < 0)
03901 return INT_MIN;
03902 s->lambda= pict->quality * 3/2;
03903 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
03904 s->qlog+= delta_qlog;
03905 return delta_qlog;
03906 }
03907
03908 static void calculate_visual_weight(SnowContext *s, Plane *p){
03909 int width = p->width;
03910 int height= p->height;
03911 int level, orientation, x, y;
03912
03913 for(level=0; level<s->spatial_decomposition_count; level++){
03914 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03915 SubBand *b= &p->band[level][orientation];
03916 IDWTELEM *ibuf= b->ibuf;
03917 int64_t error=0;
03918
03919 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
03920 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
03921 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
03922 for(y=0; y<height; y++){
03923 for(x=0; x<width; x++){
03924 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
03925 error += d*d;
03926 }
03927 }
03928
03929 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
03930
03931 }
03932 }
03933 }
03934
03935 #define QUANTIZE2 0
03936
03937 #if QUANTIZE2==1
03938 #define Q2_STEP 8
03939
03940 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
03941 SubBand *b= &p->band[level][orientation];
03942 int x, y;
03943 int xo=0;
03944 int yo=0;
03945 int step= 1 << (s->spatial_decomposition_count - level);
03946
03947 if(orientation&1)
03948 xo= step>>1;
03949 if(orientation&2)
03950 yo= step>>1;
03951
03952
03953
03954 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
03955 for(y=0; y<p->height; y++){
03956 for(x=0; x<p->width; x++){
03957 int sx= (x-xo + step/2) / step / Q2_STEP;
03958 int sy= (y-yo + step/2) / step / Q2_STEP;
03959 int v= r0[x + y*p->width] - r1[x + y*p->width];
03960 assert(sx>=0 && sy>=0 && sx < score_stride);
03961 v= ((v+8)>>4)<<4;
03962 score[sx + sy*score_stride] += v*v;
03963 assert(score[sx + sy*score_stride] >= 0);
03964 }
03965 }
03966 }
03967
03968 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
03969 int level, orientation;
03970
03971 for(level=0; level<s->spatial_decomposition_count; level++){
03972 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03973 SubBand *b= &p->band[level][orientation];
03974 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
03975
03976 dequantize(s, b, dst, b->stride);
03977 }
03978 }
03979 }
03980
03981 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
03982 int level, orientation, ys, xs, x, y, pass;
03983 IDWTELEM best_dequant[height * stride];
03984 IDWTELEM idwt2_buffer[height * stride];
03985 const int score_stride= (width + 10)/Q2_STEP;
03986 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP];
03987 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP];
03988 int threshold= (s->m.lambda * s->m.lambda) >> 6;
03989
03990
03991
03992
03993 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
03994
03995 for(level=0; level<s->spatial_decomposition_count; level++){
03996 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03997 SubBand *b= &p->band[level][orientation];
03998 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
03999 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
04000 assert(src == b->buf);
04001
04002 quantize(s, b, dst, src, b->stride, s->qbias);
04003 }
04004 }
04005 for(pass=0; pass<1; pass++){
04006 if(s->qbias == 0)
04007 continue;
04008 for(level=0; level<s->spatial_decomposition_count; level++){
04009 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04010 SubBand *b= &p->band[level][orientation];
04011 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
04012 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
04013
04014 for(ys= 0; ys<Q2_STEP; ys++){
04015 for(xs= 0; xs<Q2_STEP; xs++){
04016 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
04017 dequantize_all(s, p, idwt2_buffer, width, height);
04018 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
04019 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
04020 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
04021 for(y=ys; y<b->height; y+= Q2_STEP){
04022 for(x=xs; x<b->width; x+= Q2_STEP){
04023 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
04024 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
04025
04026 }
04027 }
04028 dequantize_all(s, p, idwt2_buffer, width, height);
04029 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
04030 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
04031 for(y=ys; y<b->height; y+= Q2_STEP){
04032 for(x=xs; x<b->width; x+= Q2_STEP){
04033 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
04034 if(score[score_idx] <= best_score[score_idx] + threshold){
04035 best_score[score_idx]= score[score_idx];
04036 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
04037 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
04038
04039 }
04040 }
04041 }
04042 }
04043 }
04044 }
04045 }
04046 }
04047 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
04048 }
04049
04050 #endif
04051
04052 static int encode_init(AVCodecContext *avctx)
04053 {
04054 SnowContext *s = avctx->priv_data;
04055 int plane_index;
04056
04057 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
04058 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
04059 "use vstrict=-2 / -strict -2 to use it anyway\n");
04060 return -1;
04061 }
04062
04063 if(avctx->prediction_method == DWT_97
04064 && (avctx->flags & CODEC_FLAG_QSCALE)
04065 && avctx->global_quality == 0){
04066 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
04067 return -1;
04068 }
04069
04070 s->spatial_decomposition_type= avctx->prediction_method;
04071
04072 s->chroma_h_shift= 1;
04073 s->chroma_v_shift= 1;
04074
04075 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
04076 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
04077
04078 for(plane_index=0; plane_index<3; plane_index++){
04079 s->plane[plane_index].diag_mc= 1;
04080 s->plane[plane_index].htaps= 6;
04081 s->plane[plane_index].hcoeff[0]= 40;
04082 s->plane[plane_index].hcoeff[1]= -10;
04083 s->plane[plane_index].hcoeff[2]= 2;
04084 s->plane[plane_index].fast_mc= 1;
04085 }
04086
04087 common_init(avctx);
04088 alloc_blocks(s);
04089
04090 s->version=0;
04091
04092 s->m.avctx = avctx;
04093 s->m.flags = avctx->flags;
04094 s->m.bit_rate= avctx->bit_rate;
04095
04096 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
04097 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
04098 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
04099 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
04100 h263_encode_init(&s->m);
04101
04102 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
04103
04104 if(avctx->flags&CODEC_FLAG_PASS1){
04105 if(!avctx->stats_out)
04106 avctx->stats_out = av_mallocz(256);
04107 }
04108 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
04109 if(ff_rate_control_init(&s->m) < 0)
04110 return -1;
04111 }
04112 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
04113
04114 avctx->coded_frame= &s->current_picture;
04115 switch(avctx->pix_fmt){
04116
04117
04118 case PIX_FMT_YUV420P:
04119 case PIX_FMT_GRAY8:
04120
04121
04122 s->colorspace_type= 0;
04123 break;
04124
04125
04126
04127 default:
04128 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
04129 return -1;
04130 }
04131
04132 s->chroma_h_shift= 1;
04133 s->chroma_v_shift= 1;
04134
04135 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
04136 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
04137
04138 s->avctx->get_buffer(s->avctx, &s->input_picture);
04139
04140 if(s->avctx->me_method == ME_ITER){
04141 int i;
04142 int size= s->b_width * s->b_height << 2*s->block_max_depth;
04143 for(i=0; i<s->max_ref_frames; i++){
04144 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
04145 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
04146 }
04147 }
04148
04149 return 0;
04150 }
04151
04152 #define USE_HALFPEL_PLANE 0
04153
04154 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
04155 int p,x,y;
04156
04157 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
04158
04159 for(p=0; p<3; p++){
04160 int is_chroma= !!p;
04161 int w= s->avctx->width >>is_chroma;
04162 int h= s->avctx->height >>is_chroma;
04163 int ls= frame->linesize[p];
04164 uint8_t *src= frame->data[p];
04165
04166 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
04167 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
04168 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
04169
04170 halfpel[0][p]= src;
04171 for(y=0; y<h; y++){
04172 for(x=0; x<w; x++){
04173 int i= y*ls + x;
04174
04175 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
04176 }
04177 }
04178 for(y=0; y<h; y++){
04179 for(x=0; x<w; x++){
04180 int i= y*ls + x;
04181
04182 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
04183 }
04184 }
04185 src= halfpel[1][p];
04186 for(y=0; y<h; y++){
04187 for(x=0; x<w; x++){
04188 int i= y*ls + x;
04189
04190 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
04191 }
04192 }
04193
04194
04195 }
04196 }
04197
04198 static int frame_start(SnowContext *s){
04199 AVFrame tmp;
04200 int w= s->avctx->width;
04201 int h= s->avctx->height;
04202
04203 if(s->current_picture.data[0]){
04204 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
04205 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
04206 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
04207 }
04208
04209 tmp= s->last_picture[s->max_ref_frames-1];
04210 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
04211 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
04212 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
04213 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
04214 s->last_picture[0]= s->current_picture;
04215 s->current_picture= tmp;
04216
04217 if(s->keyframe){
04218 s->ref_frames= 0;
04219 }else{
04220 int i;
04221 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
04222 if(i && s->last_picture[i-1].key_frame)
04223 break;
04224 s->ref_frames= i;
04225 }
04226
04227 s->current_picture.reference= 1;
04228 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
04229 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
04230 return -1;
04231 }
04232
04233 s->current_picture.key_frame= s->keyframe;
04234
04235 return 0;
04236 }
04237
04238 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
04239 SnowContext *s = avctx->priv_data;
04240 RangeCoder * const c= &s->c;
04241 AVFrame *pict = data;
04242 const int width= s->avctx->width;
04243 const int height= s->avctx->height;
04244 int level, orientation, plane_index, i, y;
04245 uint8_t rc_header_bak[sizeof(s->header_state)];
04246 uint8_t rc_block_bak[sizeof(s->block_state)];
04247
04248 ff_init_range_encoder(c, buf, buf_size);
04249 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
04250
04251 for(i=0; i<3; i++){
04252 int shift= !!i;
04253 for(y=0; y<(height>>shift); y++)
04254 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
04255 &pict->data[i][y * pict->linesize[i]],
04256 width>>shift);
04257 }
04258 s->new_picture = *pict;
04259
04260 s->m.picture_number= avctx->frame_number;
04261 if(avctx->flags&CODEC_FLAG_PASS2){
04262 s->m.pict_type =
04263 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
04264 s->keyframe= pict->pict_type==FF_I_TYPE;
04265 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
04266 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
04267 if (pict->quality < 0)
04268 return -1;
04269 }
04270 }else{
04271 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
04272 s->m.pict_type=
04273 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
04274 }
04275
04276 if(s->pass1_rc && avctx->frame_number == 0)
04277 pict->quality= 2*FF_QP2LAMBDA;
04278 if(pict->quality){
04279 s->qlog= qscale2qlog(pict->quality);
04280 s->lambda = pict->quality * 3/2;
04281 }
04282 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
04283 s->qlog= LOSSLESS_QLOG;
04284 s->lambda = 0;
04285 }
04286
04287 frame_start(s);
04288
04289 s->m.current_picture_ptr= &s->m.current_picture;
04290 if(pict->pict_type == P_TYPE){
04291 int block_width = (width +15)>>4;
04292 int block_height= (height+15)>>4;
04293 int stride= s->current_picture.linesize[0];
04294
04295 assert(s->current_picture.data[0]);
04296 assert(s->last_picture[0].data[0]);
04297
04298 s->m.avctx= s->avctx;
04299 s->m.current_picture.data[0]= s->current_picture.data[0];
04300 s->m. last_picture.data[0]= s->last_picture[0].data[0];
04301 s->m. new_picture.data[0]= s-> input_picture.data[0];
04302 s->m. last_picture_ptr= &s->m. last_picture;
04303 s->m.linesize=
04304 s->m. last_picture.linesize[0]=
04305 s->m. new_picture.linesize[0]=
04306 s->m.current_picture.linesize[0]= stride;
04307 s->m.uvlinesize= s->current_picture.linesize[1];
04308 s->m.width = width;
04309 s->m.height= height;
04310 s->m.mb_width = block_width;
04311 s->m.mb_height= block_height;
04312 s->m.mb_stride= s->m.mb_width+1;
04313 s->m.b8_stride= 2*s->m.mb_width+1;
04314 s->m.f_code=1;
04315 s->m.pict_type= pict->pict_type;
04316 s->m.me_method= s->avctx->me_method;
04317 s->m.me.scene_change_score=0;
04318 s->m.flags= s->avctx->flags;
04319 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
04320 s->m.out_format= FMT_H263;
04321 s->m.unrestricted_mv= 1;
04322
04323 s->m.lambda = s->lambda;
04324 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
04325 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
04326
04327 s->m.dsp= s->dsp;
04328 ff_init_me(&s->m);
04329 s->dsp= s->m.dsp;
04330 }
04331
04332 if(s->pass1_rc){
04333 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
04334 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
04335 }
04336
04337 redo_frame:
04338
04339 if(pict->pict_type == I_TYPE)
04340 s->spatial_decomposition_count= 5;
04341 else
04342 s->spatial_decomposition_count= 5;
04343
04344 s->m.pict_type = pict->pict_type;
04345 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
04346
04347 common_init_after_header(avctx);
04348
04349 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
04350 for(plane_index=0; plane_index<3; plane_index++){
04351 calculate_visual_weight(s, &s->plane[plane_index]);
04352 }
04353 }
04354
04355 encode_header(s);
04356 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
04357 encode_blocks(s, 1);
04358 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
04359
04360 for(plane_index=0; plane_index<3; plane_index++){
04361 Plane *p= &s->plane[plane_index];
04362 int w= p->width;
04363 int h= p->height;
04364 int x, y;
04365
04366
04367 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
04368
04369 if(pict->data[plane_index])
04370 for(y=0; y<h; y++){
04371 for(x=0; x<w; x++){
04372 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
04373 }
04374 }
04375 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
04376
04377 if( plane_index==0
04378 && pict->pict_type == P_TYPE
04379 && !(avctx->flags&CODEC_FLAG_PASS2)
04380 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
04381 ff_init_range_encoder(c, buf, buf_size);
04382 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
04383 pict->pict_type= FF_I_TYPE;
04384 s->keyframe=1;
04385 s->current_picture.key_frame=1;
04386 goto redo_frame;
04387 }
04388
04389 if(s->qlog == LOSSLESS_QLOG){
04390 for(y=0; y<h; y++){
04391 for(x=0; x<w; x++){
04392 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
04393 }
04394 }
04395 }else{
04396 for(y=0; y<h; y++){
04397 for(x=0; x<w; x++){
04398 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
04399 }
04400 }
04401 }
04402
04403 if(QUANTIZE2)
04404 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
04405 else
04406 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
04407
04408 if(s->pass1_rc && plane_index==0){
04409 int delta_qlog = ratecontrol_1pass(s, pict);
04410 if (delta_qlog <= INT_MIN)
04411 return -1;
04412 if(delta_qlog){
04413
04414 ff_init_range_encoder(c, buf, buf_size);
04415 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
04416 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
04417 encode_header(s);
04418 encode_blocks(s, 0);
04419 }
04420 }
04421
04422 for(level=0; level<s->spatial_decomposition_count; level++){
04423 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04424 SubBand *b= &p->band[level][orientation];
04425
04426 if(!QUANTIZE2)
04427 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
04428 if(orientation==0)
04429 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0);
04430 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
04431 assert(b->parent==NULL || b->parent->stride == b->stride*2);
04432 if(orientation==0)
04433 correlate(s, b, b->ibuf, b->stride, 1, 0);
04434 }
04435 }
04436
04437
04438 for(level=0; level<s->spatial_decomposition_count; level++){
04439 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04440 SubBand *b= &p->band[level][orientation];
04441
04442 dequantize(s, b, b->ibuf, b->stride);
04443 }
04444 }
04445
04446 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
04447 if(s->qlog == LOSSLESS_QLOG){
04448 for(y=0; y<h; y++){
04449 for(x=0; x<w; x++){
04450 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
04451 }
04452 }
04453 }
04454 {START_TIMER
04455 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
04456 STOP_TIMER("pred-conv")}
04457 }else{
04458
04459 if(pict->pict_type == I_TYPE){
04460 for(y=0; y<h; y++){
04461 for(x=0; x<w; x++){
04462 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
04463 pict->data[plane_index][y*pict->linesize[plane_index] + x];
04464 }
04465 }
04466 }else{
04467 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
04468 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
04469 }
04470 }
04471 if(s->avctx->flags&CODEC_FLAG_PSNR){
04472 int64_t error= 0;
04473
04474 if(pict->data[plane_index])
04475 for(y=0; y<h; y++){
04476 for(x=0; x<w; x++){
04477 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
04478 error += d*d;
04479 }
04480 }
04481 s->avctx->error[plane_index] += error;
04482 s->current_picture.error[plane_index] = error;
04483 }
04484 }
04485
04486 update_last_header_values(s);
04487
04488 if(s->last_picture[s->max_ref_frames-1].data[0]){
04489 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
04490 for(i=0; i<9; i++)
04491 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
04492 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
04493 }
04494
04495 s->current_picture.coded_picture_number = avctx->frame_number;
04496 s->current_picture.pict_type = pict->pict_type;
04497 s->current_picture.quality = pict->quality;
04498 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
04499 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
04500 s->m.current_picture.display_picture_number =
04501 s->m.current_picture.coded_picture_number = avctx->frame_number;
04502 s->m.current_picture.quality = pict->quality;
04503 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
04504 if(s->pass1_rc)
04505 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
04506 return -1;
04507 if(avctx->flags&CODEC_FLAG_PASS1)
04508 ff_write_pass1_stats(&s->m);
04509 s->m.last_pict_type = s->m.pict_type;
04510 avctx->frame_bits = s->m.frame_bits;
04511 avctx->mv_bits = s->m.mv_bits;
04512 avctx->misc_bits = s->m.misc_bits;
04513 avctx->p_tex_bits = s->m.p_tex_bits;
04514
04515 emms_c();
04516
04517 return ff_rac_terminate(c);
04518 }
04519
04520 static void common_end(SnowContext *s){
04521 int plane_index, level, orientation, i;
04522
04523 av_freep(&s->spatial_dwt_buffer);
04524 av_freep(&s->spatial_idwt_buffer);
04525
04526 av_freep(&s->m.me.scratchpad);
04527 av_freep(&s->m.me.map);
04528 av_freep(&s->m.me.score_map);
04529 av_freep(&s->m.obmc_scratchpad);
04530
04531 av_freep(&s->block);
04532
04533 for(i=0; i<MAX_REF_FRAMES; i++){
04534 av_freep(&s->ref_mvs[i]);
04535 av_freep(&s->ref_scores[i]);
04536 if(s->last_picture[i].data[0])
04537 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
04538 }
04539
04540 for(plane_index=0; plane_index<3; plane_index++){
04541 for(level=s->spatial_decomposition_count-1; level>=0; level--){
04542 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04543 SubBand *b= &s->plane[plane_index].band[level][orientation];
04544
04545 av_freep(&b->x_coeff);
04546 }
04547 }
04548 }
04549 }
04550
04551 static int encode_end(AVCodecContext *avctx)
04552 {
04553 SnowContext *s = avctx->priv_data;
04554
04555 common_end(s);
04556 av_free(avctx->stats_out);
04557
04558 return 0;
04559 }
04560
04561 static int decode_init(AVCodecContext *avctx)
04562 {
04563 avctx->pix_fmt= PIX_FMT_YUV420P;
04564
04565 common_init(avctx);
04566
04567 return 0;
04568 }
04569
04570 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size){
04571 SnowContext *s = avctx->priv_data;
04572 RangeCoder * const c= &s->c;
04573 int bytes_read;
04574 AVFrame *picture = data;
04575 int level, orientation, plane_index, i;
04576
04577 ff_init_range_decoder(c, buf, buf_size);
04578 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
04579
04580 s->current_picture.pict_type= FF_I_TYPE;
04581 if(decode_header(s)<0)
04582 return -1;
04583 common_init_after_header(avctx);
04584
04585
04586 slice_buffer_destroy(&s->sb);
04587 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
04588
04589 for(plane_index=0; plane_index<3; plane_index++){
04590 Plane *p= &s->plane[plane_index];
04591 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
04592 && p->hcoeff[1]==-10
04593 && p->hcoeff[2]==2;
04594 }
04595
04596 if(!s->block) alloc_blocks(s);
04597
04598 frame_start(s);
04599
04600 if(avctx->debug&FF_DEBUG_PICT_INFO)
04601 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
04602
04603 decode_blocks(s);
04604
04605 for(plane_index=0; plane_index<3; plane_index++){
04606 Plane *p= &s->plane[plane_index];
04607 int w= p->width;
04608 int h= p->height;
04609 int x, y;
04610 int decode_state[MAX_DECOMPOSITIONS][4][1];
04611
04612 if(s->avctx->debug&2048){
04613 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
04614 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
04615
04616 for(y=0; y<h; y++){
04617 for(x=0; x<w; x++){
04618 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
04619 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
04620 }
04621 }
04622 }
04623
04624 { START_TIMER
04625 for(level=0; level<s->spatial_decomposition_count; level++){
04626 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04627 SubBand *b= &p->band[level][orientation];
04628 unpack_coeffs(s, b, b->parent, orientation);
04629 }
04630 }
04631 STOP_TIMER("unpack coeffs");
04632 }
04633
04634 {START_TIMER
04635 const int mb_h= s->b_height << s->block_max_depth;
04636 const int block_size = MB_SIZE >> s->block_max_depth;
04637 const int block_w = plane_index ? block_size/2 : block_size;
04638 int mb_y;
04639 dwt_compose_t cs[MAX_DECOMPOSITIONS];
04640 int yd=0, yq=0;
04641 int y;
04642 int end_y;
04643
04644 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
04645 for(mb_y=0; mb_y<=mb_h; mb_y++){
04646
04647 int slice_starty = block_w*mb_y;
04648 int slice_h = block_w*(mb_y+1);
04649 if (!(s->keyframe || s->avctx->debug&512)){
04650 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
04651 slice_h -= (block_w >> 1);
04652 }
04653
04654 {
04655 START_TIMER
04656 for(level=0; level<s->spatial_decomposition_count; level++){
04657 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04658 SubBand *b= &p->band[level][orientation];
04659 int start_y;
04660 int end_y;
04661 int our_mb_start = mb_y;
04662 int our_mb_end = (mb_y + 1);
04663 const int extra= 3;
04664 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
04665 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
04666 if (!(s->keyframe || s->avctx->debug&512)){
04667 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
04668 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
04669 }
04670 start_y = FFMIN(b->height, start_y);
04671 end_y = FFMIN(b->height, end_y);
04672
04673 if (start_y != end_y){
04674 if (orientation == 0){
04675 SubBand * correlate_band = &p->band[0][0];
04676 int correlate_end_y = FFMIN(b->height, end_y + 1);
04677 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
04678 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
04679 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
04680 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
04681 }
04682 else
04683 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
04684 }
04685 }
04686 }
04687 STOP_TIMER("decode_subband_slice");
04688 }
04689
04690 { START_TIMER
04691 for(; yd<slice_h; yd+=4){
04692 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
04693 }
04694 STOP_TIMER("idwt slice");}
04695
04696
04697 if(s->qlog == LOSSLESS_QLOG){
04698 for(; yq<slice_h && yq<h; yq++){
04699 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
04700 for(x=0; x<w; x++){
04701 line[x] <<= FRAC_BITS;
04702 }
04703 }
04704 }
04705
04706 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
04707
04708 y = FFMIN(p->height, slice_starty);
04709 end_y = FFMIN(p->height, slice_h);
04710 while(y < end_y)
04711 slice_buffer_release(&s->sb, y++);
04712 }
04713
04714 slice_buffer_flush(&s->sb);
04715
04716 STOP_TIMER("idwt + predict_slices")}
04717 }
04718
04719 emms_c();
04720
04721 if(s->last_picture[s->max_ref_frames-1].data[0]){
04722 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
04723 for(i=0; i<9; i++)
04724 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
04725 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
04726 }
04727
04728 if(!(s->avctx->debug&2048))
04729 *picture= s->current_picture;
04730 else
04731 *picture= s->mconly_picture;
04732
04733 *data_size = sizeof(AVFrame);
04734
04735 bytes_read= c->bytestream - c->bytestream_start;
04736 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n");
04737
04738 return bytes_read;
04739 }
04740
04741 static int decode_end(AVCodecContext *avctx)
04742 {
04743 SnowContext *s = avctx->priv_data;
04744
04745 slice_buffer_destroy(&s->sb);
04746
04747 common_end(s);
04748
04749 return 0;
04750 }
04751
04752 AVCodec snow_decoder = {
04753 "snow",
04754 CODEC_TYPE_VIDEO,
04755 CODEC_ID_SNOW,
04756 sizeof(SnowContext),
04757 decode_init,
04758 NULL,
04759 decode_end,
04760 decode_frame,
04761 0 ,
04762 NULL
04763 };
04764
04765 #ifdef CONFIG_SNOW_ENCODER
04766 AVCodec snow_encoder = {
04767 "snow",
04768 CODEC_TYPE_VIDEO,
04769 CODEC_ID_SNOW,
04770 sizeof(SnowContext),
04771 encode_init,
04772 encode_frame,
04773 encode_end,
04774 };
04775 #endif
04776
04777
04778 #ifdef TEST
04779 #undef malloc
04780 #undef free
04781 #undef printf
04782 #undef random
04783
04784 int main(void){
04785 int width=256;
04786 int height=256;
04787 int buffer[2][width*height];
04788 SnowContext s;
04789 int i;
04790 s.spatial_decomposition_count=6;
04791 s.spatial_decomposition_type=1;
04792
04793 printf("testing 5/3 DWT\n");
04794 for(i=0; i<width*height; i++)
04795 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
04796
04797 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04798 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04799
04800 for(i=0; i<width*height; i++)
04801 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
04802
04803 printf("testing 9/7 DWT\n");
04804 s.spatial_decomposition_type=0;
04805 for(i=0; i<width*height; i++)
04806 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
04807
04808 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04809 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04810
04811 for(i=0; i<width*height; i++)
04812 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
04813
04814 #if 0
04815 printf("testing AC coder\n");
04816 memset(s.header_state, 0, sizeof(s.header_state));
04817 ff_init_range_encoder(&s.c, buffer[0], 256*256);
04818 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
04819
04820 for(i=-256; i<256; i++){
04821 START_TIMER
04822 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
04823 STOP_TIMER("put_symbol")
04824 }
04825 ff_rac_terminate(&s.c);
04826
04827 memset(s.header_state, 0, sizeof(s.header_state));
04828 ff_init_range_decoder(&s.c, buffer[0], 256*256);
04829 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
04830
04831 for(i=-256; i<256; i++){
04832 int j;
04833 START_TIMER
04834 j= get_symbol(&s.c, s.header_state, 1);
04835 STOP_TIMER("get_symbol")
04836 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
04837 }
04838 #endif
04839 {
04840 int level, orientation, x, y;
04841 int64_t errors[8][4];
04842 int64_t g=0;
04843
04844 memset(errors, 0, sizeof(errors));
04845 s.spatial_decomposition_count=3;
04846 s.spatial_decomposition_type=0;
04847 for(level=0; level<s.spatial_decomposition_count; level++){
04848 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04849 int w= width >> (s.spatial_decomposition_count-level);
04850 int h= height >> (s.spatial_decomposition_count-level);
04851 int stride= width << (s.spatial_decomposition_count-level);
04852 DWTELEM *buf= buffer[0];
04853 int64_t error=0;
04854
04855 if(orientation&1) buf+=w;
04856 if(orientation>1) buf+=stride>>1;
04857
04858 memset(buffer[0], 0, sizeof(int)*width*height);
04859 buf[w/2 + h/2*stride]= 256*256;
04860 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04861 for(y=0; y<height; y++){
04862 for(x=0; x<width; x++){
04863 int64_t d= buffer[0][x + y*width];
04864 error += d*d;
04865 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
04866 }
04867 if(FFABS(height/2-y)<9 && level==2) printf("\n");
04868 }
04869 error= (int)(sqrt(error)+0.5);
04870 errors[level][orientation]= error;
04871 if(g) g=ff_gcd(g, error);
04872 else g= error;
04873 }
04874 }
04875 printf("static int const visual_weight[][4]={\n");
04876 for(level=0; level<s.spatial_decomposition_count; level++){
04877 printf(" {");
04878 for(orientation=0; orientation<4; orientation++){
04879 printf("%8"PRId64",", errors[level][orientation]/g);
04880 }
04881 printf("},\n");
04882 }
04883 printf("};\n");
04884 {
04885 int level=2;
04886 int w= width >> (s.spatial_decomposition_count-level);
04887
04888 int stride= width << (s.spatial_decomposition_count-level);
04889 DWTELEM *buf= buffer[0];
04890 int64_t error=0;
04891
04892 buf+=w;
04893 buf+=stride>>1;
04894
04895 memset(buffer[0], 0, sizeof(int)*width*height);
04896 #if 1
04897 for(y=0; y<height; y++){
04898 for(x=0; x<width; x++){
04899 int tab[4]={0,2,3,1};
04900 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
04901 }
04902 }
04903 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04904 #else
04905 for(y=0; y<h; y++){
04906 for(x=0; x<w; x++){
04907 buf[x + y*stride ]=169;
04908 buf[x + y*stride-w]=64;
04909 }
04910 }
04911 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04912 #endif
04913 for(y=0; y<height; y++){
04914 for(x=0; x<width; x++){
04915 int64_t d= buffer[0][x + y*width];
04916 error += d*d;
04917 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
04918 }
04919 if(FFABS(height/2-y)<9) printf("\n");
04920 }
04921 }
04922
04923 }
04924 return 0;
04925 }
04926 #endif