OgreImageResampler.h

Go to the documentation of this file.
00001 /*
00002 -----------------------------------------------------------------------------
00003 This source file is part of OGRE
00004     (Object-oriented Graphics Rendering Engine)
00005 For the latest info, see http://www.ogre3d.org/
00006 
00007 Copyright (c) 2000-2012 Torus Knot Software Ltd
00008 
00009 Permission is hereby granted, free of charge, to any person obtaining a copy
00010 of this software and associated documentation files (the "Software"), to deal
00011 in the Software without restriction, including without limitation the rights
00012 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00013 copies of the Software, and to permit persons to whom the Software is
00014 furnished to do so, subject to the following conditions:
00015 
00016 The above copyright notice and this permission notice shall be included in
00017 all copies or substantial portions of the Software.
00018 
00019 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00020 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00021 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00022 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00023 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00024 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00025 THE SOFTWARE.
00026 -----------------------------------------------------------------------------
00027 */
00028 #ifndef OGREIMAGERESAMPLER_H
00029 #define OGREIMAGERESAMPLER_H
00030 
00031 #include <algorithm>
00032 
00033 // this file is inlined into OgreImage.cpp!
00034 // do not include anywhere else.
00035 namespace Ogre {
00043 // variable name hints:
00044 // sx_48 = 16/48-bit fixed-point x-position in source
00045 // stepx = difference between adjacent sx_48 values
00046 // sx1 = lower-bound integer x-position in source
00047 // sx2 = upper-bound integer x-position in source
00048 // sxf = fractional weight between sx1 and sx2
00049 // x,y,z = location of output pixel in destination
00050 
00051 // nearest-neighbor resampler, does not convert formats.
00052 // templated on bytes-per-pixel to allow compiler optimizations, such
00053 // as simplifying memcpy() and replacing multiplies with bitshifts
00054 template<unsigned int elemsize> struct NearestResampler {
00055     static void scale(const PixelBox& src, const PixelBox& dst) {
00056         // assert(src.format == dst.format);
00057 
00058         // srcdata stays at beginning, pdst is a moving pointer
00059         uchar* srcdata = (uchar*)src.data;
00060         uchar* pdst = (uchar*)dst.data;
00061 
00062         // sx_48,sy_48,sz_48 represent current position in source
00063         // using 16/48-bit fixed precision, incremented by steps
00064         uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
00065         uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
00066         uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth();
00067 
00068         // note: ((stepz>>1) - 1) is an extra half-step increment to adjust
00069         // for the center of the destination pixel, not the top-left corner
00070         uint64 sz_48 = (stepz >> 1) - 1;
00071         for (size_t z = dst.front; z < dst.back; z++, sz_48 += stepz) {
00072             size_t srczoff = (size_t)(sz_48 >> 48) * src.slicePitch;
00073             
00074             uint64 sy_48 = (stepy >> 1) - 1;
00075             for (size_t y = dst.top; y < dst.bottom; y++, sy_48 += stepy) {
00076                 size_t srcyoff = (size_t)(sy_48 >> 48) * src.rowPitch;
00077             
00078                 uint64 sx_48 = (stepx >> 1) - 1;
00079                 for (size_t x = dst.left; x < dst.right; x++, sx_48 += stepx) {
00080                     uchar* psrc = srcdata +
00081                         elemsize*((size_t)(sx_48 >> 48) + srcyoff + srczoff);
00082                     memcpy(pdst, psrc, elemsize);
00083                     pdst += elemsize;
00084                 }
00085                 pdst += elemsize*dst.getRowSkip();
00086             }
00087             pdst += elemsize*dst.getSliceSkip();
00088         }
00089     }
00090 };
00091 
00092 
00093 // default floating-point linear resampler, does format conversion
00094 struct LinearResampler {
00095     static void scale(const PixelBox& src, const PixelBox& dst) {
00096         size_t srcelemsize = PixelUtil::getNumElemBytes(src.format);
00097         size_t dstelemsize = PixelUtil::getNumElemBytes(dst.format);
00098 
00099         // srcdata stays at beginning, pdst is a moving pointer
00100         uchar* srcdata = (uchar*)src.data;
00101         uchar* pdst = (uchar*)dst.data;
00102         
00103         // sx_48,sy_48,sz_48 represent current position in source
00104         // using 16/48-bit fixed precision, incremented by steps
00105         uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
00106         uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
00107         uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth();
00108         
00109         // temp is 16/16 bit fixed precision, used to adjust a source
00110         // coordinate (x, y, or z) backwards by half a pixel so that the
00111         // integer bits represent the first sample (eg, sx1) and the
00112         // fractional bits are the blend weight of the second sample
00113         unsigned int temp;
00114 
00115         // note: ((stepz>>1) - 1) is an extra half-step increment to adjust
00116         // for the center of the destination pixel, not the top-left corner
00117         uint64 sz_48 = (stepz >> 1) - 1;
00118         for (size_t z = dst.front; z < dst.back; z++, sz_48+=stepz) {
00119             temp = static_cast<unsigned int>(sz_48 >> 32);
00120             temp = (temp > 0x8000)? temp - 0x8000 : 0;
00121             size_t sz1 = temp >> 16;                 // src z, sample #1
00122             size_t sz2 = std::min(sz1+1,src.getDepth()-1);// src z, sample #2
00123             float szf = (temp & 0xFFFF) / 65536.f; // weight of sample #2
00124 
00125             uint64 sy_48 = (stepy >> 1) - 1;
00126             for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) {
00127                 temp = static_cast<unsigned int>(sy_48 >> 32);
00128                 temp = (temp > 0x8000)? temp - 0x8000 : 0;
00129                 size_t sy1 = temp >> 16;                    // src y #1
00130                 size_t sy2 = std::min(sy1+1,src.getHeight()-1);// src y #2
00131                 float syf = (temp & 0xFFFF) / 65536.f; // weight of #2
00132                 
00133                 uint64 sx_48 = (stepx >> 1) - 1;
00134                 for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) {
00135                     temp = static_cast<unsigned int>(sx_48 >> 32);
00136                     temp = (temp > 0x8000)? temp - 0x8000 : 0;
00137                     size_t sx1 = temp >> 16;                    // src x #1
00138                     size_t sx2 = std::min(sx1+1,src.getWidth()-1);// src x #2
00139                     float sxf = (temp & 0xFFFF) / 65536.f; // weight of #2
00140                 
00141                     ColourValue x1y1z1, x2y1z1, x1y2z1, x2y2z1;
00142                     ColourValue x1y1z2, x2y1z2, x1y2z2, x2y2z2;
00143 
00144 #define UNPACK(dst,x,y,z) PixelUtil::unpackColour(&dst, src.format, \
00145     srcdata + srcelemsize*((x)+(y)*src.rowPitch+(z)*src.slicePitch))
00146 
00147                     UNPACK(x1y1z1,sx1,sy1,sz1); UNPACK(x2y1z1,sx2,sy1,sz1);
00148                     UNPACK(x1y2z1,sx1,sy2,sz1); UNPACK(x2y2z1,sx2,sy2,sz1);
00149                     UNPACK(x1y1z2,sx1,sy1,sz2); UNPACK(x2y1z2,sx2,sy1,sz2);
00150                     UNPACK(x1y2z2,sx1,sy2,sz2); UNPACK(x2y2z2,sx2,sy2,sz2);
00151 #undef UNPACK
00152 
00153                     ColourValue accum =
00154                         x1y1z1 * ((1.0f - sxf)*(1.0f - syf)*(1.0f - szf)) +
00155                         x2y1z1 * (        sxf *(1.0f - syf)*(1.0f - szf)) +
00156                         x1y2z1 * ((1.0f - sxf)*        syf *(1.0f - szf)) +
00157                         x2y2z1 * (        sxf *        syf *(1.0f - szf)) +
00158                         x1y1z2 * ((1.0f - sxf)*(1.0f - syf)*        szf ) +
00159                         x2y1z2 * (        sxf *(1.0f - syf)*        szf ) +
00160                         x1y2z2 * ((1.0f - sxf)*        syf *        szf ) +
00161                         x2y2z2 * (        sxf *        syf *        szf );
00162 
00163                     PixelUtil::packColour(accum, dst.format, pdst);
00164 
00165                     pdst += dstelemsize;
00166                 }
00167                 pdst += dstelemsize*dst.getRowSkip();
00168             }
00169             pdst += dstelemsize*dst.getSliceSkip();
00170         }
00171     }
00172 };
00173 
00174 
00175 // float32 linear resampler, converts FLOAT32_RGB/FLOAT32_RGBA only.
00176 // avoids overhead of pixel unpack/repack function calls
00177 struct LinearResampler_Float32 {
00178     static void scale(const PixelBox& src, const PixelBox& dst) {
00179         size_t srcchannels = PixelUtil::getNumElemBytes(src.format) / sizeof(float);
00180         size_t dstchannels = PixelUtil::getNumElemBytes(dst.format) / sizeof(float);
00181         // assert(srcchannels == 3 || srcchannels == 4);
00182         // assert(dstchannels == 3 || dstchannels == 4);
00183 
00184         // srcdata stays at beginning, pdst is a moving pointer
00185         float* srcdata = (float*)src.data;
00186         float* pdst = (float*)dst.data;
00187         
00188         // sx_48,sy_48,sz_48 represent current position in source
00189         // using 16/48-bit fixed precision, incremented by steps
00190         uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
00191         uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
00192         uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth();
00193         
00194         // temp is 16/16 bit fixed precision, used to adjust a source
00195         // coordinate (x, y, or z) backwards by half a pixel so that the
00196         // integer bits represent the first sample (eg, sx1) and the
00197         // fractional bits are the blend weight of the second sample
00198         unsigned int temp;
00199 
00200         // note: ((stepz>>1) - 1) is an extra half-step increment to adjust
00201         // for the center of the destination pixel, not the top-left corner
00202         uint64 sz_48 = (stepz >> 1) - 1;
00203         for (size_t z = dst.front; z < dst.back; z++, sz_48+=stepz) {
00204             temp = static_cast<unsigned int>(sz_48 >> 32);
00205             temp = (temp > 0x8000)? temp - 0x8000 : 0;
00206             size_t sz1 = temp >> 16;                 // src z, sample #1
00207             size_t sz2 = std::min(sz1+1,src.getDepth()-1);// src z, sample #2
00208             float szf = (temp & 0xFFFF) / 65536.f; // weight of sample #2
00209 
00210             uint64 sy_48 = (stepy >> 1) - 1;
00211             for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) {
00212                 temp = static_cast<unsigned int>(sy_48 >> 32);
00213                 temp = (temp > 0x8000)? temp - 0x8000 : 0;
00214                 size_t sy1 = temp >> 16;                    // src y #1
00215                 size_t sy2 = std::min(sy1+1,src.getHeight()-1);// src y #2
00216                 float syf = (temp & 0xFFFF) / 65536.f; // weight of #2
00217                 
00218                 uint64 sx_48 = (stepx >> 1) - 1;
00219                 for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) {
00220                     temp = static_cast<unsigned int>(sx_48 >> 32);
00221                     temp = (temp > 0x8000)? temp - 0x8000 : 0;
00222                     size_t sx1 = temp >> 16;                    // src x #1
00223                     size_t sx2 = std::min(sx1+1,src.getWidth()-1);// src x #2
00224                     float sxf = (temp & 0xFFFF) / 65536.f; // weight of #2
00225                     
00226                     // process R,G,B,A simultaneously for cache coherence?
00227                     float accum[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
00228 
00229 #define ACCUM3(x,y,z,factor) \
00230     { float f = factor; \
00231     size_t off = (x+y*src.rowPitch+z*src.slicePitch)*srcchannels; \
00232     accum[0]+=srcdata[off+0]*f; accum[1]+=srcdata[off+1]*f; \
00233     accum[2]+=srcdata[off+2]*f; }
00234 
00235 #define ACCUM4(x,y,z,factor) \
00236     { float f = factor; \
00237     size_t off = (x+y*src.rowPitch+z*src.slicePitch)*srcchannels; \
00238     accum[0]+=srcdata[off+0]*f; accum[1]+=srcdata[off+1]*f; \
00239     accum[2]+=srcdata[off+2]*f; accum[3]+=srcdata[off+3]*f; }
00240 
00241                     if (srcchannels == 3 || dstchannels == 3) {
00242                         // RGB, no alpha
00243                         ACCUM3(sx1,sy1,sz1,(1.0f-sxf)*(1.0f-syf)*(1.0f-szf));
00244                         ACCUM3(sx2,sy1,sz1,      sxf *(1.0f-syf)*(1.0f-szf));
00245                         ACCUM3(sx1,sy2,sz1,(1.0f-sxf)*      syf *(1.0f-szf));
00246                         ACCUM3(sx2,sy2,sz1,      sxf *      syf *(1.0f-szf));
00247                         ACCUM3(sx1,sy1,sz2,(1.0f-sxf)*(1.0f-syf)*      szf );
00248                         ACCUM3(sx2,sy1,sz2,      sxf *(1.0f-syf)*      szf );
00249                         ACCUM3(sx1,sy2,sz2,(1.0f-sxf)*      syf *      szf );
00250                         ACCUM3(sx2,sy2,sz2,      sxf *      syf *      szf );
00251                         accum[3] = 1.0f;
00252                     } else {
00253                         // RGBA
00254                         ACCUM4(sx1,sy1,sz1,(1.0f-sxf)*(1.0f-syf)*(1.0f-szf));
00255                         ACCUM4(sx2,sy1,sz1,      sxf *(1.0f-syf)*(1.0f-szf));
00256                         ACCUM4(sx1,sy2,sz1,(1.0f-sxf)*      syf *(1.0f-szf));
00257                         ACCUM4(sx2,sy2,sz1,      sxf *      syf *(1.0f-szf));
00258                         ACCUM4(sx1,sy1,sz2,(1.0f-sxf)*(1.0f-syf)*      szf );
00259                         ACCUM4(sx2,sy1,sz2,      sxf *(1.0f-syf)*      szf );
00260                         ACCUM4(sx1,sy2,sz2,(1.0f-sxf)*      syf *      szf );
00261                         ACCUM4(sx2,sy2,sz2,      sxf *      syf *      szf );
00262                     }
00263 
00264                     memcpy(pdst, accum, sizeof(float)*dstchannels);
00265 
00266 #undef ACCUM3
00267 #undef ACCUM4
00268 
00269                     pdst += dstchannels;
00270                 }
00271                 pdst += dstchannels*dst.getRowSkip();
00272             }
00273             pdst += dstchannels*dst.getSliceSkip();
00274         }
00275     }
00276 };
00277 
00278 
00279 
00280 // byte linear resampler, does not do any format conversions.
00281 // only handles pixel formats that use 1 byte per color channel.
00282 // 2D only; punts 3D pixelboxes to default LinearResampler (slow).
00283 // templated on bytes-per-pixel to allow compiler optimizations, such
00284 // as unrolling loops and replacing multiplies with bitshifts
00285 template<unsigned int channels> struct LinearResampler_Byte {
00286     static void scale(const PixelBox& src, const PixelBox& dst) {
00287         // assert(src.format == dst.format);
00288 
00289         // only optimized for 2D
00290         if (src.getDepth() > 1 || dst.getDepth() > 1) {
00291             LinearResampler::scale(src, dst);
00292             return;
00293         }
00294 
00295         // srcdata stays at beginning of slice, pdst is a moving pointer
00296         uchar* srcdata = (uchar*)src.data;
00297         uchar* pdst = (uchar*)dst.data;
00298 
00299         // sx_48,sy_48 represent current position in source
00300         // using 16/48-bit fixed precision, incremented by steps
00301         uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
00302         uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
00303         
00304         // bottom 28 bits of temp are 16/12 bit fixed precision, used to
00305         // adjust a source coordinate backwards by half a pixel so that the
00306         // integer bits represent the first sample (eg, sx1) and the
00307         // fractional bits are the blend weight of the second sample
00308         unsigned int temp;
00309         
00310         uint64 sy_48 = (stepy >> 1) - 1;
00311         for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) {
00312             temp = static_cast<unsigned int>(sy_48 >> 36);
00313             temp = (temp > 0x800)? temp - 0x800: 0;
00314             unsigned int syf = temp & 0xFFF;
00315             size_t sy1 = temp >> 12;
00316             size_t sy2 = std::min(sy1+1, src.bottom-src.top-1);
00317             size_t syoff1 = sy1 * src.rowPitch;
00318             size_t syoff2 = sy2 * src.rowPitch;
00319 
00320             uint64 sx_48 = (stepx >> 1) - 1;
00321             for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) {
00322                 temp = static_cast<unsigned int>(sx_48 >> 36);
00323                 temp = (temp > 0x800)? temp - 0x800 : 0;
00324                 unsigned int sxf = temp & 0xFFF;
00325                 size_t sx1 = temp >> 12;
00326                 size_t sx2 = std::min(sx1+1, src.right-src.left-1);
00327 
00328                 unsigned int sxfsyf = sxf*syf;
00329                 for (unsigned int k = 0; k < channels; k++) {
00330                     unsigned int accum =
00331                         srcdata[(sx1 + syoff1)*channels+k]*(0x1000000-(sxf<<12)-(syf<<12)+sxfsyf) +
00332                         srcdata[(sx2 + syoff1)*channels+k]*((sxf<<12)-sxfsyf) +
00333                         srcdata[(sx1 + syoff2)*channels+k]*((syf<<12)-sxfsyf) +
00334                         srcdata[(sx2 + syoff2)*channels+k]*sxfsyf;
00335                     // accum is computed using 8/24-bit fixed-point math
00336                     // (maximum is 0xFF000000; rounding will not cause overflow)
00337                     *pdst++ = static_cast<uchar>((accum + 0x800000) >> 24);
00338                 }
00339             }
00340             pdst += channels*dst.getRowSkip();
00341         }
00342     }
00343 };
00347 }
00348 
00349 #endif