blob: fc80b387e9aa381eb5afc84be12059dc5ff04999 [file] [log] [blame]
/*
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* the C code (not assembly, mmx, ...) of the swscaler which has been written
* by Michael Niedermayer can be used under the LGPL license too
*/
#undef REAL_MOVNTQ
#undef MOVNTQ
#undef PAVGB
#undef PREFETCH
#undef PREFETCHW
#undef EMMS
#undef SFENCE
static inline void RENAME(vScale)(int16_t *lumFilter, uint8_t **lumSrc, int lumFilterSize,
uint8_t *dest, int dstW)
{
int i;
for(i=0; i<dstW; i++)
{
int val=0;
int j;
for(j=0; j<lumFilterSize; j++)
val += lumSrc[j][i] * lumFilter[j];
dest[i]= FFMIN(FFMAX(val>>12, 0), 255);
}
}
// Bilinear / Bicubic scaling
static inline void RENAME(hScale)(uint8_t *dst, int dstW, int dbpp, uint8_t *src, int srcW, int sbpp,
int16_t *filter, int16_t *filterPos, long filterSize, int xOffset)
{
int i;
int sOffset = xOffset;
int dOffset = xOffset;
if (sbpp == 4 && dbpp == 3)
sOffset++; // skip the alpha channel on the source
else if (sbpp == 3 && dbpp == 4)
{
dOffset++;
if (xOffset == 3)
{
// Fill in the alpha channel
for(i=0; i<dstW; i++)
{
dst[i*dbpp /*+ dOffset*/] = 0xFF;
}
return;
}
}
for(i=0; i<dstW; i++)
{
int j;
int srcPos= filterPos[i];
int val=0;
int fsi = filterSize*i;
// printf("filterPos: %d\n", filterPos[i]);
for(j=0; j<filterSize; j++)
{
// printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
val += ((int)src[(srcPos + j) * sbpp + sOffset])*filter[fsi + j];
}
// filter += hFilterSize;
dst[i*dbpp + dOffset] = FFMIN(FFMAX(0, val>>14), (1<<8)-1); // the cubic equation does overflow ...
// dst[i] = val>>7;
}
}
static int RENAME(swScale)(SwsContext *c, uint8_t* src, int srcStride, int srcSliceY,
int srcSliceH, uint8_t* dst, int dstStride){
/* load a few things into local vars to make the code more readable? and faster */
const int srcW= c->srcW;
const int dstW= c->dstW;
const int dstH= c->dstH;
const int dstFormat= c->dstFormat;
const int srcFormat= c->srcFormat;
const int flags= c->flags;
int16_t *vLumFilterPos= c->vLumFilterPos;
int16_t *hLumFilterPos= c->hLumFilterPos;
int16_t *vLumFilter= c->vLumFilter;
int16_t *hLumFilter= c->hLumFilter;
const int vLumFilterSize= c->vLumFilterSize;
const int hLumFilterSize= c->hLumFilterSize;
uint8_t **lumPixBuf= c->lumPixBuf;
const int vLumBufSize= c->vLumBufSize;
int lastDstY;
/* vars whch will change and which we need to storw back in the context */
int dstY= c->dstY;
int lumBufIndex= c->lumBufIndex;
int lastInLumBuf= c->lastInLumBuf;
// printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2],
// (int)dst[0], (int)dst[1], (int)dst[2]);
//printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
//dstStride[0],dstStride[1],dstStride[2]);
/* Note the user might start scaling the picture in the middle so this will not get executed
this is not really intended but works currently, so ppl might do it */
if(srcSliceY ==0){
lumBufIndex=0;
dstY=0;
lastInLumBuf= -1;
}
lastDstY= dstY;
int sbpp = ((c->srcFormat == PIX_FMT_RGB32) || (c->srcFormat == PIX_FMT_BGR32)) ? 4 : 3;
int dbpp = ((c->dstFormat == PIX_FMT_RGB32) || (c->dstFormat == PIX_FMT_BGR32)) ? 4 : 3;
for(;dstY < dstH; dstY++){
unsigned char *dest =dst+dstStride*dstY;
const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
//printf("dstY:%d dstH:%d firstLumSrcY:%d lastInLumBuf:%d vLumBufSize: %d vChrBufSize: %d slice: %d %d vLumFilterSize: %d firstChrSrcY: %d vChrFilterSize: %d c->chrSrcVSubSample: %d\n",
// dstY, dstH, firstLumSrcY, lastInLumBuf, vLumBufSize, vChrBufSize, srcSliceY, srcSliceH, vLumFilterSize, firstChrSrcY, vChrFilterSize, c->chrSrcVSubSample);
//handle holes (FAST_BILINEAR & weird filters)
if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
//printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize);
ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1)
// Do we have enough lines in this slice to output the dstY line
if(lastLumSrcY < srcSliceY + srcSliceH)
{
//Do horizontal scaling
while(lastInLumBuf < lastLumSrcY)
{
uint8_t *s= src+(lastInLumBuf + 1 - srcSliceY)*srcStride;
lumBufIndex++;
// printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf, lastLumSrcY);
ASSERT(lumBufIndex < 2*vLumBufSize)
ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
// printf("%d %d\n", lumBufIndex, vLumBufSize);
RENAME(hScale)(lumPixBuf[ lumBufIndex ], dstW, dbpp, s, srcW, sbpp, hLumFilter, hLumFilterPos, hLumFilterSize, 0);
RENAME(hScale)(lumPixBuf[ lumBufIndex ], dstW, dbpp, s, srcW, sbpp, hLumFilter, hLumFilterPos, hLumFilterSize, 1);
RENAME(hScale)(lumPixBuf[ lumBufIndex ], dstW, dbpp, s, srcW, sbpp, hLumFilter, hLumFilterPos, hLumFilterSize, 2);
if (dbpp == 4)
RENAME(hScale)(lumPixBuf[ lumBufIndex ], dstW, dbpp, s, srcW, sbpp, hLumFilter, hLumFilterPos, hLumFilterSize, 3);
lastInLumBuf++;
}
//wrap buf index around to stay inside the ring buffer
if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
}
else // not enough lines left in this slice -> load the rest in the buffer
{
/* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n",
firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY,
lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize,
vChrBufSize, vLumBufSize);*/
//Do horizontal scaling
while(lastInLumBuf+1 < srcSliceY + srcSliceH)
{
uint8_t *s= src+(lastInLumBuf + 1 - srcSliceY)*srcStride;
lumBufIndex++;
ASSERT(lumBufIndex < 2*vLumBufSize)
ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH)
ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
RENAME(hScale)(lumPixBuf[ lumBufIndex ], dstW, dbpp, s, srcW, sbpp, hLumFilter, hLumFilterPos, hLumFilterSize, 0);
RENAME(hScale)(lumPixBuf[ lumBufIndex ], dstW, dbpp, s, srcW, sbpp, hLumFilter, hLumFilterPos, hLumFilterSize, 1);
RENAME(hScale)(lumPixBuf[ lumBufIndex ], dstW, dbpp, s, srcW, sbpp, hLumFilter, hLumFilterPos, hLumFilterSize, 2);
if (dbpp == 4)
RENAME(hScale)(lumPixBuf[ lumBufIndex ], dstW, dbpp, s, srcW, sbpp, hLumFilter, hLumFilterPos, hLumFilterSize, 3);
lastInLumBuf++;
}
//wrap buf index around to stay inside the ring buffer
if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize;
break; //we can't output a dstY line so let's try with the next slice
}
// Vertical filtering happens in here
uint8_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
RENAME(vScale)(vLumFilter + dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
dest, dbpp*dstW);
}
/* store changed local vars back in the context */
c->dstY= dstY;
c->lumBufIndex= lumBufIndex;
c->lastInLumBuf= lastInLumBuf;
return dstY - lastDstY;
}