1 files changed, 887 insertions, 0 deletions
diff --git a/media-tv/xbmc/files/xbmc-9.11-shader-upscalers.patch b/media-tv/xbmc/files/xbmc-9.11-shader-upscalers.patch
new file mode 100644
index 00000000..d4feaa47
--- /dev/null
+++ b/media-tv/xbmc/files/xbmc-9.11-shader-upscalers.patch
@@ -0,0 +1,887 @@
+http://bugs.gentoo.org/306661
+
+backport shader based upscalers from svn trunk
+
+--- language/English/strings.xml
++++ language/English/strings.xml
+@@ -1554,16 +1554,17 @@
+   <string id="16304">Lanczos2</string>
+   <string id="16305">Lanczos3</string>
+   <string id="16306">Sinc8</string>
+-
+   <string id="16307">Bicubic (software)</string>
+   <string id="16308">Lanczos (software)</string>
+   <string id="16309">Sinc (software)</string>
+-
+   <string id="16310">(VDPAU)Temporal</string>
+   <string id="16311">(VDPAU)Temporal/Spatial</string>
+   <string id="16312">(VDPAU)Noise Reduction</string>
+   <string id="16313">(VDPAU)Sharpness</string>
+   <string id="16314">Inverse Telecine</string>
++  <string id="16315">Lanczos3 optimized</string>
++  <string id="16316">Auto</string>
++
+   <string id="17500">Display sleep timeout</string>
+ 
+   <string id="19000">Switch to channel</string>
+--- system/shaders/convolution-6x6.glsl
++++ system/shaders/convolution-6x6.glsl
+@@ -0,0 +1,69 @@
++uniform sampler2D img;
++uniform float     stepx;
++uniform float     stepy;
++
++#if (HAS_FLOAT_TEXTURE)
++uniform sampler1D kernelTex;
++
++vec3 weight(float pos)
++{
++  return texture1D(kernelTex, pos).rgb;
++}
++#else
++uniform sampler2D kernelTex;
++
++vec3 weight(float pos)
++{
++  //row 0 contains the high byte, row 1 contains the low byte
++  return ((texture2D(kernelTex, vec2(pos, 0.0)) * 256.0 + texture2D(kernelTex, vec2(pos, 1.0)))).rgb / 128.5 - 1.0;
++}
++#endif
++
++vec3 pixel(float xpos, float ypos)
++{
++  return texture2D(img, vec2(xpos, ypos)).rgb;
++}
++
++vec3 line (float ypos, vec3 xpos1, vec3 xpos2, vec3 linetaps1, vec3 linetaps2)
++{
++  vec3  pixels;
++
++  pixels  = pixel(xpos1.r, ypos) * linetaps1.r;
++  pixels += pixel(xpos1.g, ypos) * linetaps2.r;
++  pixels += pixel(xpos1.b, ypos) * linetaps1.g;
++  pixels += pixel(xpos2.r, ypos) * linetaps2.g;
++  pixels += pixel(xpos2.g, ypos) * linetaps1.b;
++  pixels += pixel(xpos2.b, ypos) * linetaps2.b;
++
++  return pixels;
++}
++
++void main()
++{
++  float xf = fract(gl_TexCoord[0].x / stepx);
++  float yf = fract(gl_TexCoord[0].y / stepy);
++
++  vec3 linetaps1   = weight((1.0 - xf) / 2.0);
++  vec3 linetaps2   = weight((1.0 - xf) / 2.0 + 0.5);
++  vec3 columntaps1 = weight((1.0 - yf) / 2.0);
++  vec3 columntaps2 = weight((1.0 - yf) / 2.0 + 0.5);
++
++  vec3 xpos1 = vec3(
++      (-1.5 - xf) * stepx + gl_TexCoord[0].x,
++      (-0.5 - xf) * stepx + gl_TexCoord[0].x,
++      ( 0.5 - xf) * stepx + gl_TexCoord[0].x);
++  vec3 xpos2 = vec3(
++      ( 1.5 - xf) * stepx + gl_TexCoord[0].x,
++      ( 2.5 - xf) * stepx + gl_TexCoord[0].x,
++      ( 3.5 - xf) * stepx + gl_TexCoord[0].x);
++
++  gl_FragColor.rgb  = line((-1.5 - yf) * stepy + gl_TexCoord[0].y, xpos1, xpos2, linetaps1, linetaps2) * columntaps1.r;
++  gl_FragColor.rgb += line((-0.5 - yf) * stepy + gl_TexCoord[0].y, xpos1, xpos2, linetaps1, linetaps2) * columntaps2.r;
++  gl_FragColor.rgb += line(( 0.5 - yf) * stepy + gl_TexCoord[0].y, xpos1, xpos2, linetaps1, linetaps2) * columntaps1.g;
++  gl_FragColor.rgb += line(( 1.5 - yf) * stepy + gl_TexCoord[0].y, xpos1, xpos2, linetaps1, linetaps2) * columntaps2.g;
++  gl_FragColor.rgb += line(( 2.5 - yf) * stepy + gl_TexCoord[0].y, xpos1, xpos2, linetaps1, linetaps2) * columntaps1.b;
++  gl_FragColor.rgb += line(( 3.5 - yf) * stepy + gl_TexCoord[0].y, xpos1, xpos2, linetaps1, linetaps2) * columntaps2.b;
++
++  gl_FragColor.a = gl_Color.a;
++}
++
+--- system/shaders/bicubic.glsl
++++ system/shaders/bicubic.glsl
+@@ -0,0 +1,47 @@
++uniform sampler2D img;
++uniform float stepx;
++uniform float stepy;
++uniform sampler2D kernelTex;
++
++vec4 cubicFilter(float xValue, vec4 c0, vec4 c1, vec4 c2, vec4 c3)
++{
++  vec4 h = texture2D(kernelTex, vec2(xValue, 0.5));
++  vec4 r = c0 * h.r;
++  r += c1 * h.g;
++  r += c2 * h.b;
++  r += c3 * h.a;
++  return r;
++}
++
++void main()
++{
++  vec2 f = vec2(gl_TexCoord[0].x / stepx , gl_TexCoord[0].y / stepy);
++  f = fract(f);
++  vec4 t0 = cubicFilter(f.x,
++  texture2D(img, gl_TexCoord[0].xy + vec2(-stepx,    -stepy)),
++  texture2D(img, gl_TexCoord[0].xy + vec2(0.0,       -stepy)),
++  texture2D(img, gl_TexCoord[0].xy + vec2(stepx,     -stepy)),
++  texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, -stepy)));
++
++  vec4 t1 = cubicFilter(f.x,
++  texture2D(img, gl_TexCoord[0].xy + vec2(-stepx,    0.0)),
++  texture2D(img, gl_TexCoord[0].xy + vec2(0.0,       0.0)),
++  texture2D(img, gl_TexCoord[0].xy + vec2(stepx,     0.0)),
++  texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, 0.0)));
++
++  vec4 t2 = cubicFilter(f.x,
++  texture2D(img, gl_TexCoord[0].xy + vec2(-stepx,    stepy)),
++  texture2D(img, gl_TexCoord[0].xy + vec2(0.0,       stepy)),
++  texture2D(img, gl_TexCoord[0].xy + vec2(stepx,     stepy)),
++  texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, stepy)));
++
++  vec4 t3 = cubicFilter(f.x,
++  texture2D(img, gl_TexCoord[0].xy + vec2(-stepx,    2.0*stepy)),
++  texture2D(img, gl_TexCoord[0].xy + vec2(0,         2.0*stepy)),
++  texture2D(img, gl_TexCoord[0].xy + vec2(stepx,     2.0*stepy)),
++  texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, 2.0*stepy)));
++
++  gl_FragColor = cubicFilter(f.y, t0, t1, t2, t3);   
++  gl_FragColor.a = gl_Color.a;
++}
++
+--- system/shaders/convolution-4x4.glsl
++++ system/shaders/convolution-4x4.glsl
+@@ -0,0 +1,60 @@
++uniform sampler2D img;
++uniform float     stepx;
++uniform float     stepy;
++
++#if (HAS_FLOAT_TEXTURE)
++uniform sampler1D kernelTex;
++
++vec4 weight(float pos)
++{
++  return texture1D(kernelTex, pos);
++}
++#else
++uniform sampler2D kernelTex;
++
++vec4 weight(float pos)
++{
++  //row 0 contains the high byte, row 1 contains the low byte
++  return (texture2D(kernelTex, vec2(pos, 0.0)) * 256.0 + texture2D(kernelTex, vec2(pos, 1.0))) / 128.5 - 1.0;
++}
++#endif
++
++vec3 pixel(float xpos, float ypos)
++{
++  return texture2D(img, vec2(xpos, ypos)).rgb;
++}
++
++vec3 line (float ypos, vec4 xpos, vec4 linetaps)
++{
++  vec3  pixels;
++
++  pixels  = pixel(xpos.r, ypos) * linetaps.r;
++  pixels += pixel(xpos.g, ypos) * linetaps.g;
++  pixels += pixel(xpos.b, ypos) * linetaps.b;
++  pixels += pixel(xpos.a, ypos) * linetaps.a;
++
++  return pixels;
++}
++
++void main()
++{
++  float xf = fract(gl_TexCoord[0].x / stepx);
++  float yf = fract(gl_TexCoord[0].y / stepy);
++
++  vec4 linetaps   = weight(1.0 - xf);
++  vec4 columntaps = weight(1.0 - yf);
++
++  vec4 xpos = vec4(
++      (-0.5 - xf) * stepx + gl_TexCoord[0].x,
++      ( 0.5 - xf) * stepx + gl_TexCoord[0].x,
++      ( 1.5 - xf) * stepx + gl_TexCoord[0].x,
++      ( 2.5 - xf) * stepx + gl_TexCoord[0].x);
++
++  gl_FragColor.rgb  = line((-0.5 - yf) * stepy + gl_TexCoord[0].y, xpos, linetaps) * columntaps.r;
++  gl_FragColor.rgb += line(( 0.5 - yf) * stepy + gl_TexCoord[0].y, xpos, linetaps) * columntaps.g;
++  gl_FragColor.rgb += line(( 1.5 - yf) * stepy + gl_TexCoord[0].y, xpos, linetaps) * columntaps.b;
++  gl_FragColor.rgb += line(( 2.5 - yf) * stepy + gl_TexCoord[0].y, xpos, linetaps) * columntaps.a;
++
++  gl_FragColor.a = gl_Color.a;
++}
++
+--- xbmc/settings/VideoSettings.h
++++ xbmc/settings/VideoSettings.h
+@@ -51,9 +51,10 @@
+ {
+   VS_SCALINGMETHOD_NEAREST=0,
+   VS_SCALINGMETHOD_LINEAR,
+-  
++
+   VS_SCALINGMETHOD_CUBIC,
+   VS_SCALINGMETHOD_LANCZOS2,
++  VS_SCALINGMETHOD_LANCZOS3_FAST,
+   VS_SCALINGMETHOD_LANCZOS3,
+   VS_SCALINGMETHOD_SINC8,
+   VS_SCALINGMETHOD_NEDI,
+@@ -61,7 +62,9 @@
+   VS_SCALINGMETHOD_BICUBIC_SOFTWARE,
+   VS_SCALINGMETHOD_LANCZOS_SOFTWARE,
+   VS_SCALINGMETHOD_SINC_SOFTWARE,
+-  VS_SCALINGMETHOD_VDPAU_HARDWARE
++  VS_SCALINGMETHOD_VDPAU_HARDWARE,
++
++  VS_SCALINGMETHOD_AUTO
+ };
+ 
+ class CVideoSettings
+--- xbmc/cores/VideoRenderers/VideoShaders/VideoFilterShader.cpp
++++ xbmc/cores/VideoRenderers/VideoShaders/VideoFilterShader.cpp
+@@ -21,6 +21,7 @@
+ #include "system.h"
+ #include "VideoFilterShader.h"
+ #include "utils/log.h"
++#include "ConvolutionKernels.h"
+ 
+ #include <string>
+ #include <math.h>
+@@ -63,60 +64,13 @@
+ 
+ BicubicFilterShader::BicubicFilterShader(float B, float C)
+ {
+-  string shaderf = 
+-    "uniform sampler2D img;"
+-    "uniform float stepx;"
+-    "uniform float stepy;"
+-    "uniform sampler2D kernelTex;"
+-    
+-    "vec4 cubicFilter(float xValue, vec4 c0, vec4 c1, vec4 c2, vec4 c3)"
+-    "{"
+-    " vec4 h = texture2D(kernelTex, vec2(xValue, 0.5));"
+-    " vec4 r = c0 * h.r;"
+-    " r += c1 * h.g;"
+-    " r += c2 * h.b;"
+-    " r += c3 * h.a;"
+-    " return r;"
+-    "}"
+-    ""
+-    "void main()"
+-    "{"
+-    "vec2 f = vec2(gl_TexCoord[0].x / stepx , gl_TexCoord[0].y / stepy);"
+-    "f = fract(f);"
+-    "vec4 t0 = cubicFilter(f.x,"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(-stepx, -stepy)),"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(0.0, -stepy)),"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(stepx, -stepy)),"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, -stepy)));"
+-    ""
+-    "vec4 t1 = cubicFilter(f.x,"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(-stepx, 0.0)),"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(0.0, 0.0)),"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(stepx, 0.0)),"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, 0.0)));"
+-    ""
+-    "vec4 t2 = cubicFilter(f.x,"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(-stepx, stepy)),"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(0.0, stepy)),"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(stepx, stepy)),"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, stepy)));"
+-    ""
+-    "vec4 t3 = cubicFilter(f.x,"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(-stepx, 2.0*stepy)),"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(0, 2.0*stepy)),"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(stepx, 2.0*stepy)),"
+-    "texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, 2.0*stepy)));"
+-    
+-    "gl_FragColor = cubicFilter(f.y, t0, t1, t2, t3) ;"    
+-    "gl_FragColor.a = gl_Color.a;"
+-    "}";
+-  PixelShader()->SetSource(shaderf);
++  PixelShader()->LoadSource("bicubic.glsl");
+   m_kernelTex1 = 0;
+   m_B = B;
+   m_C = C;
+-  if (B<=0)
++  if (B<0)
+     m_B=1.0f/3.0f;
+-  if (C<=0)
++  if (C<0)
+     m_C=1.0f/3.0f;
+ }
+ 
+@@ -209,8 +163,8 @@
+   glBindTexture(GL_TEXTURE_2D, m_kernelTex1);
+   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+-  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
+-  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
++  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
++  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
+   glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, size, 1, 0, GL_RGBA, GL_FLOAT, img);
+ 
+   glActiveTexture(GL_TEXTURE0);
+@@ -254,4 +208,110 @@
+   return val;
+ }
+ 
++ConvolutionFilterShader::ConvolutionFilterShader(ESCALINGMETHOD method)
++{
++  m_method = method;
++  m_kernelTex1 = 0;
++
++  string shadername;
++  string defines;
++
++  if (m_method == VS_SCALINGMETHOD_CUBIC ||
++      m_method == VS_SCALINGMETHOD_LANCZOS2 ||
++      m_method == VS_SCALINGMETHOD_LANCZOS3_FAST)
++    shadername = "convolution-4x4.glsl";
++  else if (m_method == VS_SCALINGMETHOD_LANCZOS3)
++    shadername = "convolution-6x6.glsl";
++
++  m_floattex = glewIsSupported("GL_ARB_texture_float");
++
++  if (m_floattex)
++    defines = "#define HAS_FLOAT_TEXTURE 1\n";
++  else
++    defines = "#define HAS_FLOAT_TEXTURE 0\n";
++
++  CLog::Log(LOGDEBUG, "GL: ConvolutionFilterShader: using %s defines: %s", shadername.c_str(), defines.c_str());
++  PixelShader()->LoadSource(shadername, defines);
++}
++
++void ConvolutionFilterShader::OnCompiledAndLinked()
++{
++  // obtain shader attribute handles on successfull compilation
++  m_hSourceTex = glGetUniformLocation(ProgramHandle(), "img");
++  m_hStepX     = glGetUniformLocation(ProgramHandle(), "stepx");
++  m_hStepY     = glGetUniformLocation(ProgramHandle(), "stepy");
++  m_hKernTex   = glGetUniformLocation(ProgramHandle(), "kernelTex");
++
++  CConvolutionKernel kernel(m_method, 256);
++
++  if (m_kernelTex1)
++  {
++    glDeleteTextures(1, &m_kernelTex1);
++    m_kernelTex1 = 0;
++  }
++
++  glGenTextures(1, &m_kernelTex1);
++
++  if ((m_kernelTex1<=0))
++  {
++    CLog::Log(LOGERROR, "GL: ConvolutionFilterShader: Error creating kernel texture");
++    return;
++  }
++
++  glActiveTexture(GL_TEXTURE2);
++
++  //if float textures are supported, we can load the kernel as a 1d float texture
++  //if not, we load it as a 2d texture with 2 rows, where row 0 contains the high byte
++  //and row 1 contains the low byte, which can be converted in the shader
++  if (m_floattex)
++  {
++    glBindTexture(GL_TEXTURE_1D, m_kernelTex1);
++    glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
++    glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
++    glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
++    glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
++    glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA16F_ARB, kernel.GetSize(), 0, GL_RGBA, GL_FLOAT, kernel.GetFloatPixels());
++  }
++  else
++  {
++    glBindTexture(GL_TEXTURE_2D, m_kernelTex1);
++    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
++    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
++    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
++    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
++    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, kernel.GetSize(), 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, kernel.GetIntFractPixels());
++  }
++
++  glActiveTexture(GL_TEXTURE0);
++
++  VerifyGLState();
++}
++
++bool ConvolutionFilterShader::OnEnabled()
++{
++  // set shader attributes once enabled
++  glActiveTexture(GL_TEXTURE2);
++
++  if (m_floattex)
++    glBindTexture(GL_TEXTURE_1D, m_kernelTex1);
++  else
++    glBindTexture(GL_TEXTURE_2D, m_kernelTex1);
++
++  glActiveTexture(GL_TEXTURE0);
++  glUniform1i(m_hSourceTex, m_sourceTexUnit);
++  glUniform1i(m_hKernTex, 2);
++  glUniform1f(m_hStepX, m_stepX);
++  glUniform1f(m_hStepY, m_stepY);
++  VerifyGLState();
++  return true;
++}
++
++void ConvolutionFilterShader::Free()
++{
++  if (m_kernelTex1)
++    glDeleteTextures(1, &m_kernelTex1);
++  m_kernelTex1 = 0;
++  BaseVideoFilterShader::Free();
++}
++
+ #endif
+--- xbmc/cores/VideoRenderers/VideoShaders/ConvolutionKernels.cpp
++++ xbmc/cores/VideoRenderers/VideoShaders/ConvolutionKernels.cpp
+@@ -0,0 +1,226 @@
++/*
++ *      Copyright (C) 2005-2008 Team XBMC
++ *      http://www.xbmc.org
++ *
++ *  This Program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2, or (at your option)
++ *  any later version.
++ *
++ *  This Program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with XBMC; see the file COPYING.  If not, write to
++ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++ *  http://www.gnu.org/copyleft/gpl.html
++ *
++ */
++#ifdef _WIN32
++  #define _USE_MATH_DEFINES
++#endif
++
++#include "ConvolutionKernels.h"
++#include "MathUtils.h"
++
++#define SINC(x) (sin(M_PI * (x)) / (M_PI * (x)))
++
++CConvolutionKernel::CConvolutionKernel(ESCALINGMETHOD method, int size)
++{
++  m_size = size;
++  m_floatpixels = new float[m_size * 4];
++
++  if (method == VS_SCALINGMETHOD_LANCZOS2)
++    Lanczos2();
++  else if (method == VS_SCALINGMETHOD_LANCZOS3_FAST)
++    Lanczos3Fast();
++  else if (method == VS_SCALINGMETHOD_LANCZOS3)
++    Lanczos3();
++  else if (method == VS_SCALINGMETHOD_CUBIC) 
++    Bicubic(1.0 / 3.0, 1.0 / 3.0);
++
++  ToIntFract();
++}
++
++CConvolutionKernel::~CConvolutionKernel()
++{
++  delete [] m_floatpixels;
++  delete [] m_intfractpixels;
++}
++
++//generate a lanczos2 kernel which can be loaded with RGBA format
++//each value of RGBA has one tap, so a shader can load 4 taps with a single pixel lookup
++void CConvolutionKernel::Lanczos2()
++{
++  for (int i = 0; i < m_size; i++)
++  {
++    double x = (double)i / (double)m_size;
++
++    //generate taps
++    for (int j = 0; j < 4; j++)
++      m_floatpixels[i * 4 + j] = (float)LanczosWeight(x + (double)(j - 2), 2.0);
++
++    //any collection of 4 taps added together needs to be exactly 1.0
++    //for lanczos this is not always the case, so we take each collection of 4 taps
++    //and divide those taps by the sum of the taps
++    float weight = 0.0;
++    for (int j = 0; j < 4; j++)
++      weight += m_floatpixels[i * 4 + j];
++
++    for (int j = 0; j < 4; j++)
++      m_floatpixels[i * 4 + j] /= weight;
++  }
++}
++
++//generate a lanczos3 kernel which can be loaded with RGBA format
++//each value of RGBA has one tap, so a shader can load 4 taps with a single pixel lookup
++//the two outer lobes of the lanczos3 kernel are added to the two lobes one step to the middle
++//this basically looks the same as lanczos3, but the kernel only has 4 taps,
++//so it can use the 4x4 convolution shader which is twice as fast as the 6x6 one
++void CConvolutionKernel::Lanczos3Fast()
++{
++  for (int i = 0; i < m_size; i++)
++  {
++    double a = 3.0;
++    double x = (double)i / (double)m_size;
++
++    //generate taps
++    m_floatpixels[i * 4 + 0] = (float)(LanczosWeight(x - 2.0, a) + LanczosWeight(x - 3.0, a));
++    m_floatpixels[i * 4 + 1] = (float) LanczosWeight(x - 1.0, a);
++    m_floatpixels[i * 4 + 2] = (float) LanczosWeight(x      , a);
++    m_floatpixels[i * 4 + 3] = (float)(LanczosWeight(x + 1.0, a) + LanczosWeight(x + 2.0, a));
++
++    //any collection of 4 taps added together needs to be exactly 1.0
++    //for lanczos this is not always the case, so we take each collection of 4 taps
++    //and divide those taps by the sum of the taps
++    float weight = 0.0;
++    for (int j = 0; j < 4; j++)
++      weight += m_floatpixels[i * 4 + j];
++
++    for (int j = 0; j < 4; j++)
++      m_floatpixels[i * 4 + j] /= weight;
++  }
++}
++
++//generate a lanczos3 kernel which can be loaded with RGBA format
++//each value of RGB has one tap, so a shader can load 3 taps with a single pixel lookup
++void CConvolutionKernel::Lanczos3()
++{
++  for (int i = 0; i < m_size; i++)
++  {
++    double x = (double)i / (double)m_size;
++
++    //generate taps
++    for (int j = 0; j < 3; j++)
++      m_floatpixels[i * 4 + j] = (float)LanczosWeight(x * 2.0 + (double)(j * 2 - 3), 3.0);
++
++    m_floatpixels[i * 4 + 3] = 0.0;
++  }
++
++  //any collection of 6 taps added together needs to be exactly 1.0
++  //for lanczos this is not always the case, so we take each collection of 6 taps
++  //and divide those taps by the sum of the taps
++  for (int i = 0; i < m_size / 2; i++)
++  {
++    float weight = 0.0;
++    for (int j = 0; j < 3; j++)
++    {
++      weight += m_floatpixels[i * 4 + j];
++      weight += m_floatpixels[(i + m_size / 2) * 4 + j];
++    }
++    for (int j = 0; j < 3; j++)
++    {
++      m_floatpixels[i * 4 + j] /= weight;
++      m_floatpixels[(i + m_size / 2) * 4 + j] /= weight;
++    }
++  }
++}
++
++//generate a bicubic kernel which can be loaded with RGBA format
++//each value of RGBA has one tap, so a shader can load 4 taps with a single pixel lookup
++void CConvolutionKernel::Bicubic(double B, double C)
++{
++  for (int i = 0; i < m_size; i++)
++  {
++    double x = (double)i / (double)m_size;
++
++    //generate taps
++    for (int j = 0; j < 4; j++)
++      m_floatpixels[i * 4 + j] = (float)BicubicWeight(x + (double)(j - 2), B, C);
++  }
++}
++
++double CConvolutionKernel::LanczosWeight(double x, double radius)
++{
++  double ax = fabs(x);
++
++  if (ax == 0.0)
++    return 1.0;
++  else if (ax < radius)
++    return SINC(ax) * SINC(ax / radius);
++  else
++    return 0.0;
++}
++
++double CConvolutionKernel::BicubicWeight(double x, double B, double C)
++{
++  double ax = fabs(x);
++
++  if (ax<1.0)
++  {
++    return ((12 - 9*B - 6*C) * ax * ax * ax +
++            (-18 + 12*B + 6*C) * ax * ax +
++            (6 - 2*B))/6;
++  }
++  else if (ax<2.0)
++  {
++    return ((-B - 6*C) * ax * ax * ax + 
++            (6*B + 30*C) * ax * ax + (-12*B - 48*C) * 
++             ax + (8*B + 24*C)) / 6;
++  }
++  else
++  {
++    return 0.0;
++  }
++}
++
++
++//convert float to high byte/low byte, so the kernel can be loaded into an 8 bit texture
++//with height 2 and converted back to real float in the shader
++//it only works when the kernel texture uses nearest neighbour, but there's almost no difference
++//between that and linear interpolation
++void CConvolutionKernel::ToIntFract()
++{
++  m_intfractpixels = new uint8_t[m_size * 8];
++
++  for (int i = 0; i < m_size * 4; i++)
++  {
++    int value = MathUtils::round_int((m_floatpixels[i] + 1.0) / 2.0 * 65535.0);
++    if (value < 0)
++      value = 0;
++    else if (value > 65535)
++      value = 65535;
++    
++    int integer = value / 256;
++    int fract   = value % 256;
++
++    m_intfractpixels[i] = (uint8_t)integer;
++    m_intfractpixels[i + m_size * 4] = (uint8_t)fract;
++  }
++
++#if 0
++  for (int i = 0; i < 4; i++)
++  {
++    for (int j = 0; j < m_size; j++)
++    {
++      printf("%i %f %f\n",
++          i * m_size + j,
++          ((double)m_intfractpixels[j * 4 + i] + (double)m_intfractpixels[j * 4 + i + m_size * 4] / 255.0) / 255.0 * 2.0 - 1.0,
++          m_floatpixels[j * 4 + i]);
++    }
++  }
++#endif
++}
++
+--- xbmc/cores/VideoRenderers/VideoShaders/VideoFilterShader.h
++++ xbmc/cores/VideoRenderers/VideoShaders/VideoFilterShader.h
+@@ -4,6 +4,7 @@
+ #ifdef HAS_GL
+ 
+ #include "../../../../guilib/Shader.h"
++#include "../../../settings/VideoSettings.h"
+ 
+ using namespace Shaders;
+ 
+@@ -35,7 +36,7 @@
+   class BicubicFilterShader : public BaseVideoFilterShader
+   {
+   public:
+-    BicubicFilterShader(float B=0.0f, float C=0.0f);
++    BicubicFilterShader(float B=-1.0f, float C=-1.0f);
+     void OnCompiledAndLinked();
+     bool OnEnabled();
+     void Free();
+@@ -55,6 +56,25 @@
+     float m_C;
+   };
+ 
++  class ConvolutionFilterShader : public BaseVideoFilterShader
++  {
++  public:
++    ConvolutionFilterShader(ESCALINGMETHOD method);
++    void OnCompiledAndLinked();
++    bool OnEnabled();
++    void Free();
++
++  protected:
++    // kernel textures
++    GLuint m_kernelTex1;
++
++    // shader handles to kernel textures
++    GLint m_hKernTex;
++
++    ESCALINGMETHOD m_method;
++    bool           m_floattex; //if float textures are supported
++  };
++
+ } // end namespace
+ 
+ #endif
+--- xbmc/cores/VideoRenderers/VideoShaders/ConvolutionKernels.h
++++ xbmc/cores/VideoRenderers/VideoShaders/ConvolutionKernels.h
+@@ -0,0 +1,55 @@
++/*
++ *      Copyright (C) 2005-2008 Team XBMC
++ *      http://www.xbmc.org
++ *
++ *  This Program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2, or (at your option)
++ *  any later version.
++ *
++ *  This Program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with XBMC; see the file COPYING.  If not, write to
++ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++ *  http://www.gnu.org/copyleft/gpl.html
++ *
++ */
++
++#ifndef CONVOLUTIONKERNELS
++#define CONVOLUTIONKERNELS
++
++#include "system.h"
++#include "../../../settings/VideoSettings.h"
++
++class CConvolutionKernel
++{
++  public:
++    CConvolutionKernel(ESCALINGMETHOD method, int size);
++    ~CConvolutionKernel();
++
++    int      GetSize()           { return m_size; }
++    float*   GetFloatPixels()    { return m_floatpixels; }
++    uint8_t* GetIntFractPixels() { return m_intfractpixels; }
++
++  private:
++
++    void Lanczos2();
++    void Lanczos3Fast();
++    void Lanczos3();
++    void Bicubic(double B, double C);
++
++    double LanczosWeight(double x, double radius);
++    double BicubicWeight(double x, double B, double C);
++
++    void ToIntFract();
++
++    int      m_size;
++    float*   m_floatpixels;
++    uint8_t* m_intfractpixels;
++};
++
++#endif //CONVOLUTIONKERNELS
+--- xbmc/cores/VideoRenderers/VideoShaders/Makefile
++++ xbmc/cores/VideoRenderers/VideoShaders/Makefile
+@@ -1,5 +1,5 @@
+ INCLUDES=-I. -I.. -I../../ -I../../../ -I../../../linux -I../../../../guilib
+-SRCS=YUV2RGBShader.cpp VideoFilterShader.cpp
++SRCS=YUV2RGBShader.cpp VideoFilterShader.cpp ConvolutionKernels.cpp
+ 
+ LIB=VideoShaders.a
+ 
+--- xbmc/cores/VideoRenderers/LinuxRendererGL.cpp
++++ xbmc/cores/VideoRenderers/LinuxRendererGL.cpp
+@@ -886,6 +886,19 @@
+ 
+   VerifyGLState();
+ 
++  if (m_scalingMethod == VS_SCALINGMETHOD_AUTO)
++  {
++    bool scaleSD = (int)m_sourceWidth < m_upscalingWidth && (int)m_sourceHeight < m_upscalingHeight &&
++                   m_sourceHeight < 720 && m_sourceWidth < 1280;
++
++    if (Supports(VS_SCALINGMETHOD_VDPAU_HARDWARE))
++      m_scalingMethod = VS_SCALINGMETHOD_VDPAU_HARDWARE;
++    else if (Supports(VS_SCALINGMETHOD_LANCZOS3_FAST) && scaleSD)
++      m_scalingMethod = VS_SCALINGMETHOD_LANCZOS3_FAST;
++    else
++      m_scalingMethod = VS_SCALINGMETHOD_LINEAR;
++  }
++
+   switch (m_scalingMethod)
+   {
+   case VS_SCALINGMETHOD_NEAREST:
+@@ -898,13 +911,10 @@
+     m_renderQuality = RQ_SINGLEPASS;
+     return;
+ 
++  case VS_SCALINGMETHOD_LANCZOS2:
++  case VS_SCALINGMETHOD_LANCZOS3_FAST:
++  case VS_SCALINGMETHOD_LANCZOS3:
+   case VS_SCALINGMETHOD_CUBIC:
+-    if(!glewIsSupported("GL_ARB_texture_float"))
+-    {
+-      CLog::Log(LOGERROR, "GL: hardware doesn't support GL_ARB_texture_float");
+-      break;
+-    }
+-
+     if (!m_fbo.Initialize())
+     {
+       CLog::Log(LOGERROR, "GL: Error initializing FBO");
+@@ -917,7 +927,7 @@
+       break;
+     }
+ 
+-    m_pVideoFilterShader = new BicubicFilterShader(0.3f, 0.3f);
++    m_pVideoFilterShader = new ConvolutionFilterShader(m_scalingMethod);
+     if (!m_pVideoFilterShader->CompileAndLink())
+     {
+       CLog::Log(LOGERROR, "GL: Error compiling and linking video filter shader");
+@@ -928,8 +938,6 @@
+     m_renderQuality = RQ_MULTIPASS;
+     return;
+ 
+-  case VS_SCALINGMETHOD_LANCZOS2:
+-  case VS_SCALINGMETHOD_LANCZOS3:
+   case VS_SCALINGMETHOD_SINC8:
+   case VS_SCALINGMETHOD_NEDI:
+     CLog::Log(LOGERROR, "GL: TODO: This scaler has not yet been implemented");
+@@ -1895,16 +1903,19 @@
+ bool CLinuxRendererGL::Supports(ESCALINGMETHOD method)
+ {
+   if(method == VS_SCALINGMETHOD_NEAREST
+-  || method == VS_SCALINGMETHOD_LINEAR)
++  || method == VS_SCALINGMETHOD_LINEAR
++  || method == VS_SCALINGMETHOD_AUTO)
+     return true;
+ 
+-
+-  if(method == VS_SCALINGMETHOD_CUBIC 
+-  && glewIsSupported("GL_ARB_texture_float")
+-  && glewIsSupported("GL_EXT_framebuffer_object")
+-  && m_renderMethod == RENDER_GLSL)
+-    return true;
+-
++  if(method == VS_SCALINGMETHOD_CUBIC
++  || method == VS_SCALINGMETHOD_LANCZOS2
++  || method == VS_SCALINGMETHOD_LANCZOS3_FAST
++  || method == VS_SCALINGMETHOD_LANCZOS3)
++  {
++    if (glewIsSupported("GL_EXT_framebuffer_object") && (m_renderMethod & RENDER_GLSL))
++      return true;
++  }
++ 
+   if (g_advancedSettings.m_videoHighQualityScaling != SOFTWARE_UPSCALING_DISABLED)
+   {
+     if(method == VS_SCALINGMETHOD_BICUBIC_SOFTWARE
+--- xbmc/GUIDialogVideoSettings.cpp
++++ xbmc/GUIDialogVideoSettings.cpp
+@@ -103,6 +103,7 @@
+     entries.push_back(make_pair(VS_SCALINGMETHOD_LINEAR           , 16302));
+     entries.push_back(make_pair(VS_SCALINGMETHOD_CUBIC            , 16303));
+     entries.push_back(make_pair(VS_SCALINGMETHOD_LANCZOS2         , 16304));
++    entries.push_back(make_pair(VS_SCALINGMETHOD_LANCZOS3_FAST    , 16315));
+     entries.push_back(make_pair(VS_SCALINGMETHOD_LANCZOS3         , 16305));
+     entries.push_back(make_pair(VS_SCALINGMETHOD_SINC8            , 16306));
+ //    entries.push_back(make_pair(VS_SCALINGMETHOD_NEDI             , ?????));
+@@ -110,6 +111,7 @@
+     entries.push_back(make_pair(VS_SCALINGMETHOD_LANCZOS_SOFTWARE , 16308));
+     entries.push_back(make_pair(VS_SCALINGMETHOD_SINC_SOFTWARE    , 16309));
+     entries.push_back(make_pair(VS_SCALINGMETHOD_VDPAU_HARDWARE   , 13120));
++    entries.push_back(make_pair(VS_SCALINGMETHOD_AUTO             , 16316));
+ 
+     /* remove unsupported methods */
+     for(vector<pair<int, int> >::iterator it = entries.begin(); it != entries.end();)
+--- xbmc/Settings.cpp
++++ xbmc/Settings.cpp
+@@ -772,7 +772,7 @@
+     GetInteger(pElement, "interlacemethod", interlaceMethod, VS_INTERLACEMETHOD_NONE, VS_INTERLACEMETHOD_NONE, VS_INTERLACEMETHOD_INVERSE_TELECINE);
+     m_stSettings.m_defaultVideoSettings.m_InterlaceMethod = (EINTERLACEMETHOD)interlaceMethod;
+     int scalingMethod;
+-    GetInteger(pElement, "scalingmethod", scalingMethod, VS_SCALINGMETHOD_LINEAR, VS_SCALINGMETHOD_NEAREST, VS_SCALINGMETHOD_CUBIC);
++    GetInteger(pElement, "scalingmethod", scalingMethod, VS_SCALINGMETHOD_LINEAR, VS_SCALINGMETHOD_NEAREST, VS_SCALINGMETHOD_AUTO);
+     m_stSettings.m_defaultVideoSettings.m_ScalingMethod = (ESCALINGMETHOD)scalingMethod;
+ 
+     GetInteger(pElement, "viewmode", m_stSettings.m_defaultVideoSettings.m_ViewMode, VIEW_MODE_NORMAL, VIEW_MODE_NORMAL, VIEW_MODE_CUSTOM);