summaryrefslogtreecommitdiff
path: root/media-tv/xbmc/files/xbmc-9.11-shader-upscalers.patch
diff options
context:
space:
mode:
Diffstat (limited to 'media-tv/xbmc/files/xbmc-9.11-shader-upscalers.patch')
-rw-r--r--media-tv/xbmc/files/xbmc-9.11-shader-upscalers.patch887
1 files changed, 887 insertions, 0 deletions
diff --git a/media-tv/xbmc/files/xbmc-9.11-shader-upscalers.patch b/media-tv/xbmc/files/xbmc-9.11-shader-upscalers.patch
new file mode 100644
index 00000000..d4feaa47
--- /dev/null
+++ b/media-tv/xbmc/files/xbmc-9.11-shader-upscalers.patch
@@ -0,0 +1,887 @@
+http://bugs.gentoo.org/306661
+
+backport shader based upscalers from svn trunk
+
+--- language/English/strings.xml
++++ language/English/strings.xml
+@@ -1554,16 +1554,17 @@
+ <string id="16304">Lanczos2</string>
+ <string id="16305">Lanczos3</string>
+ <string id="16306">Sinc8</string>
+-
+ <string id="16307">Bicubic (software)</string>
+ <string id="16308">Lanczos (software)</string>
+ <string id="16309">Sinc (software)</string>
+-
+ <string id="16310">(VDPAU)Temporal</string>
+ <string id="16311">(VDPAU)Temporal/Spatial</string>
+ <string id="16312">(VDPAU)Noise Reduction</string>
+ <string id="16313">(VDPAU)Sharpness</string>
+ <string id="16314">Inverse Telecine</string>
++ <string id="16315">Lanczos3 optimized</string>
++ <string id="16316">Auto</string>
++
+ <string id="17500">Display sleep timeout</string>
+
+ <string id="19000">Switch to channel</string>
+--- system/shaders/convolution-6x6.glsl
++++ system/shaders/convolution-6x6.glsl
+@@ -0,0 +1,69 @@
++uniform sampler2D img;
++uniform float stepx;
++uniform float stepy;
++
++#if (HAS_FLOAT_TEXTURE)
++uniform sampler1D kernelTex;
++
++vec3 weight(float pos)
++{
++ return texture1D(kernelTex, pos).rgb;
++}
++#else
++uniform sampler2D kernelTex;
++
++vec3 weight(float pos)
++{
++ //row 0 contains the high byte, row 1 contains the low byte
++ return ((texture2D(kernelTex, vec2(pos, 0.0)) * 256.0 + texture2D(kernelTex, vec2(pos, 1.0)))).rgb / 128.5 - 1.0;
++}
++#endif
++
++vec3 pixel(float xpos, float ypos)
++{
++ return texture2D(img, vec2(xpos, ypos)).rgb;
++}
++
++vec3 line (float ypos, vec3 xpos1, vec3 xpos2, vec3 linetaps1, vec3 linetaps2)
++{
++ vec3 pixels;
++
++ pixels = pixel(xpos1.r, ypos) * linetaps1.r;
++ pixels += pixel(xpos1.g, ypos) * linetaps2.r;
++ pixels += pixel(xpos1.b, ypos) * linetaps1.g;
++ pixels += pixel(xpos2.r, ypos) * linetaps2.g;
++ pixels += pixel(xpos2.g, ypos) * linetaps1.b;
++ pixels += pixel(xpos2.b, ypos) * linetaps2.b;
++
++ return pixels;
++}
++
++void main()
++{
++ float xf = fract(gl_TexCoord[0].x / stepx);
++ float yf = fract(gl_TexCoord[0].y / stepy);
++
++ vec3 linetaps1 = weight((1.0 - xf) / 2.0);
++ vec3 linetaps2 = weight((1.0 - xf) / 2.0 + 0.5);
++ vec3 columntaps1 = weight((1.0 - yf) / 2.0);
++ vec3 columntaps2 = weight((1.0 - yf) / 2.0 + 0.5);
++
++ vec3 xpos1 = vec3(
++ (-1.5 - xf) * stepx + gl_TexCoord[0].x,
++ (-0.5 - xf) * stepx + gl_TexCoord[0].x,
++ ( 0.5 - xf) * stepx + gl_TexCoord[0].x);
++ vec3 xpos2 = vec3(
++ ( 1.5 - xf) * stepx + gl_TexCoord[0].x,
++ ( 2.5 - xf) * stepx + gl_TexCoord[0].x,
++ ( 3.5 - xf) * stepx + gl_TexCoord[0].x);
++
++ gl_FragColor.rgb = line((-1.5 - yf) * stepy + gl_TexCoord[0].y, xpos1, xpos2, linetaps1, linetaps2) * columntaps1.r;
++ gl_FragColor.rgb += line((-0.5 - yf) * stepy + gl_TexCoord[0].y, xpos1, xpos2, linetaps1, linetaps2) * columntaps2.r;
++ gl_FragColor.rgb += line(( 0.5 - yf) * stepy + gl_TexCoord[0].y, xpos1, xpos2, linetaps1, linetaps2) * columntaps1.g;
++ gl_FragColor.rgb += line(( 1.5 - yf) * stepy + gl_TexCoord[0].y, xpos1, xpos2, linetaps1, linetaps2) * columntaps2.g;
++ gl_FragColor.rgb += line(( 2.5 - yf) * stepy + gl_TexCoord[0].y, xpos1, xpos2, linetaps1, linetaps2) * columntaps1.b;
++ gl_FragColor.rgb += line(( 3.5 - yf) * stepy + gl_TexCoord[0].y, xpos1, xpos2, linetaps1, linetaps2) * columntaps2.b;
++
++ gl_FragColor.a = gl_Color.a;
++}
++
+--- system/shaders/bicubic.glsl
++++ system/shaders/bicubic.glsl
+@@ -0,0 +1,47 @@
++uniform sampler2D img;
++uniform float stepx;
++uniform float stepy;
++uniform sampler2D kernelTex;
++
++vec4 cubicFilter(float xValue, vec4 c0, vec4 c1, vec4 c2, vec4 c3)
++{
++ vec4 h = texture2D(kernelTex, vec2(xValue, 0.5));
++ vec4 r = c0 * h.r;
++ r += c1 * h.g;
++ r += c2 * h.b;
++ r += c3 * h.a;
++ return r;
++}
++
++void main()
++{
++ vec2 f = vec2(gl_TexCoord[0].x / stepx , gl_TexCoord[0].y / stepy);
++ f = fract(f);
++ vec4 t0 = cubicFilter(f.x,
++ texture2D(img, gl_TexCoord[0].xy + vec2(-stepx, -stepy)),
++ texture2D(img, gl_TexCoord[0].xy + vec2(0.0, -stepy)),
++ texture2D(img, gl_TexCoord[0].xy + vec2(stepx, -stepy)),
++ texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, -stepy)));
++
++ vec4 t1 = cubicFilter(f.x,
++ texture2D(img, gl_TexCoord[0].xy + vec2(-stepx, 0.0)),
++ texture2D(img, gl_TexCoord[0].xy + vec2(0.0, 0.0)),
++ texture2D(img, gl_TexCoord[0].xy + vec2(stepx, 0.0)),
++ texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, 0.0)));
++
++ vec4 t2 = cubicFilter(f.x,
++ texture2D(img, gl_TexCoord[0].xy + vec2(-stepx, stepy)),
++ texture2D(img, gl_TexCoord[0].xy + vec2(0.0, stepy)),
++ texture2D(img, gl_TexCoord[0].xy + vec2(stepx, stepy)),
++ texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, stepy)));
++
++ vec4 t3 = cubicFilter(f.x,
++ texture2D(img, gl_TexCoord[0].xy + vec2(-stepx, 2.0*stepy)),
++ texture2D(img, gl_TexCoord[0].xy + vec2(0, 2.0*stepy)),
++ texture2D(img, gl_TexCoord[0].xy + vec2(stepx, 2.0*stepy)),
++ texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, 2.0*stepy)));
++
++ gl_FragColor = cubicFilter(f.y, t0, t1, t2, t3);
++ gl_FragColor.a = gl_Color.a;
++}
++
+--- system/shaders/convolution-4x4.glsl
++++ system/shaders/convolution-4x4.glsl
+@@ -0,0 +1,60 @@
++uniform sampler2D img;
++uniform float stepx;
++uniform float stepy;
++
++#if (HAS_FLOAT_TEXTURE)
++uniform sampler1D kernelTex;
++
++vec4 weight(float pos)
++{
++ return texture1D(kernelTex, pos);
++}
++#else
++uniform sampler2D kernelTex;
++
++vec4 weight(float pos)
++{
++ //row 0 contains the high byte, row 1 contains the low byte
++ return (texture2D(kernelTex, vec2(pos, 0.0)) * 256.0 + texture2D(kernelTex, vec2(pos, 1.0))) / 128.5 - 1.0;
++}
++#endif
++
++vec3 pixel(float xpos, float ypos)
++{
++ return texture2D(img, vec2(xpos, ypos)).rgb;
++}
++
++vec3 line (float ypos, vec4 xpos, vec4 linetaps)
++{
++ vec3 pixels;
++
++ pixels = pixel(xpos.r, ypos) * linetaps.r;
++ pixels += pixel(xpos.g, ypos) * linetaps.g;
++ pixels += pixel(xpos.b, ypos) * linetaps.b;
++ pixels += pixel(xpos.a, ypos) * linetaps.a;
++
++ return pixels;
++}
++
++void main()
++{
++ float xf = fract(gl_TexCoord[0].x / stepx);
++ float yf = fract(gl_TexCoord[0].y / stepy);
++
++ vec4 linetaps = weight(1.0 - xf);
++ vec4 columntaps = weight(1.0 - yf);
++
++ vec4 xpos = vec4(
++ (-0.5 - xf) * stepx + gl_TexCoord[0].x,
++ ( 0.5 - xf) * stepx + gl_TexCoord[0].x,
++ ( 1.5 - xf) * stepx + gl_TexCoord[0].x,
++ ( 2.5 - xf) * stepx + gl_TexCoord[0].x);
++
++ gl_FragColor.rgb = line((-0.5 - yf) * stepy + gl_TexCoord[0].y, xpos, linetaps) * columntaps.r;
++ gl_FragColor.rgb += line(( 0.5 - yf) * stepy + gl_TexCoord[0].y, xpos, linetaps) * columntaps.g;
++ gl_FragColor.rgb += line(( 1.5 - yf) * stepy + gl_TexCoord[0].y, xpos, linetaps) * columntaps.b;
++ gl_FragColor.rgb += line(( 2.5 - yf) * stepy + gl_TexCoord[0].y, xpos, linetaps) * columntaps.a;
++
++ gl_FragColor.a = gl_Color.a;
++}
++
+--- xbmc/settings/VideoSettings.h
++++ xbmc/settings/VideoSettings.h
+@@ -51,9 +51,10 @@
+ {
+ VS_SCALINGMETHOD_NEAREST=0,
+ VS_SCALINGMETHOD_LINEAR,
+-
++
+ VS_SCALINGMETHOD_CUBIC,
+ VS_SCALINGMETHOD_LANCZOS2,
++ VS_SCALINGMETHOD_LANCZOS3_FAST,
+ VS_SCALINGMETHOD_LANCZOS3,
+ VS_SCALINGMETHOD_SINC8,
+ VS_SCALINGMETHOD_NEDI,
+@@ -61,7 +62,9 @@
+ VS_SCALINGMETHOD_BICUBIC_SOFTWARE,
+ VS_SCALINGMETHOD_LANCZOS_SOFTWARE,
+ VS_SCALINGMETHOD_SINC_SOFTWARE,
+- VS_SCALINGMETHOD_VDPAU_HARDWARE
++ VS_SCALINGMETHOD_VDPAU_HARDWARE,
++
++ VS_SCALINGMETHOD_AUTO
+ };
+
+ class CVideoSettings
+--- xbmc/cores/VideoRenderers/VideoShaders/VideoFilterShader.cpp
++++ xbmc/cores/VideoRenderers/VideoShaders/VideoFilterShader.cpp
+@@ -21,6 +21,7 @@
+ #include "system.h"
+ #include "VideoFilterShader.h"
+ #include "utils/log.h"
++#include "ConvolutionKernels.h"
+
+ #include <string>
+ #include <math.h>
+@@ -63,60 +64,13 @@
+
+ BicubicFilterShader::BicubicFilterShader(float B, float C)
+ {
+- string shaderf =
+- "uniform sampler2D img;"
+- "uniform float stepx;"
+- "uniform float stepy;"
+- "uniform sampler2D kernelTex;"
+-
+- "vec4 cubicFilter(float xValue, vec4 c0, vec4 c1, vec4 c2, vec4 c3)"
+- "{"
+- " vec4 h = texture2D(kernelTex, vec2(xValue, 0.5));"
+- " vec4 r = c0 * h.r;"
+- " r += c1 * h.g;"
+- " r += c2 * h.b;"
+- " r += c3 * h.a;"
+- " return r;"
+- "}"
+- ""
+- "void main()"
+- "{"
+- "vec2 f = vec2(gl_TexCoord[0].x / stepx , gl_TexCoord[0].y / stepy);"
+- "f = fract(f);"
+- "vec4 t0 = cubicFilter(f.x,"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(-stepx, -stepy)),"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(0.0, -stepy)),"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(stepx, -stepy)),"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, -stepy)));"
+- ""
+- "vec4 t1 = cubicFilter(f.x,"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(-stepx, 0.0)),"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(0.0, 0.0)),"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(stepx, 0.0)),"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, 0.0)));"
+- ""
+- "vec4 t2 = cubicFilter(f.x,"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(-stepx, stepy)),"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(0.0, stepy)),"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(stepx, stepy)),"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, stepy)));"
+- ""
+- "vec4 t3 = cubicFilter(f.x,"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(-stepx, 2.0*stepy)),"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(0, 2.0*stepy)),"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(stepx, 2.0*stepy)),"
+- "texture2D(img, gl_TexCoord[0].xy + vec2(2.0*stepx, 2.0*stepy)));"
+-
+- "gl_FragColor = cubicFilter(f.y, t0, t1, t2, t3) ;"
+- "gl_FragColor.a = gl_Color.a;"
+- "}";
+- PixelShader()->SetSource(shaderf);
++ PixelShader()->LoadSource("bicubic.glsl");
+ m_kernelTex1 = 0;
+ m_B = B;
+ m_C = C;
+- if (B<=0)
++ if (B<0)
+ m_B=1.0f/3.0f;
+- if (C<=0)
++ if (C<0)
+ m_C=1.0f/3.0f;
+ }
+
+@@ -209,8 +163,8 @@
+ glBindTexture(GL_TEXTURE_2D, m_kernelTex1);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
+- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
++ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
++ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, size, 1, 0, GL_RGBA, GL_FLOAT, img);
+
+ glActiveTexture(GL_TEXTURE0);
+@@ -254,4 +208,110 @@
+ return val;
+ }
+
++ConvolutionFilterShader::ConvolutionFilterShader(ESCALINGMETHOD method)
++{
++ m_method = method;
++ m_kernelTex1 = 0;
++
++ string shadername;
++ string defines;
++
++ if (m_method == VS_SCALINGMETHOD_CUBIC ||
++ m_method == VS_SCALINGMETHOD_LANCZOS2 ||
++ m_method == VS_SCALINGMETHOD_LANCZOS3_FAST)
++ shadername = "convolution-4x4.glsl";
++ else if (m_method == VS_SCALINGMETHOD_LANCZOS3)
++ shadername = "convolution-6x6.glsl";
++
++ m_floattex = glewIsSupported("GL_ARB_texture_float");
++
++ if (m_floattex)
++ defines = "#define HAS_FLOAT_TEXTURE 1\n";
++ else
++ defines = "#define HAS_FLOAT_TEXTURE 0\n";
++
++ CLog::Log(LOGDEBUG, "GL: ConvolutionFilterShader: using %s defines: %s", shadername.c_str(), defines.c_str());
++ PixelShader()->LoadSource(shadername, defines);
++}
++
++void ConvolutionFilterShader::OnCompiledAndLinked()
++{
++ // obtain shader attribute handles on successfull compilation
++ m_hSourceTex = glGetUniformLocation(ProgramHandle(), "img");
++ m_hStepX = glGetUniformLocation(ProgramHandle(), "stepx");
++ m_hStepY = glGetUniformLocation(ProgramHandle(), "stepy");
++ m_hKernTex = glGetUniformLocation(ProgramHandle(), "kernelTex");
++
++ CConvolutionKernel kernel(m_method, 256);
++
++ if (m_kernelTex1)
++ {
++ glDeleteTextures(1, &m_kernelTex1);
++ m_kernelTex1 = 0;
++ }
++
++ glGenTextures(1, &m_kernelTex1);
++
++ if ((m_kernelTex1<=0))
++ {
++ CLog::Log(LOGERROR, "GL: ConvolutionFilterShader: Error creating kernel texture");
++ return;
++ }
++
++ glActiveTexture(GL_TEXTURE2);
++
++ //if float textures are supported, we can load the kernel as a 1d float texture
++ //if not, we load it as a 2d texture with 2 rows, where row 0 contains the high byte
++ //and row 1 contains the low byte, which can be converted in the shader
++ if (m_floattex)
++ {
++ glBindTexture(GL_TEXTURE_1D, m_kernelTex1);
++ glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
++ glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
++ glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
++ glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
++ glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA16F_ARB, kernel.GetSize(), 0, GL_RGBA, GL_FLOAT, kernel.GetFloatPixels());
++ }
++ else
++ {
++ glBindTexture(GL_TEXTURE_2D, m_kernelTex1);
++ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
++ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
++ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
++ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
++ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, kernel.GetSize(), 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, kernel.GetIntFractPixels());
++ }
++
++ glActiveTexture(GL_TEXTURE0);
++
++ VerifyGLState();
++}
++
++bool ConvolutionFilterShader::OnEnabled()
++{
++ // set shader attributes once enabled
++ glActiveTexture(GL_TEXTURE2);
++
++ if (m_floattex)
++ glBindTexture(GL_TEXTURE_1D, m_kernelTex1);
++ else
++ glBindTexture(GL_TEXTURE_2D, m_kernelTex1);
++
++ glActiveTexture(GL_TEXTURE0);
++ glUniform1i(m_hSourceTex, m_sourceTexUnit);
++ glUniform1i(m_hKernTex, 2);
++ glUniform1f(m_hStepX, m_stepX);
++ glUniform1f(m_hStepY, m_stepY);
++ VerifyGLState();
++ return true;
++}
++
++void ConvolutionFilterShader::Free()
++{
++ if (m_kernelTex1)
++ glDeleteTextures(1, &m_kernelTex1);
++ m_kernelTex1 = 0;
++ BaseVideoFilterShader::Free();
++}
++
+ #endif
+--- xbmc/cores/VideoRenderers/VideoShaders/ConvolutionKernels.cpp
++++ xbmc/cores/VideoRenderers/VideoShaders/ConvolutionKernels.cpp
+@@ -0,0 +1,226 @@
++/*
++ * Copyright (C) 2005-2008 Team XBMC
++ * http://www.xbmc.org
++ *
++ * This Program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2, or (at your option)
++ * any later version.
++ *
++ * This Program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with XBMC; see the file COPYING. If not, write to
++ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++ * http://www.gnu.org/copyleft/gpl.html
++ *
++ */
++#ifdef _WIN32
++ #define _USE_MATH_DEFINES
++#endif
++
++#include "ConvolutionKernels.h"
++#include "MathUtils.h"
++
++#define SINC(x) (sin(M_PI * (x)) / (M_PI * (x)))
++
++CConvolutionKernel::CConvolutionKernel(ESCALINGMETHOD method, int size)
++{
++ m_size = size;
++ m_floatpixels = new float[m_size * 4];
++
++ if (method == VS_SCALINGMETHOD_LANCZOS2)
++ Lanczos2();
++ else if (method == VS_SCALINGMETHOD_LANCZOS3_FAST)
++ Lanczos3Fast();
++ else if (method == VS_SCALINGMETHOD_LANCZOS3)
++ Lanczos3();
++ else if (method == VS_SCALINGMETHOD_CUBIC)
++ Bicubic(1.0 / 3.0, 1.0 / 3.0);
++
++ ToIntFract();
++}
++
++CConvolutionKernel::~CConvolutionKernel()
++{
++ delete [] m_floatpixels;
++ delete [] m_intfractpixels;
++}
++
++//generate a lanczos2 kernel which can be loaded with RGBA format
++//each value of RGBA has one tap, so a shader can load 4 taps with a single pixel lookup
++void CConvolutionKernel::Lanczos2()
++{
++ for (int i = 0; i < m_size; i++)
++ {
++ double x = (double)i / (double)m_size;
++
++ //generate taps
++ for (int j = 0; j < 4; j++)
++ m_floatpixels[i * 4 + j] = (float)LanczosWeight(x + (double)(j - 2), 2.0);
++
++ //any collection of 4 taps added together needs to be exactly 1.0
++ //for lanczos this is not always the case, so we take each collection of 4 taps
++ //and divide those taps by the sum of the taps
++ float weight = 0.0;
++ for (int j = 0; j < 4; j++)
++ weight += m_floatpixels[i * 4 + j];
++
++ for (int j = 0; j < 4; j++)
++ m_floatpixels[i * 4 + j] /= weight;
++ }
++}
++
++//generate a lanczos3 kernel which can be loaded with RGBA format
++//each value of RGBA has one tap, so a shader can load 4 taps with a single pixel lookup
++//the two outer lobes of the lanczos3 kernel are added to the two lobes one step to the middle
++//this basically looks the same as lanczos3, but the kernel only has 4 taps,
++//so it can use the 4x4 convolution shader which is twice as fast as the 6x6 one
++void CConvolutionKernel::Lanczos3Fast()
++{
++ for (int i = 0; i < m_size; i++)
++ {
++ double a = 3.0;
++ double x = (double)i / (double)m_size;
++
++ //generate taps
++ m_floatpixels[i * 4 + 0] = (float)(LanczosWeight(x - 2.0, a) + LanczosWeight(x - 3.0, a));
++ m_floatpixels[i * 4 + 1] = (float) LanczosWeight(x - 1.0, a);
++ m_floatpixels[i * 4 + 2] = (float) LanczosWeight(x , a);
++ m_floatpixels[i * 4 + 3] = (float)(LanczosWeight(x + 1.0, a) + LanczosWeight(x + 2.0, a));
++
++ //any collection of 4 taps added together needs to be exactly 1.0
++ //for lanczos this is not always the case, so we take each collection of 4 taps
++ //and divide those taps by the sum of the taps
++ float weight = 0.0;
++ for (int j = 0; j < 4; j++)
++ weight += m_floatpixels[i * 4 + j];
++
++ for (int j = 0; j < 4; j++)
++ m_floatpixels[i * 4 + j] /= weight;
++ }
++}
++
++//generate a lanczos3 kernel which can be loaded with RGBA format
++//each value of RGB has one tap, so a shader can load 3 taps with a single pixel lookup
++void CConvolutionKernel::Lanczos3()
++{
++ for (int i = 0; i < m_size; i++)
++ {
++ double x = (double)i / (double)m_size;
++
++ //generate taps
++ for (int j = 0; j < 3; j++)
++ m_floatpixels[i * 4 + j] = (float)LanczosWeight(x * 2.0 + (double)(j * 2 - 3), 3.0);
++
++ m_floatpixels[i * 4 + 3] = 0.0;
++ }
++
++ //any collection of 6 taps added together needs to be exactly 1.0
++ //for lanczos this is not always the case, so we take each collection of 6 taps
++ //and divide those taps by the sum of the taps
++ for (int i = 0; i < m_size / 2; i++)
++ {
++ float weight = 0.0;
++ for (int j = 0; j < 3; j++)
++ {
++ weight += m_floatpixels[i * 4 + j];
++ weight += m_floatpixels[(i + m_size / 2) * 4 + j];
++ }
++ for (int j = 0; j < 3; j++)
++ {
++ m_floatpixels[i * 4 + j] /= weight;
++ m_floatpixels[(i + m_size / 2) * 4 + j] /= weight;
++ }
++ }
++}
++
++//generate a bicubic kernel which can be loaded with RGBA format
++//each value of RGBA has one tap, so a shader can load 4 taps with a single pixel lookup
++void CConvolutionKernel::Bicubic(double B, double C)
++{
++ for (int i = 0; i < m_size; i++)
++ {
++ double x = (double)i / (double)m_size;
++
++ //generate taps
++ for (int j = 0; j < 4; j++)
++ m_floatpixels[i * 4 + j] = (float)BicubicWeight(x + (double)(j - 2), B, C);
++ }
++}
++
++double CConvolutionKernel::LanczosWeight(double x, double radius)
++{
++ double ax = fabs(x);
++
++ if (ax == 0.0)
++ return 1.0;
++ else if (ax < radius)
++ return SINC(ax) * SINC(ax / radius);
++ else
++ return 0.0;
++}
++
++double CConvolutionKernel::BicubicWeight(double x, double B, double C)
++{
++ double ax = fabs(x);
++
++ if (ax<1.0)
++ {
++ return ((12 - 9*B - 6*C) * ax * ax * ax +
++ (-18 + 12*B + 6*C) * ax * ax +
++ (6 - 2*B))/6;
++ }
++ else if (ax<2.0)
++ {
++ return ((-B - 6*C) * ax * ax * ax +
++ (6*B + 30*C) * ax * ax + (-12*B - 48*C) *
++ ax + (8*B + 24*C)) / 6;
++ }
++ else
++ {
++ return 0.0;
++ }
++}
++
++
++//convert float to high byte/low byte, so the kernel can be loaded into an 8 bit texture
++//with height 2 and converted back to real float in the shader
++//it only works when the kernel texture uses nearest neighbour, but there's almost no difference
++//between that and linear interpolation
++void CConvolutionKernel::ToIntFract()
++{
++ m_intfractpixels = new uint8_t[m_size * 8];
++
++ for (int i = 0; i < m_size * 4; i++)
++ {
++ int value = MathUtils::round_int((m_floatpixels[i] + 1.0) / 2.0 * 65535.0);
++ if (value < 0)
++ value = 0;
++ else if (value > 65535)
++ value = 65535;
++
++ int integer = value / 256;
++ int fract = value % 256;
++
++ m_intfractpixels[i] = (uint8_t)integer;
++ m_intfractpixels[i + m_size * 4] = (uint8_t)fract;
++ }
++
++#if 0
++ for (int i = 0; i < 4; i++)
++ {
++ for (int j = 0; j < m_size; j++)
++ {
++ printf("%i %f %f\n",
++ i * m_size + j,
++ ((double)m_intfractpixels[j * 4 + i] + (double)m_intfractpixels[j * 4 + i + m_size * 4] / 255.0) / 255.0 * 2.0 - 1.0,
++ m_floatpixels[j * 4 + i]);
++ }
++ }
++#endif
++}
++
+--- xbmc/cores/VideoRenderers/VideoShaders/VideoFilterShader.h
++++ xbmc/cores/VideoRenderers/VideoShaders/VideoFilterShader.h
+@@ -4,6 +4,7 @@
+ #ifdef HAS_GL
+
+ #include "../../../../guilib/Shader.h"
++#include "../../../settings/VideoSettings.h"
+
+ using namespace Shaders;
+
+@@ -35,7 +36,7 @@
+ class BicubicFilterShader : public BaseVideoFilterShader
+ {
+ public:
+- BicubicFilterShader(float B=0.0f, float C=0.0f);
++ BicubicFilterShader(float B=-1.0f, float C=-1.0f);
+ void OnCompiledAndLinked();
+ bool OnEnabled();
+ void Free();
+@@ -55,6 +56,25 @@
+ float m_C;
+ };
+
++ class ConvolutionFilterShader : public BaseVideoFilterShader
++ {
++ public:
++ ConvolutionFilterShader(ESCALINGMETHOD method);
++ void OnCompiledAndLinked();
++ bool OnEnabled();
++ void Free();
++
++ protected:
++ // kernel textures
++ GLuint m_kernelTex1;
++
++ // shader handles to kernel textures
++ GLint m_hKernTex;
++
++ ESCALINGMETHOD m_method;
++ bool m_floattex; //if float textures are supported
++ };
++
+ } // end namespace
+
+ #endif
+--- xbmc/cores/VideoRenderers/VideoShaders/ConvolutionKernels.h
++++ xbmc/cores/VideoRenderers/VideoShaders/ConvolutionKernels.h
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (C) 2005-2008 Team XBMC
++ * http://www.xbmc.org
++ *
++ * This Program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2, or (at your option)
++ * any later version.
++ *
++ * This Program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with XBMC; see the file COPYING. If not, write to
++ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++ * http://www.gnu.org/copyleft/gpl.html
++ *
++ */
++
++#ifndef CONVOLUTIONKERNELS
++#define CONVOLUTIONKERNELS
++
++#include "system.h"
++#include "../../../settings/VideoSettings.h"
++
++class CConvolutionKernel
++{
++ public:
++ CConvolutionKernel(ESCALINGMETHOD method, int size);
++ ~CConvolutionKernel();
++
++ int GetSize() { return m_size; }
++ float* GetFloatPixels() { return m_floatpixels; }
++ uint8_t* GetIntFractPixels() { return m_intfractpixels; }
++
++ private:
++
++ void Lanczos2();
++ void Lanczos3Fast();
++ void Lanczos3();
++ void Bicubic(double B, double C);
++
++ double LanczosWeight(double x, double radius);
++ double BicubicWeight(double x, double B, double C);
++
++ void ToIntFract();
++
++ int m_size;
++ float* m_floatpixels;
++ uint8_t* m_intfractpixels;
++};
++
++#endif //CONVOLUTIONKERNELS
+--- xbmc/cores/VideoRenderers/VideoShaders/Makefile
++++ xbmc/cores/VideoRenderers/VideoShaders/Makefile
+@@ -1,5 +1,5 @@
+ INCLUDES=-I. -I.. -I../../ -I../../../ -I../../../linux -I../../../../guilib
+-SRCS=YUV2RGBShader.cpp VideoFilterShader.cpp
++SRCS=YUV2RGBShader.cpp VideoFilterShader.cpp ConvolutionKernels.cpp
+
+ LIB=VideoShaders.a
+
+--- xbmc/cores/VideoRenderers/LinuxRendererGL.cpp
++++ xbmc/cores/VideoRenderers/LinuxRendererGL.cpp
+@@ -886,6 +886,19 @@
+
+ VerifyGLState();
+
++ if (m_scalingMethod == VS_SCALINGMETHOD_AUTO)
++ {
++ bool scaleSD = (int)m_sourceWidth < m_upscalingWidth && (int)m_sourceHeight < m_upscalingHeight &&
++ m_sourceHeight < 720 && m_sourceWidth < 1280;
++
++ if (Supports(VS_SCALINGMETHOD_VDPAU_HARDWARE))
++ m_scalingMethod = VS_SCALINGMETHOD_VDPAU_HARDWARE;
++ else if (Supports(VS_SCALINGMETHOD_LANCZOS3_FAST) && scaleSD)
++ m_scalingMethod = VS_SCALINGMETHOD_LANCZOS3_FAST;
++ else
++ m_scalingMethod = VS_SCALINGMETHOD_LINEAR;
++ }
++
+ switch (m_scalingMethod)
+ {
+ case VS_SCALINGMETHOD_NEAREST:
+@@ -898,13 +911,10 @@
+ m_renderQuality = RQ_SINGLEPASS;
+ return;
+
++ case VS_SCALINGMETHOD_LANCZOS2:
++ case VS_SCALINGMETHOD_LANCZOS3_FAST:
++ case VS_SCALINGMETHOD_LANCZOS3:
+ case VS_SCALINGMETHOD_CUBIC:
+- if(!glewIsSupported("GL_ARB_texture_float"))
+- {
+- CLog::Log(LOGERROR, "GL: hardware doesn't support GL_ARB_texture_float");
+- break;
+- }
+-
+ if (!m_fbo.Initialize())
+ {
+ CLog::Log(LOGERROR, "GL: Error initializing FBO");
+@@ -917,7 +927,7 @@
+ break;
+ }
+
+- m_pVideoFilterShader = new BicubicFilterShader(0.3f, 0.3f);
++ m_pVideoFilterShader = new ConvolutionFilterShader(m_scalingMethod);
+ if (!m_pVideoFilterShader->CompileAndLink())
+ {
+ CLog::Log(LOGERROR, "GL: Error compiling and linking video filter shader");
+@@ -928,8 +938,6 @@
+ m_renderQuality = RQ_MULTIPASS;
+ return;
+
+- case VS_SCALINGMETHOD_LANCZOS2:
+- case VS_SCALINGMETHOD_LANCZOS3:
+ case VS_SCALINGMETHOD_SINC8:
+ case VS_SCALINGMETHOD_NEDI:
+ CLog::Log(LOGERROR, "GL: TODO: This scaler has not yet been implemented");
+@@ -1895,16 +1903,19 @@
+ bool CLinuxRendererGL::Supports(ESCALINGMETHOD method)
+ {
+ if(method == VS_SCALINGMETHOD_NEAREST
+- || method == VS_SCALINGMETHOD_LINEAR)
++ || method == VS_SCALINGMETHOD_LINEAR
++ || method == VS_SCALINGMETHOD_AUTO)
+ return true;
+
+-
+- if(method == VS_SCALINGMETHOD_CUBIC
+- && glewIsSupported("GL_ARB_texture_float")
+- && glewIsSupported("GL_EXT_framebuffer_object")
+- && m_renderMethod == RENDER_GLSL)
+- return true;
+-
++ if(method == VS_SCALINGMETHOD_CUBIC
++ || method == VS_SCALINGMETHOD_LANCZOS2
++ || method == VS_SCALINGMETHOD_LANCZOS3_FAST
++ || method == VS_SCALINGMETHOD_LANCZOS3)
++ {
++ if (glewIsSupported("GL_EXT_framebuffer_object") && (m_renderMethod & RENDER_GLSL))
++ return true;
++ }
++
+ if (g_advancedSettings.m_videoHighQualityScaling != SOFTWARE_UPSCALING_DISABLED)
+ {
+ if(method == VS_SCALINGMETHOD_BICUBIC_SOFTWARE
+--- xbmc/GUIDialogVideoSettings.cpp
++++ xbmc/GUIDialogVideoSettings.cpp
+@@ -103,6 +103,7 @@
+ entries.push_back(make_pair(VS_SCALINGMETHOD_LINEAR , 16302));
+ entries.push_back(make_pair(VS_SCALINGMETHOD_CUBIC , 16303));
+ entries.push_back(make_pair(VS_SCALINGMETHOD_LANCZOS2 , 16304));
++ entries.push_back(make_pair(VS_SCALINGMETHOD_LANCZOS3_FAST , 16315));
+ entries.push_back(make_pair(VS_SCALINGMETHOD_LANCZOS3 , 16305));
+ entries.push_back(make_pair(VS_SCALINGMETHOD_SINC8 , 16306));
+ // entries.push_back(make_pair(VS_SCALINGMETHOD_NEDI , ?????));
+@@ -110,6 +111,7 @@
+ entries.push_back(make_pair(VS_SCALINGMETHOD_LANCZOS_SOFTWARE , 16308));
+ entries.push_back(make_pair(VS_SCALINGMETHOD_SINC_SOFTWARE , 16309));
+ entries.push_back(make_pair(VS_SCALINGMETHOD_VDPAU_HARDWARE , 13120));
++ entries.push_back(make_pair(VS_SCALINGMETHOD_AUTO , 16316));
+
+ /* remove unsupported methods */
+ for(vector<pair<int, int> >::iterator it = entries.begin(); it != entries.end();)
+--- xbmc/Settings.cpp
++++ xbmc/Settings.cpp
+@@ -772,7 +772,7 @@
+ GetInteger(pElement, "interlacemethod", interlaceMethod, VS_INTERLACEMETHOD_NONE, VS_INTERLACEMETHOD_NONE, VS_INTERLACEMETHOD_INVERSE_TELECINE);
+ m_stSettings.m_defaultVideoSettings.m_InterlaceMethod = (EINTERLACEMETHOD)interlaceMethod;
+ int scalingMethod;
+- GetInteger(pElement, "scalingmethod", scalingMethod, VS_SCALINGMETHOD_LINEAR, VS_SCALINGMETHOD_NEAREST, VS_SCALINGMETHOD_CUBIC);
++ GetInteger(pElement, "scalingmethod", scalingMethod, VS_SCALINGMETHOD_LINEAR, VS_SCALINGMETHOD_NEAREST, VS_SCALINGMETHOD_AUTO);
+ m_stSettings.m_defaultVideoSettings.m_ScalingMethod = (ESCALINGMETHOD)scalingMethod;
+
+ GetInteger(pElement, "viewmode", m_stSettings.m_defaultVideoSettings.m_ViewMode, VIEW_MODE_NORMAL, VIEW_MODE_NORMAL, VIEW_MODE_CUSTOM);