view libvo/gl_common.c @ 24903:b4cdc74f4496

remove the registration descriptor from the PSM: writing the fourcc in it makes it total crap
author nicodvb
date Fri, 02 Nov 2007 17:43:39 +0000
parents 5abb8336cba8
children 7934f12c76f0
line wrap: on
line source

/**
 * \file gl_common.c
 * \brief OpenGL helper functions used by vo_gl.c and vo_gl2.c
 *
 * Common OpenGL routines.
 * Copyleft (C) Reimar Döffinger <Reimar.Doeffinger@stud.uni-karlsruhe.de>, 2005
 * Licensed under the GNU GPL v2 or later.
 * Special thanks go to the xine team and Matthias Hopf, whose video_out_opengl.c
 * gave me lots of good ideas.
 */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include "gl_common.h"

/**
 * \defgroup glextfunctions OpenGL extension functions
 *
 * the pointers to these functions are acquired when the OpenGL
 * context is created
 * \{
 */
void (APIENTRY *GenBuffers)(GLsizei, GLuint *);
void (APIENTRY *DeleteBuffers)(GLsizei, const GLuint *);
void (APIENTRY *BindBuffer)(GLenum, GLuint);
GLvoid* (APIENTRY *MapBuffer)(GLenum, GLenum); 
GLboolean (APIENTRY *UnmapBuffer)(GLenum);
void (APIENTRY *BufferData)(GLenum, intptr_t, const GLvoid *, GLenum);
void (APIENTRY *CombinerParameterfv)(GLenum, const GLfloat *);
void (APIENTRY *CombinerParameteri)(GLenum, GLint);
void (APIENTRY *CombinerInput)(GLenum, GLenum, GLenum, GLenum, GLenum,
                               GLenum);
void (APIENTRY *CombinerOutput)(GLenum, GLenum, GLenum, GLenum, GLenum,
                                GLenum, GLenum, GLboolean, GLboolean,
                                GLboolean);
void (APIENTRY *BeginFragmentShader)(void);
void (APIENTRY *EndFragmentShader)(void);
void (APIENTRY *SampleMap)(GLuint, GLuint, GLenum);
void (APIENTRY *ColorFragmentOp2)(GLenum, GLuint, GLuint, GLuint, GLuint,
                                  GLuint, GLuint, GLuint, GLuint, GLuint);
void (APIENTRY *ColorFragmentOp3)(GLenum, GLuint, GLuint, GLuint, GLuint,
                                  GLuint, GLuint, GLuint, GLuint, GLuint,
                                  GLuint, GLuint, GLuint);
void (APIENTRY *SetFragmentShaderConstant)(GLuint, const GLfloat *);
void (APIENTRY *ActiveTexture)(GLenum);
void (APIENTRY *BindTexture)(GLenum, GLuint);
void (APIENTRY *MultiTexCoord2f)(GLenum, GLfloat, GLfloat);
void (APIENTRY *GenPrograms)(GLsizei, GLuint *);
void (APIENTRY *DeletePrograms)(GLsizei, const GLuint *);
void (APIENTRY *BindProgram)(GLenum, GLuint);
void (APIENTRY *ProgramString)(GLenum, GLenum, GLsizei, const GLvoid *);
void (APIENTRY *GetProgramiv)(GLenum, GLenum, GLint *);
void (APIENTRY *ProgramEnvParameter4f)(GLenum, GLuint, GLfloat, GLfloat,
                                       GLfloat, GLfloat);
int (APIENTRY *SwapInterval)(int);
void (APIENTRY *TexImage3D)(GLenum, GLint, GLenum, GLsizei, GLsizei, GLsizei,
                            GLint, GLenum, GLenum, const GLvoid *);
/** \} */ // end of glextfunctions group

//! \defgroup glgeneral OpenGL general helper functions

//! \defgroup glcontext OpenGL context management helper functions

//! \defgroup gltexture OpenGL texture handling helper functions

//! \defgroup glconversion OpenGL conversion helper functions

static GLint hqtexfmt;

/**
 * \brief adjusts the GL_UNPACK_ALIGNMENT to fit the stride.
 * \param stride number of bytes per line for which alignment should fit.
 * \ingroup glgeneral
 */
void glAdjustAlignment(int stride) {
  GLint gl_alignment;
  if (stride % 8 == 0)
    gl_alignment=8;
  else if (stride % 4 == 0)
    gl_alignment=4;
  else if (stride % 2 == 0)
    gl_alignment=2;
  else
    gl_alignment=1;
  glPixelStorei (GL_UNPACK_ALIGNMENT, gl_alignment);
}

struct gl_name_map_struct {
  GLint value;
  const char *name;
};

#undef MAP
#define MAP(a) {a, #a}
//! mapping table for the glValName function
static const struct gl_name_map_struct gl_name_map[] = {
  // internal format
  MAP(GL_R3_G3_B2), MAP(GL_RGB4), MAP(GL_RGB5), MAP(GL_RGB8),
  MAP(GL_RGB10), MAP(GL_RGB12), MAP(GL_RGB16), MAP(GL_RGBA2),
  MAP(GL_RGBA4), MAP(GL_RGB5_A1), MAP(GL_RGBA8), MAP(GL_RGB10_A2),
  MAP(GL_RGBA12), MAP(GL_RGBA16), MAP(GL_LUMINANCE8),

  // format
  MAP(GL_RGB), MAP(GL_RGBA), MAP(GL_RED), MAP(GL_GREEN), MAP(GL_BLUE),
  MAP(GL_ALPHA), MAP(GL_LUMINANCE), MAP(GL_LUMINANCE_ALPHA),
  MAP(GL_COLOR_INDEX),
  // rest 1.2 only
  MAP(GL_BGR), MAP(GL_BGRA),

  //type
  MAP(GL_BYTE), MAP(GL_UNSIGNED_BYTE), MAP(GL_SHORT), MAP(GL_UNSIGNED_SHORT),
  MAP(GL_INT), MAP(GL_UNSIGNED_INT), MAP(GL_FLOAT), MAP(GL_DOUBLE),
  MAP(GL_2_BYTES), MAP(GL_3_BYTES), MAP(GL_4_BYTES),
  // rest 1.2 only
  MAP(GL_UNSIGNED_BYTE_3_3_2), MAP(GL_UNSIGNED_BYTE_2_3_3_REV),
  MAP(GL_UNSIGNED_SHORT_5_6_5), MAP(GL_UNSIGNED_SHORT_5_6_5_REV),
  MAP(GL_UNSIGNED_SHORT_4_4_4_4), MAP(GL_UNSIGNED_SHORT_4_4_4_4_REV),
  MAP(GL_UNSIGNED_SHORT_5_5_5_1), MAP(GL_UNSIGNED_SHORT_1_5_5_5_REV),
  MAP(GL_UNSIGNED_INT_8_8_8_8), MAP(GL_UNSIGNED_INT_8_8_8_8_REV),
  MAP(GL_UNSIGNED_INT_10_10_10_2), MAP(GL_UNSIGNED_INT_2_10_10_10_REV),
  {0, 0}
};
#undef MAP

/**
 * \brief return the name of an OpenGL constant
 * \param value the constant
 * \return name of the constant or "Unknown format!"
 * \ingroup glgeneral
 */
const char *glValName(GLint value)
{
  int i = 0;

  while (gl_name_map[i].name) {
    if (gl_name_map[i].value == value)
      return gl_name_map[i].name;
    i++;
  }
  return "Unknown format!";
}

//! always return this format as internal texture format in glFindFormat
#define TEXTUREFORMAT_ALWAYS GL_RGB8
#undef TEXTUREFORMAT_ALWAYS

/**
 * \brief find the OpenGL settings coresponding to format.
 *
 * All parameters may be NULL.
 * \param fmt MPlayer format to analyze.
 * \param bpp [OUT] bits per pixel of that format.
 * \param gl_texfmt [OUT] internal texture format that fits the
 * image format, not necessarily the best for performance.
 * \param gl_format [OUT] OpenGL format for this image format.
 * \param gl_type [OUT] OpenGL type for this image format.
 * \return 1 if format is supported by OpenGL, 0 if not.
 * \ingroup gltexture
 */
int glFindFormat(uint32_t fmt, int *bpp, GLint *gl_texfmt,
                  GLenum *gl_format, GLenum *gl_type)
{
  int supported = 1;
  int dummy1;
  GLenum dummy2;
  GLint dummy3;
  if (bpp == NULL) bpp = &dummy1;
  if (gl_texfmt == NULL) gl_texfmt = &dummy3;
  if (gl_format == NULL) gl_format = &dummy2;
  if (gl_type == NULL) gl_type = &dummy2;
  
  *bpp = IMGFMT_IS_BGR(fmt)?IMGFMT_BGR_DEPTH(fmt):IMGFMT_RGB_DEPTH(fmt);
  *gl_texfmt = 3;
  switch (fmt) {
    case IMGFMT_RGB24:
      *gl_format = GL_RGB;
      *gl_type = GL_UNSIGNED_BYTE;
      break;
    case IMGFMT_RGBA:
      *gl_texfmt = 4;
      *gl_format = GL_RGBA;
      *gl_type = GL_UNSIGNED_BYTE;
      break;
    case IMGFMT_YV12:
      supported = 0; // no native YV12 support
    case IMGFMT_Y800:
    case IMGFMT_Y8:
      *gl_texfmt = 1;
      *bpp = 8;
      *gl_format = GL_LUMINANCE;
      *gl_type = GL_UNSIGNED_BYTE;
      break;
#if 0
    // we do not support palettized formats, although the format the
    // swscale produces works
    case IMGFMT_RGB8:
      gl_format = GL_RGB;
      gl_type = GL_UNSIGNED_BYTE_2_3_3_REV;
      break;
#endif
    case IMGFMT_RGB15:
      *gl_format = GL_RGBA;
      *gl_type = GL_UNSIGNED_SHORT_1_5_5_5_REV;
      break;
    case IMGFMT_RGB16:
      *gl_format = GL_RGB;
      *gl_type = GL_UNSIGNED_SHORT_5_6_5_REV;
      break;
#if 0
    case IMGFMT_BGR8:
      // special case as red and blue have a differen number of bits.
      // GL_BGR and GL_UNSIGNED_BYTE_3_3_2 isn't supported at least
      // by nVidia drivers, and in addition would give more bits to
      // blue than to red, which isn't wanted
      gl_format = GL_RGB;
      gl_type = GL_UNSIGNED_BYTE_3_3_2;
      break;
#endif
    case IMGFMT_BGR15:
      *gl_format = GL_BGRA;
      *gl_type = GL_UNSIGNED_SHORT_1_5_5_5_REV;
      break;
    case IMGFMT_BGR16:
      *gl_format = GL_RGB;
      *gl_type = GL_UNSIGNED_SHORT_5_6_5;
      break;
    case IMGFMT_BGR24:
      *gl_format = GL_BGR;
      *gl_type = GL_UNSIGNED_BYTE;
      break;
    case IMGFMT_BGRA:
      *gl_texfmt = 4;
      *gl_format = GL_BGRA;
      *gl_type = GL_UNSIGNED_BYTE;
      break;
    default:
      *gl_texfmt = 4;
      *gl_format = GL_RGBA;
      *gl_type = GL_UNSIGNED_BYTE;
      supported = 0;
  }
#ifdef TEXTUREFORMAT_ALWAYS
  *gl_texfmt = TEXTUREFORMAT_ALWAYS;
#endif
  return supported;
}

static void *setNull(const GLubyte *s) {
  return NULL;
}

typedef struct {
  void **funcptr;
  const char *extstr;
  const char *funcnames[7];
} extfunc_desc_t;

static const extfunc_desc_t extfuncs[] = {
  {(void **)&GenBuffers, NULL, {"glGenBuffers", "glGenBuffersARB", NULL}},
  {(void **)&DeleteBuffers, NULL, {"glDeleteBuffers", "glDeleteBuffersARB", NULL}},
  {(void **)&BindBuffer, NULL, {"glBindBuffer", "glBindBufferARB", NULL}},
  {(void **)&MapBuffer, NULL, {"glMapBuffer", "glMapBufferARB", NULL}},
  {(void **)&UnmapBuffer, NULL, {"glUnmapBuffer", "glUnmapBufferARB", NULL}},
  {(void **)&BufferData, NULL, {"glBufferData", "glBufferDataARB", NULL}},
  {(void **)&CombinerParameterfv, "NV_register_combiners", {"glCombinerParameterfv", "glCombinerParameterfvNV", NULL}},
  {(void **)&CombinerParameteri, "NV_register_combiners", {"glCombinerParameteri", "glCombinerParameteriNV", NULL}},
  {(void **)&CombinerInput, "NV_register_combiners", {"glCombinerInput", "glCombinerInputNV", NULL}},
  {(void **)&CombinerOutput, "NV_register_combiners", {"glCombinerOutput", "glCombinerOutputNV", NULL}},
  {(void **)&BeginFragmentShader, "ATI_fragment_shader", {"glBeginFragmentShaderATI", NULL}},
  {(void **)&EndFragmentShader, "ATI_fragment_shader", {"glEndFragmentShaderATI", NULL}},
  {(void **)&SampleMap, "ATI_fragment_shader", {"glSampleMapATI", NULL}},
  {(void **)&ColorFragmentOp2, "ATI_fragment_shader", {"glColorFragmentOp2ATI", NULL}},
  {(void **)&ColorFragmentOp3, "ATI_fragment_shader", {"glColorFragmentOp3ATI", NULL}},
  {(void **)&SetFragmentShaderConstant, "ATI_fragment_shader", {"glSetFragmentShaderConstantATI", NULL}},
  {(void **)&ActiveTexture, NULL, {"glActiveTexture", "glActiveTextureARB", NULL}},
  {(void **)&BindTexture, NULL, {"glBindTexture", "glBindTextureARB", "glBindTextureEXT", NULL}},
  {(void **)&MultiTexCoord2f, NULL, {"glMultiTexCoord2f", "glMultiTexCoord2fARB", NULL}},
  {(void **)&GenPrograms, "_program", {"glGenProgramsARB", NULL}},
  {(void **)&DeletePrograms, "_program", {"glDeleteProgramsARB", NULL}},
  {(void **)&BindProgram, "_program", {"glBindProgramARB", NULL}},
  {(void **)&ProgramString, "_program", {"glProgramStringARB", NULL}},
  {(void **)&GetProgramiv, "_program", {"glGetProgramivARB", NULL}},
  {(void **)&ProgramEnvParameter4f, "_program", {"glProgramEnvParameter4fARB", NULL}},
  {(void **)&SwapInterval, "_swap_control", {"glXSwapInterval", "glXSwapIntervalEXT", "glXSwapIntervalSGI", "wglSwapInterval", "wglSwapIntervalEXT", "wglSwapIntervalSGI", NULL}},
  {(void **)&TexImage3D, NULL, {"glTexImage3D", NULL}},
  {NULL}
};

/**
 * \brief find the function pointers of some useful OpenGL extensions
 * \param getProcAddress function to resolve function names, may be NULL
 * \param ext2 an extra extension string
 */
static void getFunctions(void *(*getProcAddress)(const GLubyte *),
                         const char *ext2) {
  const extfunc_desc_t *dsc;
  const char *extensions = (const char *)glGetString(GL_EXTENSIONS);
  char *allexts;
  if (!extensions) extensions = "";
  if (!ext2) ext2 = "";
  allexts = malloc(strlen(extensions) + strlen(ext2) + 2);
  strcpy(allexts, extensions);
  strcat(allexts, " ");
  strcat(allexts, ext2);
  mp_msg(MSGT_VO, MSGL_V, "OpenGL extensions string:\n%s\n", allexts);
  if (!getProcAddress)
    getProcAddress = setNull;
  for (dsc = extfuncs; dsc->funcptr; dsc++) {
    void *ptr = NULL;
    int i;
    if (!dsc->extstr || strstr(allexts, dsc->extstr)) {
      for (i = 0; !ptr && dsc->funcnames[i]; i++)
        ptr = getProcAddress((const GLubyte *)dsc->funcnames[i]);
    }
    *(dsc->funcptr) = ptr;
  }
  if (strstr(allexts, "_texture_float"))
    hqtexfmt = GL_RGB32F;
  else if (strstr(allexts, "NV_float_buffer"))
    hqtexfmt = GL_FLOAT_RGB32_NV;
  else
    hqtexfmt = GL_RGB16;
  free(allexts);
}

/**
 * \brief create a texture and set some defaults
 * \param target texture taget, usually GL_TEXTURE_2D
 * \param fmt internal texture format
 * \param filter filter used for scaling, e.g. GL_LINEAR
 * \param w texture width
 * \param h texture height
 * \param val luminance value to fill texture with
 * \ingroup gltexture
 */
void glCreateClearTex(GLenum target, GLenum fmt, GLint filter,
                      int w, int h, unsigned char val) {
  GLfloat fval = (GLfloat)val / 255.0;
  GLfloat border[4] = {fval, fval, fval, fval};
  GLenum clrfmt = (fmt == GL_ALPHA) ? GL_ALPHA : GL_LUMINANCE;
  char *init = malloc(w * h);
  memset(init, val, w * h);
  glAdjustAlignment(w);
  glPixelStorei(GL_UNPACK_ROW_LENGTH, w);
  glTexImage2D(target, 0, fmt, w, h, 0, clrfmt, GL_UNSIGNED_BYTE, init);
  glTexParameterf(target, GL_TEXTURE_PRIORITY, 1.0);
  glTexParameteri(target, GL_TEXTURE_MIN_FILTER, filter);
  glTexParameteri(target, GL_TEXTURE_MAG_FILTER, filter);
  glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
  glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
  // Border texels should not be used with CLAMP_TO_EDGE
  // We set a sane default anyway.
  glTexParameterfv(target, GL_TEXTURE_BORDER_COLOR, border);
  free(init);
}

/**
 * \brief skips whitespace and comments
 * \param f file to read from
 */
static void ppm_skip(FILE *f) {
  int c, comment = 0;
  do {
    c = fgetc(f);
    if (c == '#')
      comment = 1;
    if (c == '\n')
      comment = 0;
  } while (c != EOF && (isspace(c) || comment));
  if (c != EOF)
    ungetc(c, f);
}

#define MAXDIM (16 * 1024)

/**
 * \brief creates a texture from a PPM file
 * \param target texture taget, usually GL_TEXTURE_2D
 * \param fmt internal texture format, 0 for default
 * \param filter filter used for scaling, e.g. GL_LINEAR
 * \param f file to read PPM from
 * \param width [out] width of texture
 * \param height [out] height of texture
 * \param maxval [out] maxval value from PPM file
 * \return 0 on error, 1 otherwise
 * \ingroup gltexture
 */
int glCreatePPMTex(GLenum target, GLenum fmt, GLint filter,
                   FILE *f, int *width, int *height, int *maxval) {
  unsigned w, h, m, val, bpp;
  char *data;
  ppm_skip(f);
  if (fgetc(f) != 'P' || fgetc(f) != '6')
    return 0;
  ppm_skip(f);
  if (fscanf(f, "%u", &w) != 1)
    return 0;
  ppm_skip(f);
  if (fscanf(f, "%u", &h) != 1)
    return 0;
  ppm_skip(f);
  if (fscanf(f, "%u", &m) != 1)
    return 0;
  val = fgetc(f);
  if (!isspace(val))
    return 0;
  if (w > MAXDIM || h > MAXDIM)
    return 0;
  bpp = (m > 255) ? 6 : 3;
  data = malloc(w * h * bpp);
  if (fread(data, w * bpp, h, f) != h)
    return 0;
  if (!fmt) {
    fmt = (m > 255) ? hqtexfmt : 3;
    if (fmt == GL_FLOAT_RGB32_NV && target != GL_TEXTURE_RECTANGLE)
      fmt = GL_RGB16;
  }
  glCreateClearTex(target, fmt, filter, w, h, 0);
  glUploadTex(target, GL_RGB, (m > 255) ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE,
              data, w * bpp, 0, 0, w, h, 0);
  free(data);
  if (width) *width = w;
  if (height) *height = h;
  if (maxval) *maxval = m;
  return 1;
}

/**
 * \brief return the number of bytes per pixel for the given format
 * \param format OpenGL format
 * \param type OpenGL type
 * \return bytes per pixel
 * \ingroup glgeneral
 *
 * Does not handle all possible variants, just those used by MPlayer
 */
int glFmt2bpp(GLenum format, GLenum type) {
  int component_size = 0;
  switch (type) {
    case GL_UNSIGNED_BYTE_3_3_2:
    case GL_UNSIGNED_BYTE_2_3_3_REV:
      return 1;
    case GL_UNSIGNED_SHORT_5_5_5_1:
    case GL_UNSIGNED_SHORT_1_5_5_5_REV:
    case GL_UNSIGNED_SHORT_5_6_5:
    case GL_UNSIGNED_SHORT_5_6_5_REV:
      return 2;
    case GL_UNSIGNED_BYTE:
      component_size = 1;
      break;
    case GL_UNSIGNED_SHORT:
      component_size = 2;
      break;
  }
  switch (format) {
    case GL_LUMINANCE:
    case GL_ALPHA:
      return component_size;
    case GL_RGB:
    case GL_BGR:
      return 3 * component_size;
    case GL_RGBA:
    case GL_BGRA:
      return 4 * component_size;
  }
  return 0; // unknown
}

/**
 * \brief upload a texture, handling things like stride and slices
 * \param target texture target, usually GL_TEXTURE_2D
 * \param format OpenGL format of data
 * \param type OpenGL type of data
 * \param dataptr data to upload
 * \param stride data stride
 * \param x x offset in texture
 * \param y y offset in texture
 * \param w width of the texture part to upload
 * \param h height of the texture part to upload
 * \param slice height of an upload slice, 0 for all at once
 * \ingroup gltexture
 */
void glUploadTex(GLenum target, GLenum format, GLenum type,
                 const void *dataptr, int stride,
                 int x, int y, int w, int h, int slice) {
  const uint8_t *data = dataptr;
  int y_max = y + h;
  if (w <= 0 || h <= 0) return;
  if (slice <= 0)
    slice = h;
  if (stride < 0) {
    data += (h - 1) * stride;
    stride = -stride;
  }
  // this is not always correct, but should work for MPlayer
  glAdjustAlignment(stride);
  glPixelStorei(GL_UNPACK_ROW_LENGTH, stride / glFmt2bpp(format, type));
  for (; y + slice <= y_max; y += slice) {
    glTexSubImage2D(target, 0, x, y, w, slice, format, type, data);
    data += stride * slice;
  }
  if (y < y_max)
    glTexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data);
}

static void fillUVcoeff(GLfloat *ucoef, GLfloat *vcoef,
                        float uvcos, float uvsin) {
  int i;
  ucoef[0] = 0 * uvcos + 1.403 * uvsin;
  vcoef[0] = 0 * uvsin + 1.403 * uvcos;
  ucoef[1] = -0.344 * uvcos + -0.714 * uvsin;
  vcoef[1] = -0.344 * uvsin + -0.714 * uvcos;
  ucoef[2] = 1.770 * uvcos + 0 * uvsin;
  vcoef[2] = 1.770 * uvsin + 0 * uvcos;
  ucoef[3] = 0;
  vcoef[3] = 0;
  // Coefficients (probably) must be in [0, 1] range, whereas they originally
  // are in [-2, 2] range, so here comes the trick:
  // First put them in the [-0.5, 0.5] range, then add 0.5.
  // This can be undone with the HALF_BIAS and SCALE_BY_FOUR arguments
  // for CombinerInput and CombinerOutput (or the respective ATI variants)
  for (i = 0; i < 4; i++) {
    ucoef[i] = ucoef[i] * 0.25 + 0.5;
    vcoef[i] = vcoef[i] * 0.25 + 0.5;
  }
}

/**
 * \brief Setup register combiners for YUV to RGB conversion.
 * \param uvcos used for saturation and hue adjustment
 * \param uvsin used for saturation and hue adjustment
 */
static void glSetupYUVCombiners(float uvcos, float uvsin) {
  GLfloat ucoef[4];
  GLfloat vcoef[4];
  GLint i;
  if (!CombinerInput || !CombinerOutput ||
      !CombinerParameterfv || !CombinerParameteri) {
    mp_msg(MSGT_VO, MSGL_FATAL, "[gl] Combiner functions missing!\n");
    return;
  }
  glGetIntegerv(GL_MAX_GENERAL_COMBINERS_NV, &i);
  if (i < 2)
    mp_msg(MSGT_VO, MSGL_ERR,
           "[gl] 2 general combiners needed for YUV combiner support (found %i)\n", i);
  glGetIntegerv (GL_MAX_TEXTURE_UNITS, &i);
  if (i < 3)
    mp_msg(MSGT_VO, MSGL_ERR,
           "[gl] 3 texture units needed for YUV combiner support (found %i)\n", i);
  fillUVcoeff(ucoef, vcoef, uvcos, uvsin);
  CombinerParameterfv(GL_CONSTANT_COLOR0_NV, ucoef);
  CombinerParameterfv(GL_CONSTANT_COLOR1_NV, vcoef);

  // UV first, like this green component cannot overflow
  CombinerInput(GL_COMBINER0_NV, GL_RGB, GL_VARIABLE_A_NV,
                GL_TEXTURE1, GL_HALF_BIAS_NORMAL_NV, GL_RGB);
  CombinerInput(GL_COMBINER0_NV, GL_RGB, GL_VARIABLE_B_NV,
                GL_CONSTANT_COLOR0_NV, GL_HALF_BIAS_NORMAL_NV, GL_RGB);
  CombinerInput(GL_COMBINER0_NV, GL_RGB, GL_VARIABLE_C_NV,
                GL_TEXTURE2, GL_HALF_BIAS_NORMAL_NV, GL_RGB);
  CombinerInput(GL_COMBINER0_NV, GL_RGB, GL_VARIABLE_D_NV,
                GL_CONSTANT_COLOR1_NV, GL_HALF_BIAS_NORMAL_NV, GL_RGB);
  CombinerOutput(GL_COMBINER0_NV, GL_RGB, GL_DISCARD_NV, GL_DISCARD_NV,
                 GL_SPARE0_NV, GL_SCALE_BY_FOUR_NV, GL_NONE, GL_FALSE,
                 GL_FALSE, GL_FALSE);

  // stage 2
  CombinerInput(GL_COMBINER1_NV, GL_RGB, GL_VARIABLE_A_NV, GL_SPARE0_NV,
                GL_SIGNED_IDENTITY_NV, GL_RGB);
  CombinerInput(GL_COMBINER1_NV, GL_RGB, GL_VARIABLE_B_NV, GL_ZERO,
                 GL_UNSIGNED_INVERT_NV, GL_RGB);
  CombinerInput(GL_COMBINER1_NV, GL_RGB, GL_VARIABLE_C_NV,
                GL_TEXTURE0, GL_SIGNED_IDENTITY_NV, GL_RGB);
  CombinerInput(GL_COMBINER1_NV, GL_RGB, GL_VARIABLE_D_NV, GL_ZERO,
                GL_UNSIGNED_INVERT_NV, GL_RGB);
  CombinerOutput(GL_COMBINER1_NV, GL_RGB, GL_DISCARD_NV, GL_DISCARD_NV,
                 GL_SPARE0_NV, GL_NONE, GL_NONE, GL_FALSE,
                 GL_FALSE, GL_FALSE);

  // leave final combiner stage in default mode
  CombinerParameteri(GL_NUM_GENERAL_COMBINERS_NV, 2);
}

/**
 * \brief Setup ATI version of register combiners for YUV to RGB conversion.
 * \param uvcos used for saturation and hue adjustment
 * \param uvsin used for saturation and hue adjustment
 *
 * ATI called this fragment shader, but the name is confusing in the
 * light of a very different OpenGL 2.0 extension with the same name
 */
static void glSetupYUVCombinersATI(float uvcos, float uvsin) {
  GLfloat ucoef[4];
  GLfloat vcoef[4];
  GLint i;
  if (!BeginFragmentShader || !EndFragmentShader ||
      !SetFragmentShaderConstant || !SampleMap ||
      !ColorFragmentOp2 || !ColorFragmentOp3) {
    mp_msg(MSGT_VO, MSGL_FATAL, "[gl] Combiner (ATI) functions missing!\n");
    return;
  }
  glGetIntegerv(GL_NUM_FRAGMENT_REGISTERS_ATI, &i);
  if (i < 3)
    mp_msg(MSGT_VO, MSGL_ERR,
           "[gl] 3 registers needed for YUV combiner (ATI) support (found %i)\n", i);
  glGetIntegerv (GL_MAX_TEXTURE_UNITS, &i);
  if (i < 3)
    mp_msg(MSGT_VO, MSGL_ERR,
           "[gl] 3 texture units needed for YUV combiner (ATI) support (found %i)\n", i);
  fillUVcoeff(ucoef, vcoef, uvcos, uvsin);
  BeginFragmentShader();
  SetFragmentShaderConstant(GL_CON_0_ATI, ucoef);
  SetFragmentShaderConstant(GL_CON_1_ATI, vcoef);
  SampleMap(GL_REG_0_ATI, GL_TEXTURE0, GL_SWIZZLE_STR_ATI);
  SampleMap(GL_REG_1_ATI, GL_TEXTURE1, GL_SWIZZLE_STR_ATI);
  SampleMap(GL_REG_2_ATI, GL_TEXTURE2, GL_SWIZZLE_STR_ATI);
  // UV first, like this green component cannot overflow
  ColorFragmentOp2(GL_MUL_ATI, GL_REG_1_ATI, GL_NONE, GL_NONE,
                   GL_REG_1_ATI, GL_NONE, GL_BIAS_BIT_ATI,
                   GL_CON_0_ATI, GL_NONE, GL_BIAS_BIT_ATI);
  ColorFragmentOp3(GL_MAD_ATI, GL_REG_2_ATI, GL_NONE, GL_4X_BIT_ATI,
                   GL_REG_2_ATI, GL_NONE, GL_BIAS_BIT_ATI,
                   GL_CON_1_ATI, GL_NONE, GL_BIAS_BIT_ATI,
                   GL_REG_1_ATI, GL_NONE, GL_NONE);
  ColorFragmentOp2(GL_ADD_ATI, GL_REG_0_ATI, GL_NONE, GL_NONE,
                   GL_REG_0_ATI, GL_NONE, GL_NONE,
                   GL_REG_2_ATI, GL_NONE, GL_NONE);
  EndFragmentShader();
}

/**
 * \brief helper function for gen_spline_lookup_tex
 * \param x subpixel-position ((0,1) range) to calculate weights for
 * \param dst where to store transformed weights, must provide space for 4 GLfloats
 *
 * calculates the weights and stores them after appropriate transformation
 * for the scaler fragment program.
 */
static void store_weights(float x, GLfloat *dst) {
  float w0 = (((-1 * x + 3) * x - 3) * x + 1) / 6;
  float w1 = ((( 3 * x - 6) * x + 0) * x + 4) / 6;
  float w2 = (((-3 * x + 3) * x + 3) * x + 1) / 6;
  float w3 = ((( 1 * x + 0) * x + 0) * x + 0) / 6;
  *dst++ = 1 + x - w1 / (w0 + w1);
  *dst++ = 1 - x + w3 / (w2 + w3);
  *dst++ = w0 + w1;
  *dst++ = 0;
}

//! to avoid artefacts this should be rather large
#define LOOKUP_BSPLINE_RES (2 * 1024)
/**
 * \brief creates the 1D lookup texture needed for fast higher-order filtering
 * \param unit texture unit to attach texture to
 */
static void gen_spline_lookup_tex(GLenum unit) {
  GLfloat tex[4 * LOOKUP_BSPLINE_RES];
  GLfloat *tp = tex;
  int i;
  for (i = 0; i < LOOKUP_BSPLINE_RES; i++) {
    float x = (float)(i + 0.5) / LOOKUP_BSPLINE_RES;
    store_weights(x, tp);
    tp += 4;
  }
  store_weights(0, tex);
  store_weights(1, &tex[4 * (LOOKUP_BSPLINE_RES - 1)]);
  ActiveTexture(unit);
  glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA16, LOOKUP_BSPLINE_RES, 0, GL_RGBA, GL_FLOAT, tex);
  glTexParameterf(GL_TEXTURE_1D, GL_TEXTURE_PRIORITY, 1.0);
  glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
  glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
  glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_REPEAT);
  ActiveTexture(GL_TEXTURE0);
}

static const char *bilin_filt_template =
  "TEX yuv.%c, fragment.texcoord[%c], texture[%c], %s;";

#define BICUB_FILT_MAIN(textype) \
  /* first y-interpolation */ \
  "ADD coord, fragment.texcoord[%c].xyxy, cdelta.xyxw;" \
  "ADD coord2, fragment.texcoord[%c].xyxy, cdelta.zyzw;" \
  "TEX a.r, coord.xyxy, texture[%c], "textype";" \
  "TEX a.g, coord.zwzw, texture[%c], "textype";" \
  /* second y-interpolation */ \
  "TEX b.r, coord2.xyxy, texture[%c], "textype";" \
  "TEX b.g, coord2.zwzw, texture[%c], "textype";" \
  "LRP a.b, parmy.b, a.rrrr, a.gggg;" \
  "LRP a.a, parmy.b, b.rrrr, b.gggg;" \
  /* x-interpolation */ \
  "LRP yuv.%c, parmx.b, a.bbbb, a.aaaa;"

static const char *bicub_filt_template_2D =
  "MAD coord.xy, fragment.texcoord[%c], {%f, %f}, {0.5, 0.5};"
  "TEX parmx, coord.x, texture[%c], 1D;"
  "MUL cdelta.xz, parmx.rrgg, {-%f, 0, %f, 0};"
  "TEX parmy, coord.y, texture[%c], 1D;"
  "MUL cdelta.yw, parmy.rrgg, {0, -%f, 0, %f};"
  BICUB_FILT_MAIN("2D");

static const char *bicub_filt_template_RECT =
  "ADD coord, fragment.texcoord[%c], {0.5, 0.5};"
  "TEX parmx, coord.x, texture[%c], 1D;"
  "MUL cdelta.xz, parmx.rrgg, {-1, 0, 1, 0};"
  "TEX parmy, coord.y, texture[%c], 1D;"
  "MUL cdelta.yw, parmy.rrgg, {0, -1, 0, 1};"
  BICUB_FILT_MAIN("RECT");

#define CALCWEIGHTS(t, s) \
  "MAD "t", {-0.5, 0.1666, 0.3333, -0.3333}, "s", {1, 0, -0.5, 0.5};" \
  "MAD "t", "t", "s", {0, 0, -0.5, 0.5};" \
  "MAD "t", "t", "s", {-0.6666, 0, 0.8333, 0.1666};" \
  "RCP a.x, "t".z;" \
  "RCP a.y, "t".w;" \
  "MAD "t".xy, "t".xyxy, a.xyxy, {1, 1, 0, 0};" \
  "ADD "t".x, "t".xxxx, "s";" \
  "SUB "t".y, "t".yyyy, "s";"

static const char *bicub_notex_filt_template_2D =
  "MAD coord.xy, fragment.texcoord[%c], {%f, %f}, {0.5, 0.5};"
  "FRC coord.xy, coord.xyxy;"
  CALCWEIGHTS("parmx", "coord.xxxx")
  "MUL cdelta.xz, parmx.rrgg, {-%f, 0, %f, 0};"
  CALCWEIGHTS("parmy", "coord.yyyy")
  "MUL cdelta.yw, parmy.rrgg, {0, -%f, 0, %f};"
  BICUB_FILT_MAIN("2D");

static const char *bicub_notex_filt_template_RECT =
  "ADD coord, fragment.texcoord[%c], {0.5, 0.5};"
  "FRC coord.xy, coord.xyxy;"
  CALCWEIGHTS("parmx", "coord.xxxx")
  "MUL cdelta.xz, parmx.rrgg, {-1, 0, 1, 0};"
  CALCWEIGHTS("parmy", "coord.yyyy")
  "MUL cdelta.yw, parmy.rrgg, {0, -1, 0, 1};"
  BICUB_FILT_MAIN("RECT");

#define BICUB_X_FILT_MAIN(textype) \
  "ADD coord, fragment.texcoord[%c].xyxy, cdelta.xyxw;" \
  "ADD coord2, fragment.texcoord[%c].xyxy, cdelta.zyzw;" \
  "TEX a.r, coord, texture[%c], "textype";" \
  "TEX b.r, coord2, texture[%c], "textype";" \
  /* x-interpolation */ \
  "LRP yuv.%c, parmx.b, a.rrrr, b.rrrr;"

static const char *bicub_x_filt_template_2D =
  "MAD coord.x, fragment.texcoord[%c], {%f, %f}, {0.5, 0.5};"
  "TEX parmx, coord, texture[%c], 1D;"
  "MUL cdelta.xz, parmx.rrgg, {-%f, 0, %f, 0};"
  BICUB_X_FILT_MAIN("2D");

static const char *bicub_x_filt_template_RECT =
  "ADD coord.x, fragment.texcoord[%c], {0.5, 0.5};"
  "TEX parmx, coord, texture[%c], 1D;"
  "MUL cdelta.xz, parmx.rrgg, {-1, 0, 1, 0};"
  BICUB_X_FILT_MAIN("RECT");

static const char *yuv_prog_template =
  "PARAM ycoef = {%.4f, %.4f, %.4f};"
  "PARAM ucoef = {%.4f, %.4f, %.4f};"
  "PARAM vcoef = {%.4f, %.4f, %.4f};"
  "PARAM offsets = {%.4f, %.4f, %.4f};"
  "TEMP res;"
  "MAD res.rgb, yuv.rrrr, ycoef, offsets;"
  "MAD res.rgb, yuv.gggg, ucoef, res;"
  "MAD result.color.rgb, yuv.bbbb, vcoef, res;"
  "END";

static const char *yuv_pow_prog_template =
  "PARAM ycoef = {%.4f, %.4f, %.4f};"
  "PARAM ucoef = {%.4f, %.4f, %.4f};"
  "PARAM vcoef = {%.4f, %.4f, %.4f};"
  "PARAM offsets = {%.4f, %.4f, %.4f};"
  "PARAM gamma = {%.4f, %.4f, %.4f};"
  "TEMP res;"
  "MAD res.rgb, yuv.rrrr, ycoef, offsets;"
  "MAD res.rgb, yuv.gggg, ucoef, res;"
  "MAD_SAT res.rgb, yuv.bbbb, vcoef, res;"
  "POW result.color.r, res.r, gamma.r;"
  "POW result.color.g, res.g, gamma.g;"
  "POW result.color.b, res.b, gamma.b;"
  "END";

static const char *yuv_lookup_prog_template =
  "PARAM ycoef = {%.4f, %.4f, %.4f, 0};"
  "PARAM ucoef = {%.4f, %.4f, %.4f, 0};"
  "PARAM vcoef = {%.4f, %.4f, %.4f, 0};"
  "PARAM offsets = {%.4f, %.4f, %.4f, 0.125};"
  "TEMP res;"
  "MAD res, yuv.rrrr, ycoef, offsets;"
  "MAD res.rgb, yuv.gggg, ucoef, res;"
  "MAD res.rgb, yuv.bbbb, vcoef, res;"
  "TEX result.color.r, res.raaa, texture[%c], 2D;"
  "ADD res.a, res.a, 0.25;"
  "TEX result.color.g, res.gaaa, texture[%c], 2D;"
  "ADD res.a, res.a, 0.25;"
  "TEX result.color.b, res.baaa, texture[%c], 2D;"
  "END";

static const char *yuv_lookup3d_prog_template =
  "TEX result.color, yuv, texture[%c], 3D;"
  "END";

/**
 * \brief creates and initializes helper textures needed for scaling texture read
 * \param scaler scaler type to create texture for
 * \param texu contains next free texture unit number
 * \param texs texture unit ids for the scaler are stored in this array
 */
static void create_scaler_textures(int scaler, int *texu, char *texs) {
  switch (scaler) {
    case YUV_SCALER_BILIN:
      break;
    case YUV_SCALER_BICUB:
    case YUV_SCALER_BICUB_X:
      texs[0] = (*texu)++;
      gen_spline_lookup_tex(GL_TEXTURE0 + texs[0]);
      texs[0] += '0';
      break;
    default:
      mp_msg(MSGT_VO, MSGL_ERR, "[gl] unknown scaler type %i\n", scaler);
  }
}

static void gen_gamma_map(unsigned char *map, int size, float gamma);

static void get_yuv2rgb_coeffs(float brightness, float contrast, float uvcos, float uvsin,
                               float *ry, float *ru, float *rv, float *rc,
                               float *gy, float *gu, float *gv, float *gc,
                               float *by, float *bu, float *bv, float *bc) {
  *ry = 1.164 * contrast;
  *gy = 1.164 * contrast;
  *by = 1.164 * contrast;
  *ru = 0 * uvcos + 1.596 * uvsin;
  *rv = 0 * uvsin + 1.596 * uvcos;
  *gu = -0.391 * uvcos + -0.813 * uvsin;
  *gv = -0.391 * uvsin + -0.813 * uvcos;
  *bu = 2.018 * uvcos + 0 * uvsin;
  *bv = 2.018 * uvsin + 0 * uvcos;
  *rc = (-16 * *ry + (-128) * *ru + (-128) * *rv) / 255.0 + brightness;
  *gc = (-16 * *gy + (-128) * *gu + (-128) * *gv) / 255.0 + brightness;
  *bc = (-16 * *by + (-128) * *bu + (-128) * *bv) / 255.0 + brightness;
  // these "center" contrast control so that e.g. a contrast of 0
  // leads to a grey image, not a black one
  *rc += 0.5 - contrast / 2.0;
  *gc += 0.5 - contrast / 2.0;
  *bc += 0.5 - contrast / 2.0;
}

//! size of gamma map use to avoid slow exp function in gen_yuv2rgb_map
#define GMAP_SIZE (1024)
/**
 * \brief generate a 3D YUV -> RGB map
 * \param map where to store map. Must provide space for (size + 2)^3 elements
 * \param size size of the map, excluding border
 * \param brightness desired brightness adjustment for conversion
 * \param contrast desired contrast adjustment for conversion
 * \param uvcos desired hue/saturation adjustment for conversion
 * \param uvsin desired hue/saturation adjustment for conversion
 * \param rgamma desired red gamma adjustment for conversion
 * \param ggamma desired green gamma adjustment for conversion
 * \param bgamma desired blue gamma adjustment for conversion
 */
static void gen_yuv2rgb_map(unsigned char *map, int size, float brightness,
                            float contrast, float uvcos, float uvsin,
                            float rgamma, float ggamma, float bgamma) {
  int i, j, k;
  float step = 1.0 / size;
  float y, u, v;
  float r, g, b;
  float ry, ru, rv, rc;
  float gy, gu, gv, gc;
  float by, bu, bv, bc;
  unsigned char gmaps[3][GMAP_SIZE];
  gen_gamma_map(gmaps[0], GMAP_SIZE, rgamma);
  gen_gamma_map(gmaps[1], GMAP_SIZE, ggamma);
  gen_gamma_map(gmaps[2], GMAP_SIZE, bgamma);
  get_yuv2rgb_coeffs(brightness, contrast, uvcos, uvsin,
          &ry, &ru, &rv, &rc, &gy, &gu, &gv, &gc, &by, &bu, &bv, &bc);
  ry *= GMAP_SIZE - 1; ru *= GMAP_SIZE - 1; rv *= GMAP_SIZE - 1; rc *= GMAP_SIZE - 1;
  gy *= GMAP_SIZE - 1; gu *= GMAP_SIZE - 1; gv *= GMAP_SIZE - 1; gc *= GMAP_SIZE - 1;
  by *= GMAP_SIZE - 1; bu *= GMAP_SIZE - 1; bv *= GMAP_SIZE - 1; bc *= GMAP_SIZE - 1;
  v = 0;
  for (i = -1; i <= size; i++) {
    u = 0;
    for (j = -1; j <= size; j++) {
      y = 0;
      for (k = -1; k <= size; k++) {
        r = ry * y + ru * u + rv * v + rc;
        g = gy * y + gu * u + gv * v + gc;
        b = by * y + bu * u + bv * v + bc;
        if (r > GMAP_SIZE - 1) r = GMAP_SIZE - 1;
        if (r < 0) r = 0;
        if (g > GMAP_SIZE - 1) g = GMAP_SIZE - 1;
        if (g < 0) g = 0;
        if (b > GMAP_SIZE - 1) b = GMAP_SIZE - 1;
        if (b < 0) b = 0;
        *map++ = gmaps[0][(int)r];
        *map++ = gmaps[1][(int)g];
        *map++ = gmaps[2][(int)b];
        y += (k == -1 || k == size - 1) ? step / 2 : step;
      }
      u += (j == -1 || j == size - 1) ? step / 2 : step;
    }
    v += (i == -1 || i == size - 1) ? step / 2 : step;
  }
}

//! resolution of texture for gamma lookup table
#define LOOKUP_RES 512
//! resolution for 3D yuv->rgb conversion lookup table
#define LOOKUP_3DRES 32
/**
 * \brief creates and initializes helper textures needed for yuv conversion
 * \param texu contains next free texture unit number
 * \param texs texture unit ids for the conversion are stored in this array
 * \param brightness desired brightness adjustment for conversion
 * \param contrast desired contrast adjustment for conversion
 * \param uvcos desired hue/saturation adjustment for conversion
 * \param uvsin desired hue/saturation adjustment for conversion
 * \param rgamma desired red gamma adjustment for conversion
 * \param ggamma desired green gamma adjustment for conversion
 * \param bgamma desired blue gamma adjustment for conversion
 */
static void create_conv_textures(int conv, int *texu, char *texs,
              float brightness, float contrast, float uvcos, float uvsin,
              float rgamma, float ggamma, float bgamma) {
  unsigned char *lookup_data = NULL;
  switch (conv) {
    case YUV_CONVERSION_FRAGMENT:
    case YUV_CONVERSION_FRAGMENT_POW:
     break;
    case YUV_CONVERSION_FRAGMENT_LOOKUP:
      texs[0] = (*texu)++;
      ActiveTexture(GL_TEXTURE0 + texs[0]);
      lookup_data = malloc(4 * LOOKUP_RES);
      gen_gamma_map(lookup_data, LOOKUP_RES, rgamma);
      gen_gamma_map(&lookup_data[LOOKUP_RES], LOOKUP_RES, ggamma);
      gen_gamma_map(&lookup_data[2 * LOOKUP_RES], LOOKUP_RES, bgamma);
      glCreateClearTex(GL_TEXTURE_2D, GL_LUMINANCE8, GL_LINEAR,
                       LOOKUP_RES, 4, 0);
      glUploadTex(GL_TEXTURE_2D, GL_LUMINANCE, GL_UNSIGNED_BYTE, lookup_data,
                  LOOKUP_RES, 0, 0, LOOKUP_RES, 4, 0);
      ActiveTexture(GL_TEXTURE0);
      texs[0] += '0';
      break;
    case YUV_CONVERSION_FRAGMENT_LOOKUP3D:
      {
        int sz = LOOKUP_3DRES + 2; // texture size including borders
        if (!TexImage3D) {
          mp_msg(MSGT_VO, MSGL_ERR, "[gl] Missing 3D texture function!\n");
          break;
        }
        texs[0] = (*texu)++;
        ActiveTexture(GL_TEXTURE0 + texs[0]);
        lookup_data = malloc(3 * sz * sz * sz);
        gen_yuv2rgb_map(lookup_data, LOOKUP_3DRES, brightness, contrast,
                        uvcos, uvsin, rgamma, ggamma, bgamma);
        glAdjustAlignment(sz);
        glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
        TexImage3D(GL_TEXTURE_3D, 0, 3, sz, sz, sz, 1,
                   GL_RGB, GL_UNSIGNED_BYTE, lookup_data);
        glTexParameterf(GL_TEXTURE_3D, GL_TEXTURE_PRIORITY, 1.0);
        glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
        glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
        glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP);
        glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP);
        glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, GL_CLAMP);
        ActiveTexture(GL_TEXTURE0);
        texs[0] += '0';
      }
      break;
    default:
      mp_msg(MSGT_VO, MSGL_ERR, "[gl] unknown conversion type %i\n", conv);
  }
  if (lookup_data)
    free(lookup_data);
}

/**
 * \brief adds a scaling texture read at the current fragment program position
 * \param scaler type of scaler to insert
 * \param prog_pos current position in fragment program
 * \param remain how many bytes remain in the buffer given by prog_pos
 * \param texs array containing the texture unit identifiers for this scaler
 * \param in_tex texture unit the scaler should read from
 * \param out_comp component of the yuv variable the scaler stores the result in
 * \param rect if rectangular (pixel) adressing should be used for in_tex
 * \param texw width of the in_tex texture
 * \param texh height of the in_tex texture
 */
static void add_scaler(int scaler, char **prog_pos, int *remain, char *texs,
                    char in_tex, char out_comp, int rect, int texw, int texh) {
  switch (scaler) {
    case YUV_SCALER_BILIN:
      snprintf(*prog_pos, *remain, bilin_filt_template, out_comp, in_tex,
                 in_tex, rect ? "RECT" : "2D");
      break;
    case YUV_SCALER_BICUB:
      if (rect)
        snprintf(*prog_pos, *remain, bicub_filt_template_RECT,
                 in_tex, texs[0], texs[0],
                 in_tex, in_tex, in_tex, in_tex, in_tex, in_tex, out_comp);
      else
        snprintf(*prog_pos, *remain, bicub_filt_template_2D,
                 in_tex, (float)texw, (float)texh,
                 texs[0], (float)1.0 / texw, (float)1.0 / texw,
                 texs[0], (float)1.0 / texh, (float)1.0 / texh,
                 in_tex, in_tex, in_tex, in_tex, in_tex, in_tex, out_comp);
      break;
    case YUV_SCALER_BICUB_X:
      if (rect)
        snprintf(*prog_pos, *remain, bicub_x_filt_template_RECT,
                 in_tex, texs[0],
                 in_tex, in_tex, in_tex, in_tex, out_comp);
      else
        snprintf(*prog_pos, *remain, bicub_x_filt_template_2D,
                 in_tex, (float)texw, (float)texh,
                 texs[0], (float)1.0 / texw, (float)1.0 / texw,
                 in_tex, in_tex, in_tex, in_tex, out_comp);
      break;
    case YUV_SCALER_BICUB_NOTEX:
      if (rect)
        snprintf(*prog_pos, *remain, bicub_notex_filt_template_RECT,
                 in_tex,
                 in_tex, in_tex, in_tex, in_tex, in_tex, in_tex, out_comp);
      else
        snprintf(*prog_pos, *remain, bicub_notex_filt_template_2D,
                 in_tex, (float)texw, (float)texh,
                 (float)1.0 / texw, (float)1.0 / texw,
                 (float)1.0 / texh, (float)1.0 / texh,
                 in_tex, in_tex, in_tex, in_tex, in_tex, in_tex, out_comp);
      break;
  }
  *remain -= strlen(*prog_pos);
  *prog_pos += strlen(*prog_pos);
}

static const struct {
  const char *name;
  GLenum cur;
  GLenum max;
} progstats[] = {
  {"instructions", 0x88A0, 0x88A1},
  {"native instructions", 0x88A2, 0x88A3},
  {"temporaries", 0x88A4, 0x88A5},
  {"native temporaries", 0x88A6, 0x88A7},
  {"parameters", 0x88A8, 0x88A9},
  {"native parameters", 0x88AA, 0x88AB},
  {"attribs", 0x88AC, 0x88AD},
  {"native attribs", 0x88AE, 0x88AF},
  {"ALU instructions", 0x8805, 0x880B},
  {"TEX instructions", 0x8806, 0x880C},
  {"TEX indirections", 0x8807, 0x880D},
  {"native ALU instructions", 0x8808, 0x880E},
  {"native TEX instructions", 0x8809, 0x880F},
  {"native TEX indirections", 0x880A, 0x8810},
  {NULL, 0, 0}
};

/**
 * \brief load the specified GPU Program
 * \param target program target to load into, only GL_FRAGMENT_PROGRAM is tested
 * \param prog program string
 * \return 1 on success, 0 otherwise
 */
int loadGPUProgram(GLenum target, char *prog) {
  int i;
  GLint cur = 0, max = 0, err = 0;
  if (!ProgramString) {
    mp_msg(MSGT_VO, MSGL_ERR, "[gl] Missing GPU program function\n");
    return 0;
  }
  ProgramString(target, GL_PROGRAM_FORMAT_ASCII, strlen(prog), prog);
  glGetIntegerv(GL_PROGRAM_ERROR_POSITION, &err);
  if (err != -1) {
    mp_msg(MSGT_VO, MSGL_ERR,
      "[gl] Error compiling fragment program, make sure your card supports\n"
      "[gl]   GL_ARB_fragment_program (use glxinfo to check).\n"
      "[gl]   Error message:\n  %s at %.10s\n",
      glGetString(GL_PROGRAM_ERROR_STRING), &prog[err]);
    return 0;
  }
  if (!GetProgramiv || !mp_msg_test(MSGT_VO, MSGL_V))
    return 1;
  mp_msg(MSGT_VO, MSGL_V, "[gl] Program statistics:\n");
  for (i = 0; progstats[i].name; i++) {
    GetProgramiv(target, progstats[i].cur, &cur);
    GetProgramiv(target, progstats[i].max, &max);
    mp_msg(MSGT_VO, MSGL_V, "[gl]   %s: %i/%i\n", progstats[i].name, cur, max);
  }
  return 1;
}

/**
 * \brief setup a fragment program that will do YUV->RGB conversion
 * \param brightness brightness adjustment offset
 * \param contrast contrast adjustment factor
 * \param uvcos used for saturation and hue adjustment
 * \param uvsin used for saturation and hue adjustment
 * \param lookup use fragment program that uses texture unit 4 to
 *               do additional conversion via lookup.
 */
static void glSetupYUVFragprog(float brightness, float contrast,
                        float uvcos, float uvsin, float rgamma,
                        float ggamma, float bgamma, int type, int rect,
                        int texw, int texh) {
  char yuv_prog[4000] =
    "!!ARBfp1.0\n"
    "OPTION ARB_precision_hint_fastest;"
    // all scaler variables must go here so they aren't defined
    // multiple times when the same scaler is used more than once
    "TEMP coord, coord2, cdelta, parmx, parmy, a, b, yuv;";
  int prog_remain = sizeof(yuv_prog) - strlen(yuv_prog);
  char *prog_pos = &yuv_prog[strlen(yuv_prog)];
  int cur_texu = 3;
  char lum_scale_texs[1];
  char chrom_scale_texs[1];
  char conv_texs[1];
  GLint i;
  // this is the conversion matrix, with y, u, v factors
  // for red, green, blue and the constant offsets
  float ry, ru, rv, rc;
  float gy, gu, gv, gc;
  float by, bu, bv, bc;
  create_scaler_textures(YUV_LUM_SCALER(type), &cur_texu, lum_scale_texs);
  if (YUV_CHROM_SCALER(type) == YUV_LUM_SCALER(type))
    memcpy(chrom_scale_texs, lum_scale_texs, sizeof(chrom_scale_texs));
  else
    create_scaler_textures(YUV_CHROM_SCALER(type), &cur_texu, chrom_scale_texs);
  create_conv_textures(YUV_CONVERSION(type), &cur_texu, conv_texs,
      brightness, contrast, uvcos, uvsin, rgamma, ggamma, bgamma);
  glGetIntegerv(GL_MAX_TEXTURE_UNITS, &i);
  if (i < cur_texu)
    mp_msg(MSGT_VO, MSGL_ERR,
           "[gl] %i texture units needed for this type of YUV fragment support (found %i)\n",
           cur_texu, i);
  if (!ProgramString) {
    mp_msg(MSGT_VO, MSGL_FATAL, "[gl] ProgramString function missing!\n");
    return;
  }
  add_scaler(YUV_LUM_SCALER(type), &prog_pos, &prog_remain, lum_scale_texs,
             '0', 'r', rect, texw, texh);
  add_scaler(YUV_CHROM_SCALER(type), &prog_pos, &prog_remain, chrom_scale_texs,
             '1', 'g', rect, texw / 2, texh / 2);
  add_scaler(YUV_CHROM_SCALER(type), &prog_pos, &prog_remain, chrom_scale_texs,
             '2', 'b', rect, texw / 2, texh / 2);
  get_yuv2rgb_coeffs(brightness, contrast, uvcos, uvsin,
          &ry, &ru, &rv, &rc, &gy, &gu, &gv, &gc, &by, &bu, &bv, &bc);
  switch (YUV_CONVERSION(type)) {
    case YUV_CONVERSION_FRAGMENT:
      snprintf(prog_pos, prog_remain, yuv_prog_template,
               ry, gy, by, ru, gu, bu, rv, gv, bv, rc, gc, bc);
      break;
    case YUV_CONVERSION_FRAGMENT_POW:
      snprintf(prog_pos, prog_remain, yuv_pow_prog_template,
               ry, gy, by, ru, gu, bu, rv, gv, bv, rc, gc, bc,
               (float)1.0 / rgamma, (float)1.0 / bgamma, (float)1.0 / bgamma);
      break;
    case YUV_CONVERSION_FRAGMENT_LOOKUP:
      snprintf(prog_pos, prog_remain, yuv_lookup_prog_template,
               ry, gy, by, ru, gu, bu, rv, gv, bv, rc, gc, bc,
               conv_texs[0], conv_texs[0], conv_texs[0]);
      break;
    case YUV_CONVERSION_FRAGMENT_LOOKUP3D:
      snprintf(prog_pos, prog_remain, yuv_lookup3d_prog_template, conv_texs[0]);
      break;
    default:
      mp_msg(MSGT_VO, MSGL_ERR, "[gl] unknown conversion type %i\n", YUV_CONVERSION(type));
      break;
  }
  mp_msg(MSGT_VO, MSGL_V, "[gl] generated fragment program:\n%s\n", yuv_prog);
  loadGPUProgram(GL_FRAGMENT_PROGRAM, yuv_prog);
}

/**
 * \brief little helper function to create a lookup table for gamma
 * \param map buffer to create map into
 * \param size size of buffer
 * \param gamma gamma value
 */
static void gen_gamma_map(unsigned char *map, int size, float gamma) {
  int i;
  if (gamma == 1.0) {
    for (i = 0; i < size; i++)
      map[i] = 255 * i / (size - 1);
    return;
  }
  gamma = 1.0 / gamma;
  for (i = 0; i < size; i++) {
    float tmp = (float)i / (size - 1.0);
    tmp = pow(tmp, gamma);
    if (tmp > 1.0) tmp = 1.0;
    if (tmp < 0.0) tmp = 0.0;
    map[i] = 255 * tmp;
  }
}

/**
 * \brief setup YUV->RGB conversion
 * \param target texture target for Y, U and V textures (e.g. GL_TEXTURE_2D)
 * \param type YUV conversion type
 * \param brightness brightness adjustment offset
 * \param contrast contrast adjustment factor
 * \param hue hue adjustment angle
 * \param saturation saturation adjustment factor
 * \param rgamma gamma value for red channel
 * \param ggamma gamma value for green channel
 * \param bgamma gamma value for blue channel
 * \ingroup glconversion
 */
void glSetupYUVConversion(GLenum target, int type,
                          float brightness, float contrast,
                          float hue, float saturation,
                          float rgamma, float ggamma, float bgamma,
                          int texw, int texh) {
  float uvcos = saturation * cos(hue);
  float uvsin = saturation * sin(hue);
  switch (YUV_CONVERSION(type)) {
    case YUV_CONVERSION_COMBINERS:
      glSetupYUVCombiners(uvcos, uvsin);
      break;
    case YUV_CONVERSION_COMBINERS_ATI:
      glSetupYUVCombinersATI(uvcos, uvsin);
      break;
    case YUV_CONVERSION_FRAGMENT_LOOKUP:
    case YUV_CONVERSION_FRAGMENT_LOOKUP3D:
    case YUV_CONVERSION_FRAGMENT:
    case YUV_CONVERSION_FRAGMENT_POW:
      glSetupYUVFragprog(brightness, contrast, uvcos, uvsin,
                         rgamma, ggamma, bgamma, type,
                         target == GL_TEXTURE_RECTANGLE,
                         texw, texh);
      break;
    default:
      mp_msg(MSGT_VO, MSGL_ERR, "[gl] unknown conversion type %i\n", YUV_CONVERSION(type));
  }
}

/**
 * \brief enable the specified YUV conversion
 * \param target texture target for Y, U and V textures (e.g. GL_TEXTURE_2D)
 * \param type type of YUV conversion
 * \ingroup glconversion
 */
void glEnableYUVConversion(GLenum target, int type) {
  if (type <= 0) return;
  switch (YUV_CONVERSION(type)) {
    case YUV_CONVERSION_COMBINERS:
      ActiveTexture(GL_TEXTURE1);
      glEnable(target);
      ActiveTexture(GL_TEXTURE2);
      glEnable(target);
      ActiveTexture(GL_TEXTURE0);
      glEnable(GL_REGISTER_COMBINERS_NV);
      break;
    case YUV_CONVERSION_COMBINERS_ATI:
      ActiveTexture(GL_TEXTURE1);
      glEnable(target);
      ActiveTexture(GL_TEXTURE2);
      glEnable(target);
      ActiveTexture(GL_TEXTURE0);
      glEnable(GL_FRAGMENT_SHADER_ATI);
      break;
    case YUV_CONVERSION_FRAGMENT_LOOKUP3D:
    case YUV_CONVERSION_FRAGMENT_LOOKUP:
    case YUV_CONVERSION_FRAGMENT_POW:
    case YUV_CONVERSION_FRAGMENT:
      glEnable(GL_FRAGMENT_PROGRAM);
      break;
  }
}

/**
 * \brief disable the specified YUV conversion
 * \param target texture target for Y, U and V textures (e.g. GL_TEXTURE_2D)
 * \param type type of YUV conversion
 * \ingroup glconversion
 */
void glDisableYUVConversion(GLenum target, int type) {
  if (type <= 0) return;
  switch (YUV_CONVERSION(type)) {
    case YUV_CONVERSION_COMBINERS:
      ActiveTexture(GL_TEXTURE1);
      glDisable(target);
      ActiveTexture(GL_TEXTURE2);
      glDisable(target);
      ActiveTexture(GL_TEXTURE0);
      glDisable(GL_REGISTER_COMBINERS_NV);
      break;
    case YUV_CONVERSION_COMBINERS_ATI:
      ActiveTexture(GL_TEXTURE1);
      glDisable(target);
      ActiveTexture(GL_TEXTURE2);
      glDisable(target);
      ActiveTexture(GL_TEXTURE0);
      glDisable(GL_FRAGMENT_SHADER_ATI);
      break;
    case YUV_CONVERSION_FRAGMENT_LOOKUP3D:
    case YUV_CONVERSION_FRAGMENT_LOOKUP:
    case YUV_CONVERSION_FRAGMENT_POW:
    case YUV_CONVERSION_FRAGMENT:
      glDisable(GL_FRAGMENT_PROGRAM);
      break;
  }
}

/**
 * \brief draw a texture part at given 2D coordinates
 * \param x screen top coordinate
 * \param y screen left coordinate
 * \param w screen width coordinate
 * \param h screen height coordinate
 * \param tx texture top coordinate in pixels
 * \param ty texture left coordinate in pixels
 * \param tw texture part width in pixels
 * \param th texture part height in pixels
 * \param sx width of texture in pixels
 * \param sy height of texture in pixels
 * \param rect_tex whether this texture uses texture_rectangle extension
 * \param is_yv12 if set, also draw the textures from units 1 and 2
 * \param flip flip the texture upside down
 * \ingroup gltexture
 */
void glDrawTex(GLfloat x, GLfloat y, GLfloat w, GLfloat h,
               GLfloat tx, GLfloat ty, GLfloat tw, GLfloat th,
               int sx, int sy, int rect_tex, int is_yv12, int flip) {
  GLfloat tx2 = tx / 2, ty2 = ty / 2, tw2 = tw / 2, th2 = th / 2;
  if (!rect_tex) {
    tx /= sx; ty /= sy; tw /= sx; th /= sy;
    tx2 = tx, ty2 = ty, tw2 = tw, th2 = th;
  }
  if (flip) {
    y += h;
    h = -h;
  }
  glBegin(GL_QUADS);
  glTexCoord2f(tx, ty);
  if (is_yv12) {
    MultiTexCoord2f(GL_TEXTURE1, tx2, ty2);
    MultiTexCoord2f(GL_TEXTURE2, tx2, ty2);
  }
  glVertex2f(x, y);
  glTexCoord2f(tx, ty + th);
  if (is_yv12) {
    MultiTexCoord2f(GL_TEXTURE1, tx2, ty2 + th2);
    MultiTexCoord2f(GL_TEXTURE2, tx2, ty2 + th2);
  }
  glVertex2f(x, y + h);
  glTexCoord2f(tx + tw, ty + th);
  if (is_yv12) {
    MultiTexCoord2f(GL_TEXTURE1, tx2 + tw2, ty2 + th2);
    MultiTexCoord2f(GL_TEXTURE2, tx2 + tw2, ty2 + th2);
  }
  glVertex2f(x + w, y + h);
  glTexCoord2f(tx + tw, ty);
  if (is_yv12) {
    MultiTexCoord2f(GL_TEXTURE1, tx2 + tw2, ty2);
    MultiTexCoord2f(GL_TEXTURE2, tx2 + tw2, ty2);
  }
  glVertex2f(x + w, y);
  glEnd();
}

#ifdef GL_WIN32
#include "w32_common.h"
/**
 * \brief little helper since wglGetProcAddress definition does not fit our 
 *        getProcAddress
 * \param procName name of function to look up
 * \return function pointer returned by wglGetProcAddress
 */
static void *w32gpa(const GLubyte *procName) {
  return wglGetProcAddress(procName);
}

int setGlWindow(int *vinfo, HGLRC *context, HWND win)
{
  int new_vinfo;
  HDC windc = GetDC(win);
  HGLRC new_context = 0;
  int keep_context = 0;
  int res = SET_WINDOW_FAILED;

  // should only be needed when keeping context, but not doing glFinish
  // can cause flickering even when we do not keep it.
  if (*context)
  glFinish();
  new_vinfo = GetPixelFormat(windc);
  if (*context && *vinfo && new_vinfo && *vinfo == new_vinfo) {
      // we can keep the wglContext
      new_context = *context;
      keep_context = 1;
  } else {
    // create a context
    new_context = wglCreateContext(windc);
    if (!new_context) {
      mp_msg(MSGT_VO, MSGL_FATAL, "[gl] Could not create GL context!\n");
      goto out;
    }
  }

  // set context
  if (!wglMakeCurrent(windc, new_context)) {
    mp_msg (MSGT_VO, MSGL_FATAL, "[gl] Could not set GL context!\n");
    if (!keep_context) {
      wglDeleteContext(new_context);
    }
    goto out;
  }

  // set new values
  vo_w32_window = win;
  {
    RECT rect;
    GetClientRect(win, &rect);
    vo_dwidth = rect.right;
    vo_dheight = rect.bottom;
  }
  if (!keep_context) {
    if (*context)
      wglDeleteContext(*context);
    *context = new_context;
    *vinfo = new_vinfo;
    getFunctions(w32gpa, NULL);

    // and inform that reinit is neccessary
    res = SET_WINDOW_REINIT;
  } else
    res = SET_WINDOW_OK;

out:
  ReleaseDC(win, windc);
  return res;
}

void releaseGlContext(int *vinfo, HGLRC *context) {
  *vinfo = 0;
  if (*context) {
    wglMakeCurrent(0, 0);
    wglDeleteContext(*context);
  }
  *context = 0;
}

void swapGlBuffers() {
  HDC vo_hdc = GetDC(vo_w32_window);
  SwapBuffers(vo_hdc);
  ReleaseDC(vo_w32_window, vo_hdc);
}
#else
#ifdef HAVE_LIBDL
#include <dlfcn.h>
#endif
#include "x11_common.h"
/**
 * \brief find address of a linked function
 * \param s name of function to find
 * \return address of function or NULL if not found
 *
 * Copied from xine
 */
static void *getdladdr(const char *s) {
#ifdef HAVE_LIBDL
#if defined(__sun) || defined(__sgi)
  static void *handle = NULL;
  if (!handle)
    handle = dlopen(NULL, RTLD_LAZY);
  return dlsym(handle, s);
#else
  return dlsym(0, s);
#endif
#else
  return NULL;
#endif
}

/**
 * \brief Returns the XVisualInfo associated with Window win.
 * \param win Window whose XVisualInfo is returne.
 * \return XVisualInfo of the window. Caller must use XFree to free it.
 */
static XVisualInfo *getWindowVisualInfo(Window win) {
  XWindowAttributes xw_attr;
  XVisualInfo vinfo_template;
  int tmp;
  XGetWindowAttributes(mDisplay, win, &xw_attr);
  vinfo_template.visualid = XVisualIDFromVisual(xw_attr.visual);
  return XGetVisualInfo(mDisplay, VisualIDMask, &vinfo_template, &tmp);
}

/**
 * \brief Changes the window in which video is displayed.
 * If possible only transfers the context to the new window, otherwise
 * creates a new one, which must be initialized by the caller.
 * \param vinfo Currently used visual.
 * \param context Currently used context.
 * \param win window that should be used for drawing.
 * \return one of SET_WINDOW_FAILED, SET_WINDOW_OK or SET_WINDOW_REINIT.
 * In case of SET_WINDOW_REINIT the context could not be transfered
 * and the caller must initialize it correctly.
 * \ingroup glcontext
 */
int setGlWindow(XVisualInfo **vinfo, GLXContext *context, Window win)
{
  XVisualInfo *new_vinfo;
  GLXContext new_context = NULL;
  int keep_context = 0;

  // should only be needed when keeping context, but not doing glFinish
  // can cause flickering even when we do not keep it.
  if (*context)
  glFinish();
  new_vinfo = getWindowVisualInfo(win);
  if (*context && *vinfo && new_vinfo &&
      (*vinfo)->visualid == new_vinfo->visualid) {
      // we can keep the GLXContext
      new_context = *context;
      XFree(new_vinfo);
      new_vinfo = *vinfo;
      keep_context = 1;
  } else {
    // create a context
    new_context = glXCreateContext(mDisplay, new_vinfo, NULL, True);
    if (!new_context) {
      mp_msg(MSGT_VO, MSGL_FATAL, "[gl] Could not create GLX context!\n");
      XFree(new_vinfo);
      return SET_WINDOW_FAILED;
    }
  }

  // set context
  if (!glXMakeCurrent(mDisplay, vo_window, new_context)) {
    mp_msg (MSGT_VO, MSGL_FATAL, "[gl] Could not set GLX context!\n");
    if (!keep_context) {
      glXDestroyContext (mDisplay, new_context);
      XFree(new_vinfo);
    }
    return SET_WINDOW_FAILED;
  }

  // set new values
  vo_window = win;
  {
    Window root;
    int tmp;
    unsigned utmp;
    XGetGeometry(mDisplay, vo_window, &root, &tmp, &tmp,
        (unsigned *)&vo_dwidth, (unsigned *)&vo_dheight, &utmp, &utmp);
  }
  if (!keep_context) {
    void *(*getProcAddress)(const GLubyte *);
    const char *(*glXExtStr)(Display *, int);
    if (*context)
      glXDestroyContext(mDisplay, *context);
    *context = new_context;
    if (*vinfo)
      XFree(*vinfo);
    *vinfo = new_vinfo;
      getProcAddress = getdladdr("glXGetProcAddress");
    if (!getProcAddress)
      getProcAddress = getdladdr("glXGetProcAddressARB");
    if (!getProcAddress)
      getProcAddress = (void *)getdladdr;
    glXExtStr = getdladdr("glXQueryExtensionsString");
    getFunctions(getProcAddress, !glXExtStr ? NULL :
                 glXExtStr(mDisplay, DefaultScreen(mDisplay)));

    // and inform that reinit is neccessary
    return SET_WINDOW_REINIT;
  }
  return SET_WINDOW_OK;
}

/**
 * \brief free the VisualInfo and GLXContext of an OpenGL context.
 * \ingroup glcontext
 */
void releaseGlContext(XVisualInfo **vinfo, GLXContext *context) {
  if (*vinfo)
    XFree(*vinfo);
  *vinfo = NULL;
  if (*context)
  {
    glFinish();
    glXMakeCurrent(mDisplay, None, NULL);
    glXDestroyContext(mDisplay, *context);
  }
  *context = 0;
}

void swapGlBuffers(void) {
  glXSwapBuffers(mDisplay, vo_window);
}
#endif