view sub/sub_cc.c @ 36812:e6823a005ab6

Add some extra checks to avoid potential crashes. Should avoid the issues described in #2127.
author reimar
date Sun, 23 Feb 2014 19:09:08 +0000
parents d206960484fe
children
line wrap: on
line source

/*
 * decoder for Closed Captions
 *
 * This decoder relies on MPlayer's OSD to display subtitles.
 * Be warned that decoding is somewhat preliminary, though it basically works.
 *
 * Most notably, only the text information is decoded as of now, discarding
 * color, background and position info (see source below).
 *
 * uses source from the xine closed captions decoder
 *
 * Copyright (C) 2002 Matteo Giani
 *
 * This file is part of MPlayer.
 *
 * MPlayer is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * MPlayer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "config.h"
#include "mp_msg.h"
#include "sub_cc.h"

#include "subreader.h"

#include "libvo/video_out.h"
#include "sub.h"

#include "libavutil/avutil.h"
#include "libavutil/common.h"


#define CC_MAX_LINE_LENGTH 64

static char chartbl[128];

static subtitle buf1,buf2;
static subtitle *fb,*bb;

static unsigned int cursor_pos=0;

static int initialized=0;
static int wtv_format;

#define CC_ROLLON 1
#define CC_ROLLUP 2

static int cc_mode=CC_ROLLON;
static int cc_lines=4; ///< number of visible rows in CC roll-up mode, not used in CC roll-on mode

static void build_char_table(void)
{
  int i;
  /* first the normal ASCII codes */
  for (i = 0; i < 128; i++)
    chartbl[i] = (char) i;
  /* now the special codes */
  chartbl[0x2a] = 0xe1; /* Latin Small Letter A with acute */
  chartbl[0x5c] = 0xe9; /* Latin Small Letter E with acute */
  chartbl[0x5e] = 0xed; /* Latin Small Letter I with acute */
  chartbl[0x5f] = 0xf3; /* Latin Small Letter O with acute */
  chartbl[0x60] = 0xfa; /* Latin Small Letter U with acute */
  chartbl[0x7b] = 0xe7; /* Latin Small Letter C with cedilla */
  chartbl[0x7c] = 0xf7; /* Division sign */
  chartbl[0x7d] = 0xd1; /* Latin Capital letter N with tilde */
  chartbl[0x7e] = 0xf1; /* Latin Small Letter N with tilde */
  chartbl[0x7f] = 0xa4; /* Currency sign FIXME: this should be a solid block */
}

static void clear_buffer(subtitle *buf)
{
	int i;
	buf->lines=0;
	for (i = 0; i < SUB_MAX_TEXT; i++) {
		free(buf->text[i]);
		buf->text[i] = NULL;
	}
}


/**
 \brief scroll buffer one line up
 \param buf buffer to scroll
*/
static void scroll_buffer(subtitle* buf)
{
	int i;

	while(buf->lines > cc_lines)
	{
		free(buf->text[0]);

		for(i = 0; i < buf->lines - 1; i++) buf->text[i] = buf->text[i+1];

		buf->text[buf->lines-1] = NULL;
		buf->lines--;
	}
}

static int channel;

void subcc_init(void)
{
	int i;
	//printf("subcc_init(): initing...\n");
	build_char_table();
	for(i=0;i<SUB_MAX_TEXT;i++) {buf1.text[i]=buf2.text[i]=NULL;}
	buf1.lines=buf2.lines=0;
	fb=&buf1;
	bb=&buf2;
	channel = -1;

	initialized=1;
	wtv_format = 0;
}

void subcc_reset(void)
{
    wtv_format = 0;
    if (!initialized)
        return;
    clear_buffer(&buf1);
    clear_buffer(&buf2);
}

static void display_buffer(subtitle *buf)
{
	vo_sub = buf;
	vo_osd_changed(OSDTYPE_SUBTITLE);
}


static void append_char(char c)
{
	if(!bb->lines) {bb->lines++; cursor_pos=0;}
	if(bb->text[bb->lines - 1]==NULL)
	{
		bb->text[bb->lines - 1] = calloc(1, CC_MAX_LINE_LENGTH);
		cursor_pos=0;
	}

	if(c=='\n')
	{
		if(cursor_pos>0 && bb->lines < SUB_MAX_TEXT)
		{
			bb->lines++;cursor_pos=0;
			if(cc_mode==CC_ROLLUP){ //Carriage return - scroll buffer one line up
				bb->text[bb->lines - 1]=calloc(1, CC_MAX_LINE_LENGTH);
				scroll_buffer(bb);
			}
		}
	}
	else
	{
		if(cursor_pos==CC_MAX_LINE_LENGTH-1)
		{
			fprintf(stderr,"CC: append_char() reached CC_MAX_LINE_LENGTH!\n");
			return;
		}
		bb->text[bb->lines - 1][cursor_pos++]=c;
	}
	//In CC roll-up mode data should be shown immediately
	if(cc_mode==CC_ROLLUP) display_buffer(bb);
}


static void swap_buffers(void)
{
	subtitle *foo;
	foo=fb;
	fb=bb;
	bb=foo;
}

static int selected_channel(void)
{
    return subcc_enabled - 1;
}

static void cc_decode_EIA608(unsigned short int data)
{

  static unsigned short int lastcode=0x0000;
  uint8_t c1 = data & 0x7f;
  uint8_t c2 = (data >> 8) & 0x7f;

  if (c1 & 0x60) {		/* normal character, 0x20 <= c1 <= 0x7f */
	   if (channel != (selected_channel() & 1))
		   return;
	   append_char(chartbl[c1]);
	   if(c2 & 0x60)	/*c2 might not be a normal char even if c1 is*/
		   append_char(chartbl[c2]);
  }
  else if (c1 & 0x10)		// control code / special char
  {
	  channel = (c1 & 0x08) >> 3;
	  if (channel != (selected_channel() & 1))
		return;
	  c1&=~0x08;
	  if(data!=lastcode)
	  {
	  	if(c2 & 0x40) {	/*PAC, Preamble Address Code */
			append_char('\n'); /*FIXME properly interpret PACs*/
		}
		else
			switch(c1)
			{
				case 0x10:	break; // ext attribute
				case 0x11:
					if((c2 & 0x30)==0x30)
					{
						//printf("[debug]:Special char (ignored)\n");
						/*cc_decode_special_char()*/;
					}
					else if (c2 & 0x20)
					{
						//printf("[debug]: midrow_attr (ignored)\n");
						/*cc_decode_midrow_attr()*/;
					}
					break;
				case 0x14:
					switch(c2)
					{
						case 0x00: //CC roll-on mode
							   cc_mode=CC_ROLLON;
							   break;
						case 0x25: //CC roll-up, 2 rows
						case 0x26: //CC roll-up, 3 rows
						case 0x27: //CC roll-up, 4 rows
							   cc_lines=c2-0x23;
							   cc_mode=CC_ROLLUP;
							   break;
						case 0x2C: display_buffer(NULL); //EDM
							   clear_buffer(fb); break;
						case 0x2d: append_char('\n');	//carriage return
							   break;
						case 0x2e: clear_buffer(bb);	//ENM
							   break;
						case 0x2f: swap_buffers();	//Swap buffers
							   display_buffer(fb);
							   clear_buffer(bb);
							   break;
					}
					break;
				case 0x17:
					if( c2>=0x21 && c2<=0x23) //TAB
					{
						break;
					}
			}
	  }
  }
  lastcode=data;
}

static void subcc_decode(const uint8_t *inputbuffer, unsigned int inputlength)
{
  /* The first number may denote a channel number. I don't have the
   * EIA-708 standard, so it is hard to say.
   * From what I could figure out so far, the general format seems to be:
   *
   * repeat
   *
   *   0xfe starts 2 byte sequence of unknown purpose. It might denote
   *        field #2 in line 21 of the VBI.
   *        Treating it identical of 0xff fixes
   *        http://samples.mplayerhq.hu/MPEG-VOB/ClosedCaptions/Starship_Troopers.vob
   *
   *   0xff starts 2 byte EIA-608 sequence, field #1 in line 21 of the VBI.
   *        Followed by a 3-code triplet that starts either with 0xff or
   *        0xfe. In either case, the following triplet needs to be ignored
   *        for line 21, field 1.
   *
   *   0x00 is padding, followed by 2 more 0x00.
   *
   *   0x01 always seems to appear at the beginning, always seems to
   *        be followed by 0xf8, 8-bit number.
   *        The lower 7 bits of this 8-bit number seem to denote the
   *        number of code triplets that follow.
   *        The most significant bit denotes whether the Line 21 field 1
   *        captioning information is at odd or even triplet offsets from this
   *        beginning triplet. 1 denotes odd offsets, 0 denotes even offsets.
   *
   *        Most captions are encoded with odd offsets, so this is what we
   *        will assume.
   *
   * until end of packet
   */
  const uint8_t *current = inputbuffer;
  unsigned int curbytes = 0;
  uint8_t data1, data2;
  uint8_t cc_code;
  int odd_offset = 1;

  while (curbytes < inputlength) {
    cc_code = current[0];

    if (inputlength - curbytes < 2) {
#ifdef LOG_DEBUG
      fprintf(stderr, "Not enough data for 2-byte CC encoding\n");
#endif
      break;
    }

    data1 = current[1];
    data2 = current[2];
    current += 3; curbytes += 3;

    // 0xfe/0xff are both used on plain EIA-608 CC and
    // for extended EIA-708 (where 0xfc/0xfd is used for
    // compatibility layer).
    // Allow using channel bit 2 to select between which
    // ones to look in.
    switch (cc_code) {
    case 0xfc:
    case 0xfd:
    case 0xfe:
    case 0xff:
      if ((cc_code & 2) == (selected_channel() & 4) >> 1)
          break;
      odd_offset ^= 1;
      if (odd_offset != (selected_channel() & 2) >> 1)
          break;
      /* expect EIA-608 CC1/CC2 encoding */
      // FIXME check parity!
      // Parity check omitted assuming we are reading from a DVD and therefore
      // we should encounter no "transmission errors".
      cc_decode_EIA608(data1 | (data2 << 8));
      break;

    case 0xfa:
    case 0x00:
      /* This seems to be just padding */
      break;

    case 0x01:
      odd_offset = data2 >> 7;
      break;

    default:
//#ifdef LOG_DEBUG
      fprintf(stderr, "Unknown CC encoding: %x\n", cc_code);
//#endif
      break;
    }
  }
}

static const uint8_t mov_cc_signature_1[] = {0, 0, 0, 0xa, 'c', 'd', 'a', 't'};
static const uint8_t mov_cc_signature_2[] = {0, 0, 0, 0xa, 'c', 'd', 't', '2'};
/**
 * MOV uses a vastly more verbose representation for EIA 608 CC data than DVDs.
 * This function handles that case.
 */
static void mov_subcc_decode(const uint8_t *data, unsigned len)
{
    while (len >= 10) {
        int channel = -1;
        if (memcmp(data, mov_cc_signature_1, sizeof(mov_cc_signature_1)) == 0) {
            channel = 0;
        } else if (memcmp(data, mov_cc_signature_2, sizeof(mov_cc_signature_2)) == 0) {
            channel = 1;
        } else {
            mp_msg(MSGT_OSD, MSGL_V, "Unknown MOV 608 CC formatting\n");
            data++;
            len--;
            continue;
        }
        if (channel == selected_channel() >> 1)
            cc_decode_EIA608(data[8] | (data[9] << 8));
        data += 10;
        len -= 10;
    }
}

void subcc_process_data(const uint8_t *inputdata, unsigned int len)
{
	int mov_mode = len >= 10 &&
	               memcmp(inputdata, mov_cc_signature_1, sizeof(mov_cc_signature_1)) == 0;
	if(!subcc_enabled) return;
	if(!initialized) subcc_init();

	if (mov_mode) {
		mov_subcc_decode(inputdata, len);
		return;
	}
	if (len & 1) wtv_format = 0;
	if (len == 2) {
		// EIA-608 compatibility part.
		// Full EIA-708 parts have length >= 4 (multiple of 2).
		cc_decode_EIA608(inputdata[0] | (inputdata[1] << 8));
		wtv_format = 1;
	}
	if (wtv_format)
		return;
	subcc_decode(inputdata, len);
}

/**
 * This processes CC captions in the format as found in ATSC broadcasts.
 * Like DVD CC it is stored inside the MPEG-frame userdata, but with two
 * differences:
 * 1) It starts with "GA" instead of "CC"
 * 2) It _must_ be reordered in the way the decoder reorders the video frames
 * The latter makes things difficult and is the reason why there is no support
 * for this yet beyond this function.
 */
void subcc_process_eia708(const uint8_t *data, int len)
{
    int cc_count;
    if (!subcc_enabled)
        return;
    if (!initialized)
        subcc_init();
    if (len <= 5)
        return;
    if (data[0] != '9' || data[1] != '4' || data[2] != 3) {
        mp_msg(MSGT_OSD, MSGL_ERR, "Unknown ATSC CC type "
                                   "0x%"PRIx8" 0x%"PRIx8" 0x%"PRIx8"\n",
                                   data[0], data[1], data[2]);
        return;
    }
    // process_cc_data_flag
    if (!(data[3] & 0x40))
        return;
    cc_count = data[3] & 0x1f;
    data += 5;
    len  -= 5;
    cc_count = FFMIN(cc_count, len / 3);
    while (cc_count--) {
        // EAI-608 data
        if ((data[0] & 0xfe) == 0xfc && (data[0] & 1) == selected_channel() >> 1)
            cc_decode_EIA608(data[1] | (data[2] << 8));
        data += 3;
    }
}