view libmpdemux/demux_rtp_codec.cpp @ 32267:f6bebbee678b

Change the way encoders necessary for both MPlayer and MEncoder are enabled. The new implementation is more modular and easier to extend.
author diego
date Sat, 25 Sep 2010 16:42:20 +0000
parents 4614728cab25
children 8fa2f43cb760
line wrap: on
line source

/*
 * codec-specific routines used to interface between MPlayer
 * and the "LIVE555 Streaming Media" libraries
 *
 * This file is part of MPlayer.
 *
 * MPlayer is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * MPlayer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include "demux_rtp_internal.h"
extern "C" {
#include <limits.h>
#include <math.h>
#include "stheader.h"
#include "libavutil/base64.h"
}

#ifdef CONFIG_FFMPEG
AVCodecParserContext * h264parserctx;
AVCodecContext *avcctx;
#endif

// Copied from vlc
static unsigned char* parseH264ConfigStr( char const* configStr,
                                          unsigned int& configSize )
{

    char *dup, *psz;
    int i, i_records = 1;

    if( configSize )
    configSize = 0;
    if( configStr == NULL || *configStr == '\0' )
        return NULL;
    psz = dup = strdup( configStr );

 /* Count the number of comma's */
    for( psz = dup; *psz != '\0'; ++psz )
    {
        if( *psz == ',')
        {
            ++i_records;
            *psz = '\0';
        }
    }

    unsigned char *cfg = new unsigned char[5 * strlen(dup)];
    psz = dup;
    for( i = 0; i < i_records; i++ )
    {

        cfg[configSize++] = 0x00;
        cfg[configSize++] = 0x00;
        cfg[configSize++] = 0x01;
        configSize += av_base64_decode( (uint8_t*)&cfg[configSize],
                                        psz,
                                        5 * strlen(dup) - 3 );

    psz += strlen(psz)+1;
    }
    if( dup ) free( dup );

    return cfg;
}

static void
needVideoFrameRate(demuxer_t* demuxer, MediaSubsession* subsession); // forward
static Boolean
parseQTState_video(QuickTimeGenericRTPSource::QTState const& qtState,
		   unsigned& fourcc); // forward
static Boolean
parseQTState_audio(QuickTimeGenericRTPSource::QTState const& qtState,
		   unsigned& fourcc, unsigned& numChannels); // forward

static BITMAPINFOHEADER * insertVideoExtradata(BITMAPINFOHEADER *bih,
                                               unsigned char * extraData,
                                               unsigned size)
{
    BITMAPINFOHEADER * original = bih;
    if (!size || size > INT_MAX - sizeof(BITMAPINFOHEADER))
        return bih;
    bih = (BITMAPINFOHEADER*)realloc(bih, sizeof(BITMAPINFOHEADER) + size);
    if (!bih)
        return original;
    bih->biSize = sizeof(BITMAPINFOHEADER) + size;
    memcpy(bih+1, extraData, size);
    return bih;
}

void rtpCodecInitialize_video(demuxer_t* demuxer,
			      MediaSubsession* subsession,
			      unsigned& flags) {
  flags = 0;
  // Create a dummy video stream header
  // to make the main MPlayer code happy:
  sh_video_t* sh_video = new_sh_video(demuxer,0);
  BITMAPINFOHEADER* bih
    = (BITMAPINFOHEADER*)calloc(1,sizeof(BITMAPINFOHEADER));
  bih->biSize = sizeof(BITMAPINFOHEADER);
  sh_video->bih = bih;
  demux_stream_t* d_video = demuxer->video;
  d_video->sh = sh_video; sh_video->ds = d_video;

  // Map known video MIME types to the BITMAPINFOHEADER parameters
  // that this program uses.  (Note that not all types need all
  // of the parameters to be set.)
  if (strcmp(subsession->codecName(), "MPV") == 0) {
    flags |= RTPSTATE_IS_MPEG12_VIDEO;
  } else if (strcmp(subsession->codecName(), "MP1S") == 0 ||
	     strcmp(subsession->codecName(), "MP2T") == 0) {
    flags |= RTPSTATE_IS_MPEG12_VIDEO|RTPSTATE_IS_MULTIPLEXED;
  } else if (strcmp(subsession->codecName(), "H263") == 0 ||
	     strcmp(subsession->codecName(), "H263-2000") == 0 ||
	     strcmp(subsession->codecName(), "H263-1998") == 0) {
    bih->biCompression = sh_video->format
      = mmioFOURCC('H','2','6','3');
    needVideoFrameRate(demuxer, subsession);
  } else if (strcmp(subsession->codecName(), "H264") == 0) {
    bih->biCompression = sh_video->format
      = mmioFOURCC('H','2','6','4');
    unsigned int configLen = 0;
    unsigned char* configData
      = parseH264ConfigStr(subsession->fmtp_spropparametersets(), configLen);
    sh_video->bih = bih = insertVideoExtradata(bih, configData, configLen);
#ifdef CONFIG_FFMPEG
    int fooLen;
    const uint8_t* fooData;
    avcodec_register_all();
    h264parserctx = av_parser_init(CODEC_ID_H264);
    avcctx = avcodec_alloc_context();
    // Pass the config to the parser
    h264parserctx->parser->parser_parse(h264parserctx, avcctx,
                  &fooData, &fooLen, configData, configLen);
#endif
    delete[] configData;
    needVideoFrameRate(demuxer, subsession);
  } else if (strcmp(subsession->codecName(), "H261") == 0) {
    bih->biCompression = sh_video->format
      = mmioFOURCC('H','2','6','1');
    needVideoFrameRate(demuxer, subsession);
  } else if (strcmp(subsession->codecName(), "JPEG") == 0) {
    bih->biCompression = sh_video->format
      = mmioFOURCC('M','J','P','G');
    needVideoFrameRate(demuxer, subsession);
  } else if (strcmp(subsession->codecName(), "MP4V-ES") == 0) {
    bih->biCompression = sh_video->format
      = mmioFOURCC('m','p','4','v');
    // For the codec to work correctly, it may need a 'VOL Header' to be
    // inserted at the front of the data stream.  Construct this from the
    // "config" MIME parameter, which was present (hopefully) in the
    // session's SDP description:
    unsigned configLen;
    unsigned char* configData
      = parseGeneralConfigStr(subsession->fmtp_config(), configLen);
    sh_video->bih = bih = insertVideoExtradata(bih, configData, configLen);
    needVideoFrameRate(demuxer, subsession);
  } else if (strcmp(subsession->codecName(), "X-QT") == 0 ||
	     strcmp(subsession->codecName(), "X-QUICKTIME") == 0) {
    // QuickTime generic RTP format, as described in
    // http://developer.apple.com/quicktime/icefloe/dispatch026.html

    // We can't initialize this stream until we've received the first packet
    // that has QuickTime "sdAtom" information in the header.  So, keep
    // reading packets until we get one:
    unsigned char* packetData; unsigned packetDataLen; float pts;
    QuickTimeGenericRTPSource* qtRTPSource
      = (QuickTimeGenericRTPSource*)(subsession->rtpSource());
    unsigned fourcc;
    do {
      if (!awaitRTPPacket(demuxer, demuxer->video,
			  packetData, packetDataLen, pts)) {
	return;
      }
    } while (!parseQTState_video(qtRTPSource->qtState, fourcc));

    bih->biCompression = sh_video->format = fourcc;
    bih->biWidth = qtRTPSource->qtState.width;
    bih->biHeight = qtRTPSource->qtState.height;
      if (qtRTPSource->qtState.sdAtomSize > 83)
        bih->biBitCount = qtRTPSource->qtState.sdAtom[83];
      uint8_t *pos = (uint8_t*)qtRTPSource->qtState.sdAtom + 86;
      uint8_t *endpos = (uint8_t*)qtRTPSource->qtState.sdAtom
                        + qtRTPSource->qtState.sdAtomSize;
      while (pos+8 < endpos) {
        unsigned atomLength = pos[0]<<24 | pos[1]<<16 | pos[2]<<8 | pos[3];
        if (atomLength == 0 || atomLength > endpos-pos) break;
        if (((!memcmp(pos+4, "avcC", 4) && fourcc==mmioFOURCC('a','v','c','1')) ||
             !memcmp(pos+4, "esds", 4) ||
             (!memcmp(pos+4, "SMI ", 4) && fourcc==mmioFOURCC('S','V','Q','3'))) &&
            atomLength > 8) {
          sh_video->bih = bih =
              insertVideoExtradata(bih, pos+8, atomLength-8);
          break;
        }
        pos += atomLength;
      }
    needVideoFrameRate(demuxer, subsession);
  } else {
    fprintf(stderr,
	    "Unknown MPlayer format code for MIME type \"video/%s\"\n",
	    subsession->codecName());
  }
}

void rtpCodecInitialize_audio(demuxer_t* demuxer,
			      MediaSubsession* subsession,
			      unsigned& flags) {
  flags = 0;
  // Create a dummy audio stream header
  // to make the main MPlayer code happy:
  sh_audio_t* sh_audio = new_sh_audio(demuxer,0, NULL);
  WAVEFORMATEX* wf = (WAVEFORMATEX*)calloc(1,sizeof(WAVEFORMATEX));
  sh_audio->wf = wf;
  demux_stream_t* d_audio = demuxer->audio;
  d_audio->sh = sh_audio; sh_audio->ds = d_audio;
  d_audio->id = sh_audio->aid;

  wf->nChannels = subsession->numChannels();

  // Map known audio MIME types to the WAVEFORMATEX parameters
  // that this program uses.  (Note that not all types need all
  // of the parameters to be set.)
  wf->nSamplesPerSec
    = subsession->rtpSource()->timestampFrequency(); // by default
  if (strcmp(subsession->codecName(), "MPA") == 0 ||
      strcmp(subsession->codecName(), "MPA-ROBUST") == 0 ||
      strcmp(subsession->codecName(), "X-MP3-DRAFT-00") == 0) {
    wf->wFormatTag = sh_audio->format = 0x55;
    // Note: 0x55 is for layer III, but should work for I,II also
    wf->nSamplesPerSec = 0; // sample rate is deduced from the data
  } else if (strcmp(subsession->codecName(), "AC3") == 0) {
    wf->wFormatTag = sh_audio->format = 0x2000;
    wf->nSamplesPerSec = 0; // sample rate is deduced from the data
  } else if (strcmp(subsession->codecName(), "L16") == 0) {
    wf->wFormatTag = sh_audio->format = 0x736f7774; // "twos"
    wf->nBlockAlign = 1;
    wf->wBitsPerSample = 16;
    wf->cbSize = 0;
  } else if (strcmp(subsession->codecName(), "L8") == 0) {
    wf->wFormatTag = sh_audio->format = 0x20776172; // "raw "
    wf->nBlockAlign = 1;
    wf->wBitsPerSample = 8;
    wf->cbSize = 0;
  } else if (strcmp(subsession->codecName(), "PCMU") == 0) {
    wf->wFormatTag = sh_audio->format = 0x7;
    wf->nAvgBytesPerSec = 8000;
    wf->nBlockAlign = 1;
    wf->wBitsPerSample = 8;
    wf->cbSize = 0;
  } else if (strcmp(subsession->codecName(), "PCMA") == 0) {
    wf->wFormatTag = sh_audio->format = 0x6;
    wf->nAvgBytesPerSec = 8000;
    wf->nBlockAlign = 1;
    wf->wBitsPerSample = 8;
    wf->cbSize = 0;
  } else if (strcmp(subsession->codecName(), "AMR") == 0) {
    wf->wFormatTag = sh_audio->format = mmioFOURCC('s','a','m','r');
  } else if (strcmp(subsession->codecName(), "AMR-WB") == 0) {
    wf->wFormatTag = sh_audio->format = mmioFOURCC('s','a','w','b');
  } else if (strcmp(subsession->codecName(), "GSM") == 0) {
    wf->wFormatTag = sh_audio->format = mmioFOURCC('a','g','s','m');
    wf->nAvgBytesPerSec = 1650;
    wf->nBlockAlign = 33;
    wf->wBitsPerSample = 16;
    wf->cbSize = 0;
  } else if (strcmp(subsession->codecName(), "QCELP") == 0) {
    wf->wFormatTag = sh_audio->format = mmioFOURCC('Q','c','l','p');
    wf->nAvgBytesPerSec = 1750;
    wf->nBlockAlign = 35;
    wf->wBitsPerSample = 16;
    wf->cbSize = 0;
  } else if (strcmp(subsession->codecName(), "MP4A-LATM") == 0) {
    wf->wFormatTag = sh_audio->format = mmioFOURCC('m','p','4','a');
    // For the codec to work correctly, it needs "AudioSpecificConfig"
    // data, which is parsed from the "StreamMuxConfig" string that
    // was present (hopefully) in the SDP description:
    unsigned codecdata_len;
    sh_audio->codecdata
      = parseStreamMuxConfigStr(subsession->fmtp_config(),
				codecdata_len);
    sh_audio->codecdata_len = codecdata_len;
    //faad doesn't understand LATM's data length field, so omit it
    ((MPEG4LATMAudioRTPSource*)subsession->rtpSource())->omitLATMDataLengthField();
  } else if (strcmp(subsession->codecName(), "MPEG4-GENERIC") == 0) {
    wf->wFormatTag = sh_audio->format = mmioFOURCC('m','p','4','a');
    // For the codec to work correctly, it needs "AudioSpecificConfig"
    // data, which was present (hopefully) in the SDP description:
    unsigned codecdata_len;
    sh_audio->codecdata
      = parseGeneralConfigStr(subsession->fmtp_config(),
			      codecdata_len);
    sh_audio->codecdata_len = codecdata_len;
  } else if (strcmp(subsession->codecName(), "X-QT") == 0 ||
	     strcmp(subsession->codecName(), "X-QUICKTIME") == 0) {
    // QuickTime generic RTP format, as described in
    // http://developer.apple.com/quicktime/icefloe/dispatch026.html

    // We can't initialize this stream until we've received the first packet
    // that has QuickTime "sdAtom" information in the header.  So, keep
    // reading packets until we get one:
    unsigned char* packetData; unsigned packetDataLen; float pts;
    QuickTimeGenericRTPSource* qtRTPSource
      = (QuickTimeGenericRTPSource*)(subsession->rtpSource());
    unsigned fourcc, numChannels;
    do {
      if (!awaitRTPPacket(demuxer, demuxer->audio,
			  packetData, packetDataLen, pts)) {
	return;
      }
    } while (!parseQTState_audio(qtRTPSource->qtState, fourcc, numChannels));

    wf->wFormatTag = sh_audio->format = fourcc;
    wf->nChannels = numChannels;

      if (qtRTPSource->qtState.sdAtomSize > 33) {
        wf->wBitsPerSample = qtRTPSource->qtState.sdAtom[27];
        wf->nSamplesPerSec = qtRTPSource->qtState.sdAtom[32]<<8|qtRTPSource->qtState.sdAtom[33];
      }
    uint8_t *pos = (uint8_t*)qtRTPSource->qtState.sdAtom + 52;
    uint8_t *endpos = (uint8_t*)qtRTPSource->qtState.sdAtom
                      + qtRTPSource->qtState.sdAtomSize;
    while (pos+8 < endpos) {
      unsigned atomLength = pos[0]<<24 | pos[1]<<16 | pos[2]<<8 | pos[3];
      if (atomLength == 0 || atomLength > endpos-pos) break;
      if (!memcmp(pos+4, "wave", 4) && fourcc==mmioFOURCC('Q','D','M','2') &&
          atomLength > 8 &&
          atomLength <= INT_MAX) {
        sh_audio->codecdata = (unsigned char*) malloc(atomLength-8);
        if (sh_audio->codecdata) {
          memcpy(sh_audio->codecdata, pos+8, atomLength-8);
          sh_audio->codecdata_len = atomLength-8;
        }
        break;
      }
      pos += atomLength;
    }
  } else {
    fprintf(stderr,
	    "Unknown MPlayer format code for MIME type \"audio/%s\"\n",
	    subsession->codecName());
  }
}

static void needVideoFrameRate(demuxer_t* demuxer,
			       MediaSubsession* subsession) {
  // For some codecs, MPlayer's decoding software can't (or refuses to :-)
  // figure out the frame rate by itself, so (unless the user specifies
  // it manually, using "-fps") we figure it out ourselves here, using the
  // presentation timestamps in successive packets,
  extern double force_fps; if (force_fps != 0.0) return; // user used "-fps"

  demux_stream_t* d_video = demuxer->video;
  sh_video_t* sh_video = (sh_video_t*)(d_video->sh);

  // If we already know the subsession's video frame rate, use it:
  int fps = (int)(subsession->videoFPS());
  if (fps != 0) {
    sh_video->fps = fps;
    sh_video->frametime = 1.0f/fps;
    return;
  }

  // Keep looking at incoming frames until we see two with different,
  // non-zero "pts" timestamps:
  unsigned char* packetData; unsigned packetDataLen;
  float lastPTS = 0.0, curPTS;
  unsigned const maxNumFramesToWaitFor = 300;
  int lastfps = 0;
  for (unsigned i = 0; i < maxNumFramesToWaitFor; ++i) {
    if (!awaitRTPPacket(demuxer, d_video, packetData, packetDataLen, curPTS)) {
      break;
    }

    if (curPTS != lastPTS && lastPTS != 0.0) {
      // Use the difference between these two "pts"s to guess the frame rate.
      // (should really check that there were no missing frames inbetween)#####
      // Guess the frame rate as an integer.  If it's not, use "-fps" instead.
      fps = (int)(1/fabs(curPTS-lastPTS) + 0.5); // rounding
        if (fps == lastfps) {
      fprintf(stderr, "demux_rtp: Guessed the video frame rate as %d frames-per-second.\n\t(If this is wrong, use the \"-fps <frame-rate>\" option instead.)\n", fps);
      sh_video->fps = fps;
      sh_video->frametime=1.0f/fps;
      return;
        }
      if (fps>lastfps) lastfps = fps;
    }
    lastPTS = curPTS;
  }
  fprintf(stderr, "demux_rtp: Failed to guess the video frame rate\n");
}

static Boolean
parseQTState_video(QuickTimeGenericRTPSource::QTState const& qtState,
		   unsigned& fourcc) {
  // qtState's "sdAtom" field is supposed to contain a QuickTime video
  // 'sample description' atom.  This atom's name is the 'fourcc' that we want:
  char const* sdAtom = qtState.sdAtom;
  if (sdAtom == NULL || qtState.sdAtomSize < 2*4) return False;

  fourcc = *(unsigned*)(&sdAtom[4]); // put in host order
  return True;
}

static Boolean
parseQTState_audio(QuickTimeGenericRTPSource::QTState const& qtState,
		   unsigned& fourcc, unsigned& numChannels) {
  // qtState's "sdAtom" field is supposed to contain a QuickTime audio
  // 'sample description' atom.  This atom's name is the 'fourcc' that we want.
  // Also, the top half of the 5th word following the atom name should
  // contain the number of channels ("numChannels") that we want:
  char const* sdAtom = qtState.sdAtom;
  if (sdAtom == NULL || qtState.sdAtomSize < 7*4) return False;

  fourcc = *(unsigned*)(&sdAtom[4]); // put in host order

  char const* word7Ptr = &sdAtom[6*4];
  numChannels = (word7Ptr[0]<<8)|(word7Ptr[1]);
  return True;
}