splitstream - uue doesn't work.

joe666boxer@y... joe666boxer at y...
Sun, 14 Oct 2001 04:25:49 -0000


Apparently, yahoo processes the uue and inserts all sorts of garbage
in there.


here is the code:

It should compile with VC++. It should also be very portable.

#define WIN32_LEAN_AND_MEAN	// Exclude rarely-used stuff from Windows headers
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <io.h>

#define VIDEO_ID	0xE0
#define AUDIO_ID	0xC0
#define NON_EMPTY_ID	0x03
#define CHECKSUM	0x3FC
#define MAGIC_SIGNATURE 0x91231ebc
#define CHUNK_SIZE	(0x20000)
#define min(a, b)	((a) <= (b) ? (a) : (b))
int verbose = 0;

#define SCAN_TO_NEXT_HEADER 1
#define CHECK_SEQ

/*

Ver 0.1 first release

DONE:
- skip chunks with bad seq entirely.
- better video header location (thanks to displaystream)
- non-audio/video record with payload! (type = 0x03) => fixes "audio
failure" and "failure" problems
- audio seq matching
- dump seq mismatched chunks
- audio start from the beginning of first chunk
- read from standard in

TODO:
- large seq matching

WEIRDNESS:
- Why are some chunks duplicated? (1 case in 1.2 GB file)
- Why are some chunks near power of 2 boundaries (2048, 4096, 8192,
etc) out of sequence?
- Why are there lots of out of sequence chunks at the end of the stream?
*/

struct TyStreamHeader
{
int	type;
int	PESHeader;
int	UnRecognized;
int	size;
unsigned char	*header;
unsigned char	*data;
};

static void parse_chunk
(
FILE	*audio_out_fd,
FILE	*video_out_fd,
FILE	*bad_chunk_out_fd,
unsigned char	*buf,
unsigned long	chunk_num
);

int
main(int argc, char *argv[]);

/*
=======================================================================================================================
=======================================================================================================================
*/

static unsigned long int getCurrentSequence(TyStreamHeader *tys)
{
unsigned long int tempSequence = (((unsigned long int) tys->header[5])
<< 16) +
(((unsigned long int) tys->header[6]) << 8) + ((unsigned long int)
tys->header[7]);
return tempSequence;
}

static int check_audio_sequence(TyStreamHeader *audio_tys, unsigned
long chunk_num, int record_num)
{
static unsigned long int	predict_audio_sequence = 0;

if (audio_tys->PESHeader) return 0; // no matching on PES Headers
#ifdef CHECK_SEQ
unsigned long int current_seq = getCurrentSequence(audio_tys);


if (verbose) fprintf(stderr, "chunk %d record %d seq %x pred %x\n",
chunk_num, record_num, 
current_seq, predict_audio_sequence);

if(!predict_audio_sequence || (current_seq == 0x2D1C40 &&
audio_tys->size == 0))	/* start/restart */
predict_audio_sequence = current_seq;

if (audio_tys->size == 0 && current_seq != 0x2D1C40)
fprintf(stderr, "hmpf: audio sequence not 0x2D1C40 at chunk %d record %d\n",
chunk_num, record_num);

if(current_seq != predict_audio_sequence)
{
fprintf
(
stderr,
"Audio sequence number bogus %x!=%x at chunk %d record %d\n",
predict_audio_sequence,
current_seq,
chunk_num,
record_num
);
return 1; // not matched
}
predict_audio_sequence += audio_tys->size;

#endif
return 0; // matched
}

static int check_video_sequence(TyStreamHeader *video_tys, unsigned
long chunk_num, int record_num)
{
static unsigned long int	predict_video_sequence = 0;

if (video_tys->PESHeader) return 0; // no matching on PES Headers
#ifdef CHECK_SEQ
unsigned long int current_seq = getCurrentSequence(video_tys);


if (verbose) fprintf(stderr, "chunk %d record %d seq %x pred %x\n",
chunk_num, record_num, 
current_seq, predict_video_sequence);
if((!predict_video_sequence && current_seq >= 0x200000) || (current_seq
& 0xF0F0F0) == 0x205040)	/* start/restart */
predict_video_sequence = current_seq;

if(video_tys->header[4] > 0x30)
{
/*~~~~~~~~~~~~~~*/
int k, chksum = 0;
/*~~~~~~~~~~~~~~*/

for(k = 0; k < 8; k++) chksum += video_tys->header[k];
if(chksum != CHECKSUM)
{
fprintf(stderr, "Checksum bogus %x!=%x at record %d\n", chksum, CHECKSUM,
chunk_num);
return 1;
}
}
else
{
if(current_seq != predict_video_sequence && video_tys->header[5] >= 0x20)
{
fprintf
(
stderr,
"Video Sequence number bogus %x!=%x at chunk %d record %d\n",
predict_video_sequence,
current_seq,
chunk_num,
record_num
);
return 1; // not matched
}
}
if
(
video_tys->header[5] >= 0x20	/* data */
||	video_tys->header[2] == 0x42	/* marker */
||	video_tys->header[2] == 0x8c) /* frame beginning */
{
predict_video_sequence += video_tys->size;
}

#endif
return 0; // matched
}

static void parse_chunk(FILE *audio_out_fd, FILE *video_out_fd, FILE
*bad_chunk_out_fd, 
unsigned char *buf, unsigned long chunk_num)
{
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
struct TyStreamHeader	TyStream[0xff];
int	num_recs = (int) buf[0] /* +(((int)buf[1]) << 8) */ ;
int	i,j;
static int	foundfirstframe = 0;
int	first_video = -1;
int	first_audio = -1;
int	last_unrecognized = 0;
static int	expect_more = 0;
int	header_pos;
int	data_pos;
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
static unsigned char audio_header[3] = {0xFF, 0xFD, 0xA8};

if(buf[0] == 0xF5 && buf[1] == 0x46 && buf[2] == 0x7a && buf[3] == 0xbd)
{
fprintf(stderr, "Bad chunk (header?) %d, skipping\n", chunk_num);
return;
}

if (buf[1] != 0x00)
{
fprintf(stderr, "Warning: second byte in chunk %d is non-zero
%x:%x\n",chunk_num,
(unsigned int) buf[0],(unsigned int)buf[1]);
}

if(verbose)
{
fprintf(stderr, "Records = %d\n", num_recs);
}

// first pass: fill in TyStream array and find the first record to be
dumped.
data_pos = num_recs*16+4;
for(i=0;i<num_recs;i++)
{
header_pos = i*16+4;
TyStream[i].header = buf + header_pos;
TyStream[i].data = buf + data_pos;
TyStream[i].type = buf[header_pos + 3];
TyStream[i].PESHeader = 0;
/*
if((buf[header_pos + 1] == 0x01) && (buf[header_pos + 2] == 0x03 ||
buf[header_pos + 2] == 0x06))
*/
if ((TyStream[i].type == AUDIO_ID && TyStream[i].header[2] == 0x03) ||
(TyStream[i].type == VIDEO_ID && TyStream[i].header[2] == 0x06))
{
TyStream[i].PESHeader = 1;
}
if (TyStream[i].type == AUDIO_ID || TyStream[i].type == VIDEO_ID)
{
TyStream[i].size = (((unsigned int) buf[header_pos]) << 12) | (((unsigned
int) buf[header_pos + 1]) << 4) | (((unsigned int) buf[header_pos +
2]) >> 4);
data_pos += TyStream[i].size;
if(!foundfirstframe)
{
if(TyStream[i].type == VIDEO_ID)
{
if((buf[header_pos + 2] & 0xF) == 0x7)
{	/* video header record */
foundfirstframe = 1;
first_video = i;
first_audio = last_unrecognized;
if (verbose) fprintf(stderr, "first video %d first audio %d\n",first_video,
first_audio);
}
}
}
if (foundfirstframe) //note: this is not the same as "else"
{
if (TyStream[i].type == VIDEO_ID)
{
if (check_video_sequence(&(TyStream[i]), chunk_num, i))
{
if (bad_chunk_out_fd != NULL)
fwrite(buf, sizeof(char), CHUNK_SIZE, bad_chunk_out_fd);
fprintf(stderr, "Chunk %d has bad sequence, ignoring chunk\n", chunk_num);
return;
}
}
else if (TyStream[i].type == AUDIO_ID)
{
if (check_audio_sequence(&(TyStream[i]), chunk_num, i))
{
if (bad_chunk_out_fd != NULL)
fwrite(buf, sizeof(char), CHUNK_SIZE, bad_chunk_out_fd);
fprintf(stderr, "Chunk %d has bad sequence, ignoring chunk\n", chunk_num);
return;
}
}
}
}
else
{
last_unrecognized = i;
if (TyStream[i].type == NON_EMPTY_ID)
{
TyStream[i].size = (((unsigned int) buf[header_pos]) << 12) | (((unsigned
int) buf[header_pos + 1]) << 4) | (((unsigned int) buf[header_pos +
2]) >> 4);
data_pos += TyStream[i].size;
}
}
}
if(!foundfirstframe)
{
fprintf(stderr, "No first frame, skipping block\n");
return;
}
for(i=0;i<num_recs;i++)
{
if (TyStream[i].type == AUDIO_ID)
{
if (/*i >= first_audio && */ !TyStream[i].PESHeader)
{
if(!expect_more && TyStream[i].data[0] != 0xFF && TyStream[i].data[1] != 0xFD 
&& TyStream[i].data[2] != 0xA8)
{
if(TyStream[i].size != 0)
{
fprintf
(
stderr,
"audio failure %d:0x%x 0x%x:0x%x:0x%x chunk %d\n",
i,
TyStream[i].size,
TyStream[i].data[0],
TyStream[i].data[1],
TyStream[i].data[2],
chunk_num
);
}
#ifdef SCAN_TO_NEXT_HEADER
for(j=0;j<TyStream[i].size;j++)
{
if (TyStream[i].data[j] == 0xFF && TyStream[i].data[j+1] == 0xFD && 
TyStream[i].data[j+2] == 0xA8)
{
fprintf(stderr, "found new header at offset %d\n",j);
if (audio_out_fd != NULL) 
fwrite(&(TyStream[i].data[j]), sizeof(char), TyStream[i].size-j, audio_out_fd);
}
}
#endif
}
else
{
if(TyStream[i].data[0] == 0xFF && TyStream[i].data[1] == 0xFD && 
TyStream[i].data[2] == 0xA8)
{
if(TyStream[i].size < 0x360)
{
expect_more = 1;
}
else expect_more = 0;
}
if (audio_out_fd != NULL) 
fwrite(TyStream[i].data, sizeof(char), TyStream[i].size, audio_out_fd);
}
}
}
else if (TyStream[i].type == VIDEO_ID)
{
if (i >= first_video && !TyStream[i].PESHeader)
{
if (video_out_fd != NULL) 
fwrite(TyStream[i].data, sizeof(char), TyStream[i].size, video_out_fd);
}

}
else
{
// unknown record type; ignore it
}
}
}

/*
=======================================================================================================================
}
=======================================================================================================================
*/
void usage(void)
{
fprintf(stdout, "Usage: \n");
fprintf(stdout, "splitstream stream.ty program.m2a program.m2v
badchunk.ty\n\n");
fprintf(stdout, "stream.ty can be '-' for stdin\n");
fprintf(stdout, "other arguments can be '+' to avoid creating those\n\n");
fprintf(stdout, "stream.ty is the unprocessed stream\n");
fprintf(stdout, "program.m2a is the MPEG-1 Layer 2 audio file to be
written\n");
fprintf(stdout, "program.m2v is the MPEG-2 video file to be written\n");
fprintf(stdout, "badchunk.ty is all the chunks that could not be
processed\n");
exit(0);
}

/*
=======================================================================================================================
=======================================================================================================================
*/
int main(int argc, char *argv[])
{
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
FILE	*in_fp, *video_out_fp, *audio_out_fp, *bad_chunk_out_fp;
unsigned char	*buf;
int	read = 0;
int	chunk_count = 0;
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

if(argc != 5) usage();

if (!strcmp(argv[1], "-"))
{
setmode(fileno(stdin), _O_BINARY);
//(stdin)->_flag &= ~_IOTEXT;
in_fp = stdin;


}
else
{
in_fp = fopen(argv[1], "rb");
}
if(in_fp == NULL)
{
fprintf(stderr, "could not open %s\n", argv[1]);
exit(1);
}

if (!strcmp(argv[2], "+"))
{
audio_out_fp = NULL;
}
else
{
audio_out_fp = fopen(argv[2], "wb");
if(audio_out_fp == NULL)
{
fprintf(stderr, "could not open %s\n", argv[2]);
exit(1);
}
}

if (!strcmp(argv[3], "+"))
{
video_out_fp = NULL;
}
else
{
video_out_fp = fopen(argv[3], "wb");
if(video_out_fp == NULL)
{
fprintf(stderr, "could not open %s\n", argv[3]);
exit(1);
}
}

if (!strcmp(argv[4], "+"))
{
bad_chunk_out_fp = NULL;
}
else
{
bad_chunk_out_fp = fopen(argv[4], "wb");
if(audio_out_fp == NULL)
{
fprintf(stderr, "could not open %s\n", argv[4]);
exit(1);
}
}

buf = (unsigned char *) malloc(sizeof(char) * CHUNK_SIZE);

while(!feof(in_fp))
{
read += fread(buf + read, sizeof(char), CHUNK_SIZE - read, in_fp);
if(read == CHUNK_SIZE)
{
parse_chunk(audio_out_fp, video_out_fp, bad_chunk_out_fp, buf,
chunk_count++);
read = 0;
}
}

if(read > 0) fprintf(stderr, "File not a multiple of 128 KB chunks!\n");

fclose(in_fp);
if (audio_out_fp != NULL) fclose(audio_out_fp);
if (bad_chunk_out_fp != NULL) fclose(bad_chunk_out_fp);
if (video_out_fp != NULL) fclose(video_out_fp);
return 0;
}