| /* |
| * ALAC (Apple Lossless Audio Codec) decoder |
| * Copyright (c) 2005 David Hammerton |
| * |
| * This file is part of FFmpeg. |
| * |
| * FFmpeg is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * FFmpeg is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with FFmpeg; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| /** |
| * @file libavcodec/alac.c |
| * ALAC (Apple Lossless Audio Codec) decoder |
| * @author 2005 David Hammerton |
| * |
| * For more information on the ALAC format, visit: |
| * http://crazney.net/programs/itunes/alac.html |
| * |
| * Note: This decoder expects a 36- (0x24-)byte QuickTime atom to be |
| * passed through the extradata[_size] fields. This atom is tacked onto |
| * the end of an 'alac' stsd atom and has the following format: |
| * bytes 0-3 atom size (0x24), big-endian |
| * bytes 4-7 atom type ('alac', not the 'alac' tag from start of stsd) |
| * bytes 8-35 data bytes needed by decoder |
| * |
| * Extradata: |
| * 32bit size |
| * 32bit tag (=alac) |
| * 32bit zero? |
| * 32bit max sample per frame |
| * 8bit ?? (zero?) |
| * 8bit sample size |
| * 8bit history mult |
| * 8bit initial history |
| * 8bit kmodifier |
| * 8bit channels? |
| * 16bit ?? |
| * 32bit max coded frame size |
| * 32bit bitrate? |
| * 32bit samplerate |
| */ |
| |
| |
| #include "avcodec.h" |
| #include "bitstream.h" |
| #include "bytestream.h" |
| #include "unary.h" |
| |
| #define ALAC_EXTRADATA_SIZE 36 |
| #define MAX_CHANNELS 2 |
| |
| typedef struct { |
| |
| AVCodecContext *avctx; |
| GetBitContext gb; |
| /* init to 0; first frame decode should initialize from extradata and |
| * set this to 1 */ |
| int context_initialized; |
| |
| int numchannels; |
| int bytespersample; |
| |
| /* buffers */ |
| int32_t *predicterror_buffer[MAX_CHANNELS]; |
| |
| int32_t *outputsamples_buffer[MAX_CHANNELS]; |
| |
| /* stuff from setinfo */ |
| uint32_t setinfo_max_samples_per_frame; /* 0x1000 = 4096 */ /* max samples per frame? */ |
| uint8_t setinfo_sample_size; /* 0x10 */ |
| uint8_t setinfo_rice_historymult; /* 0x28 */ |
| uint8_t setinfo_rice_initialhistory; /* 0x0a */ |
| uint8_t setinfo_rice_kmodifier; /* 0x0e */ |
| /* end setinfo stuff */ |
| |
| } ALACContext; |
| |
| static void allocate_buffers(ALACContext *alac) |
| { |
| int chan; |
| for (chan = 0; chan < MAX_CHANNELS; chan++) { |
| alac->predicterror_buffer[chan] = |
| av_malloc(alac->setinfo_max_samples_per_frame * 4); |
| |
| alac->outputsamples_buffer[chan] = |
| av_malloc(alac->setinfo_max_samples_per_frame * 4); |
| } |
| } |
| |
| static int alac_set_info(ALACContext *alac) |
| { |
| const unsigned char *ptr = alac->avctx->extradata; |
| |
| ptr += 4; /* size */ |
| ptr += 4; /* alac */ |
| ptr += 4; /* 0 ? */ |
| |
| if(AV_RB32(ptr) >= UINT_MAX/4){ |
| av_log(alac->avctx, AV_LOG_ERROR, "setinfo_max_samples_per_frame too large\n"); |
| return -1; |
| } |
| |
| /* buffer size / 2 ? */ |
| alac->setinfo_max_samples_per_frame = bytestream_get_be32(&ptr); |
| ptr++; /* ??? */ |
| alac->setinfo_sample_size = *ptr++; |
| if (alac->setinfo_sample_size > 32) { |
| av_log(alac->avctx, AV_LOG_ERROR, "setinfo_sample_size too large\n"); |
| return -1; |
| } |
| alac->setinfo_rice_historymult = *ptr++; |
| alac->setinfo_rice_initialhistory = *ptr++; |
| alac->setinfo_rice_kmodifier = *ptr++; |
| ptr++; /* channels? */ |
| bytestream_get_be16(&ptr); /* ??? */ |
| bytestream_get_be32(&ptr); /* max coded frame size */ |
| bytestream_get_be32(&ptr); /* bitrate ? */ |
| bytestream_get_be32(&ptr); /* samplerate */ |
| |
| allocate_buffers(alac); |
| |
| return 0; |
| } |
| |
| static inline int decode_scalar(GetBitContext *gb, int k, int limit, int readsamplesize){ |
| /* read x - number of 1s before 0 represent the rice */ |
| int x = get_unary_0_9(gb); |
| |
| if (x > 8) { /* RICE THRESHOLD */ |
| /* use alternative encoding */ |
| x = get_bits(gb, readsamplesize); |
| } else { |
| if (k >= limit) |
| k = limit; |
| |
| if (k != 1) { |
| int extrabits = show_bits(gb, k); |
| |
| /* multiply x by 2^k - 1, as part of their strange algorithm */ |
| x = (x << k) - x; |
| |
| if (extrabits > 1) { |
| x += extrabits - 1; |
| skip_bits(gb, k); |
| } else |
| skip_bits(gb, k - 1); |
| } |
| } |
| return x; |
| } |
| |
| static void bastardized_rice_decompress(ALACContext *alac, |
| int32_t *output_buffer, |
| int output_size, |
| int readsamplesize, /* arg_10 */ |
| int rice_initialhistory, /* arg424->b */ |
| int rice_kmodifier, /* arg424->d */ |
| int rice_historymult, /* arg424->c */ |
| int rice_kmodifier_mask /* arg424->e */ |
| ) |
| { |
| int output_count; |
| unsigned int history = rice_initialhistory; |
| int sign_modifier = 0; |
| |
| for (output_count = 0; output_count < output_size; output_count++) { |
| int32_t x; |
| int32_t x_modified; |
| int32_t final_val; |
| |
| /* standard rice encoding */ |
| int k; /* size of extra bits */ |
| |
| /* read k, that is bits as is */ |
| k = av_log2((history >> 9) + 3); |
| x= decode_scalar(&alac->gb, k, rice_kmodifier, readsamplesize); |
| |
| x_modified = sign_modifier + x; |
| final_val = (x_modified + 1) / 2; |
| if (x_modified & 1) final_val *= -1; |
| |
| output_buffer[output_count] = final_val; |
| |
| sign_modifier = 0; |
| |
| /* now update the history */ |
| history += x_modified * rice_historymult |
| - ((history * rice_historymult) >> 9); |
| |
| if (x_modified > 0xffff) |
| history = 0xffff; |
| |
| /* special case: there may be compressed blocks of 0 */ |
| if ((history < 128) && (output_count+1 < output_size)) { |
| int k; |
| unsigned int block_size; |
| |
| sign_modifier = 1; |
| |
| k = 7 - av_log2(history) + ((history + 16) >> 6 /* / 64 */); |
| |
| block_size= decode_scalar(&alac->gb, k, rice_kmodifier, 16); |
| |
| if (block_size > 0) { |
| if(block_size >= output_size - output_count){ |
| av_log(alac->avctx, AV_LOG_ERROR, "invalid zero block size of %d %d %d\n", block_size, output_size, output_count); |
| block_size= output_size - output_count - 1; |
| } |
| memset(&output_buffer[output_count+1], 0, block_size * 4); |
| output_count += block_size; |
| } |
| |
| if (block_size > 0xffff) |
| sign_modifier = 0; |
| |
| history = 0; |
| } |
| } |
| } |
| |
| static inline int32_t extend_sign32(int32_t val, int bits) |
| { |
| return (val << (32 - bits)) >> (32 - bits); |
| } |
| |
| static inline int sign_only(int v) |
| { |
| return v ? FFSIGN(v) : 0; |
| } |
| |
| static void predictor_decompress_fir_adapt(int32_t *error_buffer, |
| int32_t *buffer_out, |
| int output_size, |
| int readsamplesize, |
| int16_t *predictor_coef_table, |
| int predictor_coef_num, |
| int predictor_quantitization) |
| { |
| int i; |
| |
| /* first sample always copies */ |
| *buffer_out = *error_buffer; |
| |
| if (!predictor_coef_num) { |
| if (output_size <= 1) |
| return; |
| |
| memcpy(buffer_out+1, error_buffer+1, (output_size-1) * 4); |
| return; |
| } |
| |
| if (predictor_coef_num == 0x1f) { /* 11111 - max value of predictor_coef_num */ |
| /* second-best case scenario for fir decompression, |
| * error describes a small difference from the previous sample only |
| */ |
| if (output_size <= 1) |
| return; |
| for (i = 0; i < output_size - 1; i++) { |
| int32_t prev_value; |
| int32_t error_value; |
| |
| prev_value = buffer_out[i]; |
| error_value = error_buffer[i+1]; |
| buffer_out[i+1] = |
| extend_sign32((prev_value + error_value), readsamplesize); |
| } |
| return; |
| } |
| |
| /* read warm-up samples */ |
| if (predictor_coef_num > 0) |
| for (i = 0; i < predictor_coef_num; i++) { |
| int32_t val; |
| |
| val = buffer_out[i] + error_buffer[i+1]; |
| val = extend_sign32(val, readsamplesize); |
| buffer_out[i+1] = val; |
| } |
| |
| #if 0 |
| /* 4 and 8 are very common cases (the only ones i've seen). these |
| * should be unrolled and optimized |
| */ |
| if (predictor_coef_num == 4) { |
| /* FIXME: optimized general case */ |
| return; |
| } |
| |
| if (predictor_coef_table == 8) { |
| /* FIXME: optimized general case */ |
| return; |
| } |
| #endif |
| |
| /* general case */ |
| if (predictor_coef_num > 0) { |
| for (i = predictor_coef_num + 1; i < output_size; i++) { |
| int j; |
| int sum = 0; |
| int outval; |
| int error_val = error_buffer[i]; |
| |
| for (j = 0; j < predictor_coef_num; j++) { |
| sum += (buffer_out[predictor_coef_num-j] - buffer_out[0]) * |
| predictor_coef_table[j]; |
| } |
| |
| outval = (1 << (predictor_quantitization-1)) + sum; |
| outval = outval >> predictor_quantitization; |
| outval = outval + buffer_out[0] + error_val; |
| outval = extend_sign32(outval, readsamplesize); |
| |
| buffer_out[predictor_coef_num+1] = outval; |
| |
| if (error_val > 0) { |
| int predictor_num = predictor_coef_num - 1; |
| |
| while (predictor_num >= 0 && error_val > 0) { |
| int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num]; |
| int sign = sign_only(val); |
| |
| predictor_coef_table[predictor_num] -= sign; |
| |
| val *= sign; /* absolute value */ |
| |
| error_val -= ((val >> predictor_quantitization) * |
| (predictor_coef_num - predictor_num)); |
| |
| predictor_num--; |
| } |
| } else if (error_val < 0) { |
| int predictor_num = predictor_coef_num - 1; |
| |
| while (predictor_num >= 0 && error_val < 0) { |
| int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num]; |
| int sign = - sign_only(val); |
| |
| predictor_coef_table[predictor_num] -= sign; |
| |
| val *= sign; /* neg value */ |
| |
| error_val -= ((val >> predictor_quantitization) * |
| (predictor_coef_num - predictor_num)); |
| |
| predictor_num--; |
| } |
| } |
| |
| buffer_out++; |
| } |
| } |
| } |
| |
| static void reconstruct_stereo_16(int32_t *buffer[MAX_CHANNELS], |
| int16_t *buffer_out, |
| int numchannels, int numsamples, |
| uint8_t interlacing_shift, |
| uint8_t interlacing_leftweight) |
| { |
| int i; |
| if (numsamples <= 0) |
| return; |
| |
| /* weighted interlacing */ |
| if (interlacing_leftweight) { |
| for (i = 0; i < numsamples; i++) { |
| int32_t a, b; |
| |
| a = buffer[0][i]; |
| b = buffer[1][i]; |
| |
| a -= (b * interlacing_leftweight) >> interlacing_shift; |
| b += a; |
| |
| buffer_out[i*numchannels] = b; |
| buffer_out[i*numchannels + 1] = a; |
| } |
| |
| return; |
| } |
| |
| /* otherwise basic interlacing took place */ |
| for (i = 0; i < numsamples; i++) { |
| int16_t left, right; |
| |
| left = buffer[0][i]; |
| right = buffer[1][i]; |
| |
| buffer_out[i*numchannels] = left; |
| buffer_out[i*numchannels + 1] = right; |
| } |
| } |
| |
| static int alac_decode_frame(AVCodecContext *avctx, |
| void *outbuffer, int *outputsize, |
| const uint8_t *inbuffer, int input_buffer_size) |
| { |
| ALACContext *alac = avctx->priv_data; |
| |
| int channels; |
| unsigned int outputsamples; |
| int hassize; |
| unsigned int readsamplesize; |
| int wasted_bytes; |
| int isnotcompressed; |
| uint8_t interlacing_shift; |
| uint8_t interlacing_leftweight; |
| |
| /* short-circuit null buffers */ |
| if (!inbuffer || !input_buffer_size) |
| return input_buffer_size; |
| |
| /* initialize from the extradata */ |
| if (!alac->context_initialized) { |
| if (alac->avctx->extradata_size != ALAC_EXTRADATA_SIZE) { |
| av_log(avctx, AV_LOG_ERROR, "alac: expected %d extradata bytes\n", |
| ALAC_EXTRADATA_SIZE); |
| return input_buffer_size; |
| } |
| if (alac_set_info(alac)) { |
| av_log(avctx, AV_LOG_ERROR, "alac: set_info failed\n"); |
| return input_buffer_size; |
| } |
| alac->context_initialized = 1; |
| } |
| |
| init_get_bits(&alac->gb, inbuffer, input_buffer_size * 8); |
| |
| channels = get_bits(&alac->gb, 3) + 1; |
| if (channels > MAX_CHANNELS) { |
| av_log(avctx, AV_LOG_ERROR, "channels > %d not supported\n", |
| MAX_CHANNELS); |
| return input_buffer_size; |
| } |
| |
| /* 2^result = something to do with output waiting. |
| * perhaps matters if we read > 1 frame in a pass? |
| */ |
| skip_bits(&alac->gb, 4); |
| |
| skip_bits(&alac->gb, 12); /* unknown, skip 12 bits */ |
| |
| /* the output sample size is stored soon */ |
| hassize = get_bits1(&alac->gb); |
| |
| wasted_bytes = get_bits(&alac->gb, 2); /* unknown ? */ |
| |
| /* whether the frame is compressed */ |
| isnotcompressed = get_bits1(&alac->gb); |
| |
| if (hassize) { |
| /* now read the number of samples as a 32bit integer */ |
| outputsamples = get_bits_long(&alac->gb, 32); |
| if(outputsamples > alac->setinfo_max_samples_per_frame){ |
| av_log(avctx, AV_LOG_ERROR, "outputsamples %d > %d\n", outputsamples, alac->setinfo_max_samples_per_frame); |
| return -1; |
| } |
| } else |
| outputsamples = alac->setinfo_max_samples_per_frame; |
| |
| if(outputsamples > *outputsize / alac->bytespersample){ |
| av_log(avctx, AV_LOG_ERROR, "sample buffer too small\n"); |
| return -1; |
| } |
| |
| *outputsize = outputsamples * alac->bytespersample; |
| readsamplesize = alac->setinfo_sample_size - (wasted_bytes * 8) + channels - 1; |
| if (readsamplesize > MIN_CACHE_BITS) { |
| av_log(avctx, AV_LOG_ERROR, "readsamplesize too big (%d)\n", readsamplesize); |
| return -1; |
| } |
| |
| if (!isnotcompressed) { |
| /* so it is compressed */ |
| int16_t predictor_coef_table[channels][32]; |
| int predictor_coef_num[channels]; |
| int prediction_type[channels]; |
| int prediction_quantitization[channels]; |
| int ricemodifier[channels]; |
| int i, chan; |
| |
| interlacing_shift = get_bits(&alac->gb, 8); |
| interlacing_leftweight = get_bits(&alac->gb, 8); |
| |
| for (chan = 0; chan < channels; chan++) { |
| prediction_type[chan] = get_bits(&alac->gb, 4); |
| prediction_quantitization[chan] = get_bits(&alac->gb, 4); |
| |
| ricemodifier[chan] = get_bits(&alac->gb, 3); |
| predictor_coef_num[chan] = get_bits(&alac->gb, 5); |
| |
| /* read the predictor table */ |
| for (i = 0; i < predictor_coef_num[chan]; i++) |
| predictor_coef_table[chan][i] = (int16_t)get_bits(&alac->gb, 16); |
| } |
| |
| if (wasted_bytes) |
| av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented, unhandling of wasted_bytes\n"); |
| |
| for (chan = 0; chan < channels; chan++) { |
| bastardized_rice_decompress(alac, |
| alac->predicterror_buffer[chan], |
| outputsamples, |
| readsamplesize, |
| alac->setinfo_rice_initialhistory, |
| alac->setinfo_rice_kmodifier, |
| ricemodifier[chan] * alac->setinfo_rice_historymult / 4, |
| (1 << alac->setinfo_rice_kmodifier) - 1); |
| |
| if (prediction_type[chan] == 0) { |
| /* adaptive fir */ |
| predictor_decompress_fir_adapt(alac->predicterror_buffer[chan], |
| alac->outputsamples_buffer[chan], |
| outputsamples, |
| readsamplesize, |
| predictor_coef_table[chan], |
| predictor_coef_num[chan], |
| prediction_quantitization[chan]); |
| } else { |
| av_log(avctx, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type[chan]); |
| /* I think the only other prediction type (or perhaps this is |
| * just a boolean?) runs adaptive fir twice.. like: |
| * predictor_decompress_fir_adapt(predictor_error, tempout, ...) |
| * predictor_decompress_fir_adapt(predictor_error, outputsamples ...) |
| * little strange.. |
| */ |
| } |
| } |
| } else { |
| /* not compressed, easy case */ |
| int i, chan; |
| for (i = 0; i < outputsamples; i++) |
| for (chan = 0; chan < channels; chan++) { |
| int32_t audiobits; |
| |
| audiobits = get_bits_long(&alac->gb, alac->setinfo_sample_size); |
| audiobits = extend_sign32(audiobits, alac->setinfo_sample_size); |
| |
| alac->outputsamples_buffer[chan][i] = audiobits; |
| } |
| /* wasted_bytes = 0; */ |
| interlacing_shift = 0; |
| interlacing_leftweight = 0; |
| } |
| if (get_bits(&alac->gb, 3) != 7) |
| av_log(avctx, AV_LOG_ERROR, "Error : Wrong End Of Frame\n"); |
| |
| switch(alac->setinfo_sample_size) { |
| case 16: |
| if (channels == 2) { |
| reconstruct_stereo_16(alac->outputsamples_buffer, |
| (int16_t*)outbuffer, |
| alac->numchannels, |
| outputsamples, |
| interlacing_shift, |
| interlacing_leftweight); |
| } else { |
| int i; |
| for (i = 0; i < outputsamples; i++) { |
| int16_t sample = alac->outputsamples_buffer[0][i]; |
| ((int16_t*)outbuffer)[i * alac->numchannels] = sample; |
| } |
| } |
| break; |
| case 20: |
| case 24: |
| // It is not clear if there exist any encoder that creates 24 bit ALAC |
| // files. iTunes convert 24 bit raw files to 16 bit before encoding. |
| case 32: |
| av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented sample size %i\n", alac->setinfo_sample_size); |
| break; |
| default: |
| break; |
| } |
| |
| if (input_buffer_size * 8 - get_bits_count(&alac->gb) > 8) |
| av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n", input_buffer_size * 8 - get_bits_count(&alac->gb)); |
| |
| return input_buffer_size; |
| } |
| |
| static av_cold int alac_decode_init(AVCodecContext * avctx) |
| { |
| ALACContext *alac = avctx->priv_data; |
| alac->avctx = avctx; |
| alac->context_initialized = 0; |
| |
| alac->numchannels = alac->avctx->channels; |
| alac->bytespersample = 2 * alac->numchannels; |
| avctx->sample_fmt = SAMPLE_FMT_S16; |
| |
| return 0; |
| } |
| |
| static av_cold int alac_decode_close(AVCodecContext *avctx) |
| { |
| ALACContext *alac = avctx->priv_data; |
| |
| int chan; |
| for (chan = 0; chan < MAX_CHANNELS; chan++) { |
| av_free(alac->predicterror_buffer[chan]); |
| av_free(alac->outputsamples_buffer[chan]); |
| } |
| |
| return 0; |
| } |
| |
| AVCodec alac_decoder = { |
| "alac", |
| CODEC_TYPE_AUDIO, |
| CODEC_ID_ALAC, |
| sizeof(ALACContext), |
| alac_decode_init, |
| NULL, |
| alac_decode_close, |
| alac_decode_frame, |
| .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"), |
| }; |