src/libprojectM/Renderer/SOIL/image_DXT.c - vendor/opensource/projectM - Git at Google

 /*
 	Jonathan Dummer
 	2007-07-31-10.32

 	simple DXT compression / decompression code

 	public domain
 */

 #include "image_DXT.h"
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>

 /*	set this =1 if you want to use the covarince matrix method...
 	which is better than my method of using standard deviations
 	overall, except on the infintesimal chance that the power
 	method fails for finding the largest eigenvector	*/
 #define USE_COV_MAT	1

 /********* Function Prototypes *********/
 /*
 	Takes a 4x4 block of pixels and compresses it into 8 bytes
 	in DXT1 format (color only, no alpha).  Speed is valued
 	over prettyness, at least for now.
 */
 void compress_DDS_color_block(
 				int channels,
 				const unsigned char *const uncompressed,
 				unsigned char compressed[8] );
 /*
 	Takes a 4x4 block of pixels and compresses the alpha
 	component it into 8 bytes for use in DXT5 DDS files.
 	Speed is valued over prettyness, at least for now.
 */
 void compress_DDS_alpha_block(
 				const unsigned char *const uncompressed,
 				unsigned char compressed[8] );

 /********* Actual Exposed Functions *********/
 int
 	save_image_as_DDS
 	(
 		const char *filename,
 		int width, int height, int channels,
 		const unsigned char *const data
 	)
 {
 	/*	variables	*/
 	FILE *fout;
 	unsigned char *DDS_data;
 	DDS_header header;
 	int DDS_size;
 	/*	error check	*/
 	if( (NULL == filename) ||
 		(width < 1) || (height < 1) ||
 		(channels < 1) || (channels > 4) ||
 		(data == NULL ) )
 	{
 		return 0;
 	}
 	/*	Convert the image	*/
 	if( (channels & 1) == 1 )
 	{
 		/*	no alpha, just use DXT1	*/
 		DDS_data = convert_image_to_DXT1( data, width, height, channels, &DDS_size );
 	} else
 	{
 		/*	has alpha, so use DXT5	*/
 		DDS_data = convert_image_to_DXT5( data, width, height, channels, &DDS_size );
 	}
 	/*	save it	*/
 	memset( &header, 0, sizeof( DDS_header ) );
 	header.dwMagic = ('D' << 0) | ('D' << 8) | ('S' << 16) | (' ' << 24);
 	header.dwSize = 124;
 	header.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT | DDSD_LINEARSIZE;
 	header.dwWidth = width;
 	header.dwHeight = height;
 	header.dwPitchOrLinearSize = DDS_size;
 	header.sPixelFormat.dwSize = 32;
 	header.sPixelFormat.dwFlags = DDPF_FOURCC;
 	if( (channels & 1) == 1 )
 	{
 		header.sPixelFormat.dwFourCC = ('D' << 0) | ('X' << 8) | ('T' << 16) | ('1' << 24);
 	} else
 	{
 		header.sPixelFormat.dwFourCC = ('D' << 0) | ('X' << 8) | ('T' << 16) | ('5' << 24);
 	}
 	header.sCaps.dwCaps1 = DDSCAPS_TEXTURE;
 	/*	write it out	*/
 	fout = fopen( filename, "wb");
 	fwrite( &header, sizeof( DDS_header ), 1, fout );
 	fwrite( DDS_data, 1, DDS_size, fout );
 	fclose( fout );
 	/*	done	*/
 	free( DDS_data );
 	return 1;
 }

 unsigned char* convert_image_to_DXT1(
 		const unsigned char *const uncompressed,
 		int width, int height, int channels,
 		int *out_size )
 {
 	unsigned char *compressed;
 	int i, j, x, y;
 	unsigned char ublock[16*3];
 	unsigned char cblock[8];
 	int index = 0, chan_step = 1;
 	int block_count = 0;
 	/*	error check	*/
 	*out_size = 0;
 	if( (width < 1) || (height < 1) ||
 		(NULL == uncompressed) ||
 		(channels < 1) || (channels > 4) )
 	{
 		return NULL;
 	}
 	/*	for channels == 1 or 2, I do not step forward for R,G,B values	*/
 	if( channels < 3 )
 	{
 		chan_step = 0;
 	}
 	/*	get the RAM for the compressed image
 		(8 bytes per 4x4 pixel block)	*/
 	*out_size = ((width+3) >> 2) * ((height+3) >> 2) * 8;
 	compressed = (unsigned char*)malloc( *out_size );
 	/*	go through each block	*/
 	for( j = 0; j < height; j += 4 )
 	{
 		for( i = 0; i < width; i += 4 )
 		{
 			/*	copy this block into a new one	*/
 			int idx = 0;
 			int mx = 4, my = 4;
 			if( j+4 >= height )
 			{
 				my = height - j;
 			}
 			if( i+4 >= width )
 			{
 				mx = width - i;
 			}
 			for( y = 0; y < my; ++y )
 			{
 				for( x = 0; x < mx; ++x )
 				{
 					ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels];
 					ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels+chan_step];
 					ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels+chan_step+chan_step];
 				}
 				for( x = mx; x < 4; ++x )
 				{
 					ublock[idx++] = ublock[0];
 					ublock[idx++] = ublock[1];
 					ublock[idx++] = ublock[2];
 				}
 			}
 			for( y = my; y < 4; ++y )
 			{
 				for( x = 0; x < 4; ++x )
 				{
 					ublock[idx++] = ublock[0];
 					ublock[idx++] = ublock[1];
 					ublock[idx++] = ublock[2];
 				}
 			}
 			/*	compress the block	*/
 			++block_count;
 			compress_DDS_color_block( 3, ublock, cblock );
 			/*	copy the data from the block into the main block	*/
 			for( x = 0; x < 8; ++x )
 			{
 				compressed[index++] = cblock[x];
 			}
 		}
 	}
 	return compressed;
 }

 unsigned char* convert_image_to_DXT5(
 		const unsigned char *const uncompressed,
 		int width, int height, int channels,
 		int *out_size )
 {
 	unsigned char *compressed;
 	int i, j, x, y;
 	unsigned char ublock[16*4];
 	unsigned char cblock[8];
 	int index = 0, chan_step = 1;
 	int block_count = 0, has_alpha;
 	/*	error check	*/
 	*out_size = 0;
 	if( (width < 1) || (height < 1) ||
 		(NULL == uncompressed) ||
 		(channels < 1) || ( channels > 4) )
 	{
 		return NULL;
 	}
 	/*	for channels == 1 or 2, I do not step forward for R,G,B vales	*/
 	if( channels < 3 )
 	{
 		chan_step = 0;
 	}
 	/*	# channels = 1 or 3 have no alpha, 2 & 4 do have alpha	*/
 	has_alpha = 1 - (channels & 1);
 	/*	get the RAM for the compressed image
 		(16 bytes per 4x4 pixel block)	*/
 	*out_size = ((width+3) >> 2) * ((height+3) >> 2) * 16;
 	compressed = (unsigned char*)malloc( *out_size );
 	/*	go through each block	*/
 	for( j = 0; j < height; j += 4 )
 	{
 		for( i = 0; i < width; i += 4 )
 		{
 			/*	local variables, and my block counter	*/
 			int idx = 0;
 			int mx = 4, my = 4;
 			if( j+4 >= height )
 			{
 				my = height - j;
 			}
 			if( i+4 >= width )
 			{
 				mx = width - i;
 			}
 			for( y = 0; y < my; ++y )
 			{
 				for( x = 0; x < mx; ++x )
 				{
 					ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels];
 					ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels+chan_step];
 					ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels+chan_step+chan_step];
 					ublock[idx++] =
 						has_alpha * uncompressed[(j+y)*width*channels+(i+x)*channels+channels-1]
 						+ (1-has_alpha)*255;
 				}
 				for( x = mx; x < 4; ++x )
 				{
 					ublock[idx++] = ublock[0];
 					ublock[idx++] = ublock[1];
 					ublock[idx++] = ublock[2];
 					ublock[idx++] = ublock[3];
 				}
 			}
 			for( y = my; y < 4; ++y )
 			{
 				for( x = 0; x < 4; ++x )
 				{
 					ublock[idx++] = ublock[0];
 					ublock[idx++] = ublock[1];
 					ublock[idx++] = ublock[2];
 					ublock[idx++] = ublock[3];
 				}
 			}
 			/*	now compress the alpha block	*/
 			compress_DDS_alpha_block( ublock, cblock );
 			/*	copy the data from the compressed alpha block into the main buffer	*/
 			for( x = 0; x < 8; ++x )
 			{
 				compressed[index++] = cblock[x];
 			}
 			/*	then compress the color block	*/
 			++block_count;
 			compress_DDS_color_block( 4, ublock, cblock );
 			/*	copy the data from the compressed color block into the main buffer	*/
 			for( x = 0; x < 8; ++x )
 			{
 				compressed[index++] = cblock[x];
 			}
 		}
 	}
 	return compressed;
 }

 /********* Helper Functions *********/
 int convert_bit_range( int c, int from_bits, int to_bits )
 {
 	int b = (1 << (from_bits - 1)) + c * ((1 << to_bits) - 1);
 	return (b + (b >> from_bits)) >> from_bits;
 }

 int rgb_to_565( int r, int g, int b )
 {
 	return
 		(convert_bit_range( r, 8, 5 ) << 11) |
 		(convert_bit_range( g, 8, 6 ) << 05) |
 		(convert_bit_range( b, 8, 5 ) << 00);
 }

 void rgb_888_from_565( unsigned int c, int *r, int *g, int *b )
 {
 	*r = convert_bit_range( (c >> 11) & 31, 5, 8 );
 	*g = convert_bit_range( (c >> 05) & 63, 6, 8 );
 	*b = convert_bit_range( (c >> 00) & 31, 5, 8 );
 }

 void compute_color_line_STDEV(
 		const unsigned char *const uncompressed,
 		int channels,
 		float point[3], float direction[3] )
 {
 	const float inv_16 = 1.0f / 16.0f;
 	int i;
 	float sum_r = 0.0f, sum_g = 0.0f, sum_b = 0.0f;
 	float sum_rr = 0.0f, sum_gg = 0.0f, sum_bb = 0.0f;
 	float sum_rg = 0.0f, sum_rb = 0.0f, sum_gb = 0.0f;
 	/*	calculate all data needed for the covariance matrix
 		( to compare with _rygdxt code)	*/
 	for( i = 0; i < 16*channels; i += channels )
 	{
 		sum_r += uncompressed[i+0];
 		sum_rr += uncompressed[i+0] * uncompressed[i+0];
 		sum_g += uncompressed[i+1];
 		sum_gg += uncompressed[i+1] * uncompressed[i+1];
 		sum_b += uncompressed[i+2];
 		sum_bb += uncompressed[i+2] * uncompressed[i+2];
 		sum_rg += uncompressed[i+0] * uncompressed[i+1];
 		sum_rb += uncompressed[i+0] * uncompressed[i+2];
 		sum_gb += uncompressed[i+1] * uncompressed[i+2];
 	}
 	/*	convert the sums to averages	*/
 	sum_r *= inv_16;
 	sum_g *= inv_16;
 	sum_b *= inv_16;
 	/*	and convert the squares to the squares of the value - avg_value	*/
 	sum_rr -= 16.0f * sum_r * sum_r;
 	sum_gg -= 16.0f * sum_g * sum_g;
 	sum_bb -= 16.0f * sum_b * sum_b;
 	sum_rg -= 16.0f * sum_r * sum_g;
 	sum_rb -= 16.0f * sum_r * sum_b;
 	sum_gb -= 16.0f * sum_g * sum_b;
 	/*	the point on the color line is the average	*/
 	point[0] = sum_r;
 	point[1] = sum_g;
 	point[2] = sum_b;
 	#if USE_COV_MAT
 	/*
 		The following idea was from ryg.
 		(https://mollyrocket.com/forums/viewtopic.php?t=392)
 		The method worked great (less RMSE than mine) most of
 		the time, but had some issues handling some simple
 		boundary cases, like full green next to full red,
 		which would generate a covariance matrix like this:

 		| 1  -1  0 |
 		| -1  1  0 |
 		| 0   0  0 |

 		For a given starting vector, the power method can
 		generate all zeros!  So no starting with {1,1,1}
 		as I was doing!  This kind of error is still a
 		slight posibillity, but will be very rare.
 	*/
 	/*	use the covariance matrix directly
 		(1st iteration, don't use all 1.0 values!)	*/
 	sum_r = 1.0f;
 	sum_g = 2.718281828f;
 	sum_b = 3.141592654f;
 	direction[0] = sum_r*sum_rr + sum_g*sum_rg + sum_b*sum_rb;
 	direction[1] = sum_r*sum_rg + sum_g*sum_gg + sum_b*sum_gb;
 	direction[2] = sum_r*sum_rb + sum_g*sum_gb + sum_b*sum_bb;
 	/*	2nd iteration, use results from the 1st guy	*/
 	sum_r = direction[0];
 	sum_g = direction[1];
 	sum_b = direction[2];
 	direction[0] = sum_r*sum_rr + sum_g*sum_rg + sum_b*sum_rb;
 	direction[1] = sum_r*sum_rg + sum_g*sum_gg + sum_b*sum_gb;
 	direction[2] = sum_r*sum_rb + sum_g*sum_gb + sum_b*sum_bb;
 	/*	3rd iteration, use results from the 2nd guy	*/
 	sum_r = direction[0];
 	sum_g = direction[1];
 	sum_b = direction[2];
 	direction[0] = sum_r*sum_rr + sum_g*sum_rg + sum_b*sum_rb;
 	direction[1] = sum_r*sum_rg + sum_g*sum_gg + sum_b*sum_gb;
 	direction[2] = sum_r*sum_rb + sum_g*sum_gb + sum_b*sum_bb;
 	#else
 	/*	use my standard deviation method
 		(very robust, a tiny bit slower and less accurate)	*/
 	direction[0] = sqrt( sum_rr );
 	direction[1] = sqrt( sum_gg );
 	direction[2] = sqrt( sum_bb );
 	/*	which has a greater component	*/
 	if( sum_gg > sum_rr )
 	{
 		/*	green has greater component, so base the other signs off of green	*/
 		if( sum_rg < 0.0f )
 		{
 			direction[0] = -direction[0];
 		}
 		if( sum_gb < 0.0f )
 		{
 			direction[2] = -direction[2];
 		}
 	} else
 	{
 		/*	red has a greater component	*/
 		if( sum_rg < 0.0f )
 		{
 			direction[1] = -direction[1];
 		}
 		if( sum_rb < 0.0f )
 		{
 			direction[2] = -direction[2];
 		}
 	}
 	#endif
 }

 void LSE_master_colors_max_min(
 		int *cmax, int *cmin,
 		int channels,
 		const unsigned char *const uncompressed )
 {
 	int i, j;
 	/*	the master colors	*/
 	int c0[3], c1[3];
 	/*	used for fitting the line	*/
 	float sum_x[] = { 0.0f, 0.0f, 0.0f };
 	float sum_x2[] = { 0.0f, 0.0f, 0.0f };
 	float dot_max = 1.0f, dot_min = -1.0f;
 	float vec_len2 = 0.0f;
 	float dot;
 	/*	error check	*/
 	if( (channels < 3) || (channels > 4) )
 	{
 		return;
 	}
 	compute_color_line_STDEV( uncompressed, channels, sum_x, sum_x2 );
 	vec_len2 = 1.0f / ( 0.00001f +
 			sum_x2[0]*sum_x2[0] + sum_x2[1]*sum_x2[1] + sum_x2[2]*sum_x2[2] );
 	/*	finding the max and min vector values	*/
 	dot_max =
 			(
 				sum_x2[0] * uncompressed[0] +
 				sum_x2[1] * uncompressed[1] +
 				sum_x2[2] * uncompressed[2]
 			);
 	dot_min = dot_max;
 	for( i = 1; i < 16; ++i )
 	{
 		dot =
 			(
 				sum_x2[0] * uncompressed[i*channels+0] +
 				sum_x2[1] * uncompressed[i*channels+1] +
 				sum_x2[2] * uncompressed[i*channels+2]
 			);
 		if( dot < dot_min )
 		{
 			dot_min = dot;
 		} else if( dot > dot_max )
 		{
 			dot_max = dot;
 		}
 	}
 	/*	and the offset (from the average location)	*/
 	dot = sum_x2[0]*sum_x[0] + sum_x2[1]*sum_x[1] + sum_x2[2]*sum_x[2];
 	dot_min -= dot;
 	dot_max -= dot;
 	/*	post multiply by the scaling factor	*/
 	dot_min *= vec_len2;
 	dot_max *= vec_len2;
 	/*	OK, build the master colors	*/
 	for( i = 0; i < 3; ++i )
 	{
 		/*	color 0	*/
 		c0[i] = (int)(0.5f + sum_x[i] + dot_max * sum_x2[i]);
 		if( c0[i] < 0 )
 		{
 			c0[i] = 0;
 		} else if( c0[i] > 255 )
 		{
 			c0[i] = 255;
 		}
 		/*	color 1	*/
 		c1[i] = (int)(0.5f + sum_x[i] + dot_min * sum_x2[i]);
 		if( c1[i] < 0 )
 		{
 			c1[i] = 0;
 		} else if( c1[i] > 255 )
 		{
 			c1[i] = 255;
 		}
 	}
 	/*	down_sample (with rounding?)	*/
 	i = rgb_to_565( c0[0], c0[1], c0[2] );
 	j = rgb_to_565( c1[0], c1[1], c1[2] );
 	if( i > j )
 	{
 		*cmax = i;
 		*cmin = j;
 	} else
 	{
 		*cmax = j;
 		*cmin = i;
 	}
 }

 void
 	compress_DDS_color_block
 	(
 		int channels,
 		const unsigned char *const uncompressed,
 		unsigned char compressed[8]
 	)
 {
 	/*	variables	*/
 	int i;
 	int next_bit;
 	int enc_c0, enc_c1;
 	int c0[4], c1[4];
 	float color_line[] = { 0.0f, 0.0f, 0.0f, 0.0f };
 	float vec_len2 = 0.0f, dot_offset = 0.0f;
 	/*	stupid order	*/
 	int swizzle4[] = { 0, 2, 3, 1 };
 	/*	get the master colors	*/
 	LSE_master_colors_max_min( &enc_c0, &enc_c1, channels, uncompressed );
 	/*	store the 565 color 0 and color 1	*/
 	compressed[0] = (enc_c0 >> 0) & 255;
 	compressed[1] = (enc_c0 >> 8) & 255;
 	compressed[2] = (enc_c1 >> 0) & 255;
 	compressed[3] = (enc_c1 >> 8) & 255;
 	/*	zero out the compressed data	*/
 	compressed[4] = 0;
 	compressed[5] = 0;
 	compressed[6] = 0;
 	compressed[7] = 0;
 	/*	reconstitute the master color vectors	*/
 	rgb_888_from_565( enc_c0, &c0[0], &c0[1], &c0[2] );
 	rgb_888_from_565( enc_c1, &c1[0], &c1[1], &c1[2] );
 	/*	the new vector	*/
 	vec_len2 = 0.0f;
 	for( i = 0; i < 3; ++i )
 	{
 		color_line[i] = (float)(c1[i] - c0[i]);
 		vec_len2 += color_line[i] * color_line[i];
 	}
 	if( vec_len2 > 0.0f )
 	{
 		vec_len2 = 1.0f / vec_len2;
 	}
 	/*	pre-proform the scaling	*/
 	color_line[0] *= vec_len2;
 	color_line[1] *= vec_len2;
 	color_line[2] *= vec_len2;
 	/*	compute the offset (constant) portion of the dot product	*/
 	dot_offset = color_line[0]*c0[0] + color_line[1]*c0[1] + color_line[2]*c0[2];
 	/*	store the rest of the bits	*/
 	next_bit = 8*4;
 	for( i = 0; i < 16; ++i )
 	{
 		/*	find the dot product of this color, to place it on the line
 			(should be [-1,1])	*/
 		int next_value = 0;
 		float dot_product =
 			color_line[0] * uncompressed[i*channels+0] +
 			color_line[1] * uncompressed[i*channels+1] +
 			color_line[2] * uncompressed[i*channels+2] -
 			dot_offset;
 		/*	map to [0,3]	*/
 		next_value = (int)( dot_product * 3.0f + 0.5f );
 		if( next_value > 3 )
 		{
 			next_value = 3;
 		} else if( next_value < 0 )
 		{
 			next_value = 0;
 		}
 		/*	OK, store this value	*/
 		compressed[next_bit >> 3] |= swizzle4[ next_value ] << (next_bit & 7);
 		next_bit += 2;
 	}
 	/*	done compressing to DXT1	*/
 }

 void
 	compress_DDS_alpha_block
 	(
 		const unsigned char *const uncompressed,
 		unsigned char compressed[8]
 	)
 {
 	/*	variables	*/
 	int i;
 	int next_bit;
 	int a0, a1;
 	float scale_me;
 	/*	stupid order	*/
 	int swizzle8[] = { 1, 7, 6, 5, 4, 3, 2, 0 };
 	/*	get the alpha limits (a0 > a1)	*/
 	a0 = a1 = uncompressed[3];
 	for( i = 4+3; i < 16*4; i += 4 )
 	{
 		if( uncompressed[i] > a0 )
 		{
 			a0 = uncompressed[i];
 		} else if( uncompressed[i] < a1 )
 		{
 			a1 = uncompressed[i];
 		}
 	}
 	/*	store those limits, and zero the rest of the compressed dataset	*/
 	compressed[0] = a0;
 	compressed[1] = a1;
 	/*	zero out the compressed data	*/
 	compressed[2] = 0;
 	compressed[3] = 0;
 	compressed[4] = 0;
 	compressed[5] = 0;
 	compressed[6] = 0;
 	compressed[7] = 0;
 	/*	store the all of the alpha values	*/
 	next_bit = 8*2;
 	scale_me = 7.9999f / (a0 - a1);
 	for( i = 3; i < 16*4; i += 4 )
 	{
 		/*	convert this alpha value to a 3 bit number	*/
 		int svalue;
 		int value = (int)((uncompressed[i] - a1) * scale_me);
 		svalue = swizzle8[ value&7 ];
 		/*	OK, store this value, start with the 1st byte	*/
 		compressed[next_bit >> 3] |= svalue << (next_bit & 7);
 		if( (next_bit & 7) > 5 )
 		{
 			/*	spans 2 bytes, fill in the start of the 2nd byte	*/
 			compressed[1 + (next_bit >> 3)] |= svalue >> (8 - (next_bit & 7) );
 		}
 		next_bit += 3;
 	}
 	/*	done compressing to DXT1	*/
 }
	/*
	Jonathan Dummer
	2007-07-31-10.32

	simple DXT compression / decompression code

	public domain
	*/

	#include "image_DXT.h"
	#include <math.h>
	#include <stdlib.h>
	#include <string.h>
	#include <stdio.h>

	/* set this =1 if you want to use the covarince matrix method...
	which is better than my method of using standard deviations
	overall, except on the infintesimal chance that the power
	method fails for finding the largest eigenvector */
	#define USE_COV_MAT 1

	/******* Function Prototypes *******/
	/*
	Takes a 4x4 block of pixels and compresses it into 8 bytes
	in DXT1 format (color only, no alpha). Speed is valued
	over prettyness, at least for now.
	*/
	void compress_DDS_color_block(
	int channels,
	const unsigned char *const uncompressed,
	unsigned char compressed[8] );
	/*
	Takes a 4x4 block of pixels and compresses the alpha
	component it into 8 bytes for use in DXT5 DDS files.
	Speed is valued over prettyness, at least for now.
	*/
	void compress_DDS_alpha_block(
	const unsigned char *const uncompressed,
	unsigned char compressed[8] );

	/******* Actual Exposed Functions *******/
	int
	save_image_as_DDS
	(
	const char *filename,
	int width, int height, int channels,
	const unsigned char *const data
	)
	{
	/* variables */
	FILE *fout;
	unsigned char *DDS_data;
	DDS_header header;
	int DDS_size;
	/* error check */
	if( (NULL == filename) \|\|
	(width < 1) \|\| (height < 1) \|\|
	(channels < 1) \|\| (channels > 4) \|\|
	(data == NULL ) )
	{
	return 0;
	}
	/* Convert the image */
	if( (channels & 1) == 1 )
	{
	/* no alpha, just use DXT1 */
	DDS_data = convert_image_to_DXT1( data, width, height, channels, &DDS_size );
	} else
	{
	/* has alpha, so use DXT5 */
	DDS_data = convert_image_to_DXT5( data, width, height, channels, &DDS_size );
	}
	/* save it */
	memset( &header, 0, sizeof( DDS_header ) );
	header.dwMagic = ('D' << 0) \| ('D' << 8) \| ('S' << 16) \| (' ' << 24);
	header.dwSize = 124;
	header.dwFlags = DDSD_CAPS \| DDSD_HEIGHT \| DDSD_WIDTH \| DDSD_PIXELFORMAT \| DDSD_LINEARSIZE;
	header.dwWidth = width;
	header.dwHeight = height;
	header.dwPitchOrLinearSize = DDS_size;
	header.sPixelFormat.dwSize = 32;
	header.sPixelFormat.dwFlags = DDPF_FOURCC;
	if( (channels & 1) == 1 )
	{
	header.sPixelFormat.dwFourCC = ('D' << 0) \| ('X' << 8) \| ('T' << 16) \| ('1' << 24);
	} else
	{
	header.sPixelFormat.dwFourCC = ('D' << 0) \| ('X' << 8) \| ('T' << 16) \| ('5' << 24);
	}
	header.sCaps.dwCaps1 = DDSCAPS_TEXTURE;
	/* write it out */
	fout = fopen( filename, "wb");
	fwrite( &header, sizeof( DDS_header ), 1, fout );
	fwrite( DDS_data, 1, DDS_size, fout );
	fclose( fout );
	/* done */
	free( DDS_data );
	return 1;
	}

	unsigned char* convert_image_to_DXT1(
	const unsigned char *const uncompressed,
	int width, int height, int channels,
	int *out_size )
	{
	unsigned char *compressed;
	int i, j, x, y;
	unsigned char ublock[16*3];
	unsigned char cblock[8];
	int index = 0, chan_step = 1;
	int block_count = 0;
	/* error check */
	*out_size = 0;
	if( (width < 1) \|\| (height < 1) \|\|
	(NULL == uncompressed) \|\|
	(channels < 1) \|\| (channels > 4) )
	{
	return NULL;
	}
	/* for channels == 1 or 2, I do not step forward for R,G,B values */
	if( channels < 3 )
	{
	chan_step = 0;
	}
	/* get the RAM for the compressed image
	(8 bytes per 4x4 pixel block) */
	out_size = ((width+3) >> 2) ((height+3) >> 2) * 8;
	compressed = (unsigned char)malloc( out_size );
	/* go through each block */
	for( j = 0; j < height; j += 4 )
	{
	for( i = 0; i < width; i += 4 )
	{
	/* copy this block into a new one */
	int idx = 0;
	int mx = 4, my = 4;
	if( j+4 >= height )
	{
	my = height - j;
	}
	if( i+4 >= width )
	{
	mx = width - i;
	}
	for( y = 0; y < my; ++y )
	{
	for( x = 0; x < mx; ++x )
	{
	ublock[idx++] = uncompressed[(j+y)widthchannels+(i+x)*channels];
	ublock[idx++] = uncompressed[(j+y)widthchannels+(i+x)*channels+chan_step];
	ublock[idx++] = uncompressed[(j+y)widthchannels+(i+x)*channels+chan_step+chan_step];
	}
	for( x = mx; x < 4; ++x )
	{
	ublock[idx++] = ublock[0];
	ublock[idx++] = ublock[1];
	ublock[idx++] = ublock[2];
	}
	}
	for( y = my; y < 4; ++y )
	{
	for( x = 0; x < 4; ++x )
	{
	ublock[idx++] = ublock[0];
	ublock[idx++] = ublock[1];
	ublock[idx++] = ublock[2];
	}
	}
	/* compress the block */
	++block_count;
	compress_DDS_color_block( 3, ublock, cblock );
	/* copy the data from the block into the main block */
	for( x = 0; x < 8; ++x )
	{
	compressed[index++] = cblock[x];
	}
	}
	}
	return compressed;
	}

	unsigned char* convert_image_to_DXT5(
	const unsigned char *const uncompressed,
	int width, int height, int channels,
	int *out_size )
	{
	unsigned char *compressed;
	int i, j, x, y;
	unsigned char ublock[16*4];
	unsigned char cblock[8];
	int index = 0, chan_step = 1;
	int block_count = 0, has_alpha;
	/* error check */
	*out_size = 0;
	if( (width < 1) \|\| (height < 1) \|\|
	(NULL == uncompressed) \|\|
	(channels < 1) \|\| ( channels > 4) )
	{
	return NULL;
	}
	/* for channels == 1 or 2, I do not step forward for R,G,B vales */
	if( channels < 3 )
	{
	chan_step = 0;
	}
	/* # channels = 1 or 3 have no alpha, 2 & 4 do have alpha */
	has_alpha = 1 - (channels & 1);
	/* get the RAM for the compressed image
	(16 bytes per 4x4 pixel block) */
	out_size = ((width+3) >> 2) ((height+3) >> 2) * 16;
	compressed = (unsigned char)malloc( out_size );
	/* go through each block */
	for( j = 0; j < height; j += 4 )
	{
	for( i = 0; i < width; i += 4 )
	{
	/* local variables, and my block counter */
	int idx = 0;
	int mx = 4, my = 4;
	if( j+4 >= height )
	{
	my = height - j;
	}
	if( i+4 >= width )
	{
	mx = width - i;
	}
	for( y = 0; y < my; ++y )
	{
	for( x = 0; x < mx; ++x )
	{
	ublock[idx++] = uncompressed[(j+y)widthchannels+(i+x)*channels];
	ublock[idx++] = uncompressed[(j+y)widthchannels+(i+x)*channels+chan_step];
	ublock[idx++] = uncompressed[(j+y)widthchannels+(i+x)*channels+chan_step+chan_step];
	ublock[idx++] =
	has_alpha * uncompressed[(j+y)widthchannels+(i+x)*channels+channels-1]
	+ (1-has_alpha)*255;
	}
	for( x = mx; x < 4; ++x )
	{
	ublock[idx++] = ublock[0];
	ublock[idx++] = ublock[1];
	ublock[idx++] = ublock[2];
	ublock[idx++] = ublock[3];
	}
	}
	for( y = my; y < 4; ++y )
	{
	for( x = 0; x < 4; ++x )
	{
	ublock[idx++] = ublock[0];
	ublock[idx++] = ublock[1];
	ublock[idx++] = ublock[2];
	ublock[idx++] = ublock[3];
	}
	}
	/* now compress the alpha block */
	compress_DDS_alpha_block( ublock, cblock );
	/* copy the data from the compressed alpha block into the main buffer */
	for( x = 0; x < 8; ++x )
	{
	compressed[index++] = cblock[x];
	}
	/* then compress the color block */
	++block_count;
	compress_DDS_color_block( 4, ublock, cblock );
	/* copy the data from the compressed color block into the main buffer */
	for( x = 0; x < 8; ++x )
	{
	compressed[index++] = cblock[x];
	}
	}
	}
	return compressed;
	}

	/******* Helper Functions *******/
	int convert_bit_range( int c, int from_bits, int to_bits )
	{
	int b = (1 << (from_bits - 1)) + c * ((1 << to_bits) - 1);
	return (b + (b >> from_bits)) >> from_bits;
	}

	int rgb_to_565( int r, int g, int b )
	{
	return
	(convert_bit_range( r, 8, 5 ) << 11) \|
	(convert_bit_range( g, 8, 6 ) << 05) \|
	(convert_bit_range( b, 8, 5 ) << 00);
	}

	void rgb_888_from_565( unsigned int c, int r, int g, int *b )
	{
	*r = convert_bit_range( (c >> 11) & 31, 5, 8 );
	*g = convert_bit_range( (c >> 05) & 63, 6, 8 );
	*b = convert_bit_range( (c >> 00) & 31, 5, 8 );
	}

	void compute_color_line_STDEV(
	const unsigned char *const uncompressed,
	int channels,
	float point[3], float direction[3] )
	{
	const float inv_16 = 1.0f / 16.0f;
	int i;
	float sum_r = 0.0f, sum_g = 0.0f, sum_b = 0.0f;
	float sum_rr = 0.0f, sum_gg = 0.0f, sum_bb = 0.0f;
	float sum_rg = 0.0f, sum_rb = 0.0f, sum_gb = 0.0f;
	/* calculate all data needed for the covariance matrix
	( to compare with _rygdxt code) */
	for( i = 0; i < 16*channels; i += channels )
	{
	sum_r += uncompressed[i+0];
	sum_rr += uncompressed[i+0] * uncompressed[i+0];
	sum_g += uncompressed[i+1];
	sum_gg += uncompressed[i+1] * uncompressed[i+1];
	sum_b += uncompressed[i+2];
	sum_bb += uncompressed[i+2] * uncompressed[i+2];
	sum_rg += uncompressed[i+0] * uncompressed[i+1];
	sum_rb += uncompressed[i+0] * uncompressed[i+2];
	sum_gb += uncompressed[i+1] * uncompressed[i+2];
	}
	/* convert the sums to averages */
	sum_r *= inv_16;
	sum_g *= inv_16;
	sum_b *= inv_16;
	/* and convert the squares to the squares of the value - avg_value */
	sum_rr -= 16.0f * sum_r * sum_r;
	sum_gg -= 16.0f * sum_g * sum_g;
	sum_bb -= 16.0f * sum_b * sum_b;
	sum_rg -= 16.0f * sum_r * sum_g;
	sum_rb -= 16.0f * sum_r * sum_b;
	sum_gb -= 16.0f * sum_g * sum_b;
	/* the point on the color line is the average */
	point[0] = sum_r;
	point[1] = sum_g;
	point[2] = sum_b;
	#if USE_COV_MAT
	/*
	The following idea was from ryg.
	(https://mollyrocket.com/forums/viewtopic.php?t=392)
	The method worked great (less RMSE than mine) most of
	the time, but had some issues handling some simple
	boundary cases, like full green next to full red,
	which would generate a covariance matrix like this:

	\| 1 -1 0 \|
	\| -1 1 0 \|
	\| 0 0 0 \|

	For a given starting vector, the power method can
	generate all zeros! So no starting with {1,1,1}
	as I was doing! This kind of error is still a
	slight posibillity, but will be very rare.
	*/
	/* use the covariance matrix directly
	(1st iteration, don't use all 1.0 values!) */
	sum_r = 1.0f;
	sum_g = 2.718281828f;
	sum_b = 3.141592654f;
	direction[0] = sum_rsum_rr + sum_gsum_rg + sum_b*sum_rb;
	direction[1] = sum_rsum_rg + sum_gsum_gg + sum_b*sum_gb;
	direction[2] = sum_rsum_rb + sum_gsum_gb + sum_b*sum_bb;
	/* 2nd iteration, use results from the 1st guy */
	sum_r = direction[0];
	sum_g = direction[1];
	sum_b = direction[2];
	direction[0] = sum_rsum_rr + sum_gsum_rg + sum_b*sum_rb;
	direction[1] = sum_rsum_rg + sum_gsum_gg + sum_b*sum_gb;
	direction[2] = sum_rsum_rb + sum_gsum_gb + sum_b*sum_bb;
	/* 3rd iteration, use results from the 2nd guy */
	sum_r = direction[0];
	sum_g = direction[1];
	sum_b = direction[2];
	direction[0] = sum_rsum_rr + sum_gsum_rg + sum_b*sum_rb;
	direction[1] = sum_rsum_rg + sum_gsum_gg + sum_b*sum_gb;
	direction[2] = sum_rsum_rb + sum_gsum_gb + sum_b*sum_bb;
	#else
	/* use my standard deviation method
	(very robust, a tiny bit slower and less accurate) */
	direction[0] = sqrt( sum_rr );
	direction[1] = sqrt( sum_gg );
	direction[2] = sqrt( sum_bb );
	/* which has a greater component */
	if( sum_gg > sum_rr )
	{
	/* green has greater component, so base the other signs off of green */
	if( sum_rg < 0.0f )
	{
	direction[0] = -direction[0];
	}
	if( sum_gb < 0.0f )
	{
	direction[2] = -direction[2];
	}
	} else
	{
	/* red has a greater component */
	if( sum_rg < 0.0f )
	{
	direction[1] = -direction[1];
	}
	if( sum_rb < 0.0f )
	{
	direction[2] = -direction[2];
	}
	}
	#endif
	}

	void LSE_master_colors_max_min(
	int cmax, int cmin,
	int channels,
	const unsigned char *const uncompressed )
	{
	int i, j;
	/* the master colors */
	int c0[3], c1[3];
	/* used for fitting the line */
	float sum_x[] = { 0.0f, 0.0f, 0.0f };
	float sum_x2[] = { 0.0f, 0.0f, 0.0f };
	float dot_max = 1.0f, dot_min = -1.0f;
	float vec_len2 = 0.0f;
	float dot;
	/* error check */
	if( (channels < 3) \|\| (channels > 4) )
	{
	return;
	}
	compute_color_line_STDEV( uncompressed, channels, sum_x, sum_x2 );
	vec_len2 = 1.0f / ( 0.00001f +
	sum_x2[0]sum_x2[0] + sum_x2[1]sum_x2[1] + sum_x2[2]*sum_x2[2] );
	/* finding the max and min vector values */
	dot_max =
	(
	sum_x2[0] * uncompressed[0] +
	sum_x2[1] * uncompressed[1] +
	sum_x2[2] * uncompressed[2]
	);
	dot_min = dot_max;
	for( i = 1; i < 16; ++i )
	{
	dot =
	(
	sum_x2[0] * uncompressed[i*channels+0] +
	sum_x2[1] * uncompressed[i*channels+1] +
	sum_x2[2] * uncompressed[i*channels+2]
	);
	if( dot < dot_min )
	{
	dot_min = dot;
	} else if( dot > dot_max )
	{
	dot_max = dot;
	}
	}
	/* and the offset (from the average location) */
	dot = sum_x2[0]sum_x[0] + sum_x2[1]sum_x[1] + sum_x2[2]*sum_x[2];
	dot_min -= dot;
	dot_max -= dot;
	/* post multiply by the scaling factor */
	dot_min *= vec_len2;
	dot_max *= vec_len2;
	/* OK, build the master colors */
	for( i = 0; i < 3; ++i )
	{
	/* color 0 */
	c0[i] = (int)(0.5f + sum_x[i] + dot_max * sum_x2[i]);
	if( c0[i] < 0 )
	{
	c0[i] = 0;
	} else if( c0[i] > 255 )
	{
	c0[i] = 255;
	}
	/* color 1 */
	c1[i] = (int)(0.5f + sum_x[i] + dot_min * sum_x2[i]);
	if( c1[i] < 0 )
	{
	c1[i] = 0;
	} else if( c1[i] > 255 )
	{
	c1[i] = 255;
	}
	}
	/* down_sample (with rounding?) */
	i = rgb_to_565( c0[0], c0[1], c0[2] );
	j = rgb_to_565( c1[0], c1[1], c1[2] );
	if( i > j )
	{
	*cmax = i;
	*cmin = j;
	} else
	{
	*cmax = j;
	*cmin = i;
	}
	}

	void
	compress_DDS_color_block
	(
	int channels,
	const unsigned char *const uncompressed,
	unsigned char compressed[8]
	)
	{
	/* variables */
	int i;
	int next_bit;
	int enc_c0, enc_c1;
	int c0[4], c1[4];
	float color_line[] = { 0.0f, 0.0f, 0.0f, 0.0f };
	float vec_len2 = 0.0f, dot_offset = 0.0f;
	/* stupid order */
	int swizzle4[] = { 0, 2, 3, 1 };
	/* get the master colors */
	LSE_master_colors_max_min( &enc_c0, &enc_c1, channels, uncompressed );
	/* store the 565 color 0 and color 1 */
	compressed[0] = (enc_c0 >> 0) & 255;
	compressed[1] = (enc_c0 >> 8) & 255;
	compressed[2] = (enc_c1 >> 0) & 255;
	compressed[3] = (enc_c1 >> 8) & 255;
	/* zero out the compressed data */
	compressed[4] = 0;
	compressed[5] = 0;
	compressed[6] = 0;
	compressed[7] = 0;
	/* reconstitute the master color vectors */
	rgb_888_from_565( enc_c0, &c0[0], &c0[1], &c0[2] );
	rgb_888_from_565( enc_c1, &c1[0], &c1[1], &c1[2] );
	/* the new vector */
	vec_len2 = 0.0f;
	for( i = 0; i < 3; ++i )
	{
	color_line[i] = (float)(c1[i] - c0[i]);
	vec_len2 += color_line[i] * color_line[i];
	}
	if( vec_len2 > 0.0f )
	{
	vec_len2 = 1.0f / vec_len2;
	}
	/* pre-proform the scaling */
	color_line[0] *= vec_len2;
	color_line[1] *= vec_len2;
	color_line[2] *= vec_len2;
	/* compute the offset (constant) portion of the dot product */
	dot_offset = color_line[0]c0[0] + color_line[1]c0[1] + color_line[2]*c0[2];
	/* store the rest of the bits */
	next_bit = 8*4;
	for( i = 0; i < 16; ++i )
	{
	/* find the dot product of this color, to place it on the line
	(should be [-1,1]) */
	int next_value = 0;
	float dot_product =
	color_line[0] * uncompressed[i*channels+0] +
	color_line[1] * uncompressed[i*channels+1] +
	color_line[2] * uncompressed[i*channels+2] -
	dot_offset;
	/* map to [0,3] */
	next_value = (int)( dot_product * 3.0f + 0.5f );
	if( next_value > 3 )
	{
	next_value = 3;
	} else if( next_value < 0 )
	{
	next_value = 0;
	}
	/* OK, store this value */
	compressed[next_bit >> 3] \|= swizzle4[ next_value ] << (next_bit & 7);
	next_bit += 2;
	}
	/* done compressing to DXT1 */
	}

	void
	compress_DDS_alpha_block
	(
	const unsigned char *const uncompressed,
	unsigned char compressed[8]
	)
	{
	/* variables */
	int i;
	int next_bit;
	int a0, a1;
	float scale_me;
	/* stupid order */
	int swizzle8[] = { 1, 7, 6, 5, 4, 3, 2, 0 };
	/* get the alpha limits (a0 > a1) */
	a0 = a1 = uncompressed[3];
	for( i = 4+3; i < 16*4; i += 4 )
	{
	if( uncompressed[i] > a0 )
	{
	a0 = uncompressed[i];
	} else if( uncompressed[i] < a1 )
	{
	a1 = uncompressed[i];
	}
	}
	/* store those limits, and zero the rest of the compressed dataset */
	compressed[0] = a0;
	compressed[1] = a1;
	/* zero out the compressed data */
	compressed[2] = 0;
	compressed[3] = 0;
	compressed[4] = 0;
	compressed[5] = 0;
	compressed[6] = 0;
	compressed[7] = 0;
	/* store the all of the alpha values */
	next_bit = 8*2;
	scale_me = 7.9999f / (a0 - a1);
	for( i = 3; i < 16*4; i += 4 )
	{
	/* convert this alpha value to a 3 bit number */
	int svalue;
	int value = (int)((uncompressed[i] - a1) * scale_me);
	svalue = swizzle8[ value&7 ];
	/* OK, store this value, start with the 1st byte */
	compressed[next_bit >> 3] \|= svalue << (next_bit & 7);
	if( (next_bit & 7) > 5 )
	{
	/* spans 2 bytes, fill in the start of the 2nd byte */
	compressed[1 + (next_bit >> 3)] \|= svalue >> (8 - (next_bit & 7) );
	}
	next_bit += 3;
	}
	/* done compressing to DXT1 */
	}