/* vim:set ts=4 sw=4 cindent ignorecase enc=gbk: */

#include "config.h"

#include <stdio.h>
#include <string.h>
#include <unzip.h>
#include <chm_lib.h>
#include <unrar.h>
#include "common/utils.h"
#include "charsets.h"
#include "display.h"
#include "html.h"
#include "text.h"
#include "buffer.h"
#include "scene.h"
#include "conf.h"
#include <DrvMemMgr.h>
#include <fs_api.h>
#include <scene_text.h>
#include <mystring.h>
#include <debugoff.h>

CodeType cur_code_type;

//-----------------------------------궨---------------------------------------------
#define gzread( x, y, z ) 1
#define gzclose( x )
#define RARReadHeader( x, y ) 1
#define RARCloseArchive( x )

//-----------------------------------ȫֱ----------------------------------------
#if 1
byte bytetable[256] = {
//  0X00  0X01  0X02  0X03  0X04  0X05  0X06  0X07  0X08  0X09  0X0A  0X0B  0X0C  0X0D  0X0E  0X0F
	0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00,	// 0x00  0x00ո 0x09 Tabstop  0x0a 0x0dس
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	// 0x10
	0x02, 0x08, 0x04, 0x04, 0x04, 0x08, 0x04, 0x04, 0x04, 0x08, 0x04, 0x04, 0x04, 0x00, 0x04, 0x04,	// 0x20	 0x20ո
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00,	// 0x30
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,	// 0x40
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0X08, 0x04, 0x04,	// 0x50
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,	// 0x60
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x08, 0x04, 0x00,	// 0x70
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	// 0x80
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	// 0x90
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	// 0xA0
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	// 0xB0
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	// 0xC0
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	// 0xD0
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	// 0xE0
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00	// 0xF0
};

#else

BOOL bytetable[256] = {
	1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 1, 0, 0,	// 0x00  0x00ո 0x09 Tabstop  0x0a 0x0dس
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	// 0x10
	2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	// 0x20	 0x20ո
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	// 0x30
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	// 0x40
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	// 0x50
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	// 0x60
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	// 0x70
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	// 0x80
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	// 0x90
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	// 0xA0
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	// 0xB0
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	// 0xC0
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	// 0xD0
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	// 0xE0
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0	// 0xF0
};

#endif

//-----------------------------------ȫֱ----------------------------------------
extern t_conf config;
extern int use_ttf;

//-----------------------------------------------------------------------------------
static void UnicodeSwap( byte *unicode_Big, U32 count )
{
	U32 i;
	byte tmp;

	count &= ~1;
	for( i=0; i<count; i += 2 )
	{
		tmp = unicode_Big[i];
		unicode_Big[i] = unicode_Big[i+1];
		unicode_Big[i+1] = tmp;
	}
}

static void text_decode(p_text txt, t_conf_encode encode)
{
	if( encode == conf_encode_ucs_bigendian )
	{
		UnicodeSwap( (byte*)txt->buf, txt->size );
		encode = conf_encode_ucs;
	}
	else if( encode == conf_encode_utf8 )
	{
		byte *buffer = malloc( txt->size << 1 );
		if( buffer == NULL )
		{
			return txt->buf;
		}
		
		txt->size = utf8_to_ucs( buffer, txt->size<<1, (const byte*)txt->buf, txt->size );
		free( txt->buf );
		txt->buf = buffer;
		encode = conf_encode_ucs;
	}
	Text_encode_cur = encode;
	return;//------------------

	if ( txt->size < 2 )
		return;
	if (*(word *) txt->buf == 0xFEFF) //Unicode(ucs2)Сģʽļǰ FF FE ֽ
	{
		txt->size =
			charsets_ucs_conv((const byte *) (txt->buf + 2), (byte *) txt->buf);
		txt->ucs = 1;
	}
	if (*(word *) txt->buf == 0xFFFE) //Unicode(ucs2)ģʽļǰ FE FF ֽ
	{
		txt->size =
			charsets_ucsbe_conv((const byte *) (txt->buf + 2), (byte *) txt->buf);
		txt->ucs = 1;
	}
	else if (*(word *) txt->buf == 0xFFEF) //FE FF 00   UTF-16BE
	{
		txt->size =
			charsets_utf16be_conv((const byte *) (txt->buf + 2),
								  (byte *) txt->buf);
		txt->ucs = 1;
	}
	else if (txt->size > 2 && (unsigned char) txt->buf[0] == 0xEF
			   && (unsigned char) txt->buf[1] == 0xBB
			   && (unsigned char) txt->buf[2] == 0xBF) //UTF-8ıļǰEF BB BFֽ
	{
		txt->size =
			charsets_utf8_conv((const byte *) (txt->buf + 3),
							   (byte *) txt->buf);
		txt->ucs = 2;
	}
	else
	{
		if( IsUTF8( (const void*)txt->buf, (long)txt->size ) )
		{
			encode = conf_encode_utf8;
		}

		switch (encode)
		{
			case conf_encode_big5:
				charsets_big5_conv((const byte *) txt->buf, (byte *) txt->buf);
				break;
			case conf_encode_sjis:
				{
					char *orgbuf = txt->buf;
					byte *newbuf;

					charsets_sjis_conv((const byte *) orgbuf, &newbuf, &txt->size);
					txt->buf = (char *) newbuf;
					if (txt->buf != NULL)
						free((void *) orgbuf);
					else
						txt->buf = orgbuf;
				}
				break;
			case conf_encode_utf8:
				txt->size =
					charsets_utf8_conv((const byte *) txt->buf, (byte *) txt->buf);
				break;
			default:;
		}
		txt->ucs = 0;
	}
}

#ifdef ENABLE_TTF
extern p_ttf ettf, cttf;
#endif

/*
 * õıȣؼƣϵͳʹ
 */
int text_get_string_width_sys( const byte * pos, size_t size, dword wordspace )
{
	int width = 0;
	const byte *posend = pos + size;

	while (pos < posend && bytetable[*(byte *) pos] != 1)
	{
		if ((*(byte *) pos) > 0x80) {
			width += DISP_FONTSIZE;
			width += wordspace;
			pos += 2;
		} else {
			int j;

			for (j = 0; j < (*pos == 0x09 ? config.tabstop : 1); ++j)
				width += DISP_FONTSIZE / 2;
			width += wordspace;
			++pos;
		}
	}

	return width;
}

int Proc_string_depart( const char *pos, const char *posend, dword wordspace, dword *letter_count, dword *word_width )
{
	*word_width = 0;
	*letter_count = 0;
	while ( pos < posend )
	{
		if ((*(byte *) pos) > 0x80) break;
		else if( (bytetable[*(byte *) pos] & 0x0C) )
		{
			*word_width += disp_ewidth[*(byte *) pos]+wordspace;
			*letter_count += 1;
			++pos;

			if( (bytetable[*(byte *) pos] & 0x08) )
				break;
		}
		else
		{
			break;
		}
	}
	return 0;
}

/*
 * õıȣؼƣʾıʹ
 */
int text_get_string_width(const byte *pos, const byte *posend, dword maxpixel, dword wordspace, dword * count, dword fontsize )
{
	int j;
	int width = 0;
	dword letter_count, word_width;
	int letter_count_line;
	const byte *posstart = pos;
	letter_count_line = maxpixel / ( disp_ewidth[(byte)'a'] + wordspace );
	while ( pos < posend && bytetable[*(byte *) pos] != 1 )// bytetable[*(byte *) pos] == 1 ʾǻس
	{
		if ((*(byte *) pos) > 0x80) //ģռֽڵڴ
		{
//			width += fontsize;
			if ( (width+fontsize) > maxpixel )
			{
				break;
			}

			width += fontsize + wordspace;
			pos += 2;
		}
		else
		{
			if( bytetable[*(byte *) pos] & 0x04 )
			{
				Proc_string_depart( pos, posend, wordspace, &letter_count, &word_width );
				if( (width + word_width) > maxpixel )
				{
					if( word_width > maxpixel && width == 0 )
					{
						pos += letter_count_line;
						width += letter_count_line * (disp_ewidth[(byte)'a']+wordspace);
					}
					break;
				}
				else
				{
					pos += letter_count;
					width += word_width;
				}
			}
			else
			{
				for (j = 0; j < (*pos == 0x09 ? config.tabstop : 1); ++j)
					width += disp_ewidth[*(byte *) pos];

				if (width > maxpixel)
				{
					break;
				}
				width += wordspace;
				++pos;
			}
		}
	}

	*count = pos - posstart;//ֽ

	return width;//һЧݵشС
}

extern BOOL text_format(p_text txt, dword rowpixels, dword wordspace, BOOL ttf_mode)//ݷ䵽УΪλй
{
	byte *pos = (byte*)txt->buf, *posend = pos + txt->size;
	dword curs;
	txt->row_count = 0;
	for (curs = 0; curs < 1024; ++curs)
	{
		if (txt->rows[curs] != NULL)
		{
			free((void *) txt->rows[curs]);
			txt->rows[curs] = NULL;
		}
	}
	//ԴĽǶ⣺һ¥һԪһ¥
	curs = 0;//ӡ0㡰ʼ1024,ÿ10241024
	while (txt->row_count < 1024 * 1024 && pos < posend) //ļС1024*1024,1024*1024а1024лΪһ㣬cursʾÿһķſõǰtxt->row_countĵ10λʾ
	{
		if ((txt->row_count % 1024) == 0)
		{
			curs = txt->row_count >> 10;//
			if ( txt->rows[curs] != NULL )
			{
				free( txt->rows[curs] );
				txt->rows[curs] = NULL;
			}
			if ( (txt->rows[curs] = (p_textrow) malloc( 1024 * sizeof(t_textrow))) == NULL )//ÿһ㡱1024
			{
				return false;
			}
		}
		txt->rows[curs][txt->row_count & 0x3FF].start = pos;//浱ǰеַ(Ϊÿһĵַ)
		byte *startp = pos;

		{
			dword count = 0;

			text_get_string_width( pos, posend, rowpixels, wordspace, &count, DISP_BOOK_FONTSIZE );
			pos += count;
		}
/*		if(pos + 1 < posend && ((*pos >= 'A' && *pos <= 'Z') || (*pos >= 'a' && *pos <= 'z')))
		{
			char * curp = pos - 1;
			while(curp > startp)
			{
				if(*(byte *)(curp - 1) >= 0x80 || *curp == ' ' || * curp == '\t')
				{
					pos = curp + 1;
					break;
				}
				curp --;
			}
		}*/
		if (pos + 1 < posend && bytetable[*(byte *) pos] == 1)
		{
			if (*pos == '\r' && *(pos + 1) == '\n')
			{
				pos += 2;
			}
			else
				++pos;
		}

		txt->rows[curs][txt->row_count & 0x3FF].count = pos - startp;//浱ǰıֽ

		txt->row_count++;//ͳı
		if (pos + 1 == posend && bytetable[*(byte *) pos] == 1)
		{
			break;
		}
	}
	return true;
}

extern BOOL text_format_ucs(p_text txt, dword rowpixels, dword wordspace, BOOL ttf_mode)//ݷ䵽УΪλй
{
	byte *pos = txt->buf, *posend = pos + txt->size;
	dword curs;
	txt->row_count = 0;
	for (curs = 0; curs < 1024; ++curs)
	{
		if (txt->rows[curs] != NULL)
		{
			free((void *) txt->rows[curs]);
			txt->rows[curs] = NULL;
		}
	}
	
	if( pos[0] == 0xFF && pos[1] == 0xFE )
		pos += 2;//skip UNICODE BOM: FF FE

	//ԴĽǶ⣺һ¥һԪһ¥
	curs = 0;//ӡ0㡰ʼ1024,ÿ10241024
	while (txt->row_count < 1024 * 1024 && pos < posend) //ļС1024*1024,1024*1024а1024лΪһ㣬cursʾÿһķſõǰtxt->row_countĵ10λʾ
	{
		if ((txt->row_count % 1024) == 0)
		{
			curs = txt->row_count >> 10;//
			if ( txt->rows[curs] != NULL )
			{
				free( txt->rows[curs] );
				txt->rows[curs] = NULL;
			}
			if ( (txt->rows[curs] = (p_textrow) malloc( 1024 * sizeof(t_textrow))) == NULL )//ÿһ㡱1024
			{
				return false;
			}
		}
		txt->rows[curs][txt->row_count & 0x3FF].start = pos;//浱ǰеַ(Ϊÿһĵַ)
		byte *startp = pos;

		{
			dword count = 0;

//			int UCS2_text_get_string_width(const byte *pos, const byte *posend, dword maxpixel, dword wordspace, dword * count, dword fontsize )
			CB_text_get_string_width( pos, posend, rowpixels, wordspace, &count, DISP_BOOK_FONTSIZE );
			pos += count;
		}
/*		if(pos + 1 < posend && ((*pos >= 'A' && *pos <= 'Z') || (*pos >= 'a' && *pos <= 'z')))
		{
			char * curp = pos - 1;
			while(curp > startp)
			{
				if(*(byte *)(curp - 1) >= 0x80 || *curp == ' ' || * curp == '\t')
				{
					pos = curp + 1;
					break;
				}
				curp --;
			}
		}*/
		if (pos + 1 < posend && bytetable[*(byte *) pos] == 1)
		{
			if( pos[0] == 0x0D && pos[1] == 0x00 && pos[2] == 0x0A && pos[3] == 0x00 )//DOS
			{
				pos += 4;
			}
			else if( pos[0] == 0x0A && pos[1] == 0x00 )//Unix
			{
				pos += 2;
			}
			else if( pos[0] == 0x0D && pos[1] == 0x00 )//MAC
			{
				pos += 2;
			}
			else
			{
			}
//				++pos;
		}

		txt->rows[curs][txt->row_count & 0x3FF].count = pos - startp;//浱ǰıֽ(ռڴֽ)

		txt->row_count++;//ͳı
		if (pos + 1 == posend && bytetable[*(byte *) pos] == 1)
		{
			break;
		}
	}
	return true;
}

int Proc_string_depart_utf8( const byte *pos, const byte *posend, dword wordspace, dword *letter_count, dword *word_width )
{
	*word_width = 0;
	*letter_count = 0;
	while ( pos < posend )
	{
		if ((*(byte *) pos) > 0x80) break;
		else if( (bytetable[*(byte *) pos] & 0x0C) )
		{
			*word_width += disp_ewidth[*(byte *) pos]+wordspace;
			*letter_count += 1;
			++pos;

			if( (bytetable[*(byte *) pos] & 0x08) )
				break;
		}
		else
		{
			break;
		}
	}
	return 0;
}

/*
 * õıȣؼƣʾıʹ
 */
int text_get_string_width_utf8(const byte *pos, const byte *posend, dword maxpixel, dword wordspace, dword * count, dword fontsize )
{
	int j;
	int width = 0, len;
	dword letter_count, word_width;
	int letter_count_line;
	const byte *posstart = pos;
	letter_count_line = maxpixel / ( disp_ewidth[(byte)'a'] + wordspace );
	while ( pos < posend && bytetable[*(byte *) pos] != 1 )// bytetable[*(byte *) pos] == 1 ʾǻس
	{
		if ((*(byte *) pos) >= 0xc2) // ģռֽڵڴ
		{
			if ( (width+fontsize) > maxpixel )
			{
				break;
			}
			len = GetUTF8char( pos, (int)(posend-pos) );
			if( len < 0 )
			{	
				pos++;
				break;
			}

			width += fontsize + wordspace;
			pos += len;
		}
		else if( *pos < 0x80 )
		{
			if( bytetable[*(byte *) pos] & 0x04 )
			{
				Proc_string_depart_utf8( pos, posend, wordspace, &letter_count, &word_width );
				if( (width + word_width) > maxpixel )
				{
					if( word_width > maxpixel && width == 0 )
					{
						pos += letter_count_line;
						width += letter_count_line * (disp_ewidth[(byte)'a']+wordspace);
					}
					break;
				}
				else
				{
					pos += letter_count;
					width += word_width;
				}
			}
			else
			{
				for (j = 0; j < (*pos == 0x09 ? config.tabstop : 1); ++j)
					width += disp_ewidth[*(byte *) pos];

				if (width > maxpixel)
				{
					break;
				}
				width += wordspace;
				++pos;
			}
		}
		else
		{
			pos++;
			break;	
		}
	}

	*count = pos - posstart;//ֽ

	return width;//һЧݵشС
}

extern BOOL text_format_utf8(p_text txt, dword rowpixels, dword wordspace, BOOL ttf_mode)//ݷ䵽УΪλй
{
	byte *pos = txt->buf, *posend = pos + txt->size;
	dword curs;
	dword count = 0;
	
	txt->row_count = 0;
	for (curs = 0; curs < 1024; ++curs)
	{
		if (txt->rows[curs] != NULL)
		{
			free((void *) txt->rows[curs]);
			txt->rows[curs] = NULL;
		}
	}

	if( pos[0] == 0xEF && pos[1] == 0xBB && pos[2] == 0xBF )
		pos += 3;//skip UTF8 BOM: EF BB BF

	//ԴĽǶ⣺һ¥һԪһ¥
	curs = 0;//ӡ0㡰ʼ1024,ÿ10241024
	while (txt->row_count < 1024 * 1024 && pos < posend) //ļС1024*1024,1024*1024а1024лΪһ㣬cursʾÿһķſõǰtxt->row_countĵ10λʾ
	{
		if ((txt->row_count % 1024) == 0)
		{
			curs = txt->row_count >> 10;//
			if ( txt->rows[curs] != NULL )
			{
				free( txt->rows[curs] );
				txt->rows[curs] = NULL;
			}
			if ( (txt->rows[curs] = (p_textrow) malloc( 1024 * sizeof(t_textrow))) == NULL )//ÿһ㡱1024
			{
				return false;
			}
		}
		txt->rows[curs][txt->row_count & 0x3FF].start = pos;//浱ǰеַ(Ϊÿһĵַ)
		byte *startp = pos;

		text_get_string_width_utf8( pos, posend, rowpixels, wordspace, &count, DISP_BOOK_FONTSIZE );
		pos += count;

		if (pos + 1 < posend && bytetable[*(byte *) pos] == 1)
		{
			if( pos[0] == 0x0D && pos[1] == 0x0A )//DOS
			{
				pos += 2;
			}
			else if( pos[0] == 0x0A )//Unix
			{
				pos += 1;
			}
			else if( pos[0] == 0x0D )//MAC
			{
				pos += 1;
			}
			else
			{
			}
//				++pos;
		}

		txt->rows[curs][txt->row_count & 0x3FF].count = pos - startp;//浱ǰıֽ(ռڴֽ)

		txt->row_count++;//ͳı
		if (pos + 1 == posend && bytetable[*(byte *) pos] == 1)
		{
			break;
		}
	}
	return true;
}

int min_ratio = 20;

/** ƽг */
static size_t getTxtAvgLength(char *txtBuf, size_t txtLen)
{
	int linesize = 0, linecnt = 0;

	while (txtLen-- > 0) {
		if (*txtBuf == '\n') {
			linecnt++;
		} else {
			if (*txtBuf != '\r') {
				linesize++;
			}
		}
		txtBuf++;
	}
	if (linecnt != 0)
		return linesize / linecnt;
	return 0;
}

/**
 * ϲı䣬ڴ汾ٶ
 * @param txtbuf TXTָָ
 * @param txtlen TXTС
 * @return ļС
 * @note ܹϲ
 */
static size_t text_paragraph_join_without_memory(char **txtbuf, size_t txtlen)
{
	char *src = *txtbuf;

	if (txtlen == 0 || txtbuf == NULL || *txtbuf == NULL)
		return 0;

	// ƽг
	size_t avgLength = getTxtAvgLength(*txtbuf, txtlen);

	int numOfLineSize = 0, numOfLine = 0;

	int cnt = txtlen;

	while (cnt-- > 0) {
		if (*src == '\n') {
			numOfLine++;
			if (numOfLineSize < avgLength - avgLength * 1.0 / min_ratio) {
			} else {
				// кһкϲ
				if (*(src - 1) == '\r') {
					if (txtlen > src - *txtbuf) {
						memmove(src - 1, src + 1, txtlen - (src - *txtbuf));
						txtlen -= 2;
					}
				} else {
					if (txtlen > src - *txtbuf) {
						memmove(src, src + 1, txtlen - (src - *txtbuf));
						txtlen--;
					}
				}
			}
			numOfLineSize = 0;
		} else if (*src != '\r') {
			numOfLineSize++;
		}
		src++;
	}

	return txtlen;
}

/**
 * ϲı䣬ڴ汾ٶȿ
 * @param txtbuf TXTָָ
 * @param txtlen TXTС
 * @return ļС
 * @note ܹϲ
 * <br> *txtbufָһαmallocTXT
 */
static size_t text_paragraph_join_alloc_memory(char **txtbuf, size_t txtlen)
{
	if (txtlen == 0 || txtbuf == NULL || *txtbuf == NULL)
		return 0;

	// ƽг
	size_t avgLength = getTxtAvgLength(*txtbuf, txtlen);

	char *src = *txtbuf, *dst = NULL;

	if ((dst = (char *) malloc(txtlen)) == NULL) {
		return text_paragraph_join_without_memory(txtbuf, txtlen);
	}

	char *p = dst;

	int numOfLineSize = 0, numOfLine = 0;

	int cnt = txtlen;

	while (cnt-- > 0) {
		if (*src == '\n') {
			numOfLine++;
			if (numOfLineSize < avgLength - avgLength * 1.0 / min_ratio) {
				if (*(src - 1) == '\r') {
					*p++ = *(src - 1);
				}
				*p++ = '\n';
				/*
				   // 俪ʼַ
				   size_t pos = p - dst;
				   dst = realloc_free_when_fail(dst, txtlen+strlen("    "));
				   p = pos + dst;
				   memcpy((char*)p, "    ", strlen("    "));
				   p += strlen("    ");
				   txtlen += strlen("    ");
				 */
			} else {
				// кһкϲ
			}
			numOfLineSize = 0;
		} else if (*src != '\r') {
			numOfLineSize++;
			*p++ = *src;
		}
		src++;
	}

	free(*txtbuf);
	*txtbuf = dst;
	return p - dst;
}

static dword text_reorder(char *string, dword size)
{
	int i;
	char *wtxt = string, *ctxt = string, *etxt = string + size;

	while (ctxt < etxt) {
		while (ctxt < etxt && ( bytetable[*(byte *) ctxt] & 0x03 ) == 0)
			*wtxt++ = *ctxt++;
		if (ctxt >= etxt)
			break;
		switch (*ctxt) {
			case '\t':
				*wtxt++ = ' ';
				ctxt++;
				for (i = 0; i < 3; i++)
					if (*ctxt == ' ') {
						*wtxt++ = ' ';
						ctxt++;
					}
				while (ctxt < etxt && *ctxt == ' ')
					ctxt++;
				break;
			case ' ':
				*wtxt++ = ' ';
				ctxt++;
				if (ctxt - 1 > string && *(ctxt - 2) == '\n')
					for (i = 0; i < 2; i++)
						if (*ctxt == ' ') {
							*wtxt++ = ' ';
							ctxt++;
						}
				if (*ctxt == ' ') {
					*wtxt++ = ' ';
					ctxt++;
				}
				while (ctxt < etxt && *ctxt == ' ')
					ctxt++;
				break;
			case '\r':
			case '\n':
				i = ((*ctxt == '\n') ? 1 : 0);
				*wtxt++ = '\n';
				ctxt++;
				while (ctxt < etxt && (*ctxt == '\r' || *ctxt == '\n')) {
					i += ((*ctxt == '\n') ? 1 : 0);
					ctxt++;
				}
				if (i > 2)
					*wtxt++ = '\n';
				break;
			case 0:
				ctxt++;
		}
	}
	return wtxt - string;
}

extern p_text text_open( const char *filename, t_fs_filetype ft, dword rowpixels, dword wordspace, t_conf_encode encode, BOOL reorder )
{
//----------------------------------------------
	p_text txt = NULL;
	FILE *fp = NULL;
	int len;
	byte *utf8 = NULL;
	U16 *ucs = NULL;

	Text_encode_cur = conf_encode_default;
//------------------------------------------------------
	txt = (p_text) malloc( sizeof(t_text) );
	if (txt == NULL)
	{
		return NULL;
	}
	memset( txt, 0x00, sizeof(t_text) );
	if( ( fp = fopen( filename, "r" ) ) == NULL )//XXXXļϵͳʹutf8
	{
		text_close(txt);
		return NULL;
	}

	STRCPY_S(txt->filename, filename);

	fseek( fp, 0, SEEK_END );
	txt->size = ftell( fp );
	if( txt->size <= 0 )
	{
		fclose( fp );
		return txt;	
	}

	//ڴ4ֽڶ룬洦unicodeʱõ
	if ((txt->buf = (char *) malloc( txt->size + 2)) == NULL) //Ժ账⣺ļȴڿڴ棬ôıܴ
	{
		fclose( fp );
		text_close(txt);
		return NULL;
	}

	fseek( fp, 0, SEEK_SET );
	fread( txt->buf, 1, txt->size, fp );
	fclose( fp );


	txt->buf[txt->size] = 0;
	txt->buf[txt->size + 1] = 0;

	cur_code_type = CheckCodeType( (const byte*)txt->buf, txt->size );

	if( cur_code_type == CODETYPE_DEFAULT )
	{
		return txt;
	}
	
	encode = cur_code_type;
	
//	text_decode( txt, encode);
	
	if( encode == conf_encode_ucs_bigendian )
	{
		UnicodeSwap( (byte*)txt->buf, txt->size );
		encode = conf_encode_ucs;
	}
	
	if (ft == fs_filetype_html)
	{
		if( encode == conf_encode_ucs )
		{
			len = GetUnicode2Utf8Len( (const byte *)txt->buf, txt->size );
			utf8 = malloc( len + 1 );
			if( utf8 == NULL )
			{
				text_close( txt );
				return NULL;
			}
			txt->size = Unicode2Utf8( utf8, len, (const byte *)txt->buf, txt->size );
			free( txt->buf );
			txt->buf = NULL;
			txt->buf = utf8;
			encode = conf_encode_utf8;
		}
		txt->size = html_to_text(txt->buf, txt->size, true);
	}
	
#if 0
	if( encode == conf_encode_utf8 )
	{
		len = GetUTF8ToUnicodeLen( (const byte *)txt->buf, txt->size );
		ucs = memalign( 2, len );
		if( ucs == NULL )
		{
			return txt->buf;
		}
		txt->size = utf8_to_ucs_Fast( ucs, len, (const byte*)txt->buf, txt->size );
		free( txt->buf );
		txt->buf = (char*)ucs;
		encode = conf_encode_ucs;
	}
#endif
	Text_encode_cur = encode;


	if (reorder)
	{
		txt->size = text_reorder(txt->buf, txt->size);
		txt->size = text_paragraph_join_alloc_memory(&txt->buf, txt->size);
	}
	if( Text_encode_cur == conf_encode_utf8 )
	{
		if (!text_format_utf8(txt, rowpixels, wordspace, use_ttf)) //ݷ䵽УΪλй
		{
			text_close(txt);
			return NULL;
		}
	}
	else if( Text_encode_cur == conf_encode_ucs )
	{
		if (!text_format_ucs(txt, rowpixels, wordspace, use_ttf)) //ݷ䵽УΪλй
		{
			text_close(txt);
			return NULL;
		}
	}
	else
	{
		if (!text_format(txt, rowpixels, wordspace, use_ttf)) //ݷ䵽УΪλй
		{
			text_close(txt);
			return NULL;
		}
	}

	return txt;
}

extern p_text text_open_binary(const char *filename, BOOL vert)
{
	return NULL;
}

extern p_text text_open_in_gz(const char *gzfile, const char *filename,
							  t_fs_filetype ft, dword rowpixels,
							  dword wordspace, t_conf_encode encode,
							  BOOL reorder)
{
	p_text txt = (p_text) calloc(1, sizeof(t_text));

	if (txt == NULL)
		return NULL;
//////	gzFile unzf = gzopen(gzfile, "rb");
	gzFile unzf = NULL;

	if (unzf == NULL) {
		text_close(txt);
		return NULL;
	}
	STRCPY_S(txt->filename, filename);
	int len;

	buffer *b = buffer_init();
	char tempbuf[BUFSIZ];

	while ((len = gzread(unzf, tempbuf, BUFSIZ)) > 0) {
		if (buffer_append_memory(b, tempbuf, len) < 0) {
			text_close(txt);
			gzclose(unzf);
			return NULL;
		}
	}
	if (len < 0) {
		buffer_free(b);
		text_close(txt);
		gzclose(unzf);
		return NULL;
	}
	gzclose(unzf);

	// get the buffer
	txt->size = b->used;
	txt->buf = buffer_free_weak(b);

	text_decode(txt, encode);
	if (ft == fs_filetype_html)
		txt->size = html_to_text(txt->buf, txt->size, true);
	if (reorder) {
		txt->size = text_reorder(txt->buf, txt->size);
		txt->size = text_paragraph_join_alloc_memory(&txt->buf, txt->size);
	}

	if (!text_format(txt, rowpixels, wordspace, use_ttf)) {
		text_close(txt);
		return NULL;
	}
	return txt;
}

extern p_text text_open_binary_in_zip(const char *zipfile, const char *filename,
									  t_fs_filetype ft, dword rowpixels,
									  dword wordspace, t_conf_encode encode,
									  BOOL reorder, BOOL vert)
{
	p_text txt = (p_text) calloc(1, sizeof(t_text));

	if (txt == NULL)
		return NULL;
	unzFile unzf = unzOpen(zipfile);

	if (unzf == NULL) {
		text_close(txt);
		return NULL;
	}
	if (unzLocateFile(unzf, filename, 0) != UNZ_OK
		|| unzOpenCurrentFile(unzf) != UNZ_OK) {
		text_close(txt);
		unzClose(unzf);
		return NULL;
	}
	STRCPY_S(txt->filename, filename);
	unz_file_info info;

	if (unzGetCurrentFileInfo(unzf, &info, NULL, 0, NULL, 0, NULL, 0) != UNZ_OK) {
		text_close(txt);
		unzCloseCurrentFile(unzf);
		unzClose(unzf);
		return NULL;
	}
	txt->size = info.uncompressed_size;
	if ((txt->buf = (char *) calloc(1, txt->size)) == NULL) {
		text_close(txt);
		unzCloseCurrentFile(unzf);
		unzClose(unzf);
		return NULL;
	}
	txt->size = unzReadCurrentFile(unzf, txt->buf, txt->size);
	unzCloseCurrentFile(unzf);
	unzClose(unzf);

	byte *tmpbuf = (byte *) txt->buf;
	dword bpr = (vert ? 43 : 66);

	if ((txt->buf = (char *) calloc(1, (txt->size + 15) / 16 * bpr)) == NULL) {
		free(tmpbuf);
		text_close(txt);
		return NULL;
	}
	dword curs = 0;

	txt->row_count = (txt->size + 15) / 16;
	byte *cbuf = tmpbuf;
	dword i;

	for (i = 0; i < txt->row_count; i++) {
		if ((i % 1024) == 0) {
			curs = i >> 10;
			if ((txt->rows[curs] =
				 (p_textrow) calloc(1024, sizeof(t_textrow))) == NULL) {
				free((void *) tmpbuf);
				text_close(txt);
				return NULL;
			}
		}
		txt->rows[curs][i & 0x3FF].start = &txt->buf[bpr * i];
		txt->rows[curs][i & 0x3FF].count = bpr;
		if (vert) {
			sprintf(&txt->buf[bpr * i],
					"%08X: %02X%02X%02X%02X%02X%02X%02X%02X %02X%02X%02X%02X%02X%02X%02X%02X",
					(unsigned int) i * 0x10, cbuf[0], cbuf[1], cbuf[2], cbuf[3],
					cbuf[4], cbuf[5], cbuf[6], cbuf[7], cbuf[8], cbuf[9],
					cbuf[10], cbuf[11], cbuf[12], cbuf[13], cbuf[14], cbuf[15]);
			if ((i + 1) * 16 > txt->size) {
				dword padding = (i + 1) * 16 - txt->size;

				if (padding < 9)
					memset(&txt->buf[bpr * i + bpr - padding * 2], 0x20,
						   padding * 2);
				else
					memset(&txt->buf[bpr * i + bpr - 1 - padding * 2], 0x20,
						   padding * 2 + 1);
			}
		} else {
			sprintf(&txt->buf[bpr * i],
					"%08X: %02X%02X %02X%02X %02X%02X %02X%02X %02X%02X %02X%02X %02X%02X %02X%02X ",
					(unsigned int) i * 0x10, cbuf[0], cbuf[1], cbuf[2], cbuf[3],
					cbuf[4], cbuf[5], cbuf[6], cbuf[7], cbuf[8], cbuf[9],
					cbuf[10], cbuf[11], cbuf[12], cbuf[13], cbuf[14], cbuf[15]);
			dword j;

			for (j = 0; j < 16; j++)
				txt->buf[bpr * i + 40 + 10 + j] = (cbuf[j] > 0x1F
												   && cbuf[j] <
												   0x7F) ? cbuf[j] : '.';
			if ((i + 1) * 16 > txt->size) {
				dword padding = (i + 1) * 16 - txt->size;

				memset(&txt->buf[bpr * i + bpr - padding], 0x20, padding);
				if ((padding & 1) > 0)
					memset(&txt->buf[bpr * i + 40 + 10 - padding / 2 * 5 - 3],
						   0x20, padding / 2 * 5 + 3);
				else
					memset(&txt->buf[bpr * i + 40 + 10 - padding / 2 * 5], 0x20,
						   padding / 2 * 5);
			}
		}
		cbuf += 16;
	}
	free((void *) tmpbuf);
	return txt;
}
#if 0
extern p_text text_open_in_raw(const char *filename, const unsigned char *data,
							   size_t size, t_fs_filetype ft, dword rowpixels,
							   dword wordspace, t_conf_encode encode,
							   BOOL reorder)
{
	if (data == NULL || size == 0) {
		return NULL;
	}

	p_text txt = (p_text) calloc(1, sizeof(t_text));

	if (txt == NULL)
		return NULL;

	STRCPY_S(txt->filename, filename);
	txt->size = size;

	if ((txt->buf = (char *) calloc(1, txt->size)) == NULL) {
		text_close(txt);
		return NULL;
	}

	memcpy(txt->buf, data, txt->size);

	text_decode(txt, encode);
	if (ft == fs_filetype_html)
		txt->size = html_to_text(txt->buf, txt->size, true);
	if (reorder) {
		txt->size = text_reorder(txt->buf, txt->size);
		txt->size = text_paragraph_join_alloc_memory(&txt->buf, txt->size);
	}
	if (!text_format(txt, rowpixels, wordspace, use_ttf)) {
		text_close(txt);
		return NULL;
	}
	return txt;
}
#endif
extern p_text text_open_in_zip(const char *zipfile, const char *filename,
							   t_fs_filetype ft, dword rowpixels,
							   dword wordspace, t_conf_encode encode,
							   BOOL reorder)
{

	p_text txt = (p_text) calloc(1, sizeof(t_text));
	int len;
	byte *utf8 = NULL;
	U16 *ucs = NULL;

	if (txt == NULL)
		return NULL;
	unzFile unzf = unzOpen(zipfile);

	if (unzf == NULL)
	{
		text_close(txt);
		return NULL;
	}

	if ( unzLocateFile(unzf, filename, 0) != UNZ_OK || unzOpenCurrentFile(unzf) != UNZ_OK )
	{
		text_close(txt);
		unzClose(unzf);
		return NULL;
	}

	STRCPY_S(txt->filename, filename);
	unz_file_info info;

	if (unzGetCurrentFileInfo(unzf, &info, NULL, 0, NULL, 0, NULL, 0) != UNZ_OK)
	{
		text_close(txt);
		unzCloseCurrentFile(unzf);
		unzClose(unzf);
		return NULL;
	}
	txt->size = info.uncompressed_size;
	if ((txt->buf = (char *) calloc(1, txt->size)) == NULL)
	{
		text_close(txt);
		unzCloseCurrentFile(unzf);
		unzClose(unzf);
		return NULL;
	}
	txt->size = unzReadCurrentFile(unzf, txt->buf, txt->size);
	unzCloseCurrentFile(unzf);
	unzClose(unzf);

//	text_decode(txt, encode);
	
	cur_code_type = CheckCodeType( (const byte*)txt->buf, txt->size );

	if( cur_code_type == CODETYPE_DEFAULT )
	{
		return txt;
	}
	
	encode = cur_code_type;
	
//	text_decode( txt, encode);
	
	if( encode == conf_encode_ucs_bigendian )
	{
		UnicodeSwap( (byte*)txt->buf, txt->size );
		encode = conf_encode_ucs;
	}
	
	if (ft == fs_filetype_html)
	{	
		if( encode == conf_encode_ucs )
		{
			len = GetUnicode2Utf8Len( (const byte *)txt->buf, txt->size );
			utf8 = malloc( len + 1 );
			if( utf8 == NULL )
			{
				text_close( txt );
				return NULL;
			}
			txt->size = Unicode2Utf8( utf8, len, (const byte *)txt->buf, txt->size );
			free( txt->buf );
			txt->buf = NULL;
			txt->buf = utf8;
			encode = conf_encode_utf8;
		}
		txt->size = html_to_text(txt->buf, txt->size, true);
	}
	
	if( encode == conf_encode_utf8 )
	{
		len = GetUTF8ToUnicodeLen( (const byte *)txt->buf, txt->size );
		ucs = memalign( 2, len );
		if( ucs == NULL )
		{
			return txt->buf;
		}
		txt->size = utf8_to_ucs_Fast( ucs, len, (const byte*)txt->buf, txt->size );
		free( txt->buf );
		txt->buf = (char*)ucs;
		encode = conf_encode_ucs;
	}
	Text_encode_cur = encode;

	if (reorder)
	{
		txt->size = text_reorder(txt->buf, txt->size);
		txt->size = text_paragraph_join_alloc_memory(&txt->buf, txt->size);
	}

	if( Text_encode_cur == conf_encode_ucs )
	{
		if (!text_format_ucs(txt, rowpixels, wordspace, use_ttf)) //ݷ䵽УΪλй
		{
			text_close(txt);
			return NULL;
		}
	}
	else
	{
		if (!text_format(txt, rowpixels, wordspace, use_ttf)) //ݷ䵽УΪλй
		{
			text_close(txt);
			return NULL;
		}
	}
	
	return txt;
}

extern void text_close(p_text fstext)
{
	dword i;
	if (fstext != NULL)
	{
//---------step 0--------------------
		if (fstext->buf != NULL)
		{
			free((void *) fstext->buf);
		}
//---------step 1--------------------
		for (i = 0; i < 1024; ++i) //ͷܹ1024ͳڴ
		{
			if (fstext->rows[i] != NULL)
				free((void *) fstext->rows[i]);
		}
//---------step 2--------------------
		free( (void*)fstext );
		fstext = NULL;
	}
}

/**
 * ıļڵļ
 * @param filename ļ·
 * @param archname ·
 * @param filetype ıļ
 * @param rowpixels
 * @param wordspace ־
 * @param encode ıļ
 * @param reorder Ƿ±
 * @param where 
 * @param vertread ʾʽ
 * @return ıָ
 * - NULL ʧ
 */
extern p_text text_open_archive(const char *filename,
								const char *archname,
								t_fs_filetype filetype,
								dword rowpixels,
								dword wordspace,
								t_conf_encode encode,
								BOOL reorder, int where, int vertread)
{
	if (filename == NULL)
		return NULL;
	
	if (where != scene_in_dir && (archname == NULL || archname[0] == '\0'))
		return NULL;
	
	p_text pText = NULL;

	const char *ext = utils_fileext(filename);

	switch (where)
	{
		case scene_in_dir:
			if (ext && stricmp(ext, "gz") == 0)
			{
				//pText = text_open_in_gz(filename, filename, filetype, rowpixels, wordspace, encode, reorder);
			}
			else if (filetype != fs_filetype_unknown)
			{
				pText = text_open(filename, filetype, rowpixels, wordspace, encode, reorder);
			}
			else
			{
				/*pText =
					text_open_binary(filename,
									 (vertread == conf_vertread_lvert
									  || vertread == conf_vertread_rvert)
					);*/
			}
			break;
		case scene_in_chm:
			break;
		case scene_in_zip:
			if (filetype == fs_filetype_txt || filetype == fs_filetype_html)
				pText = text_open_in_zip(archname, filename,
										 filetype, rowpixels,
										 wordspace, encode, reorder);
			/*else
				pText = text_open_binary_in_zip(archname, filename,
												filetype, rowpixels,
												wordspace, encode,
												reorder,
												(vertread == conf_vertread_lvert
												 || vertread ==
												 conf_vertread_rvert));*/
			break;
		case scene_in_rar:
			break;
	}

	return pText;
}

int text_read_sys( const char *filename, char ** ppTxtData, int *len )
{
	if( filename == NULL )
		goto _text_read_sys_Fail_Exit;
	FILE *fp = fopen( filename, "r" );
	if( !fp )
	{
		goto _text_read_sys_Fail_Exit;
	}

	fseek( fp, 0, SEEK_END );

	*len = ftell( fp );
	if( *len <= 0 )
		goto _text_read_sys_Fail_Exit;

	if( *ppTxtData == NULL )
	{
		*ppTxtData = malloc( (*len) + 2 );
		if( *ppTxtData == NULL )
			goto _text_read_sys_Fail_Exit;
	}

	fseek( fp, 0, SEEK_SET );
	fread( (*ppTxtData), 1, *len, fp );
	fclose( fp );

	(*ppTxtData)[(*len)] = 0x00;
	(*ppTxtData)[(*len)+1] = 0x00;
	return 0;

_text_read_sys_Fail_Exit:
	return -1;
}
