Contains Unicode string handling functions for STB usage. More...

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <ctype.h>
#include <techtype.h>
#include <dbgfuncs.h>
#include "asciimap.h"
#include "stbheap.h"
#include "stbuni.h"
#include "stbhuffman.h"

Classes
struct	S_LANG_CODE_ENTRY

Macros
#define	UTF16_HEADER_VALUE 0x11

#define	UCS2_HEADER_VALUE 0x14

#define	UTF8_HEADER_VALUE 0x15

#define	UNICODE_HEADER_POS 0

#define	LENGTH_LOOP_LIMIT INVALID_UNICODE_CHAR

#define	STRINGS_EQUAL 0

#define	FIRST_STRING_GREATER 1

#define	SECOND_STRING_GREATER -1

#define	MAX_NUMBER_DIGITS 11

#define	UNI_PERCENT_CHAR 0x0025

#define	UNI_SMALL_D_CHAR 0x0064

#define	UNI_SMALL_H_CHAR 0x0068

#define	UNI_SMALL_U_CHAR 0x0075

#define	UNI_SMALL_L_CHAR 0x006C

#define	UNI_SMALL_S_CHAR 0x0073

#define	UNI_SMALL_X_CHAR 0x0078

#define	UNI_LARGE_X_CHAR 0x0058

#define	UNI_ZERO_CHAR 0x0030

#define	UNI_NINE_CHAR 0x0039

#define	UNI_SUR_HIGH_START 0xD800

#define	UNI_SUR_HIGH_END 0xDBFF

#define	UNI_SUR_LOW_START 0xDC00

#define	UNI_SUR_LOW_END 0xDFFF

#define	UNI_REPLACEMENT_CHAR 0x0000FFFD

#define	UNI_SMALL_L_SMALL_D_CHARS ((UNI_SMALL_L_CHAR << 16) \| UNI_SMALL_D_CHAR)

#define	UNI_SMALL_L_SMALL_U_CHARS ((UNI_SMALL_L_CHAR << 16) \| UNI_SMALL_U_CHAR)

#define	UNI_SMALL_L_SMALL_X_CHARS ((UNI_SMALL_L_CHAR << 16) \| UNI_SMALL_X_CHAR)

#define	UNI_SMALL_L_LARGE_X_CHARS ((UNI_SMALL_L_CHAR << 16) \| UNI_LARGE_X_CHAR)

#define	UNI_SMALL_H_SMALL_D_CHARS ((UNI_SMALL_H_CHAR << 16) \| UNI_SMALL_D_CHAR)

#define	UNI_SMALL_H_SMALL_U_CHARS ((UNI_SMALL_H_CHAR << 16) \| UNI_SMALL_U_CHAR)

#define	UNI_SMALL_H_SMALL_X_CHARS ((UNI_SMALL_H_CHAR << 16) \| UNI_SMALL_X_CHAR)

#define	UNI_SMALL_H_LARGE_X_CHARS ((UNI_SMALL_H_CHAR << 16) \| UNI_LARGE_X_CHAR)

#define	MAX_DECODE_BUFFER_SIZE 255 /* Buffer size used for decoding compressed strings */

#define	MAX_NUM_FORMAT_SPEC_STR_SIZE 6 /* max size of numeric format specifier string e.g. "%011ld" */

#define	MAX_NUM_WIDTH_DIGITS

Functions
U32BIT	STB_UnicodeStringLen (U8BIT *string_ptr)
	Determines the length, in characters, of the given unicode string by searching for NULL. Count ignores the unicode header value. More...

BOOLEAN	STB_IsUnicodeStringReversed (U8BIT *string_ptr)
	Checks to see if the supplied string is unicode and if it is reversed (arabic) More...

BOOLEAN	STB_IsUnicodeString (U8BIT *string_ptr)
	Tests for unicode string. More...

BOOLEAN	STB_IsNormalString (U8BIT *string_ptr)
	Tests for normal ascii string. More...

U32BIT	STB_GetNumBytesInString (U8BIT *string_ptr)
	Determines the no of bytes of the given string. More...

U8BIT *	STB_SetUnicodeStringChar (U8BIT *string_ptr, U16BIT char_id, U16BIT code)
	Takes a string and changes the requested location to a new value. This request may involve appending to the string in which case the string is extended (always extended by one character, independent of char_id). More...

U8BIT *	STB_DeleteUnicodeStringChar (U8BIT *string_ptr, U16BIT char_id)
	Takes a string and removes the requested location, shuffling any following data down (thus removing gap) More...

U32BIT	STB_GetUnicodeStringChar (U8BIT *string_ptr, U16BIT char_id)
	Retrieves the unicode value pointed to by char_id within the given string. If an invalid request occurs (ie char_id is beyond string limit) then 0 is returned. More...

U8BIT *	STB_ConcatUnicodeStrings (U8BIT string1_ptr, U8BIT string2_ptr)
	Appends the contents of string2_ptr to string1_ptr and returns a pointer to the newly created string. More...

U8BIT *	STB_UnicodeStringTokenise (U8BIT string, U8BIT *save_ptr)
	Divides the (space separated) string up into individual words and returns them one per call. More...

U8BIT *	STB_UnicodeStrStr (U8BIT str1, U8BIT str2, BOOLEAN ignore_case)
	Finds the first occurence of str2 in str1 and returns a pointer to the substring (as per strstr) More...

S8BIT	STB_CompareUnicodeStrings (U8BIT string1_ptr, U8BIT string2_ptr, BOOLEAN exact_match, BOOLEAN ignore_case)
	Compares the contents of the two given unicode strings and returns the status (as per strcmp) More...

U8BIT *	STB_ConvertStringToUnicode (U8BIT string, BOOLEAN reverse_dir, U16BIT *nchar, BOOLEAN strip_DVB_cntrl_char, U32BIT lang_code)
	Converts the specified DVB coded string into a unicode string, counting the number of characters and checking for right-to-left characters as it goes. More...

U8BIT *	STB_ConvertStringToUTF8 (U8BIT string, U16BIT nchar, BOOLEAN strip_DVB_cntrl_char, U32BIT lang_code)
	Converts the given DVB coded string into a UTF-8 unicode string. The returned string will be preceded by the DVB byte, 0x15, indicating the string is UTF-8 format. The returned string should be freed using STB_ReleaseUnicodeString. More...

void	STB_ReleaseUnicodeString (U8BIT *string)
	Releases the specified unicode string, freeing associated heap resources. More...

U8BIT *	STB_ConvertUTF16toUTF8 (U8BIT src, U32BIT outlen)
	Creates the given string from UTF-16 to UTF-8 and returns a new string. The returned string should be freed using STB_ReleaseUnicodeString. More...

BOOLEAN	STB_IsStringEmpty (U8BIT *string_ptr)
	Checks for a string of only spaces. More...

void	STB_SetDefaultAsciiTable (U8BIT table)
	Sets default ascii table to be used, if not overridden by the table index at the start of a string. More...

S8BIT	STB_CompareStringsIgnoreCase (U8BIT string1_ptr, U8BIT string2_ptr)
	Compares the contents of the two given ASCII strings and returns the status (as per strcmp) but ignores case. More...

U8BIT *	STB_FormatUnicodeString (BOOLEAN strip_DVB_cntrl_char, BOOLEAN reverse_dir, const U8BIT const format_ptr,...)
	Unicode version of sprintf. More...

U8BIT *	STB_UnicodeInsertString (U8BIT src_str, U16BIT insert_pos, U8BIT insert_str, BOOLEAN replace_char)
	Creates a new string by inserting one string into another at a given position, with the option of replacing the char at the given position. Strings can be passed as DVB or unicode, but output will always be unicode and the resulting string must be freed. More...

U8BIT *	STB_UnicodeStripControlChars (U8BIT *string_ptr)
	Strips the DVB control characters from a string that's already in UTF-8 or UTF-16 format. The control chars that are stripped are DVB emphasis on/off and DVB CR/LF. The input string isn't changed and the returned string must be freed by calling STB_ReleaseUnicodeString. More...

Detailed Description

Contains Unicode string handling functions for STB usage.

Date: 31/05/2001

Macro Definition Documentation

#define MAX_NUM_WIDTH_DIGITS

Value:

3 /* max number of digits to specify number width

in numeric format specifier e.g. "011" in "%011ld" */

Function Documentation

S8BIT STB_CompareStringsIgnoreCase	(	U8BIT *	string1_ptr,
		U8BIT *	string2_ptr
	)

Compares the contents of the two given ASCII strings and returns the status (as per strcmp) but ignores case.

Parameters

string1_ptr	Pointer to the 'master' string
string2_ptr	Pointer to the 'slave' string

Returns: Result of the comparison. 0 if equal, +ve if string1_ptr > string2_ptr, -ve if string1_ptr < string2_ptr.

S8BIT STB_CompareUnicodeStrings	(	U8BIT *	string1_ptr,
		U8BIT *	string2_ptr,
		BOOLEAN	exact_match,
		BOOLEAN	ignore_case
	)

Compares the contents of the two given unicode strings and returns the status (as per strcmp)

Parameters

string1_ptr	Pointer to the 'master' string
string2_ptr	Pointer to the 'slave' string
exact_match	If TRUE, and the strings are the same upto the end of one of them, the lengths of the strings must also be the same for the strings to be equal
ignore_case	If TRUE, case is ignored when comparing chars, if appropriate.

Returns: Result of the comparison. 0 if equal, +ve if string1_ptr > string2_ptr, -ve if string1_ptr < string2_ptr.

U8BIT* STB_ConcatUnicodeStrings	(	U8BIT *	string1_ptr,
		U8BIT *	string2_ptr
	)

Appends the contents of string2_ptr to string1_ptr and returns a pointer to the newly created string.

Parameters

string1_ptr	Pointer to the first string, and hence first part of concatinated string.
string2_ptr	Pointer to the second string and hence last part of concatinated string.

Returns: Pointer to the concatinated string or NULL if failed

U8BIT* STB_ConvertStringToUnicode	(	U8BIT *	string,
		BOOLEAN *	reverse_dir,
		U16BIT *	nchar,
		BOOLEAN	strip_DVB_cntrl_char,
		U32BIT	lang_code
	)

Converts the specified DVB coded string into a unicode string, counting the number of characters and checking for right-to-left characters as it goes.

Parameters

string	- pointer to the string to be converted
reverse_dir	- reverse print direction (passed by ref and set by this function)
nchar	- number of characters (passed by ref and set by this function)
strip_DVB_cntrl_char	- True if all DVB control chars are to be removed

Returns: A unicode string or NULL. NULL indicates error or NULL string pointer.

U8BIT* STB_ConvertStringToUTF8	(	U8BIT *	string,
		U16BIT *	nchar,
		BOOLEAN	strip_DVB_cntrl_char,
		U32BIT	lang_code
	)

Converts the given DVB coded string into a UTF-8 unicode string. The returned string will be preceded by the DVB byte, 0x15, indicating the string is UTF-8 format. The returned string should be freed using STB_ReleaseUnicodeString.

Parameters

string	- DVB string to be converted
nchar	- number of characters, not bytes, in the returned string
strip_DVB_cntrl_char	- TRUE if DVB control character codes aren't to be included in the converted string
lang_code	- language code of the string, which may affect the ETSI defined character code table used when doing the conversion. If the code is 0 then the default table will be used.

Returns: UTF-8 format string

U8BIT* STB_ConvertUTF16toUTF8	(	U8BIT *	src,
		U32BIT *	outlen
	)

Creates the given string from UTF-16 to UTF-8 and returns a new string. The returned string should be freed using STB_ReleaseUnicodeString.

Parameters

src	- UTF-16 string to be converted
outlen	- number of bytes in the returned string

Returns: UTF-8 format string

U8BIT* STB_DeleteUnicodeStringChar	(	U8BIT *	string_ptr,
		U16BIT	char_id
	)

Takes a string and removes the requested location, shuffling any following data down (thus removing gap)

Parameters

string_ptr	Pointer to the string to be updated
char_id	Character to be changed

Returns: Pointer to the updated string or unchanged string if invalid request made.

U8BIT* STB_FormatUnicodeString	(	BOOLEAN	strip_DVB_cntrl_char,
		BOOLEAN *	reverse_dir,
		const U8BIT *const	format_ptr,
			...
	)

Unicode version of sprintf.

Parameters

strip_DVB_cntrl_char	True if all DVB control chars are to be removed
reverse_dir	reverse print direction (out)
format_ptr	Takes a format string of any format, and recognises the following tokens: hu (U8BIT), u (U16BIT), lu (U32BIT), hd (S8BIT), d (S16BIT), ld (S32BIT), hx (U8BIT), x (U16BIT), lx (U32BIT), s (U8BIT*) for 8-bit Ascii or Unicode strings, %% (to output a % character). For the decimal and hexadecimal number tokens there is also limited support for number/precision flags up to a maximum width of 11 digits (e.g. %1 to %11 to pad the number with leading spaces, %01 to %011 to pad the number with leading zeroes) the maximum width of the number/precision flag is 3 characters. ... Parameters to be subsituted into the format string.

Returns: NULL if there is any error, otherwise a newly allocated unicode string Note: The string must be freed with STB_ReleaseUnicodeString.

U32BIT STB_GetNumBytesInString ( U8BIT * string_ptr )

Determines the no of bytes of the given string.

Parameters

string_ptr - Pointer to the U8BIT string.

Returns: Returns no of bytes of string including headers and terminators.

U32BIT STB_GetUnicodeStringChar	(	U8BIT *	string_ptr,
		U16BIT	char_id
	)

Retrieves the unicode value pointed to by char_id within the given string. If an invalid request occurs (ie char_id is beyond string limit) then 0 is returned.

Parameters

string_ptr	Pointer to the string to be searched
char_id	Character to be retrieved

Returns: unicode value contained at the requested string position (0xFFFF if invalid position or string).

BOOLEAN STB_IsNormalString ( U8BIT * string_ptr )

Tests for normal ascii string.

Parameters

U8BIT* string_ptr - string to test if normal ascii

Returns: BOOLEAN is_normal

BOOLEAN STB_IsStringEmpty ( U8BIT * string_ptr )

Checks for a string of only spaces.

Parameters

string_ptr - pointer to the string to be checked.

Returns: TRUE if empty, FALSE otherwise.

BOOLEAN STB_IsUnicodeString ( U8BIT * string_ptr )

Tests for unicode string.

Parameters

U8BIT* string_ptr - string to test if unicode

Returns: BOOLEAN is_unicode

BOOLEAN STB_IsUnicodeStringReversed ( U8BIT * string_ptr )

Checks to see if the supplied string is unicode and if it is reversed (arabic)

Parameters

U8BIT* string_ptr - string to test if unicode

Returns: BOOLEAN is_reversed TRUE if reversed

void STB_ReleaseUnicodeString ( U8BIT * string )

Releases the specified unicode string, freeing associated heap resources.

Parameters

string - pointer to the unicode string to be released. NB - This will have been supplied by a previous call to STB_ConvertStringToUnicode()!

void STB_SetDefaultAsciiTable ( U8BIT table )

Sets default ascii table to be used, if not overridden by the table index at the start of a string.

Parameters

table - index of the table to be used (0 to 15), as defined in ETSI 300 468, Annex A, table A.3

U8BIT* STB_SetUnicodeStringChar	(	U8BIT *	string_ptr,
		U16BIT	char_id,
		U16BIT	code
	)

Takes a string and changes the requested location to a new value. This request may involve appending to the string in which case the string is extended (always extended by one character, independent of char_id).

Parameters

string_ptr	Pointer to the string to be updated
char_id	Character to be changed
code	Unicode value to be inserted at position indicated by char_id

Returns: Pointer to the updated string or unchanged string if invalid request made.

U8BIT* STB_UnicodeInsertString	(	U8BIT *	src_str,
		U16BIT	insert_pos,
		U8BIT *	insert_str,
		BOOLEAN	replace_char
	)

Creates a new string by inserting one string into another at a given position, with the option of replacing the char at the given position. Strings can be passed as DVB or unicode, but output will always be unicode and the resulting string must be freed.

Parameters

src_str	- string into which the insertion will be made
insert_pos	- position in the source string to make the insertion, it will be after this position
insert_str	- string to be inserted
replace_char	- TRUE if the char at the insertion point is to be replaced by the insertion string

Returns: new string with text inserted

U32BIT STB_UnicodeStringLen ( U8BIT * string_ptr )

Determines the length, in characters, of the given unicode string by searching for NULL. Count ignores the unicode header value.

Parameters

string_ptr - Pointer to the U8BIT string.

Returns: Returns length of string

U8BIT* STB_UnicodeStringTokenise	(	U8BIT *	string,
		U8BIT **	save_ptr
	)

Divides the (space separated) string up into individual words and returns them one per call.

Parameters

str	The string to be tokenised (will be modified by this function)
save_ptr	The caller's variable in which the current pointer can be saved

Returns: Pointer to the next token in the string, or NULL if no more

U8BIT* STB_UnicodeStripControlChars ( U8BIT * string_ptr )

Strips the DVB control characters from a string that's already in UTF-8 or UTF-16 format. The control chars that are stripped are DVB emphasis on/off and DVB CR/LF. The input string isn't changed and the returned string must be freed by calling STB_ReleaseUnicodeString.

Parameters

string_ptr UTF-8 or UTF-16 string from which the control chars are to be stripped

Returns: new string in the same format as the input string

U8BIT* STB_UnicodeStrStr	(	U8BIT *	str1,
		U8BIT *	str2,
		BOOLEAN	ignore_case
	)

Finds the first occurence of str2 in str1 and returns a pointer to the substring (as per strstr)

Parameters

str1	String being searched
str2	String being searched for
ignore_case	If TRUE, ignores case when comparing characters

Returns: Pointer to the first occurence of substring in str1, or NULL if not found

Classes

Macros

Functions

Detailed Description

Macro Definition Documentation

Function Documentation