artefaktur
software engineer &        architecture

 
 
 
 

struct StringUtf8Utils

Modules   Namespace List   Class Hierarchy   Alphabetical List   Compound List   Namespace Members   Compound Members   Related Pages  

acdk::lang::StringUtf8Utils Struct Reference

#include <StringUtf8Utils.h>

List of all members.


Detailed Description

Internal Helper class to handle UTF8 strings.


Public Types

enum  UtfStreamType { StreamIsAscii, StreamIsUtf8, StreamIsError }

Static Public Member Functions

void incUtfPtr (const char *&ptr, const char *endptr)
void decUtfPtr (const char *&ptr, const char *beginptr)
size_t utfDiff (const char *end, const char *begin)
bool isAscii (const char *begin, const char *end)
ucchar fetchWideChar (const char *&begin, const char *end)
ucchar toWideChar (const char *begin, const char *end)
int getByteLength (const char *it)
int utflength (ucchar ch)
 return the number of byte a unicode character needs

int utflength (const ucchar *begin, const ucchar *end)
 returns the number of bytes a unicode string needs

int utflength (const ucchar *begin)
UtfStreamType validUtf8Stream (const byte *begin, const byte *end, bool throwOnFail=true)
 check if the character byte stream is a valid UTF8 stream.

int uclength (const ucchar *ch)
 returns the length of 0 terminated unicode character string

int uc2length (const uc2char *ch)
int uc4length (const uc4char *ch)
 returns the length of an 0-terminated uc4char stream

int writeUcToUtf8 (byte *&it, byte *end, ucchar ucc)
 writes an unicode char into given chars

int writeUcToUtf8 (byte *&it, byte *end, const ucchar *ucbegin, const ucchar *ucend)
 writes the given unicode character range into 8bit char range

uccharuc4touc (ucchar *buffer, const uc4char *source, int length=-1)
 converts an uc4 character stream to ucchar stream.

uccharwcchartouc (ucchar *buffer, const wchar_t *source, int length=-1)
size_t stringlength (const char *str)
size_t stringlength (const ucchar *str)
size_t stringlength (const uc4char *str)
template<typename ToCharType, typename FromCharType> ToCharType * convertTo (ToCharType *buffer, const FromCharType *source, int length=-1)

Member Enumeration Documentation

enum acdk::lang::StringUtf8Utils::UtfStreamType
 

Enumeration values:
StreamIsAscii 
StreamIsUtf8 
StreamIsError 

Member Function Documentation

template<typename ToCharType, typename FromCharType>
ToCharType* acdk::lang::StringUtf8Utils::convertTo ToCharType *  buffer,
const FromCharType *  source,
int  length = -1
[inline, static]
 

void acdk::lang::StringUtf8Utils::decUtfPtr const char *&  ptr,
const char *  beginptr
[static]
 

ucchar acdk::lang::StringUtf8Utils::fetchWideChar const char *&  begin,
const char *  end
[static]
 

int acdk::lang::StringUtf8Utils::getByteLength const char *  it  )  [static]
 

void acdk::lang::StringUtf8Utils::incUtfPtr const char *&  ptr,
const char *  endptr
[static]
 

bool acdk::lang::StringUtf8Utils::isAscii const char *  begin,
const char *  end
[static]
 

size_t acdk::lang::StringUtf8Utils::stringlength const uc4char str  )  [inline, static]
 

size_t acdk::lang::StringUtf8Utils::stringlength const ucchar str  )  [inline, static]
 

size_t acdk::lang::StringUtf8Utils::stringlength const char *  str  )  [inline, static]
 

ucchar acdk::lang::StringUtf8Utils::toWideChar const char *  begin,
const char *  end
[inline, static]
 

int acdk::lang::StringUtf8Utils::uc2length const uc2char ch  )  [inline, static]
 

int acdk::lang::StringUtf8Utils::uc4length const uc4char ch  )  [static]
 

returns the length of an 0-terminated uc4char stream

ucchar* acdk::lang::StringUtf8Utils::uc4touc ucchar buffer,
const uc4char source,
int  length = -1
[static]
 

converts an uc4 character stream to ucchar stream.

uc4 chars cannot be mappend are mapped to 0xFFFF

buffer must have enough space to get all length + 1 chars

Parameters:
buffer target to write
source 0 terminated uc4char stream
length length of source, -1 if 0 terminated uc4 string
Returns:
just the buffer

int acdk::lang::StringUtf8Utils::uclength const ucchar ch  )  [static]
 

returns the length of 0 terminated unicode character string

size_t acdk::lang::StringUtf8Utils::utfDiff const char *  end,
const char *  begin
[static]
 

int acdk::lang::StringUtf8Utils::utflength const ucchar begin  )  [inline, static]
 

int acdk::lang::StringUtf8Utils::utflength const ucchar begin,
const ucchar end
[static]
 

returns the number of bytes a unicode string needs

int acdk::lang::StringUtf8Utils::utflength ucchar  ch  )  [static]
 

return the number of byte a unicode character needs

UtfStreamType acdk::lang::StringUtf8Utils::validUtf8Stream const byte begin,
const byte end,
bool  throwOnFail = true
[static]
 

check if the character byte stream is a valid UTF8 stream.

if throwOnFail is true, UTFDataFormatException will be thrown

ucchar* acdk::lang::StringUtf8Utils::wcchartouc ucchar buffer,
const wchar_t *  source,
int  length = -1
[static]
 

int acdk::lang::StringUtf8Utils::writeUcToUtf8 byte *&  it,
byte end,
const ucchar ucbegin,
const ucchar ucend
[static]
 

writes the given unicode character range into 8bit char range

Parameters:
it start position to write. will be modified and points to next writing position after this method call
end capacity position, if 0 no checking for end is done
Returns:
0 is Ok -1 is error > 0 need byte chars to encode to end

int acdk::lang::StringUtf8Utils::writeUcToUtf8 byte *&  it,
byte end,
ucchar  ucc
[static]
 

writes an unicode char into given chars

Parameters:
it start position to write. will be modified and points to next writing position after this method call
end capacity position, if 0 no checking for end is done
Returns:
0 is Ok -1 is error > 0 need chars to encode given unicode character to end in case of return value != 0 it position will not be changed.
 
Last modified 2005-05-08 18:32 by SYSTEM By Artefaktur, Ing. Bureau Kommer