123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254 |
- #ifndef LLVM_SUPPORT_CONVERTUTF_H
- #define LLVM_SUPPORT_CONVERTUTF_H
- #include <stddef.h> /* ptrdiff_t */
- typedef unsigned int UTF32;
- typedef unsigned short UTF16;
- typedef unsigned char UTF8;
- typedef unsigned char Boolean;
- #define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
- #define UNI_MAX_BMP (UTF32)0x0000FFFF
- #define UNI_MAX_UTF16 (UTF32)0x0010FFFF
- #define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
- #define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
- #define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4
- #define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF
- #define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE
- typedef enum {
- conversionOK,
- sourceExhausted,
- targetExhausted,
- sourceIllegal
- } ConversionResult;
- typedef enum {
- strictConversion = 0,
- lenientConversion
- } ConversionFlags;
- #ifdef __cplusplus
- extern "C" {
- #endif
- ConversionResult ConvertUTF8toUTF16 (
- const UTF8** sourceStart, const UTF8* sourceEnd,
- UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
- ConversionResult ConvertUTF8toUTF32 (
- const UTF8** sourceStart, const UTF8* sourceEnd,
- UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
- ConversionResult ConvertUTF16toUTF8 (
- const UTF16** sourceStart, const UTF16* sourceEnd,
- UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
- ConversionResult ConvertUTF32toUTF8 (
- const UTF32** sourceStart, const UTF32* sourceEnd,
- UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
- ConversionResult ConvertUTF16toUTF32 (
- const UTF16** sourceStart, const UTF16* sourceEnd,
- UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
- ConversionResult ConvertUTF32toUTF16 (
- const UTF32** sourceStart, const UTF32* sourceEnd,
- UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
- Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
- Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);
- unsigned getNumBytesForUTF8(UTF8 firstByte);
-
- int getUTF8StringLength(const UTF8* utf8);
- #ifdef __cplusplus
- }
- #include <vector>
- #include <string>
- namespace llvm {
- bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source,
- char *&ResultPtr, const UTF8 *&ErrorPtr);
- bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr);
- static inline ConversionResult convertUTF8Sequence(const UTF8 **source,
- const UTF8 *sourceEnd,
- UTF32 *target,
- ConversionFlags flags) {
- if (*source == sourceEnd)
- return sourceExhausted;
- unsigned size = getNumBytesForUTF8(**source);
- if ((ptrdiff_t)size > sourceEnd - *source)
- return sourceExhausted;
- return ConvertUTF8toUTF32(source, *source + size, &target, target + 1, flags);
- }
- bool hasUTF16ByteOrderMark(const char* SrcBytes, size_t len);
- bool convertUTF16ToUTF8String(const std::u16string& utf16, std::string &Out);
- }
- #endif
- #endif
|