SNICUH缝纫机乐队 悟空传机

 59.1
C API: 8-bit Unicode handling macros.
#include &&
#include &&
#define (leadByte)
 Counts the trail bytes for a UTF-8 lead byte.
#define (leadByte)   (((leadByte)&=0xc0)+((leadByte)&=0xe0)+((leadByte)&=0xf0))
 Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
#define (leadByte,
countTrailBytes)   ((leadByte)&=(1&&(6-(countTrailBytes)))-1)
 Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
#define (c)   (((c)&0x80)==0)
 Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
#define (c)   ((uint8_t)((c)-0xc0)&0x3e)
 Is this code unit (byte) a UTF-8 lead byte?
#define (c)   (((c)&0xc0)==0x80)
 Is this code unit (byte) a UTF-8 trail byte?
#define (c)
 How many code units (bytes) are used for the UTF-8 encoding of this Unicode code point?
#define    4
 The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
#define (s,
 Get a code point from a string at a random-access offset, without changing the offset.
#define (s,
 Get a code point from a string at a random-access offset, without changing the offset.
#define (s,
 Get a code point from a string at a random-access offset, without changing the offset.
#define (s,
 Get a code point from a string at a code point boundary offset, and advance the offset to the next code point boundary.
#define (s,
 Get a code point from a string at a code point boundary offset, and advance the offset to the next code point boundary.
#define (s,
 Get a code point from a string at a code point boundary offset, and advance the offset to the next code point boundary.
#define (s,
 Append a code point to a string, overwriting 1 to 4 bytes.
#define (s,
 Append a code point to a string, overwriting 1 to 4 bytes.
#define (s,
 Advance the string offset from one code point boundary to the next.
#define (s,
 Advance the string offset from one code point boundary to the next.
#define (s,
 Advance the string offset from one code point boundary to the n-th next one, i.e., move forward by n code points.
#define (s,
 Advance the string offset from one code point boundary to the n-th next one, i.e., move forward by n code points.
#define (s,
 Adjust a random-access offset to a code point boundary at the start of a code point.
#define (s,
 Adjust a random-access offset to a code point boundary at the start of a code point.
#define (s,
 Move the string offset from one code point boundary to the previous one and get the code point between them.
#define (s,
 Move the string offset from one code point boundary to the previous one and get the code point between them.
#define (s,
 Move the string offset from one code point boundary to the previous one and get the code point between them.
#define (s,
 Move the string offset from one code point boundary to the previous one.
#define (s,
 Move the string offset from one code point boundary to the previous one.
#define (s,
 Move the string offset from one code point boundary to the n-th one before it, i.e., move backward by n code points.
#define (s,
 Move the string offset from one code point boundary to the n-th one before it, i.e., move backward by n code points.
#define (s,
 Adjust a random-access offset to a code point boundary after a code point.
#define (s,
 Adjust a random-access offset to a code point boundary after a code point.
  (const uint8_t *s, int32_t *pi, int32_t length,
 Function for handling "next code point" with error-checking.
int32_t  (uint8_t *s, int32_t i, int32_t length,
*pIsError)
 Function for handling "append code point" with error-checking.
  (const uint8_t *s, int32_t start, int32_t *pi,
 Function for handling "previous code point" with error-checking.
int32_t  (const uint8_t *s, int32_t start, int32_t i)
 Function for handling "skip backward one code point" with error-checking.
C API: 8-bit Unicode handling macros.
This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
For more information see
and the ICU User Guide Strings chapter ().
Usage: ICU coding guidelines for if() statements should be followed when using these macros. Compound statements (curly braces {}) must be used for if-else-while... bodies and all macro statements should be terminated with semicolon.
Definition in file .
#define U8_APPEND
isError 
if((uint32_t)(c)&=0x7f) { \
(s)[(i)++]=(uint8_t)(c); \
} else if((uint32_t)(c)&=0x7ff && (i)+1&(capacity)) { \
(s)[(i)++]=(uint8_t)(((c)&&6)|0xc0); \
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
} else if((uint32_t)(c)&=0xd7ff && (i)+2&(capacity)) { \
(s)[(i)++]=(uint8_t)(((c)&&12)|0xe0); \
(s)[(i)++]=(uint8_t)((((c)&&6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
} else { \
(i)=(s, (i), (capacity), c, &(isError)); \
} \}int32_t utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError)Function for handling &append code point& with error-checking.
Append a code point to a string, overwriting 1 to 4 bytes.
The offset points to the current end of the string contents and is advanced (post-increment). "Safe" macro, checks for a valid code point. If a non-ASCII code point is written, checks for sufficient space in the string. If the code point is not valid or trail bytes do not fit, then isError is set to TRUE.
Parameters
sconst uint8_t * string buffer
iint32_t string offset, must be i&capacity
capacityint32_t size of the string buffer
cUChar32 code point to append
isErroroutput UBool set to TRUE if an error occurs, otherwise not modified
Definition at line
U8_APPEND_UNSAFE
#define U8_APPEND_UNSAFE
if((uint32_t)(c)&=0x7f) { \
(s)[(i)++]=(uint8_t)(c); \
} else { \
if((uint32_t)(c)&=0x7ff) { \
(s)[(i)++]=(uint8_t)(((c)&&6)|0xc0); \
} else { \
if((uint32_t)(c)&=0xffff) { \
(s)[(i)++]=(uint8_t)(((c)&&12)|0xe0); \
} else { \
(s)[(i)++]=(uint8_t)(((c)&&18)|0xf0); \
(s)[(i)++]=(uint8_t)((((c)&&12)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((((c)&&6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
Append a code point to a string, overwriting 1 to 4 bytes.
The offset points to the current end of the string contents and is advanced (post-increment). "Unsafe" macro, assumes a valid code point and sufficient space in the string. Otherwise, the result is undefined.
Parameters
sconst uint8_t * string buffer
istring offset
ccode point to append
Definition at line
#define U8_BACK_1
if(((s)[--(i)])) { \
(i)=(s, start, (i)); \
} \}#define U8_IS_TRAIL(c)Is this code unit (byte) a UTF-8 trail byte? Definition:
int32_t utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i)Function for handling &skip backward one code point& with error-checking.
Move the string offset from one code point boundary to the previous one.
(Pre-decrementing backward iteration.) The input offset may be the same as the string length. "Safe" macro, checks for illegal sequences and for string boundaries.
Parameters
sconst uint8_t * string
startint32_t starting string offset (usually 0)
iint32_t string offset, must be start&i
Definition at line
U8_BACK_1_UNSAFE
#define U8_BACK_1_UNSAFE
while(((s)[--(i)])) {} \}#define U8_IS_TRAIL(c)Is this code unit (byte) a UTF-8 trail byte? Definition:
Move the string offset from one code point boundary to the previous one.
(Pre-decrementing backward iteration.) The input offset may be the same as the string length. "Unsafe" macro, assumes well-formed UTF-8.
Parameters
sconst uint8_t * string
istring offset
Definition at line
#define U8_BACK_N
int32_t __N=(n); \
while(__N&0 && (i)&(start)) { \
U8_BACK_1(s, start, i); \
Move the string offset from one code point boundary to the n-th one before it, i.e., move backward by n code points.
(Pre-decrementing backward iteration.) The input offset may be the same as the string length. "Safe" macro, checks for illegal sequences and for string boundaries.
Parameters
sconst uint8_t * string
startint32_t index of the start of the string
iint32_t string offset, must be start&i
nnumber of code points to skip
Definition at line
U8_BACK_N_UNSAFE
#define U8_BACK_N_UNSAFE
int32_t __N=(n); \
while(__N&0) { \
U8_BACK_1_UNSAFE(s, i); \
Move the string offset from one code point boundary to the n-th one before it, i.e., move backward by n code points.
(Pre-decrementing backward iteration.) The input offset may be the same as the string length. "Unsafe" macro, assumes well-formed UTF-8.
Parameters
sconst uint8_t * string
istring offset
nnumber of code points to skip
Definition at line
U8_COUNT_TRAIL_BYTES
#define U8_COUNT_TRAIL_BYTES
Value:((uint8_t)(leadByte)&0xf0 ? \
((uint8_t)(leadByte)&=0xc0)+((uint8_t)(leadByte)&=0xe0) : \
(uint8_t)(leadByte)&0xfe ? 3+((uint8_t)(leadByte)&=0xf8)+((uint8_t)(leadByte)&=0xfc) : 0)
Counts the trail bytes for a UTF-8 lead byte.
Returns 0 for 0..0xbf as well as for 0xfe and 0xff.
This is internal since it is not meant to be called directly however it is called by public macros in this file and thus must remain stable.
Note: Beginning with ICU 50, the implementation uses a multi-condition expression which was shown in 2012 (on x86-64) to compile to fast, branch-free code. leadByte is evaluated multiple times.
The pre-ICU 50 implementation used the exported array utf8_countTrailBytes: #define
(utf8_countTrailBytes[leadByte]) leadByte was evaluated exactly once.
Parameters
leadByteThe first byte of a UTF-8 sequence. Must be 0..0xff.
Do not use. This API is for internal use only.
Definition at line
U8_COUNT_TRAIL_BYTES_UNSAFE
#define U8_COUNT_TRAIL_BYTES_UNSAFE
   (((leadByte)&=0xc0)+((leadByte)&=0xe0)+((leadByte)&=0xf0))
Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
The maximum supported lead byte is 0xf4 corresponding to U+10FFFF. leadByte might be evaluated multiple times.
This is internal since it is not meant to be called directly however it is called by public macros in this file and thus must remain stable.
Parameters
leadByteThe first byte of a UTF-8 sequence. Must be 0..0xff.
Do not use. This API is for internal use only.
Definition at line
#define U8_FWD_1
length 
uint8_t __b=(uint8_t)(s)[(i)++]; \
if((__b)) { \
uint8_t __count=(__b); \
if((i)+__count&(length) && (length)&=0) { \
__count=(uint8_t)((length)-(i)); \
while(__count&0 && ((s)[i])) { \
} \}#define U8_IS_LEAD(c)Is this code unit (byte) a UTF-8 lead byte? Definition:
#define U8_COUNT_TRAIL_BYTES(leadByte)Counts the trail bytes for a UTF-8 lead byte. Definition:
#define U8_IS_TRAIL(c)Is this code unit (byte) a UTF-8 trail byte? Definition:
Advance the string offset from one code point boundary to the next.
(Post-incrementing iteration.) "Safe" macro, checks for illegal sequences and for string boundaries.
The length can be negative for a NUL-terminated string.
Parameters
sconst uint8_t * string
iint32_t string offset, must be i&length
lengthint32_t string length
Definition at line
U8_FWD_1_UNSAFE
#define U8_FWD_1_UNSAFE
(i)+=1+((uint8_t)(s)[i]); \}#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte)Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence. Definition:
Advance the string offset from one code point boundary to the next.
(Post-incrementing iteration.) "Unsafe" macro, assumes well-formed UTF-8.
Parameters
sconst uint8_t * string
istring offset
Definition at line
#define U8_FWD_N
int32_t __N=(n); \
while(__N&0 && ((i)&(length) || ((length)&0 && (s)[i]!=0))) { \
U8_FWD_1(s, i, length); \
Advance the string offset from one code point boundary to the n-th next one, i.e., move forward by n code points.
(Post-incrementing iteration.) "Safe" macro, checks for illegal sequences and for string boundaries.
The length can be negative for a NUL-terminated string.
Parameters
sconst uint8_t * string
iint32_t string offset, must be i&length
lengthint32_t string length
nnumber of code points to skip
Definition at line
U8_FWD_N_UNSAFE
#define U8_FWD_N_UNSAFE
int32_t __N=(n); \
while(__N&0) { \
U8_FWD_1_UNSAFE(s, i); \
Advance the string offset from one code point boundary to the n-th next one, i.e., move forward by n code points.
(Post-incrementing iteration.) "Unsafe" macro, assumes well-formed UTF-8.
Parameters
sconst uint8_t * string
istring offset
nnumber of code points to skip
Definition at line
#define U8_GET
int32_t _u8_get_index=(i); \
U8_SET_CP_START(s, start, _u8_get_index); \
U8_NEXT(s, _u8_get_index, length, c); \}
Get a code point from a string at a random-access offset, without changing the offset.
The offset may point to either the lead byte or one of the trail bytes for a code point, in which case the macro will read all of the bytes for the code point.
The length can be negative for a NUL-terminated string.
If the offset points to an illegal UTF-8 byte sequence, then c is set to a negative value. Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
Parameters
sconst uint8_t * string
startint32_t starting string offset
iint32_t string offset, must be start&=i&length
lengthint32_t string length
coutput UChar32 variable, set to &0 in case of an error
Definition at line
U8_GET_OR_FFFD
#define U8_GET_OR_FFFD
int32_t _u8_get_index=(i); \
U8_SET_CP_START(s, start, _u8_get_index); \
U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \}
Get a code point from a string at a random-access offset, without changing the offset.
The offset may point to either the lead byte or one of the trail bytes for a code point, in which case the macro will read all of the bytes for the code point.
The length can be negative for a NUL-terminated string.
If the offset points to an illegal UTF-8 byte sequence, then c is set to U+FFFD. Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
This macro does not distinguish between a real U+FFFD in the text and U+FFFD returned for an ill-formed sequence. Use
if that distinction is important.
Parameters
sconst uint8_t * string
startint32_t starting string offset
iint32_t string offset, must be start&=i&length
lengthint32_t string length
coutput UChar32 variable, set to U+FFFD in case of an error
Definition at line
U8_GET_UNSAFE
#define U8_GET_UNSAFE
int32_t _u8_get_unsafe_index=(int32_t)(i); \
U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \}
Get a code point from a string at a random-access offset, without changing the offset.
The offset may point to either the lead byte or one of the trail bytes for a code point, in which case the macro will read all of the bytes for the code point. The result is undefined if the offset points to an illegal UTF-8 byte sequence. Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
Parameters
sconst uint8_t * string
istring offset
coutput UChar32 variable
Definition at line
U8_IS_LEAD
#define U8_IS_LEAD
   ((uint8_t)((c)-0xc0)&0x3e)
Is this code unit (byte) a UTF-8 lead byte?
Parameters
c8-bit code unit (byte)
ReturnsTRUE or FALSE
Definition at line
U8_IS_SINGLE
#define U8_IS_SINGLE
   (((c)&0x80)==0)
Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
Parameters
c8-bit code unit (byte)
ReturnsTRUE or FALSE
Definition at line
U8_IS_TRAIL
#define U8_IS_TRAIL
   (((c)&0xc0)==0x80)
Is this code unit (byte) a UTF-8 trail byte?
Parameters
c8-bit code unit (byte)
ReturnsTRUE or FALSE
Definition at line
#define U8_LENGTH
Value:((uint32_t)(c)&=0x7f ? 1 : \
((uint32_t)(c)&=0x7ff ? 2 : \
((uint32_t)(c)&=0xd7ff ? 3 : \
((uint32_t)(c)&=0xdfff || (uint32_t)(c)&0x10ffff ? 0 : \
((uint32_t)(c)&=0xffff ? 3 : 4)\
How many code units (bytes) are used for the UTF-8 encoding of this Unicode code point?
Parameters
c32-bit code point
Returns1..4, or 0 if c is a surrogate or not a Unicode code point
Definition at line
U8_MASK_LEAD_BYTE
#define U8_MASK_LEAD_BYTE
countTrailBytes 
   ((leadByte)&=(1&&(6-(countTrailBytes)))-1)
Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
This is internal since it is not meant to be called directly however it is called by public macros in this file and thus must remain stable. Do not use. This API is for internal use only.
Definition at line
U8_MAX_LENGTH
#define U8_MAX_LENGTH   4
The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
Definition at line
#define U8_NEXT
(c)=(uint8_t)(s)[(i)++]; \
if((c)&=0x80) { \
uint8_t __t1, __t2; \
(0xe0&(c) && (c)&=0xec) && \
(((i)+1)&(length) || (length)&0) && \
(__t1=(uint8_t)((s)[i]-0x80))&=0x3f && \
(__t2=(uint8_t)((s)[(i)+1]-0x80))&= 0x3f \
(c)=()(((c)&&12)|(__t1&&6)|__t2); \
} else if(
((c)&0xe0 && (c)&=0xc2) && \
((i)!=(length)) && \
(__t1=(uint8_t)((s)[i]-0x80))&=0x3f \
(c)=(((c)&0x1f)&&6)|__t1; \
} else { \
(c)=((const uint8_t *)s, &(i), (length), c, -1); \
} \}UChar32 utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict)Function for handling &next code point& with error-checking.
uint16_t UCharThe base type for UTF-16 code units and pointers. Definition:
Get a code point from a string at a code point boundary offset, and advance the offset to the next code point boundary.
(Post-incrementing forward iteration.) "Safe" macro, checks for illegal sequences and for string boundaries.
The length can be negative for a NUL-terminated string.
The offset may point to the lead byte of a multi-byte sequence, in which case the macro will read the whole sequence. If the offset points to a trail byte or an illegal UTF-8 sequence, then c is set to a negative value.
Parameters
sconst uint8_t * string
iint32_t string offset, must be i&length
lengthint32_t string length
coutput UChar32 variable, set to &0 in case of an error
Definition at line
U8_NEXT_OR_FFFD
#define U8_NEXT_OR_FFFD
(c)=(uint8_t)(s)[(i)++]; \
if((c)&=0x80) { \
uint8_t __t1, __t2; \
(0xe0&(c) && (c)&=0xec) && \
(((i)+1)&(length) || (length)&0) && \
(__t1=(uint8_t)((s)[i]-0x80))&=0x3f && \
(__t2=(uint8_t)((s)[(i)+1]-0x80))&= 0x3f \
(c)=()(((c)&&12)|(__t1&&6)|__t2); \
} else if(
((c)&0xe0 && (c)&=0xc2) && \
((i)!=(length)) && \
(__t1=(uint8_t)((s)[i]-0x80))&=0x3f \
(c)=(((c)&0x1f)&&6)|__t1; \
} else { \
(c)=((const uint8_t *)s, &(i), (length), c, -3); \
} \}UChar32 utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict)Function for handling &next code point& with error-checking.
uint16_t UCharThe base type for UTF-16 code units and pointers. Definition:
Get a code point from a string at a code point boundary offset, and advance the offset to the next code point boundary.
(Post-incrementing forward iteration.) "Safe" macro, checks for illegal sequences and for string boundaries.
The length can be negative for a NUL-terminated string.
The offset may point to the lead byte of a multi-byte sequence, in which case the macro will read the whole sequence. If the offset points to a trail byte or an illegal UTF-8 sequence, then c is set to U+FFFD.
This macro does not distinguish between a real U+FFFD in the text and U+FFFD returned for an ill-formed sequence. Use
if that distinction is important.
Parameters
sconst uint8_t * string
iint32_t string offset, must be i&length
lengthint32_t string length
coutput UChar32 variable, set to U+FFFD in case of an error
Definition at line
U8_NEXT_UNSAFE
#define U8_NEXT_UNSAFE
(c)=(uint8_t)(s)[(i)++]; \
if((c)&=0x80) { \
if((c)&0xe0) { \
(c)=(((c)&0x1f)&&6)|((s)[(i)++]&0x3f); \
} else if((c)&0xf0) { \
(c)=()(((c)&&12)|(((s)[i]&0x3f)&&6)|((s)[(i)+1]&0x3f)); \
} else { \
(c)=(((c)&7)&&18)|(((s)[i]&0x3f)&&12)|(((s)[(i)+1]&0x3f)&&6)|((s)[(i)+2]&0x3f); \
} \}uint16_t UCharThe base type for UTF-16 code units and pointers. Definition:
Get a code point from a string at a code point boundary offset, and advance the offset to the next code point boundary.
(Post-incrementing forward iteration.) "Unsafe" macro, assumes well-formed UTF-8.
The offset may point to the lead byte of a multi-byte sequence, in which case the macro will read the whole sequence. The result is undefined if the offset points to a trail byte or an illegal UTF-8 sequence.
Parameters
sconst uint8_t * string
istring offset
coutput UChar32 variable
Definition at line
#define U8_PREV
(c)=(uint8_t)(s)[--(i)]; \
if((c)&=0x80) { \
(c)=((const uint8_t *)s, start, &(i), c, -1); \
} \}UChar32 utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict)Function for handling &previous code point& with error-checking.
Move the string offset from one code point boundary to the previous one and get the code point between them.
(Pre-decrementing backward iteration.) "Safe" macro, checks for illegal sequences and for string boundaries.
The input offset may be the same as the string length. If the offset is behind a multi-byte sequence, then the macro will read the whole sequence. If the offset is behind a lead byte, then that itself will be returned as the code point. If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
Parameters
sconst uint8_t * string
startint32_t starting string offset (usually 0)
iint32_t string offset, must be start&i
coutput UChar32 variable, set to &0 in case of an error
Definition at line
U8_PREV_OR_FFFD
#define U8_PREV_OR_FFFD
(c)=(uint8_t)(s)[--(i)]; \
if((c)&=0x80) { \
(c)=((const uint8_t *)s, start, &(i), c, -3); \
} \}UChar32 utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict)Function for handling &previous code point& with error-checking.
Move the string offset from one code point boundary to the previous one and get the code point between them.
(Pre-decrementing backward iteration.) "Safe" macro, checks for illegal sequences and for string boundaries.
The input offset may be the same as the string length. If the offset is behind a multi-byte sequence, then the macro will read the whole sequence. If the offset is behind a lead byte, then that itself will be returned as the code point. If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
This macro does not distinguish between a real U+FFFD in the text and U+FFFD returned for an ill-formed sequence. Use
if that distinction is important.
Parameters
sconst uint8_t * string
startint32_t starting string offset (usually 0)
iint32_t string offset, must be start&i
coutput UChar32 variable, set to U+FFFD in case of an error
Definition at line
U8_PREV_UNSAFE
#define U8_PREV_UNSAFE
(c)=(uint8_t)(s)[--(i)]; \
if((c)) { \
uint8_t __b, __count=1, __shift=6; \\
(c)&=0x3f; \
for(;;) { \
__b=(uint8_t)(s)[--(i)]; \
if(__b&=0xc0) { \
U8_MASK_LEAD_BYTE(__b, __count); \
(c)|=()__b&&__ \
} else { \
(c)|=()(__b&0x3f)&&__ \
__shift+=6; \
} \}#define U8_IS_TRAIL(c)Is this code unit (byte) a UTF-8 trail byte? Definition:
int32_t UChar32Define UChar32 as a type for single Unicode code points. Definition:
Move the string offset from one code point boundary to the previous one and get the code point between them.
(Pre-decrementing backward iteration.) "Unsafe" macro, assumes well-formed UTF-8.
The input offset may be the same as the string length. If the offset is behind a multi-byte sequence, then the macro will read the whole sequence. If the offset is behind a lead byte, then that itself will be returned as the code point. The result is undefined if the offset is behind an illegal UTF-8 sequence.
Parameters
sconst uint8_t * string
istring offset
coutput UChar32 variable
Definition at line
U8_SET_CP_LIMIT
#define U8_SET_CP_LIMIT
length 
if((start)&(i) && ((i)&(length) || (length)&0)) { \
U8_BACK_1(s, start, i); \
U8_FWD_1(s, i, length); \
Adjust a random-access offset to a code point boundary after a code point.
If the offset is behind a partial multi-byte sequence, then the offset is incremented to behind the whole sequence. Otherwise, it is not modified. The input offset may be the same as the string length. "Safe" macro, checks for illegal sequences and for string boundaries.
The length can be negative for a NUL-terminated string.
Parameters
sconst uint8_t * string
startint32_t starting string offset (usually 0)
iint32_t string offset, must be start&=i&=length
lengthint32_t string length
Definition at line
U8_SET_CP_LIMIT_UNSAFE
#define U8_SET_CP_LIMIT_UNSAFE
U8_BACK_1_UNSAFE(s, i); \
U8_FWD_1_UNSAFE(s, i); \}
Adjust a random-access offset to a code point boundary after a code point.
If the offset is behind a partial multi-byte sequence, then the offset is incremented to behind the whole sequence. Otherwise, it is not modified. The input offset may be the same as the string length. "Unsafe" macro, assumes well-formed UTF-8.
Parameters
sconst uint8_t * string
istring offset
Definition at line
U8_SET_CP_START
#define U8_SET_CP_START
if(((s)[(i)])) { \
(i)=(s, start, (i)); \
} \}#define U8_IS_TRAIL(c)Is this code unit (byte) a UTF-8 trail byte? Definition:
int32_t utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i)Function for handling &skip backward one code point& with error-checking.
Adjust a random-access offset to a code point boundary at the start of a code point.
If the offset points to a UTF-8 trail byte, then the offset is moved backward to the corresponding lead byte. Otherwise, it is not modified. "Safe" macro, checks for illegal sequences and for string boundaries.
Parameters
sconst uint8_t * string
startint32_t starting string offset (usually 0)
iint32_t string offset, must be start&=i
Definition at line
U8_SET_CP_START_UNSAFE
#define U8_SET_CP_START_UNSAFE
while(((s)[i])) { --(i); } \}#define U8_IS_TRAIL(c)Is this code unit (byte) a UTF-8 trail byte? Definition:
Adjust a random-access offset to a code point boundary at the start of a code point.
If the offset points to a UTF-8 trail byte, then the offset is moved backward to the corresponding lead byte. Otherwise, it is not modified. "Unsafe" macro, assumes well-formed UTF-8.
Parameters
sconst uint8_t * string
istring offset
Definition at line
utf8_appendCharSafeBody()
int32_t utf8_appendCharSafeBody
uint8_t * 
int32_t 
int32_t 
pIsError 
Function for handling "append code point" with error-checking.
This is internal since it is not meant to be called directly however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this file and thus must remain stable, and should not be hidden when other internal functions are hidden (otherwise public macros would fail to compile). Do not use. This API is for internal use only.
utf8_back1SafeBody()
int32_t utf8_back1SafeBody
const uint8_t * 
int32_t 
int32_t 
Function for handling "skip backward one code point" with error-checking.
This is internal since it is not meant to be called directly however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this file and thus must remain stable, and should not be hidden when other internal functions are hidden (otherwise public macros would fail to compile). Do not use. This API is for internal use only.
utf8_nextCharSafeBody()
utf8_nextCharSafeBody
const uint8_t * 
int32_t * 
int32_t 
strict 
Function for handling "next code point" with error-checking.
This is internal since it is not meant to be called directly however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this file and thus must remain stable, and should not be hidden when other internal functions are hidden (otherwise public macros would fail to compile). Do not use. This API is for internal use only.
utf8_prevCharSafeBody()
utf8_prevCharSafeBody
const uint8_t * 
int32_t 
int32_t * 
strict 
Function for handling "previous code point" with error-checking.
This is internal since it is not meant to be called directly however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this file and thus must remain stable, and should not be hidden when other internal functions are hidden (otherwise public macros would fail to compile). Do not use. This API is for internal use only.

我要回帖

更多关于 全自动缝纫机 的文章

 

随机推荐