本文整理汇总了C++中UnicodeSet类的典型用法代码示例。如果您正苦于以下问题:C++ UnicodeSet类的具体用法?C++ UnicodeSet怎么用?C++ UnicodeSet使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了UnicodeSet类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: uscript_getCode
void SpoofImpl::addScriptChars(const char *locale, UnicodeSet *allowedChars, UErrorCode &status) {
UScriptCode scripts[30];
int32_t numScripts = uscript_getCode(locale, scripts, sizeof(scripts)/sizeof(UScriptCode), &status);
if (U_FAILURE(status)) {
return;
}
if (status == U_USING_DEFAULT_WARNING) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
UnicodeSet tmpSet;
int32_t i;
for (i=0; i<numScripts; i++) {
tmpSet.applyIntPropertyValue(UCHAR_SCRIPT, scripts[i], status);
allowedChars->addAll(tmpSet);
}
}
开发者ID:0x4d52,项目名称:JavaScriptCore-X,代码行数:18,代码来源:uspoof_impl.cpp
示例2: parseName
void
NamesPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
if(!newValues.contains(UCHAR_NAME) && !newValues.contains(PPUCD_NAME_ALIAS)) {
return;
}
U_ASSERT(props.start==props.end);
const char *names[4]={ NULL, NULL, NULL, NULL };
int16_t lengths[4]={ 0, 0, 0, 0 };
/* get the character name */
if(props.name!=NULL) {
names[0]=props.name;
lengths[0]=(int16_t)uprv_strlen(props.name);
parseName(names[0], lengths[0]);
}
CharString buffer;
if(props.nameAlias!=NULL) {
/*
* Only use "correction" aliases for now, from Unicode 6.1 NameAliases.txt with 3 fields per line.
* TODO: Work on ticket #8963 to deal with multiple type:alias pairs per character.
*/
const char *corr=uprv_strstr(props.nameAlias, "correction=");
if(corr!=NULL) {
corr+=11; // skip "correction="
const char *limit=uprv_strchr(corr, ',');
if(limit!=NULL) {
buffer.append(corr, limit-corr, errorCode);
names[3]=buffer.data();
lengths[3]=(int16_t)(limit-corr);
} else {
names[3]=corr;
lengths[3]=(int16_t)uprv_strlen(corr);
}
parseName(names[3], lengths[3]);
}
}
addLine(props.start, names, lengths, LENGTHOF(names));
}
开发者ID:icu-project,项目名称:icu-tools,代码行数:44,代码来源:namespropsbuilder.cpp
示例3: errln
void TransliteratorErrorTest::TestUnicodeSetErrors() {
UnicodeString badPattern="[[:L:]-[0x0300-0x0400]";
UnicodeSet set;
UErrorCode status = U_ZERO_ERROR;
UnicodeString result;
if (!set.isEmpty()) {
errln("FAIL: The default ctor of UnicodeSet created a non-empty object.");
}
set.applyPattern(badPattern, status);
if (U_SUCCESS(status)) {
errln("FAIL: Applied a bad pattern to the UnicodeSet object okay.");
}
status = U_ZERO_ERROR;
UnicodeSet *set1 = new UnicodeSet(badPattern, status);
if (U_SUCCESS(status)) {
errln("FAIL: Created a UnicodeSet based on bad patterns.");
}
delete set1;
}
开发者ID:winlibs,项目名称:icu4c,代码行数:20,代码来源:trnserr.cpp
示例4: uspoof_setAllowedUnicodeSet
U_CAPI void U_EXPORT2
uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) {
SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
if (This == NULL) {
return;
}
if (chars->isBogus()) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
UnicodeSet *clonedSet = static_cast<UnicodeSet *>(chars->clone());
if (clonedSet == NULL || clonedSet->isBogus()) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
clonedSet->freeze();
delete This->fAllowedCharsSet;
This->fAllowedCharsSet = clonedSet;
This->fChecks |= USPOOF_CHAR_LIMIT;
}
开发者ID:BrunoReX,项目名称:palemoon,代码行数:20,代码来源:uspoof.cpp
示例5: span
static int32_t span(const UnicodeSet &set, const UChar *s, int32_t length, UBool tf) {
UChar32 c;
int32_t start=0, prev;
while((prev=start)<length) {
U16_NEXT(s, start, length, c);
if(tf!=set.contains(c)) {
break;
}
}
return prev;
}
开发者ID:LittoCats,项目名称:OT_4010D,代码行数:11,代码来源:unisetperf.cpp
示例6: fprintf
void
BiDiPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode) || newValues.containsNone(relevantProps)) { return; }
UChar32 start=props.start;
UChar32 end=props.end;
// The runtime code relies on this invariant for returning both bmg and bpb
// from the same data.
int32_t bpt=props.getIntProp(UCHAR_BIDI_PAIRED_BRACKET_TYPE);
if(!(bpt==0 ? props.bpb==U_SENTINEL : props.bpb==props.bmg)) {
fprintf(stderr,
"genprops error: invariant not true: "
"if(bpt==None) then bpb=<none> else bpb=bmg\n");
return;
}
int32_t delta=encodeBidiMirroringGlyph(start, end, props.bmg, errorCode);
uint32_t value=(uint32_t)delta<<UBIDI_MIRROR_DELTA_SHIFT;
if(props.binProps[UCHAR_BIDI_MIRRORED]) {
value|=U_MASK(UBIDI_IS_MIRRORED_SHIFT);
}
if(props.binProps[UCHAR_BIDI_CONTROL]) {
value|=U_MASK(UBIDI_BIDI_CONTROL_SHIFT);
}
if(props.binProps[UCHAR_JOIN_CONTROL]) {
value|=U_MASK(UBIDI_JOIN_CONTROL_SHIFT);
}
value|=(uint32_t)bpt<<UBIDI_BPT_SHIFT;
value|=(uint32_t)props.getIntProp(UCHAR_JOINING_TYPE)<<UBIDI_JT_SHIFT;
value|=(uint32_t)props.getIntProp(UCHAR_BIDI_CLASS);
utrie2_setRange32(pTrie, start, end, value, TRUE, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genprops error: BiDiPropsBuilder utrie2_setRange32() failed - %s\n",
u_errorName(errorCode));
return;
}
// Store Joining_Group values from vector column 1 in simple byte arrays.
int32_t jg=props.getIntProp(UCHAR_JOINING_GROUP);
for(UChar32 c=start; c<=end; ++c) {
int32_t jgStart;
if(MIN_JG_START<=c && c<MAX_JG_LIMIT) {
jgArray[c-MIN_JG_START]=(uint8_t)jg;
} else if(MIN_JG_START2<=c && c<MAX_JG_LIMIT2) {
jgArray2[c-MIN_JG_START2]=(uint8_t)jg;
} else if(jg!=U_JG_NO_JOINING_GROUP) {
fprintf(stderr, "genprops error: Joining_Group for out-of-range code points U+%04lx..U+%04lx\n",
(long)start, (long)end);
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
}
}
开发者ID:icu-project,项目名称:icu-tools,代码行数:54,代码来源:bidipropsbuilder.cpp
示例7: fprintf
void
PreparsedUCD::parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
scx.clear();
CharString scString;
for(;;) {
const char *scs;
const char *scLimit=strchr(s, ' ');
if(scLimit!=NULL) {
scs=scString.clear().append(s, (int32_t)(scLimit-s), errorCode).data();
if(U_FAILURE(errorCode)) { return; }
} else {
scs=s;
}
int32_t script=pnames->getPropertyValueEnum(UCHAR_SCRIPT, scs);
if(script==UCHAR_INVALID_CODE) {
fprintf(stderr,
"error in preparsed UCD: '%s' is not a valid script code on line %ld\n",
scs, (long)lineNumber);
errorCode=U_PARSE_ERROR;
return;
} else if(scx.contains(script)) {
fprintf(stderr,
"error in preparsed UCD: scx has duplicate '%s' codes on line %ld\n",
scs, (long)lineNumber);
errorCode=U_PARSE_ERROR;
return;
} else {
scx.add(script);
}
if(scLimit!=NULL) {
s=scLimit+1;
} else {
break;
}
}
if(scx.isEmpty()) {
fprintf(stderr, "error in preparsed UCD: empty scx= on line %ld\n", (long)lineNumber);
errorCode=U_PARSE_ERROR;
}
}
开发者ID:icu-project,项目名称:icu4c,代码行数:41,代码来源:ppucd.cpp
示例8: getUnderflowLabel
void AlphabeticIndex::buildBucketList(UErrorCode &status) {
UnicodeString labelStr = getUnderflowLabel();
Bucket *b = new Bucket(labelStr, *EMPTY_STRING, U_ALPHAINDEX_UNDERFLOW, status);
bucketList_->addElement(b, status);
// Build up the list, adding underflow, additions, overflow
// insert infix labels as needed, using \uFFFF.
const UnicodeString *last = static_cast<UnicodeString *>(labels_->elementAt(0));
b = new Bucket(*last, *last, U_ALPHAINDEX_NORMAL, status);
bucketList_->addElement(b, status);
UnicodeSet lastSet;
UnicodeSet set;
AlphabeticIndex::getScriptSet(lastSet, *last, status);
lastSet.removeAll(*IGNORE_SCRIPTS);
for (int i = 1; i < labels_->size(); ++i) {
UnicodeString *current = static_cast<UnicodeString *>(labels_->elementAt(i));
getScriptSet(set, *current, status);
set.removeAll(*IGNORE_SCRIPTS);
if (lastSet.containsNone(set)) {
// check for adjacent
const UnicodeString &overflowComparisonString = getOverflowComparisonString(*last, status);
if (collatorPrimaryOnly_->compare(overflowComparisonString, *current) < 0) {
labelStr = getInflowLabel();
b = new Bucket(labelStr, overflowComparisonString, U_ALPHAINDEX_INFLOW, status);
bucketList_->addElement(b, status);
i++;
lastSet = set;
}
}
b = new Bucket(*current, *current, U_ALPHAINDEX_NORMAL, status);
bucketList_->addElement(b, status);
last = current;
lastSet = set;
}
const UnicodeString &limitString = getOverflowComparisonString(*last, status);
b = new Bucket(getOverflowLabel(), limitString, U_ALPHAINDEX_OVERFLOW, status);
bucketList_->addElement(b, status);
// final overflow bucket
}
开发者ID:0omega,项目名称:platform_external_icu4c,代码行数:41,代码来源:alphaindex.cpp
示例9: SpanBackUTF8
SpanBackUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
// Verify that the frozen set is equal to the unfrozen one.
UnicodeSet set;
char utf8[4];
UChar32 c;
int32_t length;
for(c=0; c<=0x10ffff; ++c) {
if(c==0xd800) {
c=0xe000;
}
length=0;
U8_APPEND_UNSAFE(utf8, length, c);
if(testcase.set.spanBackUTF8(utf8, length, USET_SPAN_CONTAINED)==0) {
set.add(c);
}
}
if(set!=testcase.set) {
fprintf(stderr, "error: frozen set != original!\n");
}
}
开发者ID:LittoCats,项目名称:OT_4010D,代码行数:21,代码来源:unisetperf.cpp
示例10: addReplacementSetTo
void StringReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {
UChar32 ch;
for (int32_t i=0; i<output.length(); i+=UTF_CHAR_LENGTH(ch)) {
ch = output.char32At(i);
UnicodeReplacer* r = data->lookupReplacer(ch);
if (r == NULL) {
toUnionTo.add(ch);
} else {
r->addReplacementSetTo(toUnionTo);
}
}
}
开发者ID:LittoCats,项目名称:OT_4010D,代码行数:12,代码来源:strrepl.cpp
示例11: addMatchSetTo
/**
* Implement UnicodeMatcher
*/
void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {
UChar32 ch;
for (int32_t i=0; i<pattern.length(); i+=UTF_CHAR_LENGTH(ch)) {
ch = pattern.char32At(i);
const UnicodeMatcher* matcher = data->lookupMatcher(ch);
if (matcher == NULL) {
toUnionTo.add(ch);
} else {
matcher->addMatchSetTo(toUnionTo);
}
}
}
开发者ID:0x4d52,项目名称:JavaScriptCore-X,代码行数:15,代码来源:strmatch.cpp
示例12: dest
UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
if (U_FAILURE(status)) {
return NULL;
}
LocalPointer<UVector> dest(new UVector(status), status);
if (U_FAILURE(status)) {
return NULL;
}
dest->setDeleter(uprv_deleteUObject);
// Fetch the script-first-primary contractions which are defined in the root collator.
// They all start with U+FDD1.
UnicodeSet set;
collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
if (U_FAILURE(status)) {
return NULL;
}
if (set.isEmpty()) {
status = U_UNSUPPORTED_ERROR;
return NULL;
}
UnicodeSetIterator iter(set);
while (iter.next()) {
const UnicodeString &boundary = iter.getString();
uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
// Ignore boundaries for the special reordering groups.
// Take only those for "real scripts" (where the sample character is a Letter,
// and the one for unassigned implicit weights (Cn).
continue;
}
UnicodeString *s = new UnicodeString(boundary);
if (s == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
dest->addElement(s, status);
}
return dest.orphan();
}
开发者ID:DavidCai1993,项目名称:node,代码行数:39,代码来源:alphaindex.cpp
示例13: DictionaryBreakEngine
CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
: DictionaryBreakEngine(1 << UBRK_WORD), fDictionary(adoptDictionary) {
// Korean dictionary only includes Hangul syllables
fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status);
fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status);
if (U_SUCCESS(status)) {
// handle Korean and Japanese/Chinese using different dictionaries
if (type == kKorean) {
setCharacters(fHangulWordSet);
} else { //Chinese and Japanese
UnicodeSet cjSet;
cjSet.addAll(fHanWordSet);
cjSet.addAll(fKatakanaWordSet);
cjSet.addAll(fHiraganaWordSet);
cjSet.add(0xFF70);
cjSet.add(0x30FC);
setCharacters(cjSet);
}
}
}
开发者ID:alfintatorkace,项目名称:osx-10.9-opensource,代码行数:23,代码来源:dictbe.cpp
示例14: assertTrue
void StaticUnicodeSetsTest::testSetCoverage() {
UErrorCode status = U_ZERO_ERROR;
// Lenient comma/period should be supersets of strict comma/period;
// it also makes the coverage logic cheaper.
assertTrue(
"COMMA should be superset of STRICT_COMMA",
get(unisets::COMMA)->containsAll(*get(unisets::STRICT_COMMA)));
assertTrue(
"PERIOD should be superset of STRICT_PERIOD",
get(unisets::PERIOD)->containsAll(*get(unisets::STRICT_PERIOD)));
UnicodeSet decimals;
decimals.addAll(*get(unisets::STRICT_COMMA));
decimals.addAll(*get(unisets::STRICT_PERIOD));
decimals.freeze();
UnicodeSet grouping;
grouping.addAll(decimals);
grouping.addAll(*get(unisets::OTHER_GROUPING_SEPARATORS));
decimals.freeze();
const UnicodeSet &plusSign = *get(unisets::PLUS_SIGN);
const UnicodeSet &minusSign = *get(unisets::MINUS_SIGN);
const UnicodeSet &percent = *get(unisets::PERCENT_SIGN);
const UnicodeSet &permille = *get(unisets::PERMILLE_SIGN);
const UnicodeSet &infinity = *get(unisets::INFINITY_KEY);
int32_t localeCount;
const Locale* allAvailableLocales = Locale::getAvailableLocales(localeCount);
for (int32_t i = 0; i < localeCount; i++) {
Locale locale = allAvailableLocales[i];
DecimalFormatSymbols dfs(locale, status);
UnicodeString localeName;
locale.getDisplayName(localeName);
assertSuccess(UnicodeString("Making DFS for ") + localeName, status);
#define ASSERT_IN_SET(name, foo) assertInSet(localeName, UnicodeString("" #name ""), name, foo)
ASSERT_IN_SET(decimals, dfs.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol));
ASSERT_IN_SET(grouping, dfs.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol));
ASSERT_IN_SET(plusSign, dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol));
ASSERT_IN_SET(minusSign, dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol));
ASSERT_IN_SET(percent, dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol));
ASSERT_IN_SET(permille, dfs.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol));
ASSERT_IN_SET(infinity, dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol));
}
}
开发者ID:winlibs,项目名称:icu4c,代码行数:46,代码来源:static_unisets_test.cpp
示例15: clear
/**
* Parse the pattern from the given RuleCharacterIterator. The
* iterator is advanced over the parsed pattern.
* @param chars iterator over the pattern characters. Upon return
* it will be advanced to the first character after the parsed
* pattern, or the end of the iteration if all characters are
* parsed.
* @param symbols symbol table to use to parse and dereference
* variables, or null if none.
* @param rebuiltPat the pattern that was parsed, rebuilt or
* copied from the input pattern, as appropriate.
* @param options a bit mask of zero or more of the following:
* IGNORE_SPACE, CASE.
*/
void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
const SymbolTable* symbols,
UnicodeString& rebuiltPat,
uint32_t options,
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
UErrorCode& ec) {
if (U_FAILURE(ec)) return;
// Syntax characters: [ ] ^ - & { }
// Recognized special forms for chars, sets: c-c s-s s&s
int32_t opts = RuleCharacterIterator::PARSE_VARIABLES |
RuleCharacterIterator::PARSE_ESCAPES;
if ((options & USET_IGNORE_SPACE) != 0) {
opts |= RuleCharacterIterator::SKIP_WHITESPACE;
}
UnicodeString patLocal, buf;
UBool usePat = FALSE;
UnicodeSetPointer scratch;
RuleCharacterIterator::Pos backup;
// mode: 0=before [, 1=between [...], 2=after ]
// lastItem: 0=none, 1=char, 2=set
int8_t lastItem = 0, mode = 0;
UChar32 lastChar = 0;
UChar op = 0;
UBool invert = FALSE;
clear();
while (mode != 2 && !chars.atEnd()) {
U_ASSERT((lastItem == 0 && op == 0) ||
(lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) ||
(lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ ||
op == INTERSECTION /*'&'*/)));
UChar32 c = 0;
UBool literal = FALSE;
UnicodeSet* nested = 0; // alias - do not delete
// -------- Check for property pattern
// setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed
int8_t setMode = 0;
if (resemblesPropertyPattern(chars, opts)) {
setMode = 2;
}
// -------- Parse '[' of opening delimiter OR nested set.
// If there is a nested set, use `setMode' to define how
// the set should be parsed. If the '[' is part of the
// opening delimiter for this pattern, parse special
// strings "[", "[^", "[-", and "[^-". Check for stand-in
// characters representing a nested set in the symbol
// table.
else {
// Prepare to backup if necessary
chars.getPos(backup);
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
if (c == 0x5B /*'['*/ && !literal) {
if (mode == 1) {
chars.setPos(backup); // backup
setMode = 1;
} else {
// Handle opening '[' delimiter
mode = 1;
patLocal.append((UChar) 0x5B /*'['*/);
chars.getPos(backup); // prepare to backup
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
if (c == 0x5E /*'^'*/ && !literal) {
invert = TRUE;
patLocal.append((UChar) 0x5E /*'^'*/);
chars.getPos(backup); // prepare to backup
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
}
// Fall through to handle special leading '-';
// otherwise restart loop for nested [], \p{}, etc.
if (c == HYPHEN /*'-'*/) {
//.........这里部分代码省略.........
开发者ID:ThomasWo,项目名称:proto-quic,代码行数:101,代码来源:uniset_props.cpp
示例16: main
//.........这里部分代码省略.........
NULL, // dest,
0, // destCapacity,
wordSourceC,
wordFileSize,
&status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
exit(status);
};
status = U_ZERO_ERROR;
UChar *wordSourceU = new UChar[destCap+1];
ucnv_toUChars(conv,
wordSourceU, // dest,
destCap+1,
wordSourceC,
wordFileSize,
&status);
if (U_FAILURE(status)) {
fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
exit(status);
};
ucnv_close(conv);
// Get rid of the original file buffer
delete[] wordBufferC;
// Create a MutableTrieDictionary, and loop through all the lines, inserting
// words.
// First, pick a median character.
UChar *current = wordSourceU + (destCap/2);
UChar uc = *current++;
UnicodeSet breaks;
breaks.add(0x000A); // Line Feed
breaks.add(0x000D); // Carriage Return
breaks.add(0x2028); // Line Separator
breaks.add(0x2029); // Paragraph Separator
do {
// Look for line break
while (uc && !breaks.contains(uc)) {
uc = *current++;
}
// Now skip to first non-line-break
while (uc && breaks.contains(uc)) {
uc = *current++;
}
}
while (uc && (breaks.contains(uc) || u_isspace(uc)));
mtd = new MutableTrieDictionary(uc, status);
if (U_FAILURE(status)) {
fprintf(stderr, "new MutableTrieDictionary: ICU Error \"%s\"\n", u_errorName(status));
exit(status);
}
// Now add the words. Words are non-space characters at the beginning of
// lines, and must be at least one UChar. If a word has an associated value,
// the value should follow the word on the same line after a tab character.
current = wordSourceU;
UChar *candidate = current;
uc = *current++;
int32_t length = 0;
int count = 0;
开发者ID:AutomationConsultant,项目名称:perch-webrtc,代码行数:67,代码来源:genctd.cpp
示例17: defined
//---------------------------------------------------------------------
//
// dump Output the compiled form of the pattern.
// Debugging function only.
//
//---------------------------------------------------------------------
void RegexPattern::dumpOp(int32_t index) const {
(void)index; // Suppress warnings in non-debug build.
#if defined(REGEX_DEBUG)
static const char * const opNames[] = {URX_OPCODE_NAMES};
int32_t op = fCompiledPat->elementAti(index);
int32_t val = URX_VAL(op);
int32_t type = URX_TYPE(op);
int32_t pinnedType = type;
if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
pinnedType = 0;
}
printf("%4d %08x %-15s ", index, op, opNames[pinnedType]);
switch (type) {
case URX_NOP:
case URX_DOTANY:
case URX_DOTANY_ALL:
case URX_FAIL:
case URX_CARET:
case URX_DOLLAR:
case URX_BACKSLASH_G:
case URX_BACKSLASH_X:
case URX_END:
case URX_DOLLAR_M:
case URX_CARET_M:
// Types with no operand field of interest.
break;
case URX_RESERVED_OP:
case URX_START_CAPTURE:
case URX_END_CAPTURE:
case URX_STATE_SAVE:
case URX_JMP:
case URX_JMP_SAV:
case URX_JMP_SAV_X:
case URX_BACKSLASH_B:
case URX_BACKSLASH_BU:
case URX_BACKSLASH_D:
case URX_BACKSLASH_Z:
case URX_STRING_LEN:
case URX_CTR_INIT:
case URX_CTR_INIT_NG:
case URX_CTR_LOOP:
case URX_CTR_LOOP_NG:
case URX_RELOC_OPRND:
case URX_STO_SP:
case URX_LD_SP:
case URX_BACKREF:
case URX_STO_INP_LOC:
case URX_JMPX:
case URX_LA_START:
case URX_LA_END:
case URX_BACKREF_I:
case URX_LB_START:
case URX_LB_CONT:
case URX_LB_END:
case URX_LBN_CONT:
case URX_LBN_END:
case URX_LOOP_C:
case URX_LOOP_DOT_I:
case URX_BACKSLASH_H:
case URX_BACKSLASH_R:
case URX_BACKSLASH_V:
// types with an integer operand field.
printf("%d", val);
break;
case URX_ONECHAR:
case URX_ONECHAR_I:
printf("%c", val<256?val:'?');
break;
case URX_STRING:
case URX_STRING_I:
{
int32_t lengthOp = fCompiledPat->elementAti(index+1);
U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
int32_t length = URX_VAL(lengthOp);
int32_t i;
for (i=val; i<val+length; i++) {
UChar c = fLiteralText[i];
if (c < 32 || c >= 256) {c = '.';}
printf("%c", c);
}
}
break;
case URX_SETREF:
case URX_LOOP_SR_I:
{
UnicodeString s;
UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
set->toPattern(s, TRUE);
for (int32_t i=0; i<s.length(); i++) {
//.........这里部分代码省略.........
开发者ID:Acorld,项目名称:WinObjC-Heading,代码行数:101,代码来源:repattrn.cpp
示例18: AlphabeticIndex
//
// APITest. Invoke every function at least once, and check that it does something.
// Does not attempt to check complete functionality.
//
void AlphabeticIndexTest::APITest() {
//
// Simple constructor and destructor, getBucketCount()
//
UErrorCode status = U_ZERO_ERROR;
int32_t lc = 0;
int32_t i = 0;
AlphabeticIndex *index = new AlphabeticIndex(Locale::getEnglish(), status);
TEST_CHECK_STATUS;
lc = index->getBucketCount(status);
TEST_CHECK_STATUS;
TEST_ASSERT(28 == lc); // 26 letters plus two under/overflow labels.
//printf("getBucketCount() == %d\n", lc);
delete index;
// Constructor from a Collator
//
status = U_ZERO_ERROR;
RuleBasedCollator *coll = dynamic_cast<RuleBasedCollator *>(
Collator::createInstance(Locale::getGerman(), status));
TEST_CHECK_STATUS;
TEST_ASSERT(coll != NULL);
index = new AlphabeticIndex(coll, status);
TEST_CHECK_STATUS;
TEST_ASSERT(coll == &index->getCollator());
assertEquals("only the underflow label in an index built from a collator",
1, index->getBucketCount(status));
TEST_CHECK_STATUS;
delete index;
// addLabels()
status = U_ZERO_ERROR;
index = new AlphabeticIndex(Locale::getEnglish(), status);
TEST_CHECK_STATUS;
UnicodeSet additions;
additions.add((UChar32)0x410).add((UChar32)0x415); // A couple of Cyrillic letters
index->addLabels(additions, status);
TEST_CHECK_STATUS;
lc = index->getBucketCount(status);
TEST_CHECK_STATUS;
assertEquals("underflow, A-Z, inflow, 2 Cyrillic, overflow",
31, index->getBucketCount(status));
// std::cout << lc << std::endl;
delete index;
// addLabels(Locale)
status = U_ZERO_ERROR;
index = new AlphabeticIndex(Locale::getEnglish(), status);
TEST_CHECK_STATUS;
AlphabeticIndex &aip = index->addLabels(Locale::getJapanese(), status);
TEST_ASSERT(&aip == index);
TEST_CHECK_STATUS;
lc = index->getBucketCount(status);
TEST_CHECK_STATUS;
TEST_ASSERT(35 < lc); // Japanese should add a bunch. Don't rely on the exact value.
delete index;
// GetCollator(), Get under/in/over flow labels
status = U_ZERO_ERROR;
index = new AlphabeticIndex(Locale::getGerman(), status);
TEST_CHECK_STATUS;
Collator *germanCol = Collator::createInstance(Locale::getGerman(), status);
TEST_CHECK_STATUS;
const RuleBasedCollator &indexCol = index->getCollator();
TEST_ASSERT(*germanCol == indexCol);
delete germanCol;
UnicodeString ELLIPSIS; ELLIPSIS.append((UChar32)0x2026);
UnicodeString s = index->getUnderflowLabel();
TEST_ASSERT(ELLIPSIS == s);
s = index->getOverflowLabel();
TEST_ASSERT(ELLIPSIS == s);
s = index->getInflowLabel();
TEST_ASSERT(ELLIPSIS == s);
index->setOverflowLabel(UNICODE_STRING_SIMPLE("O"), status);
index->setUnderflowLabel(UNICODE_STRING_SIMPLE("U"), status).setInflowLabel(UNICODE_STRING_SIMPLE("I"), status);
s = index->getUnderflowLabel();
TEST_ASSERT(UNICODE_STRING_SIMPLE("U") == s);
s = index->getOverflowLabel();
TEST_ASSERT(UNICODE_STRING_SIMPLE("O") == s);
s = index->getInflowLabel();
TEST_ASSERT(UNICODE_STRING_SIMPLE("I") == s);
delete index;
const UnicodeString adam = UNICODE_STRING_SIMPLE("Adam");
//.........这里部分代码省略.........
开发者ID:CoherentLabs,项目名称:CoherentWebCoreDependencies,代码行数:101,代码来源:alphaindextst.cpp
示例19: logln
void CanonicalIteratorTest::TestBasic() {
UErrorCode status = U_ZERO_ERROR;
static const char * const testArray[][2] = {
{"\\u00C5d\\u0307\\u0327", "A\\u030Ad\\u0307\\u0327, A\\u030Ad\\u0327\\u0307, A\\u030A\\u1E0B\\u0327, "
"A\\u030A\\u1E11\\u0307, \\u00C5d\\u0307\\u0327, \\u00C5d\\u0327\\u0307, "
"\\u00C5\\u1E0B\\u0327, \\u00C5\\u1E11\\u0307, \\u212Bd\\u0307\\u0327, "
"\\u212Bd\\u0327\\u0307, \\u212B\\u1E0B\\u0327, \\u212B\\u1E11\\u0307"},
{"\\u010d\\u017E", "c\\u030Cz\\u030C, c\\u030C\\u017E, \\u010Dz\\u030C, \\u010D\\u017E"},
{"x\\u0307\\u0327", "x\\u0307\\u0327, x\\u0327\\u0307, \\u1E8B\\u0327"},
};
#if 0
// This is not interesting for C/C++ as the data is already built beforehand
// check build
UnicodeSet ss = CanonicalIterator.getSafeStart();
logln("Safe Start: " + ss.toPattern(true));
ss = CanonicalIterator.getStarts('a');
expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'),
new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB"
+ "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]")
);
#endif
// check permute
// NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!
Hashtable *permutations = new Hashtable(FALSE, status);
permutations->setValueDeleter(uhash_deleteUnicodeString);
UnicodeString toPermute("ABC");
CanonicalIterator::permute(toPermute, FALSE, permutations, status);
logln("testing permutation");
expectEqual("Simple permutation ", "", collectionToString(permutations), "ABC, ACB, BAC, BCA, CAB, CBA");
delete permutations;
// try samples
logln("testing samples");
Hashtable *set = new Hashtable(FALSE, status);
set->setValueDeleter(uhash_deleteUnicodeString);
int32_t i = 0;
CanonicalIterator it("", status);
if(U_SUCCESS(status)) {
for (i = 0; i < ARRAY_LENGTH(testArray); ++i) {
//logln("Results for: " + name.transliterate(testArray[i]));
UnicodeString testStr = CharsToUnicodeString(testArray[i][0]);
it.setSource(testStr, status);
set->removeAll();
for (;;) {
//UnicodeString *result = new UnicodeString(it.next());
UnicodeString result(it.next());
if (result.isBogus()) {
break;
}
set->put(result, new UnicodeString(result), status); // Add result to the table
//logln(++counter + ": " + hex.transliterate(result));
//logln(" = " + name.transliterate(result));
}
expectEqual(i + ": ", testStr, collectionToString(set), CharsToUnicodeString(testArray[i][1]));
}
} else {
errln("Couldn't instantiate canonical iterator. Error: %s", u_errorName(status));
}
delete set;
}
开发者ID:Andproject,项目名称:platform_external_icu4c,代码行数:70,代码来源:canittst.cpp
示例20: iter
/*
* Find missing case mapping relationships and add mappings for case closure.
* This function starts from an "original" code point and recursively
* finds its case mappings and the case mappings of where it maps to.
*
* The recursion depth is capped at 3 nested calls of this function.
* In each call, the current code point is c, and the function enumerates
* all of c's simple (single-code point) case mappings.
* prev is the code point that case-mapped to c.
* prev2 is the code point that case-mapped to prev.
*
* The initial function call has prev2<0, prev<0, and c==orig
* (marking no code points).
* It enumerates c's case mappings and recurses without further action.
*
* The second-level function call has prev2<0, prev==orig, and c is
* the destination code point of one of prev's case mappings.
* The function checks if any of c's case mappings go back to orig
* and adds a closure mapping if not.
* In other words, it turns a case mapping relationship of
* orig->c
* into
* orig<->c
*
* The third-level function call has prev2==orig, prev>=0, and c is
* the destination code point of one of prev's case mappings.
* (And prev is the destination of one of prev2's case mappings.)
* The function checks if any of c's case mappings go back to orig
* and adds a closure mapping if not.
* In other words, it turns case mapping relationships of
* orig->prev->c or orig->prev<->c
* into
* orig->prev->c->orig or orig->prev<->c->orig
* etc.
* (Graphically, this closes a triangle.)
*
* With repeated application on all code points until no more closure mappings
* are added, all case equivalence groups get complete mappings.
* That is, in each group of code points with case relationships
* each code point will in the end have some mapping to each other
* code point in the group.
*
* @return TRUE if a closure mapping was added
*/
UBool
CasePropsBuilder::addClosure(UChar32 orig, UChar32 prev2, UChar32 prev, UChar32 c, uint32_t value,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return FALSE; }
UChar32 next;
UBool someMappingsAdded=FALSE;
if(c!=orig) {
/* get the properties for c */
value=utrie2_get32(pTrie, c);
}
/* else if c==orig then c's value was passed in */
if(value&UCASE_EXCEPTION) {
UnicodeSet set;
ExcProps &ep=*excProps[value>>UGENCASE_EXC_SHIFT];
UniProps &p=ep.props;
/*
* marker for whether any of c's mappings goes to orig
* c==orig: prevent adding a closure mapping when getting orig's own, direct mappings
*/
UBool mapsToOrig=(UBool)(c==orig);
/* collect c's case mapping destinations in set[] */
if((next=p.suc)>=0 && next!=c) {
set.add(next);
}
if((next=p.slc)>=0 && next!=c) {
set.add(next);
}
if(p.suc!=(next=p.stc) && next!=c) {
set.add(next);
}
if((next=p.scf)>=0 && next!=c) {
set.add(next);
}
/* add c's current closure mappings to set */
set.addAll(ep.closure);
/* process all code points to which c case-maps */
UnicodeSetIterator iter(set);
while(iter.next()) {
next=iter.getCodepoint(); /* next!=c */
if(next==orig) {
mapsToOrig=TRUE; /* remember that we map to orig */
} else if(prev2<0 && next!=prev) {
/*
* recurse unless
* we have reached maximum depth (prev2>=0) or
* this is a mapping to one of the previous code points (orig, prev, c)
*/
//.........这里部分代码省略.........
开发者ID:icu-project,项目名称:icu-tools,代码行数:101,代码来源:casepropsbuilder.cpp
注:本文中的UnicodeSet类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 < |
请发表评论