• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

C++ UnicodeSet类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了C++中UnicodeSet的典型用法代码示例。如果您正苦于以下问题:C++ UnicodeSet类的具体用法?C++ UnicodeSet怎么用?C++ UnicodeSet使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了UnicodeSet类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: uscript_getCode

void SpoofImpl::addScriptChars(const char *locale, UnicodeSet *allowedChars, UErrorCode &status) {
    UScriptCode scripts[30];

    int32_t numScripts = uscript_getCode(locale, scripts, sizeof(scripts)/sizeof(UScriptCode), &status);
    if (U_FAILURE(status)) {
        return;
    }
    if (status == U_USING_DEFAULT_WARNING) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
    UnicodeSet tmpSet;
    int32_t    i;
    for (i=0; i<numScripts; i++) {
        tmpSet.applyIntPropertyValue(UCHAR_SCRIPT, scripts[i], status);
        allowedChars->addAll(tmpSet);
    }
}
开发者ID:0x4d52,项目名称:JavaScriptCore-X,代码行数:18,代码来源:uspoof_impl.cpp


示例2: parseName

void
NamesPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
                            UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    if(!newValues.contains(UCHAR_NAME) && !newValues.contains(PPUCD_NAME_ALIAS)) {
        return;
    }

    U_ASSERT(props.start==props.end);

    const char *names[4]={ NULL, NULL, NULL, NULL };
    int16_t lengths[4]={ 0, 0, 0, 0 };

    /* get the character name */
    if(props.name!=NULL) {
        names[0]=props.name;
        lengths[0]=(int16_t)uprv_strlen(props.name);
        parseName(names[0], lengths[0]);
    }

    CharString buffer;
    if(props.nameAlias!=NULL) {
        /*
         * Only use "correction" aliases for now, from Unicode 6.1 NameAliases.txt with 3 fields per line.
         * TODO: Work on ticket #8963 to deal with multiple type:alias pairs per character.
         */
        const char *corr=uprv_strstr(props.nameAlias, "correction=");
        if(corr!=NULL) {
            corr+=11;  // skip "correction="
            const char *limit=uprv_strchr(corr, ',');
            if(limit!=NULL) {
                buffer.append(corr, limit-corr, errorCode);
                names[3]=buffer.data();
                lengths[3]=(int16_t)(limit-corr);
            } else {
                names[3]=corr;
                lengths[3]=(int16_t)uprv_strlen(corr);
            }
            parseName(names[3], lengths[3]);
        }
    }

    addLine(props.start, names, lengths, LENGTHOF(names));
}
开发者ID:icu-project,项目名称:icu-tools,代码行数:44,代码来源:namespropsbuilder.cpp


示例3: errln

void TransliteratorErrorTest::TestUnicodeSetErrors() {
    UnicodeString badPattern="[[:L:]-[0x0300-0x0400]";
    UnicodeSet set;
    UErrorCode status = U_ZERO_ERROR;
    UnicodeString result;

    if (!set.isEmpty()) {
        errln("FAIL: The default ctor of UnicodeSet created a non-empty object.");
    }
    set.applyPattern(badPattern, status);
    if (U_SUCCESS(status)) {
        errln("FAIL: Applied a bad pattern to the UnicodeSet object okay.");
    }
    status = U_ZERO_ERROR;
    UnicodeSet *set1 = new UnicodeSet(badPattern, status);
    if (U_SUCCESS(status)) {
        errln("FAIL: Created a UnicodeSet based on bad patterns.");
    }
    delete set1;
}
开发者ID:winlibs,项目名称:icu4c,代码行数:20,代码来源:trnserr.cpp


示例4: uspoof_setAllowedUnicodeSet

U_CAPI void U_EXPORT2
uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) {
    SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    if (This == NULL) {
        return;
    }
    if (chars->isBogus()) {
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
    UnicodeSet *clonedSet = static_cast<UnicodeSet *>(chars->clone());
    if (clonedSet == NULL || clonedSet->isBogus()) {
        *status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    clonedSet->freeze();
    delete This->fAllowedCharsSet;
    This->fAllowedCharsSet = clonedSet;
    This->fChecks |= USPOOF_CHAR_LIMIT;
}
开发者ID:BrunoReX,项目名称:palemoon,代码行数:20,代码来源:uspoof.cpp


示例5: span

 static int32_t span(const UnicodeSet &set, const UChar *s, int32_t length, UBool tf) {
     UChar32 c;
     int32_t start=0, prev;
     while((prev=start)<length) {
         U16_NEXT(s, start, length, c);
         if(tf!=set.contains(c)) {
             break;
         }
     }
     return prev;
 }
开发者ID:LittoCats,项目名称:OT_4010D,代码行数:11,代码来源:unisetperf.cpp


示例6: fprintf

void
BiDiPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
                           UErrorCode &errorCode) {
    if(U_FAILURE(errorCode) || newValues.containsNone(relevantProps)) { return; }

    UChar32 start=props.start;
    UChar32 end=props.end;

    // The runtime code relies on this invariant for returning both bmg and bpb
    // from the same data.
    int32_t bpt=props.getIntProp(UCHAR_BIDI_PAIRED_BRACKET_TYPE);
    if(!(bpt==0 ? props.bpb==U_SENTINEL : props.bpb==props.bmg)) {
        fprintf(stderr,
                "genprops error: invariant not true: "
                "if(bpt==None) then bpb=<none> else bpb=bmg\n");
        return;
    }
    int32_t delta=encodeBidiMirroringGlyph(start, end, props.bmg, errorCode);
    uint32_t value=(uint32_t)delta<<UBIDI_MIRROR_DELTA_SHIFT;
    if(props.binProps[UCHAR_BIDI_MIRRORED]) {
        value|=U_MASK(UBIDI_IS_MIRRORED_SHIFT);
    }
    if(props.binProps[UCHAR_BIDI_CONTROL]) {
        value|=U_MASK(UBIDI_BIDI_CONTROL_SHIFT);
    }
    if(props.binProps[UCHAR_JOIN_CONTROL]) {
        value|=U_MASK(UBIDI_JOIN_CONTROL_SHIFT);
    }
    value|=(uint32_t)bpt<<UBIDI_BPT_SHIFT;
    value|=(uint32_t)props.getIntProp(UCHAR_JOINING_TYPE)<<UBIDI_JT_SHIFT;
    value|=(uint32_t)props.getIntProp(UCHAR_BIDI_CLASS);
    utrie2_setRange32(pTrie, start, end, value, TRUE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "genprops error: BiDiPropsBuilder utrie2_setRange32() failed - %s\n",
                u_errorName(errorCode));
        return;
    }

    // Store Joining_Group values from vector column 1 in simple byte arrays.
    int32_t jg=props.getIntProp(UCHAR_JOINING_GROUP);
    for(UChar32 c=start; c<=end; ++c) {
        int32_t jgStart;
        if(MIN_JG_START<=c && c<MAX_JG_LIMIT) {
            jgArray[c-MIN_JG_START]=(uint8_t)jg;
        } else if(MIN_JG_START2<=c && c<MAX_JG_LIMIT2) {
            jgArray2[c-MIN_JG_START2]=(uint8_t)jg;
        } else if(jg!=U_JG_NO_JOINING_GROUP) {
            fprintf(stderr, "genprops error: Joining_Group for out-of-range code points U+%04lx..U+%04lx\n",
                    (long)start, (long)end);
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return;
        }
    }
}
开发者ID:icu-project,项目名称:icu-tools,代码行数:54,代码来源:bidipropsbuilder.cpp


示例7: fprintf

void
PreparsedUCD::parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    scx.clear();
    CharString scString;
    for(;;) {
        const char *scs;
        const char *scLimit=strchr(s, ' ');
        if(scLimit!=NULL) {
            scs=scString.clear().append(s, (int32_t)(scLimit-s), errorCode).data();
            if(U_FAILURE(errorCode)) { return; }
        } else {
            scs=s;
        }
        int32_t script=pnames->getPropertyValueEnum(UCHAR_SCRIPT, scs);
        if(script==UCHAR_INVALID_CODE) {
            fprintf(stderr,
                    "error in preparsed UCD: '%s' is not a valid script code on line %ld\n",
                    scs, (long)lineNumber);
            errorCode=U_PARSE_ERROR;
            return;
        } else if(scx.contains(script)) {
            fprintf(stderr,
                    "error in preparsed UCD: scx has duplicate '%s' codes on line %ld\n",
                    scs, (long)lineNumber);
            errorCode=U_PARSE_ERROR;
            return;
        } else {
            scx.add(script);
        }
        if(scLimit!=NULL) {
            s=scLimit+1;
        } else {
            break;
        }
    }
    if(scx.isEmpty()) {
        fprintf(stderr, "error in preparsed UCD: empty scx= on line %ld\n", (long)lineNumber);
        errorCode=U_PARSE_ERROR;
    }
}
开发者ID:icu-project,项目名称:icu4c,代码行数:41,代码来源:ppucd.cpp


示例8: getUnderflowLabel

void AlphabeticIndex::buildBucketList(UErrorCode &status) {
    UnicodeString labelStr = getUnderflowLabel();
    Bucket *b = new Bucket(labelStr, *EMPTY_STRING, U_ALPHAINDEX_UNDERFLOW, status);
    bucketList_->addElement(b, status);

    // Build up the list, adding underflow, additions, overflow
    // insert infix labels as needed, using \uFFFF.
    const UnicodeString *last = static_cast<UnicodeString *>(labels_->elementAt(0));
    b = new Bucket(*last, *last, U_ALPHAINDEX_NORMAL, status);
    bucketList_->addElement(b, status);

    UnicodeSet lastSet;
    UnicodeSet set;
    AlphabeticIndex::getScriptSet(lastSet, *last, status);
    lastSet.removeAll(*IGNORE_SCRIPTS);

    for (int i = 1; i < labels_->size(); ++i) {
        UnicodeString *current = static_cast<UnicodeString *>(labels_->elementAt(i));
        getScriptSet(set, *current, status);
        set.removeAll(*IGNORE_SCRIPTS);
        if (lastSet.containsNone(set)) {
            // check for adjacent
            const UnicodeString &overflowComparisonString = getOverflowComparisonString(*last, status);
            if (collatorPrimaryOnly_->compare(overflowComparisonString, *current) < 0) {
                labelStr = getInflowLabel();
                b = new Bucket(labelStr, overflowComparisonString, U_ALPHAINDEX_INFLOW, status);
                bucketList_->addElement(b, status);
                i++;
                lastSet = set;
            }
        }
        b = new Bucket(*current, *current, U_ALPHAINDEX_NORMAL, status);
        bucketList_->addElement(b, status);
        last = current;
        lastSet = set;
    }
    const UnicodeString &limitString = getOverflowComparisonString(*last, status);
    b = new Bucket(getOverflowLabel(), limitString, U_ALPHAINDEX_OVERFLOW, status);
    bucketList_->addElement(b, status);
    // final overflow bucket
}
开发者ID:0omega,项目名称:platform_external_icu4c,代码行数:41,代码来源:alphaindex.cpp


示例9: SpanBackUTF8

    SpanBackUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
        // Verify that the frozen set is equal to the unfrozen one.
        UnicodeSet set;
        char utf8[4];
        UChar32 c;
        int32_t length;

        for(c=0; c<=0x10ffff; ++c) {
            if(c==0xd800) {
                c=0xe000;
            }
            length=0;
            U8_APPEND_UNSAFE(utf8, length, c);
            if(testcase.set.spanBackUTF8(utf8, length, USET_SPAN_CONTAINED)==0) {
                set.add(c);
            }
        }
        if(set!=testcase.set) {
            fprintf(stderr, "error: frozen set != original!\n");
        }
    }
开发者ID:LittoCats,项目名称:OT_4010D,代码行数:21,代码来源:unisetperf.cpp


示例10: addReplacementSetTo

void StringReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {
    UChar32 ch;
    for (int32_t i=0; i<output.length(); i+=UTF_CHAR_LENGTH(ch)) {
    ch = output.char32At(i);
    UnicodeReplacer* r = data->lookupReplacer(ch);
    if (r == NULL) {
        toUnionTo.add(ch);
    } else {
        r->addReplacementSetTo(toUnionTo);
    }
    }
}
开发者ID:LittoCats,项目名称:OT_4010D,代码行数:12,代码来源:strrepl.cpp


示例11: addMatchSetTo

/**
 * Implement UnicodeMatcher
 */
void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {
    UChar32 ch;
    for (int32_t i=0; i<pattern.length(); i+=UTF_CHAR_LENGTH(ch)) {
        ch = pattern.char32At(i);
        const UnicodeMatcher* matcher = data->lookupMatcher(ch);
        if (matcher == NULL) {
            toUnionTo.add(ch);
        } else {
            matcher->addMatchSetTo(toUnionTo);
        }
    }
}
开发者ID:0x4d52,项目名称:JavaScriptCore-X,代码行数:15,代码来源:strmatch.cpp


示例12: dest

UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
    if (U_FAILURE(status)) {
        return NULL;
    }
    LocalPointer<UVector> dest(new UVector(status), status);
    if (U_FAILURE(status)) {
        return NULL;
    }
    dest->setDeleter(uprv_deleteUObject);
    // Fetch the script-first-primary contractions which are defined in the root collator.
    // They all start with U+FDD1.
    UnicodeSet set;
    collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
    if (U_FAILURE(status)) {
        return NULL;
    }
    if (set.isEmpty()) {
        status = U_UNSUPPORTED_ERROR;
        return NULL;
    }
    UnicodeSetIterator iter(set);
    while (iter.next()) {
        const UnicodeString &boundary = iter.getString();
        uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
        if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
            // Ignore boundaries for the special reordering groups.
            // Take only those for "real scripts" (where the sample character is a Letter,
            // and the one for unassigned implicit weights (Cn).
            continue;
        }
        UnicodeString *s = new UnicodeString(boundary);
        if (s == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return NULL;
        }
        dest->addElement(s, status);
    }
    return dest.orphan();
}
开发者ID:DavidCai1993,项目名称:node,代码行数:39,代码来源:alphaindex.cpp


示例13: DictionaryBreakEngine

CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
: DictionaryBreakEngine(1 << UBRK_WORD), fDictionary(adoptDictionary) {
    // Korean dictionary only includes Hangul syllables
    fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
    fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
    fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status);
    fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status);

    if (U_SUCCESS(status)) {
        // handle Korean and Japanese/Chinese using different dictionaries
        if (type == kKorean) {
            setCharacters(fHangulWordSet);
        } else { //Chinese and Japanese
            UnicodeSet cjSet;
            cjSet.addAll(fHanWordSet);
            cjSet.addAll(fKatakanaWordSet);
            cjSet.addAll(fHiraganaWordSet);
            cjSet.add(0xFF70);
            cjSet.add(0x30FC);
            setCharacters(cjSet);
        }
    }
}
开发者ID:alfintatorkace,项目名称:osx-10.9-opensource,代码行数:23,代码来源:dictbe.cpp


示例14: assertTrue

void StaticUnicodeSetsTest::testSetCoverage() {
    UErrorCode status = U_ZERO_ERROR;

    // Lenient comma/period should be supersets of strict comma/period;
    // it also makes the coverage logic cheaper.
    assertTrue(
            "COMMA should be superset of STRICT_COMMA",
            get(unisets::COMMA)->containsAll(*get(unisets::STRICT_COMMA)));
    assertTrue(
            "PERIOD should be superset of STRICT_PERIOD",
            get(unisets::PERIOD)->containsAll(*get(unisets::STRICT_PERIOD)));

    UnicodeSet decimals;
    decimals.addAll(*get(unisets::STRICT_COMMA));
    decimals.addAll(*get(unisets::STRICT_PERIOD));
    decimals.freeze();
    UnicodeSet grouping;
    grouping.addAll(decimals);
    grouping.addAll(*get(unisets::OTHER_GROUPING_SEPARATORS));
    decimals.freeze();

    const UnicodeSet &plusSign = *get(unisets::PLUS_SIGN);
    const UnicodeSet &minusSign = *get(unisets::MINUS_SIGN);
    const UnicodeSet &percent = *get(unisets::PERCENT_SIGN);
    const UnicodeSet &permille = *get(unisets::PERMILLE_SIGN);
    const UnicodeSet &infinity = *get(unisets::INFINITY_KEY);

    int32_t localeCount;
    const Locale* allAvailableLocales = Locale::getAvailableLocales(localeCount);
    for (int32_t i = 0; i < localeCount; i++) {
        Locale locale = allAvailableLocales[i];
        DecimalFormatSymbols dfs(locale, status);
        UnicodeString localeName;
        locale.getDisplayName(localeName);
        assertSuccess(UnicodeString("Making DFS for ") + localeName, status);

#define ASSERT_IN_SET(name, foo) assertInSet(localeName, UnicodeString("" #name ""), name, foo)
        ASSERT_IN_SET(decimals, dfs.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol));
        ASSERT_IN_SET(grouping, dfs.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol));
        ASSERT_IN_SET(plusSign, dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol));
        ASSERT_IN_SET(minusSign, dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol));
        ASSERT_IN_SET(percent, dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol));
        ASSERT_IN_SET(permille, dfs.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol));
        ASSERT_IN_SET(infinity, dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol));
    }
}
开发者ID:winlibs,项目名称:icu4c,代码行数:46,代码来源:static_unisets_test.cpp


示例15: clear

/**
 * Parse the pattern from the given RuleCharacterIterator.  The
 * iterator is advanced over the parsed pattern.
 * @param chars iterator over the pattern characters.  Upon return
 * it will be advanced to the first character after the parsed
 * pattern, or the end of the iteration if all characters are
 * parsed.
 * @param symbols symbol table to use to parse and dereference
 * variables, or null if none.
 * @param rebuiltPat the pattern that was parsed, rebuilt or
 * copied from the input pattern, as appropriate.
 * @param options a bit mask of zero or more of the following:
 * IGNORE_SPACE, CASE.
 */
void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
                              const SymbolTable* symbols,
                              UnicodeString& rebuiltPat,
                              uint32_t options,
                              UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
                              UErrorCode& ec) {
    if (U_FAILURE(ec)) return;

    // Syntax characters: [ ] ^ - & { }

    // Recognized special forms for chars, sets: c-c s-s s&s

    int32_t opts = RuleCharacterIterator::PARSE_VARIABLES |
                   RuleCharacterIterator::PARSE_ESCAPES;
    if ((options & USET_IGNORE_SPACE) != 0) {
        opts |= RuleCharacterIterator::SKIP_WHITESPACE;
    }

    UnicodeString patLocal, buf;
    UBool usePat = FALSE;
    UnicodeSetPointer scratch;
    RuleCharacterIterator::Pos backup;

    // mode: 0=before [, 1=between [...], 2=after ]
    // lastItem: 0=none, 1=char, 2=set
    int8_t lastItem = 0, mode = 0;
    UChar32 lastChar = 0;
    UChar op = 0;

    UBool invert = FALSE;

    clear();

    while (mode != 2 && !chars.atEnd()) {
        U_ASSERT((lastItem == 0 && op == 0) ||
                 (lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) ||
                 (lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ ||
                                    op == INTERSECTION /*'&'*/)));

        UChar32 c = 0;
        UBool literal = FALSE;
        UnicodeSet* nested = 0; // alias - do not delete

        // -------- Check for property pattern

        // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed
        int8_t setMode = 0;
        if (resemblesPropertyPattern(chars, opts)) {
            setMode = 2;
        }

        // -------- Parse '[' of opening delimiter OR nested set.
        // If there is a nested set, use `setMode' to define how
        // the set should be parsed.  If the '[' is part of the
        // opening delimiter for this pattern, parse special
        // strings "[", "[^", "[-", and "[^-".  Check for stand-in
        // characters representing a nested set in the symbol
        // table.

        else {
            // Prepare to backup if necessary
            chars.getPos(backup);
            c = chars.next(opts, literal, ec);
            if (U_FAILURE(ec)) return;

            if (c == 0x5B /*'['*/ && !literal) {
                if (mode == 1) {
                    chars.setPos(backup); // backup
                    setMode = 1;
                } else {
                    // Handle opening '[' delimiter
                    mode = 1;
                    patLocal.append((UChar) 0x5B /*'['*/);
                    chars.getPos(backup); // prepare to backup
                    c = chars.next(opts, literal, ec); 
                    if (U_FAILURE(ec)) return;
                    if (c == 0x5E /*'^'*/ && !literal) {
                        invert = TRUE;
                        patLocal.append((UChar) 0x5E /*'^'*/);
                        chars.getPos(backup); // prepare to backup
                        c = chars.next(opts, literal, ec);
                        if (U_FAILURE(ec)) return;
                    }
                    // Fall through to handle special leading '-';
                    // otherwise restart loop for nested [], \p{}, etc.
                    if (c == HYPHEN /*'-'*/) {
//.........这里部分代码省略.........
开发者ID:ThomasWo,项目名称:proto-quic,代码行数:101,代码来源:uniset_props.cpp


示例16: main


//.........这里部分代码省略.........
                           NULL,           //  dest,
                           0,              //  destCapacity,
                           wordSourceC,
                           wordFileSize,
                           &status);
        if (status != U_BUFFER_OVERFLOW_ERROR) {
            fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
            exit(status);
        };
    
        status = U_ZERO_ERROR;
        UChar *wordSourceU = new UChar[destCap+1];
        ucnv_toUChars(conv,
                      wordSourceU,     //  dest,
                      destCap+1,
                      wordSourceC,
                      wordFileSize,
                      &status);
        if (U_FAILURE(status)) {
            fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
            exit(status);
        };
        ucnv_close(conv);
    
        // Get rid of the original file buffer
        delete[] wordBufferC;
    
        // Create a MutableTrieDictionary, and loop through all the lines, inserting
        // words.
    
        // First, pick a median character.
        UChar *current = wordSourceU + (destCap/2);
        UChar uc = *current++;
        UnicodeSet breaks;
        breaks.add(0x000A);     // Line Feed
        breaks.add(0x000D);     // Carriage Return
        breaks.add(0x2028);     // Line Separator
        breaks.add(0x2029);     // Paragraph Separator
    
        do { 
            // Look for line break
            while (uc && !breaks.contains(uc)) {
                uc = *current++;
            }
            // Now skip to first non-line-break
            while (uc && breaks.contains(uc)) {
                uc = *current++;
            }
        }
        while (uc && (breaks.contains(uc) || u_isspace(uc)));
    
        mtd = new MutableTrieDictionary(uc, status);
        
        if (U_FAILURE(status)) {
            fprintf(stderr, "new MutableTrieDictionary: ICU Error \"%s\"\n", u_errorName(status));
            exit(status);
        }
        
        // Now add the words. Words are non-space characters at the beginning of
        // lines, and must be at least one UChar. If a word has an associated value,
        // the value should follow the word on the same line after a tab character.
        current = wordSourceU;
        UChar *candidate = current;
        uc = *current++;
        int32_t length = 0;
        int count = 0;
开发者ID:AutomationConsultant,项目名称:perch-webrtc,代码行数:67,代码来源:genctd.cpp


示例17: defined

//---------------------------------------------------------------------
//
//   dump    Output the compiled form of the pattern.
//           Debugging function only.
//
//---------------------------------------------------------------------
void   RegexPattern::dumpOp(int32_t index) const {
    (void)index;  // Suppress warnings in non-debug build.
#if defined(REGEX_DEBUG)
    static const char * const opNames[] = {URX_OPCODE_NAMES};
    int32_t op          = fCompiledPat->elementAti(index);
    int32_t val         = URX_VAL(op);
    int32_t type        = URX_TYPE(op);
    int32_t pinnedType  = type;
    if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
        pinnedType = 0;
    }

    printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
    switch (type) {
    case URX_NOP:
    case URX_DOTANY:
    case URX_DOTANY_ALL:
    case URX_FAIL:
    case URX_CARET:
    case URX_DOLLAR:
    case URX_BACKSLASH_G:
    case URX_BACKSLASH_X:
    case URX_END:
    case URX_DOLLAR_M:
    case URX_CARET_M:
        // Types with no operand field of interest.
        break;

    case URX_RESERVED_OP:
    case URX_START_CAPTURE:
    case URX_END_CAPTURE:
    case URX_STATE_SAVE:
    case URX_JMP:
    case URX_JMP_SAV:
    case URX_JMP_SAV_X:
    case URX_BACKSLASH_B:
    case URX_BACKSLASH_BU:
    case URX_BACKSLASH_D:
    case URX_BACKSLASH_Z:
    case URX_STRING_LEN:
    case URX_CTR_INIT:
    case URX_CTR_INIT_NG:
    case URX_CTR_LOOP:
    case URX_CTR_LOOP_NG:
    case URX_RELOC_OPRND:
    case URX_STO_SP:
    case URX_LD_SP:
    case URX_BACKREF:
    case URX_STO_INP_LOC:
    case URX_JMPX:
    case URX_LA_START:
    case URX_LA_END:
    case URX_BACKREF_I:
    case URX_LB_START:
    case URX_LB_CONT:
    case URX_LB_END:
    case URX_LBN_CONT:
    case URX_LBN_END:
    case URX_LOOP_C:
    case URX_LOOP_DOT_I:
    case URX_BACKSLASH_H:
    case URX_BACKSLASH_R:
    case URX_BACKSLASH_V:
        // types with an integer operand field.
        printf("%d", val);
        break;

    case URX_ONECHAR:
    case URX_ONECHAR_I:
        printf("%c", val<256?val:'?');
        break;

    case URX_STRING:
    case URX_STRING_I:
        {
            int32_t lengthOp       = fCompiledPat->elementAti(index+1);
            U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
            int32_t length = URX_VAL(lengthOp);
            int32_t i;
            for (i=val; i<val+length; i++) {
                UChar c = fLiteralText[i];
                if (c < 32 || c >= 256) {c = '.';}
                printf("%c", c);
            }
        }
        break;

    case URX_SETREF:
    case URX_LOOP_SR_I:
        {
            UnicodeString s;
            UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
            set->toPattern(s, TRUE);
            for (int32_t i=0; i<s.length(); i++) {
//.........这里部分代码省略.........
开发者ID:Acorld,项目名称:WinObjC-Heading,代码行数:101,代码来源:repattrn.cpp


示例18: AlphabeticIndex

//
//  APITest.   Invoke every function at least once, and check that it does something.
//             Does not attempt to check complete functionality.
//
void AlphabeticIndexTest::APITest() {
    //
    //  Simple constructor and destructor,  getBucketCount()
    //
    UErrorCode status = U_ZERO_ERROR;
    int32_t lc = 0;
    int32_t i  = 0;
    AlphabeticIndex *index = new AlphabeticIndex(Locale::getEnglish(), status);
    TEST_CHECK_STATUS;
    lc = index->getBucketCount(status);
    TEST_CHECK_STATUS;
    TEST_ASSERT(28 == lc);    // 26 letters plus two under/overflow labels.
    //printf("getBucketCount() == %d\n", lc);
    delete index;

    // Constructor from a Collator
    //
    status = U_ZERO_ERROR;
    RuleBasedCollator *coll = dynamic_cast<RuleBasedCollator *>(
        Collator::createInstance(Locale::getGerman(), status));
    TEST_CHECK_STATUS;
    TEST_ASSERT(coll != NULL);
    index = new AlphabeticIndex(coll, status);
    TEST_CHECK_STATUS;
    TEST_ASSERT(coll == &index->getCollator());
    assertEquals("only the underflow label in an index built from a collator",
                 1, index->getBucketCount(status));
    TEST_CHECK_STATUS;
    delete index;
    

    // addLabels()

    status = U_ZERO_ERROR;
    index = new AlphabeticIndex(Locale::getEnglish(), status);
    TEST_CHECK_STATUS;
    UnicodeSet additions;
    additions.add((UChar32)0x410).add((UChar32)0x415);   // A couple of Cyrillic letters
    index->addLabels(additions, status);
    TEST_CHECK_STATUS;
    lc = index->getBucketCount(status);
    TEST_CHECK_STATUS;
    assertEquals("underflow, A-Z, inflow, 2 Cyrillic, overflow",
                 31, index->getBucketCount(status));
    // std::cout << lc << std::endl;
    delete index;


    // addLabels(Locale)

    status = U_ZERO_ERROR;
    index = new AlphabeticIndex(Locale::getEnglish(), status);
    TEST_CHECK_STATUS;
    AlphabeticIndex &aip = index->addLabels(Locale::getJapanese(), status);
    TEST_ASSERT(&aip == index);
    TEST_CHECK_STATUS;
    lc = index->getBucketCount(status);
    TEST_CHECK_STATUS;
    TEST_ASSERT(35 < lc);  // Japanese should add a bunch.  Don't rely on the exact value.
    delete index;

    // GetCollator(),  Get under/in/over flow labels

    status = U_ZERO_ERROR;
    index = new AlphabeticIndex(Locale::getGerman(), status);
    TEST_CHECK_STATUS;
    Collator *germanCol = Collator::createInstance(Locale::getGerman(), status);
    TEST_CHECK_STATUS;
    const RuleBasedCollator &indexCol = index->getCollator();
    TEST_ASSERT(*germanCol == indexCol);
    delete germanCol;

    UnicodeString ELLIPSIS;  ELLIPSIS.append((UChar32)0x2026);
    UnicodeString s = index->getUnderflowLabel();
    TEST_ASSERT(ELLIPSIS == s);
    s = index->getOverflowLabel();
    TEST_ASSERT(ELLIPSIS == s);
    s = index->getInflowLabel();
    TEST_ASSERT(ELLIPSIS == s);
    index->setOverflowLabel(UNICODE_STRING_SIMPLE("O"), status);
    index->setUnderflowLabel(UNICODE_STRING_SIMPLE("U"), status).setInflowLabel(UNICODE_STRING_SIMPLE("I"), status);
    s = index->getUnderflowLabel();
    TEST_ASSERT(UNICODE_STRING_SIMPLE("U") == s);
    s = index->getOverflowLabel();
    TEST_ASSERT(UNICODE_STRING_SIMPLE("O") == s);
    s = index->getInflowLabel();
    TEST_ASSERT(UNICODE_STRING_SIMPLE("I") == s);




    delete index;



    const UnicodeString adam = UNICODE_STRING_SIMPLE("Adam");
//.........这里部分代码省略.........
开发者ID:CoherentLabs,项目名称:CoherentWebCoreDependencies,代码行数:101,代码来源:alphaindextst.cpp


示例19: logln

void CanonicalIteratorTest::TestBasic() {

    UErrorCode status = U_ZERO_ERROR;

    static const char * const testArray[][2] = {
        {"\\u00C5d\\u0307\\u0327", "A\\u030Ad\\u0307\\u0327, A\\u030Ad\\u0327\\u0307, A\\u030A\\u1E0B\\u0327, "
            "A\\u030A\\u1E11\\u0307, \\u00C5d\\u0307\\u0327, \\u00C5d\\u0327\\u0307, "
            "\\u00C5\\u1E0B\\u0327, \\u00C5\\u1E11\\u0307, \\u212Bd\\u0307\\u0327, "
            "\\u212Bd\\u0327\\u0307, \\u212B\\u1E0B\\u0327, \\u212B\\u1E11\\u0307"},
        {"\\u010d\\u017E", "c\\u030Cz\\u030C, c\\u030C\\u017E, \\u010Dz\\u030C, \\u010D\\u017E"},
        {"x\\u0307\\u0327", "x\\u0307\\u0327, x\\u0327\\u0307, \\u1E8B\\u0327"},
    };
    
#if 0
    // This is not interesting for C/C++ as the data is already built beforehand
    // check build
    UnicodeSet ss = CanonicalIterator.getSafeStart();
    logln("Safe Start: " + ss.toPattern(true));
    ss = CanonicalIterator.getStarts('a');
    expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'),
        new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB"
        + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]")
            );
#endif

    // check permute
    // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!

    Hashtable *permutations = new Hashtable(FALSE, status);
    permutations->setValueDeleter(uhash_deleteUnicodeString);
    UnicodeString toPermute("ABC");

    CanonicalIterator::permute(toPermute, FALSE, permutations, status);

    logln("testing permutation");
  
    expectEqual("Simple permutation ", "", collectionToString(permutations), "ABC, ACB, BAC, BCA, CAB, CBA");

    delete permutations;
    
    // try samples
    logln("testing samples");
    Hashtable *set = new Hashtable(FALSE, status);
    set->setValueDeleter(uhash_deleteUnicodeString);
    int32_t i = 0;
    CanonicalIterator it("", status);
    if(U_SUCCESS(status)) {
      for (i = 0; i < ARRAY_LENGTH(testArray); ++i) {
          //logln("Results for: " + name.transliterate(testArray[i]));
          UnicodeString testStr = CharsToUnicodeString(testArray[i][0]);
          it.setSource(testStr, status);
          set->removeAll();
          for (;;) {
              //UnicodeString *result = new UnicodeString(it.next());
              UnicodeString result(it.next());
              if (result.isBogus()) {
                  break;
              }
              set->put(result, new UnicodeString(result), status); // Add result to the table
              //logln(++counter + ": " + hex.transliterate(result));
              //logln(" = " + name.transliterate(result));
          }
          expectEqual(i + ": ", testStr, collectionToString(set), CharsToUnicodeString(testArray[i][1]));

      }
    } else {
      errln("Couldn't instantiate canonical iterator. Error: %s", u_errorName(status));
    }
    delete set;
}
开发者ID:Andproject,项目名称:platform_external_icu4c,代码行数:70,代码来源:canittst.cpp


示例20: iter

/*
 * Find missing case mapping relationships and add mappings for case closure.
 * This function starts from an "original" code point and recursively
 * finds its case mappings and the case mappings of where it maps to.
 *
 * The recursion depth is capped at 3 nested calls of this function.
 * In each call, the current code point is c, and the function enumerates
 * all of c's simple (single-code point) case mappings.
 * prev is the code point that case-mapped to c.
 * prev2 is the code point that case-mapped to prev.
 *
 * The initial function call has prev2<0, prev<0, and c==orig
 * (marking no code points).
 * It enumerates c's case mappings and recurses without further action.
 *
 * The second-level function call has prev2<0, prev==orig, and c is
 * the destination code point of one of prev's case mappings.
 * The function checks if any of c's case mappings go back to orig
 * and adds a closure mapping if not.
 * In other words, it turns a case mapping relationship of
 *   orig->c
 * into
 *   orig<->c
 *
 * The third-level function call has prev2==orig, prev>=0, and c is
 * the destination code point of one of prev's case mappings.
 * (And prev is the destination of one of prev2's case mappings.)
 * The function checks if any of c's case mappings go back to orig
 * and adds a closure mapping if not.
 * In other words, it turns case mapping relationships of
 *   orig->prev->c or orig->prev<->c
 * into
 *   orig->prev->c->orig or orig->prev<->c->orig
 * etc.
 * (Graphically, this closes a triangle.)
 *
 * With repeated application on all code points until no more closure mappings
 * are added, all case equivalence groups get complete mappings.
 * That is, in each group of code points with case relationships
 * each code point will in the end have some mapping to each other
 * code point in the group.
 *
 * @return TRUE if a closure mapping was added
 */
UBool
CasePropsBuilder::addClosure(UChar32 orig, UChar32 prev2, UChar32 prev, UChar32 c, uint32_t value,
                             UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return FALSE; }

    UChar32 next;
    UBool someMappingsAdded=FALSE;

    if(c!=orig) {
        /* get the properties for c */
        value=utrie2_get32(pTrie, c);
    }
    /* else if c==orig then c's value was passed in */

    if(value&UCASE_EXCEPTION) {
        UnicodeSet set;

        ExcProps &ep=*excProps[value>>UGENCASE_EXC_SHIFT];
        UniProps &p=ep.props;

        /*
         * marker for whether any of c's mappings goes to orig
         * c==orig: prevent adding a closure mapping when getting orig's own, direct mappings
         */
        UBool mapsToOrig=(UBool)(c==orig);

        /* collect c's case mapping destinations in set[] */
        if((next=p.suc)>=0 && next!=c) {
            set.add(next);
        }
        if((next=p.slc)>=0 && next!=c) {
            set.add(next);
        }
        if(p.suc!=(next=p.stc) && next!=c) {
            set.add(next);
        }
        if((next=p.scf)>=0 && next!=c) {
            set.add(next);
        }

        /* add c's current closure mappings to set */
        set.addAll(ep.closure);

        /* process all code points to which c case-maps */
        UnicodeSetIterator iter(set);
        while(iter.next()) {
            next=iter.getCodepoint(); /* next!=c */

            if(next==orig) {
                mapsToOrig=TRUE; /* remember that we map to orig */
            } else if(prev2<0 && next!=prev) {
                /*
                 * recurse unless
                 * we have reached maximum depth (prev2>=0) or
                 * this is a mapping to one of the previous code points (orig, prev, c)
                 */
//.........这里部分代码省略.........
开发者ID:icu-project,项目名称:icu-tools,代码行数:101,代码来源:casepropsbuilder.cpp



注:本文中的UnicodeSet类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。

<

鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
C++ UnicodeString类代码示例发布时间:2022-05-31
下一篇:
C++ Unicode类代码示例发布时间:2022-05-31
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap