本文整理汇总了C++中THashSet类的典型用法代码示例。如果您正苦于以下问题:C++ THashSet类的具体用法?C++ THashSet怎么用?C++ THashSet使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了THashSet类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: CompareUsingShingles
void QuoteGraph::CompareUsingShingles(THash<TMd5Sig, TIntSet>& Shingles) {
int Count = 0;
int RealCount = 0;
TVec<TMd5Sig> ShingleKeys;
Shingles.GetKeyV(ShingleKeys);
THashSet<TIntPr> EdgeCache;
for (int i = 0; i < ShingleKeys.Len(); i++) {
if (i % 100 == 0) {
Err("Processed %d out of %d shingles, count = %d\n", i, ShingleKeys.Len(), Count);
}
TIntSet Bucket;
Shingles.IsKeyGetDat(ShingleKeys[i], Bucket);
for (TIntSet::TIter Quote1 = Bucket.BegI(); Quote1 < Bucket.EndI(); Quote1++) {
TIntSet::TIter Quote1Copy = Quote1;
Quote1Copy++;
for (TIntSet::TIter Quote2 = Quote1Copy; Quote2 < Bucket.EndI(); Quote2++) {
if (!EdgeCache.IsKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())) && !EdgeCache.IsKey(TIntPr(Quote2.GetKey(), Quote1.GetKey()))) {
EdgeCache.AddKey(TIntPr(Quote1.GetKey(), Quote2.GetKey()));
EdgeCache.AddKey(TIntPr(Quote2.GetKey(), Quote1.GetKey()));
RealCount++;
AddEdgeIfSimilar(Quote1.GetKey(), Quote2.GetKey());
}
}
}
int Len = Bucket.Len() * (Bucket.Len() - 1) / 2;
Count += Len;
}
fprintf(stderr, "NUMBER OF COMPARES: %d\n", Count);
fprintf(stderr, "NUMBER OF REAL COMPARES: %d\n", RealCount);
}
开发者ID:snap-stanford,项目名称:curis-2012,代码行数:32,代码来源:quotegraph.cpp
示例2: LoadDyNetGraphV
// DyNetML format, loads all the networks in the file
TVec<PNGraph> LoadDyNetGraphV(const TStr& FNm) {
TXmlLx XmlLx(TFIn::New(FNm), xspTruncate);
TVec<PNGraph> GraphV;
THashSet<TStr> NIdStr;
while (XmlLx.GetSym()!=xsyEof) {
if (XmlLx.Sym==xsySTag && XmlLx.TagNm=="network") {
PNGraph G = TNGraph::New();
GraphV.Add(G);
XmlLx.GetSym();
while (XmlLx.TagNm=="link") {
TStr Str1, Val1, Str2, Val2;
XmlLx.GetArg(0, Str1, Val1); XmlLx.GetArg(1, Str2, Val2);
IAssert(Str1=="source" && Str2=="target");
NIdStr.AddKey(Val1); NIdStr.AddKey(Val2);
const int src=NIdStr.GetKeyId(Val1);
const int dst=NIdStr.GetKeyId(Val2);
if (! G->IsNode(src)) { G->AddNode(src); }
if (! G->IsNode(dst)) { G->AddNode(dst); }
G->AddEdge(src, dst);
XmlLx.GetSym();
}
}
}
return GraphV;
}
开发者ID:pikma,项目名称:Snap,代码行数:26,代码来源:gio.cpp
示例3: CompareUsingMinHash
// I embarassingly don't know how templating works.
void QuoteGraph::CompareUsingMinHash(TVec<THash<TMd5Sig, TIntSet> >& BucketsVector) {
THashSet<TIntPr> EdgeCache;
int Count = 0;
int RealCount = 0;
Err("Beginning edge creation step...\n");
for (int i = 0; i < BucketsVector.Len(); i++) {
Err("Processing band signature %d of %d - %d signatures\n", i+1, BucketsVector.Len(), BucketsVector[i].Len());
TVec<TMd5Sig> Buckets;
BucketsVector[i].GetKeyV(Buckets);
TVec<TMd5Sig>::TIter BucketEnd = Buckets.EndI();
for (TVec<TMd5Sig>::TIter BucketSig = Buckets.BegI(); BucketSig < BucketEnd; BucketSig++) {
TIntSet Bucket = BucketsVector[i].GetDat(*BucketSig);
Count += Bucket.Len() * (Bucket.Len() - 1) / 2;
for (TIntSet::TIter Quote1 = Bucket.BegI(); Quote1 < Bucket.EndI(); Quote1++) {
TIntSet::TIter Quote1Copy = Quote1;
Quote1Copy++;
for (TIntSet::TIter Quote2 = Quote1Copy; Quote2 < Bucket.EndI(); Quote2++) {
if (!EdgeCache.IsKey(TIntPr(Quote1.GetKey(), Quote2.GetKey())) && !EdgeCache.IsKey(TIntPr(Quote2.GetKey(), Quote1.GetKey()))) {
EdgeCache.AddKey(TIntPr(Quote1.GetKey(), Quote2.GetKey()));
EdgeCache.AddKey(TIntPr(Quote2.GetKey(), Quote1.GetKey()));
RealCount++;
AddEdgeIfSimilar(Quote1.GetKey(), Quote2.GetKey());
}
}
}
}
}
fprintf(stderr, "NUMBER OF COMPARES: %d\n", Count);
fprintf(stderr, "NUMBER OF REAL COMPARES: %d\n", RealCount);
}
开发者ID:snap-stanford,项目名称:curis-2012,代码行数:32,代码来源:quotegraph.cpp
示例4: while
TVec<TPair<TFltV, TFltV> > TLSHash::GetAllCandidatePairs() {
THashSet<TPair<TInt, TInt> > CandidateIdPairs;
for (int i=0; i<Bands; i++) {
TVec<TIntV> BucketVV;
SigBucketVHV[i].GetDatV(BucketVV);
for (int j=0; j<BucketVV.Len(); j++) {
TIntV BucketV = BucketVV[j];
for (int k=0; k<BucketV.Len(); k++) {
for (int l=k+1; l<BucketV.Len(); l++) {
int First = BucketV[k], Second = BucketV[l];
if (First > Second) {
int Temp = First;
First = Second;
Second = Temp;
}
CandidateIdPairs.AddKey(TPair<TInt, TInt> (First, Second));
}
}
}
}
TVec<TPair<TFltV, TFltV> > CandidatePairs;
int Ind = CandidateIdPairs.FFirstKeyId();
while (CandidateIdPairs.FNextKeyId(Ind)) {
TPair<TInt, TInt> IdPair = CandidateIdPairs[Ind];
TPair<TFltV, TFltV> Pair(DataV[IdPair.GetVal1()], DataV[IdPair.GetVal2()]);
CandidatePairs.Add(Pair);
}
return CandidatePairs;
}
开发者ID:EDzhangjianyu,项目名称:snap,代码行数:31,代码来源:lsh.cpp
示例5: AddTreeCtrs
static void AddTreeCtrs(const TTrainData& data,
const TSplitTree& currentTree,
TFold* fold,
TLearnContext* ctx,
TStatsFromPrevTree* statsFromPrevTree,
TCandidateList* candList) {
using TSeenProjHash = THashSet<TProjection>;
TSeenProjHash seenProj;
// greedy construction
TProjection binAndOneHotFeaturesTree;
binAndOneHotFeaturesTree.BinFeatures = currentTree.GetBinFeatures();
binAndOneHotFeaturesTree.OneHotFeatures = currentTree.GetOneHotFeatures();
seenProj.insert(binAndOneHotFeaturesTree);
for (const auto& ctrSplit : currentTree.GetCtrSplits()) {
seenProj.insert(ctrSplit.Projection);
}
TSeenProjHash addedProjHash;
for (const auto& baseProj : seenProj) {
if (baseProj.IsEmpty()) {
continue;
}
for (int cf = 0; cf < data.AllFeatures.CatFeatures.ysize(); ++cf) {
if (data.AllFeatures.CatFeatures[cf].empty() ||
data.AllFeatures.IsOneHot[cf] ||
ctx->Rand.GenRandReal1() > ctx->Params.ObliviousTreeOptions->Rsm) {
continue;
}
TProjection proj = baseProj;
proj.AddCatFeature(cf);
if (proj.IsRedundant() || proj.GetFullProjectionLength() > ctx->Params.CatFeatureParams->MaxTensorComplexity) {
continue;
}
if (addedProjHash.has(proj)) {
continue;
}
addedProjHash.insert(proj);
AddCtrsToCandList(*fold, *ctx, proj, candList);
fold->GetCtrRef(proj);
}
}
THashSet<TSplitCandidate> candidatesToErase;
for (auto& splitCandidate : statsFromPrevTree->Stats) {
if (splitCandidate.first.Type == ESplitType::OnlineCtr) {
if (!addedProjHash.has(splitCandidate.first.Ctr.Projection)) {
candidatesToErase.insert(splitCandidate.first);
}
}
}
for (const auto& splitCandidate : candidatesToErase) {
statsFromPrevTree->Stats.erase(splitCandidate);
}
}
开发者ID:iamnik13,项目名称:catboost,代码行数:60,代码来源:greedy_tensor_search.cpp
示例6: Err
/// Shingles by words
void LSH::HashShinglesOfClusters(TQuoteBase *QuoteBase,
TClusterBase *ClusterBase, TIntV& ClusterIds, TInt ShingleLen,
THash<TMd5Sig, TIntV>& ShingleToClusterIds) {
Err("Hashing shingles of clusters...\n");
for (int i = 0; i < ClusterIds.Len(); i++) {
if (i % 1000 == 0) {
fprintf(stderr, "%d out of %d completed\n", i, ClusterIds.Len());
}
TCluster C;
ClusterBase->GetCluster(ClusterIds[i], C);
//fprintf(stderr, "%d vs. %d\n", ClusterIds[i].Val, C.GetId().Val);
// Put x-word shingles into hash table; x is specified by ShingleLen parameter
THashSet < TMd5Sig > CHashedShingles;
GetHashedShinglesOfCluster(QuoteBase, C, ShingleLen, CHashedShingles);
for (THashSet<TMd5Sig>::TIter Hash = CHashedShingles.BegI();
Hash < CHashedShingles.EndI(); Hash++) {
TIntV ShingleClusterIds;
if (ShingleToClusterIds.IsKey(*Hash)) {
ShingleClusterIds = ShingleToClusterIds.GetDat(*Hash);
}
ShingleClusterIds.Add(ClusterIds[i]);
ShingleToClusterIds.AddDat(*Hash, ShingleClusterIds);
}
}
Err("Done hashing!\n");
}
开发者ID:snap-stanford,项目名称:curis-2012,代码行数:28,代码来源:lsh.cpp
示例7: GetIntersection
void TCliqueOverlap::GetIntersection(const THashSet<TInt>& A, const THashSet<TInt>& B, THashSet<TInt>& C) {
if (A.Len() < B.Len()) {
for (THashSetKeyI<TInt> it=A.BegI(); it<A.EndI(); it++)
if (B.IsKey(it.GetKey())) C.AddKey(it.GetKey());
} else {
for (THashSetKeyI<TInt> it=B.BegI(); it<B.EndI(); it++)
if (A.IsKey(it.GetKey())) C.AddKey(it.GetKey());
}
}
开发者ID:Aleyasen,项目名称:Alaki,代码行数:9,代码来源:cliques.cpp
示例8: GetNodeIdWithMaxDeg
int TCliqueOverlap::GetNodeIdWithMaxDeg(const THashSet<TInt>& Set) const{
int id = -1;
int maxDeg = -1;
//
for (THashSetKeyI<TInt> it=Set.BegI(); it<Set.EndI(); it++) {
int nId = it.GetKey();
int deg = m_G->GetNI(nId).GetDeg();
if (maxDeg < deg) { maxDeg=deg; id=nId; }
}
return id;
}
开发者ID:Aleyasen,项目名称:Alaki,代码行数:11,代码来源:cliques.cpp
示例9: main
int main(int argc, char *argv[]) {
TStr BaseString = "/lfs/1/tmp/curis/week/QBDB.bin";
TFIn BaseFile(BaseString);
TQuoteBase *QB = new TQuoteBase;
TDocBase *DB = new TDocBase;
QB->Load(BaseFile);
DB->Load(BaseFile);
TIntV QuoteIds;
QB->GetAllQuoteIds(QuoteIds);
int NumQuotes = QuoteIds.Len();
THash<TInt, TStrSet> PeakCounts;
for (int i = 0; i < NumQuotes; i++) {
TQuote CurQuote;
if (QB->GetQuote(QuoteIds[i], CurQuote)) {
TVec<TSecTm> Peaks;
CurQuote.GetPeaks(DB, Peaks);
TStr QuoteString;
CurQuote.GetParsedContentString(QuoteString);
TStrSet StringSet;
if (PeakCounts.IsKey(Peaks.Len())) {
StringSet = PeakCounts.GetDat(Peaks.Len());
}
StringSet.AddKey(QuoteString);
PeakCounts.AddDat(Peaks.Len(), StringSet);
}
}
TIntV PeakCountKeys;
PeakCounts.GetKeyV(PeakCountKeys);
PeakCountKeys.Sort(true);
for (int i = 0; i < PeakCountKeys.Len(); i++) {
TStrSet CurSet = PeakCounts.GetDat(PeakCountKeys[i]);
if (CurSet.Len() > 0) {
printf("QUOTES WITH %d PEAKS\n", PeakCountKeys[i].Val);
printf("#########################################\n");
THashSet<TStr> StringSet = PeakCounts.GetDat(PeakCountKeys[i]);
for (THashSet<TStr>::TIter l = StringSet.BegI(); l < StringSet.EndI(); l++) {
printf("%s\n", l.GetKey().CStr());
}
printf("\n");
}
}
delete QB;
delete DB;
return 0;
}
开发者ID:snap-stanford,项目名称:curis-2012,代码行数:48,代码来源:analyzepeaks.cpp
示例10: GetHashedShinglesOfCluster
void LSH::GetHashedShinglesOfCluster(TQuoteBase *QuoteBase, TCluster& C,
TInt ShingleLen, THashSet<TMd5Sig>& HashedShingles) {
TIntV QuoteIds;
C.GetQuoteIds(QuoteIds);
for (int qt = 0; qt < QuoteIds.Len(); qt++) {
TQuote Q;
QuoteBase->GetQuote(QuoteIds[qt], Q);
TStr QContentStr;
Q.GetContentString(QContentStr);
TStr QContentStrNoPunc;
TStringUtil::RemovePunctuation(QContentStr, QContentStrNoPunc);
TStrV QContentV;
QContentStrNoPunc.SplitOnWs(QContentV);
for (int i = 0; i < QContentV.Len() - ShingleLen + 1; i++) {
TStr Shingle;
for (int j = 0; j < ShingleLen; j++) {
if (j > 0) {
Shingle.InsStr(Shingle.Len(), " ");
}
Shingle.InsStr(Shingle.Len(), QContentV[i + j]);
}
TMd5Sig ShingleMd5(Shingle);
HashedShingles.AddKey(ShingleMd5);
}
}
}
开发者ID:snap-stanford,项目名称:curis-2012,代码行数:26,代码来源:lsh.cpp
示例11: MakeSlashdotSignNet
void MakeSlashdotSignNet(const TStr InFNm, TStr OutFNm, TStr Desc, THashSet<TChA> NIdSet) {
//THashSet<TChA> NIdSet;
TChA LnStr;
TVec<char *> WrdV;
int Sign;
//PSignNet Net = TSignNet::New();
TPt<TNodeEDatNet<TInt, TInt> > Net = TNodeEDatNet<TInt, TInt>::New();
int i = 0;
for (TFIn FIn(InFNm); FIn.GetNextLn(LnStr); ) {
if (LnStr.Empty() || LnStr[0]=='#') { continue; }
LnStr.ToLc();
TStrUtil::SplitOnCh(LnStr, WrdV, '\t', false);
//NIdSet.AddKey(WrdV[0]);
if (strcmp(WrdV[1], "friends")==0) { Sign = 1; }
else if (strcmp(WrdV[1], "fans")==0) { continue; } // skip (fans are in-friends)
else if (strcmp(WrdV[1], "foes")==0) { Sign = -1; } else { Fail; }
const int SrcNId = NIdSet.AddKey(WrdV[0]);
if (! Net->IsNode(SrcNId)) {
Net->AddNode(SrcNId); }
for (int e = 2; e < WrdV.Len(); e++) {
const int DstNId = NIdSet.AddKey(WrdV[e]);
i ++ ;
if ((SrcNId != DstNId) && ! Net->IsEdge(SrcNId, DstNId)) {
if (! Net->IsNode(DstNId))
Net->AddNode(DstNId);
Net->AddEdge(SrcNId, DstNId, Sign);
}
}
}
TSnap::PrintInfo(Net, "Slashdot (" + TInt::GetStr(i) + ")");
// copied from gio.h - line 111
FILE *F = fopen(OutFNm.CStr(), "wt");
fprintf(F, "# Directed graph: %s\n", OutFNm.CStr());
if (! Desc.Empty())
fprintf(F, "# %s\n", (Desc).CStr());
fprintf(F, "# Nodes: %d Edges: %d\n", Net->GetNodes(), Net->GetEdges());
fprintf(F, "# UserId\tGroupId\tSign\n");
for (TNodeEDatNet<TInt,TInt>::TEdgeI ei = Net->BegEI(); ei < Net->EndEI(); ei++) {
fprintf(F, "%d\t%d\t%d\n", ei.GetSrcNId(), ei.GetDstNId(), ei()());
}
fclose(F);
PrintGraphStatTable(Net, OutFNm, Desc);
}
开发者ID:SherlockYang,项目名称:Archive,代码行数:45,代码来源:mkdatasets.cpp
示例12: GetMaximalCliques
void TCliqueOverlap::GetMaximalCliques(const PUNGraph& G, int MinMaxCliqueSize, TVec<TIntV>& MaxCliques) {
if (G->GetNodes() == 0) return;
//
m_G = G;
m_minMaxCliqueSize = MinMaxCliqueSize;
m_maxCliques =& MaxCliques;
m_Q.Clr();
//
THashSet<TInt> SUBG;
THashSet<TInt> CAND;
for (TUNGraph::TNodeI NI=m_G->BegNI(); NI<m_G->EndNI(); NI++) {
TInt nId = NI.GetId();
SUBG.AddKey(nId);
CAND.AddKey(nId);
}
//
Expand(SUBG, CAND);
}
开发者ID:Aleyasen,项目名称:Alaki,代码行数:18,代码来源:cliques.cpp
示例13: Intersection
int TCliqueOverlap::Intersection(const THashSet<TInt>& A, const THashSet<TInt>& B) {
int n = 0;
if (A.Len() < B.Len()) {
for (THashSetKeyI<TInt> it=A.BegI(); it<A.EndI(); it++)
if (B.IsKey(it.GetKey())) n++;
} else {
for (THashSetKeyI<TInt> it=B.BegI(); it<B.EndI(); it++)
if (A.IsKey(it.GetKey())) n++;
}
return n;
}
开发者ID:Aleyasen,项目名称:Alaki,代码行数:11,代码来源:cliques.cpp
示例14: MaxNbrsInCANDNodeId
int TCliqueOverlap::MaxNbrsInCANDNodeId(const THashSet<TInt>& SUBG, const THashSet<TInt>& CAND) const{
int id = -1;
int maxIntersection = -1;
//
for (THashSetKeyI<TInt> it=SUBG.BegI(); it<SUBG.EndI(); it++) {
int nId = it.GetKey();
TUNGraph::TNodeI nIt = m_G->GetNI(nId);
int deg = nIt.GetDeg();
//
int curIntersection = 0;
for (int i=0; i<deg; i++) {
int nbrId = nIt.GetNbrNId(i);
if (CAND.IsKey(nbrId)) curIntersection++;
}
//
if (maxIntersection < curIntersection) { maxIntersection=curIntersection; id=nId; }
}
return id;
}
开发者ID:Aleyasen,项目名称:Alaki,代码行数:19,代码来源:cliques.cpp
示例15: ComputeSignature
TVec<TFltV> TLSHash::GetCandidates(TFltV Datum) {
THashSet<TInt> CandidateIds;
for (int i=0; i<Bands; i++) {
TInt Sig = ComputeSignature(Datum, i);
THash<TInt, TIntV>& SigBucketVH = SigBucketVHV[i];
if (!SigBucketVH.IsKey(Sig)) {
continue;
}
CandidateIds.AddKeyV(SigBucketVH.GetDat(Sig));
}
TVec<TFltV> Candidates;
int Ind = CandidateIds.FFirstKeyId();
while(CandidateIds.FNextKeyId(Ind)) {
int Id = CandidateIds[Ind];
Candidates.Add(DataV[Id]);
}
return Candidates;
}
开发者ID:EDzhangjianyu,项目名称:snap,代码行数:20,代码来源:lsh.cpp
示例16: WordHashing
void LSH::WordHashing(TQuoteBase* QuoteBase, THashSet<TMd5Sig>& Shingles) {
Err("Hashing shingles using words...\n");
TIntV QuoteIds;
QuoteBase->GetAllQuoteIds(QuoteIds);
for (int qt = 0; qt < QuoteIds.Len(); qt++) {
if (qt % 1000 == 0) {
Err("%d out of %d completed\n", qt, QuoteIds.Len());
}
TQuote Q;
QuoteBase->GetQuote(QuoteIds[qt], Q);
TStrV Content;
Q.GetParsedContent(Content);
int ContentLen = Content.Len();
for (int i = 0; i < ContentLen; i++) {
const TMd5Sig ShingleMd5(Content[i]);
Shingles.AddKey(ShingleMd5);
}
}
Err("Done with word hashing! Number of shingles: %d\n", Shingles.Len());
}
开发者ID:snap-stanford,项目名称:curis-2012,代码行数:22,代码来源:lsh.cpp
示例17: ComputeSignatures
void LSH::ComputeSignatures(THashSet<TMd5Sig>& Shingles,
THash<TMd5Sig, TIntV>& Signatures, int NumSignatures) {
if (NumSignatures < 1)
return;
TRnd RandomGenerator; // TODO: make this "more random" by incorporating time
TInt NumShingles = Shingles.Len();
for (int i = 0; i < NumSignatures; ++i) {
// Create new signature
TVec < TMd5Sig > Shuffle;
Shingles.GetKeyV(Shuffle);
Shuffle.Shuffle(RandomGenerator);
for (int j = 0; j < NumShingles; j++) {
TIntV Signature;
Signatures.IsKeyGetDat(Shuffle[j], Signature);
Signature.Add(j);
Signatures.AddDat(Shuffle[j], Signature);
}
}
Err("Computed %d signatures!\n", NumSignatures);
}
开发者ID:snap-stanford,项目名称:curis-2012,代码行数:22,代码来源:lsh.cpp
示例18: BigMain
void BigMain(int argc, char* argv[]) {
TExeTm ExeTm;
Env = TEnv(argc, argv, TNotify::StdNotify);
Env.PrepArgs("QuotesApp");
const TStr ToDo = Env.GetIfArgPrefixStr("-do:", "", "To do").GetLc();
if (Env.IsEndOfRun()) {
printf("To do:\n");
printf(" MkDataset : Make memes dataset (extract quotes and save txt)\n");
printf(" ExtractSubset : Extract a subset of memes containing particular words\n");
printf(" MemesToQtBs : Load memes dataset and create quote base\n");
printf(" MkClustNet : Build cluster network from the quote base\n");
return;
}
#pragma region mkdataset
// extract quotes and links and make them into a single file
if (ToDo == "mkdataset") {
const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "files.txt", "Spinn3r input files (one file per line)");
const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "Spinn3r-dataset.txt", "Output file");
const int MinQtWrdLen = Env.GetIfArgPrefixInt("-w:", 3, "Minimum quote word length");
const TStr UrlFNm = Env.GetIfArgPrefixStr("-u:", "", "Seen url set (THashSet<TMd5Sig>) file name");
const bool UrlOnlyOnce = Env.GetIfArgPrefixBool("-q:", true, "Only keep unique Urls");
//// parse directly from Spinn3r
TStr Spinn3rFNm;
THashSet<TMd5Sig> SeenUrlSet;
if (UrlOnlyOnce && ! UrlFNm.Empty()) { // keep track of already seen urls (so that there are no duplicate urls)
TFIn FIn(UrlFNm); SeenUrlSet.Load(FIn);
}
FILE *F = fopen(OutFNm.CStr(), "wt");
TFIn FIn(InFNm);
int Items=0;
for (int f=0; FIn.GetNextLn(Spinn3rFNm); f++) {
TQuoteExtractor QE(Spinn3rFNm.ToTrunc());
printf("Processing %02d: %s [%s]\n", f+1, Spinn3rFNm.CStr(), TExeTm::GetCurTm());
fflush(stdout);
for (int item = 0; QE.Next(); item++) {
const TMd5Sig PostMd5(QE.PostUrlStr);
if (QE.QuoteV.Empty() && QE.LinkV.Empty()) { continue; } // no quotes, no links
if (UrlOnlyOnce) {
if (SeenUrlSet.IsKey(PostMd5)) { continue; }
SeenUrlSet.AddKey(PostMd5);
}
fprintf(F, "P\t%s\n", QE.PostUrlStr.CStr());
//if (QE.PubTm > TSecTm(2008,8,30) || QE.PubTm < TSecTm(2008,7,25)) { printf("%s\n", QE.PubTm.GetStr().CStr()); }
fprintf(F, "T\t%s\n", QE.PubTm.GetYmdTmStr().CStr());
for (int q = 0; q < QE.QuoteV.Len(); q++) {
if (TStrUtil::CountWords(QE.QuoteV[q]) >= MinQtWrdLen) {
fprintf(F, "Q\t%s\n", QE.QuoteV[q].CStr()); }
}
for (int l = 0; l < QE.LinkV.Len(); l++) {
fprintf(F, "L\t%s\n", QE.LinkV[l].CStr()); }
fprintf(F, "\n");
if (item>0 && item % Kilo(100) == 0) {
QE.DumpStat(); QE.ExeTm.Tick(); }
Items++;
}
printf("file done. Total %d all posts, %d all items\n", SeenUrlSet.Len(), Items);
fflush(stdout);
}
printf("all done. Saving %d post urls\n", SeenUrlSet.Len()); fflush(stdout);
if (! SeenUrlSet.Empty()) {
TFOut FOut(OutFNm.GetFMid()+".SeenUrlSet");
SeenUrlSet.Save(FOut);
}
fclose(F);
}
#pragma endregion mkdataset
#pragma region extractsubset
// save posts with memes containing particular words
else if (ToDo == "extractsubset") {
const TStr InFNmWc = Env.GetIfArgPrefixStr("-i:", "memes_*.rar", "Input file prefix");
const bool IsInFNmWc = Env.GetIfArgPrefixBool("-w:", true, "Input is wildcard (else a file with list of input files)");
const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "memes-subset.txt", "Output memes file");
const TStr WordsFNm = Env.GetIfArgPrefixStr("-p:", "phrases-in.txt", "Phrases that memes have to contain");
TChAV CatchMemeV;// = TStr::GetV("great depression", "economic meltdown", "recession had bottomed out", "green shoots", "slow recovery", "gradual recovery");
printf("Loading %s\n", WordsFNm.CStr());
{ TFIn FIn(WordsFNm);
for (TStr Ln; FIn.GetNextLn(Ln); ) {
printf(" %s\n", Ln.GetLc().CStr());
CatchMemeV.Add(Ln.GetLc()); }
}
printf("%d strings loaded\n", CatchMemeV.Len());
TFOut FOut(OutFNm);
TMemesDataLoader Memes(InFNmWc, IsInFNmWc);
for (int posts = 0, nsave=0; Memes.LoadNext(); posts++) {
bool DoSave = false;
for (int m = 0; m < Memes.MemeV.Len(); m++) {
for (int i = 0; i < CatchMemeV.Len(); i++) {
if (Memes.MemeV[m].SearchStr(CatchMemeV[i]) != -1) {
DoSave=true; break; }
}
if (DoSave) { break; }
}
if (DoSave) { Memes.SaveTxt(FOut); nsave++; }
if (posts % Mega(1) == 0) {
printf("%dm posts, %d saved\n", posts/Mega(1), nsave);
FOut.Flush();
}
}
//.........这里部分代码省略.........
开发者ID:snap-stanford,项目名称:curis-2012,代码行数:101,代码来源:memeclust.cpp
示例19: WIRE_ASSERT
//----------------------------------------------------------------------------
void Node::WarmUpRendering(Renderer* pRenderer)
{
#ifndef WIRE_WII // Wii does not need to warm up by submitting draw calls
WIRE_ASSERT(pRenderer);
UpdateGS(0, true, false);
Vector3F cameraLocation = WorldBound->GetCenter();
cameraLocation.Z() += WorldBound->GetRadius();
Vector3F viewDirection = -Vector3F::UNIT_Z;
Vector3F up = Vector3F::UNIT_Y;
Vector3F right = viewDirection.Cross(up);
CameraPtr spCamera = WIRE_NEW Camera;
spCamera->SetFrame(cameraLocation, viewDirection, up, right);
Float fieldOfView = 60.0F;
Float aspectRatio = 2;
Float nearPlane = 0.1F;
Float farPlane = WorldBound->GetRadius() * 2.0F;
spCamera->SetFrustum(fieldOfView, aspectRatio, nearPlane, farPlane);
CullerSorting culler;
culler.SetCamera(spCamera);
culler.ComputeVisibleSet(this);
pRenderer->PreDraw(spCamera);
// draw scene to warm up batching buffers
pRenderer->Draw(culler.GetVisibleSets());
// collect and draw all materials separately so none will be missed
// by CULL_ALWAYS or Switch/LOD nodes.
THashSet<Material*> materials;
TStack<Node*> scene(1000);
scene.Push(this);
while (!scene.IsEmpty())
{
Node* pNode = NULL;
scene.Pop(pNode);
RenderObject* pRenderObject = pNode->GetRenderObject();
if (pRenderObject && pRenderObject->GetMaterial())
{
materials.Insert(pRenderObject->GetMaterial());
}
for (UInt i = 0; i < pNode->GetQuantity(); i++)
{
Node* pChild = DynamicCast<Node>(pNode->GetChild(i));
if (pChild)
{
scene.Push(pChild);
}
}
}
RenderObjectPtr spCube = StandardMesh::CreateCube24(4, pRenderer->
GetMaxTextureStages(), true);
THashSet<Material*>::Iterator it(&materials);
Transformation transformation;
transformation.SetTranslate(cameraLocation - Vector3F(0, 0, 3));
for (Material** pMaterial = it.GetFirst(); pMaterial; pMaterial =
it.GetNext())
{
spCube->SetMaterial(*pMaterial);
pRenderer->Draw(spCube, transformation);
}
pRenderer->PostDraw();
#endif
}
开发者ID:cbalderrama,项目名称:wire3d,代码行数:70,代码来源:WireNode.cpp
示例20: MinHash
void LSH::MinHash(TQuoteBase *QB, THashSet<TMd5Sig>& Shingles,
TVec<THash<TMd5Sig, TIntSet> >& SignatureBandBuckets) {
Err("Creating buckets...\n");
THash < TMd5Sig, TIntV > Signatures;
ComputeSignatures(Shingles, Signatures, NumBands * BandSize);
// bucket creation
for (int i = 0; i < NumBands; ++i) {
SignatureBandBuckets.Add(THash<TMd5Sig, TIntSet>());
}
// bucket filling
int NumShingles = Shingles.Len();
THash<TInt, TQuote> Quotes;
QB->GetIdToTQuotes(Quotes);
THash<TInt, TQuote>::TIter CurI = Quotes.BegI();
THash<TInt, TQuote>::TIter EndI = Quotes.EndI();
TQuote Q; // SKYFALL
for (; CurI < EndI; CurI++) {
Q = CurI.GetDat();
TStrV Content;
Q.GetParsedContent(Content);
TInt Id = Q.GetId();
// signature for quote
int ContentLen = Content.Len();
TVec < TIntV > Signature;
for (int i = 0; i < ContentLen; i++) {
const TMd5Sig ShingleMd5(Content[i]);
Signature.Add(Signatures.GetDat(ShingleMd5));
}
// place in bucket
if (ContentLen < WordWindow) {
for (int i = 0; i < NumBands; ++i) {
TStr Sig;
for (int j = 0; j < BandSize; ++j) {
int CurSig = i * BandSize + j;
TInt min = NumShingles;
for (int k = 0; k < ContentLen; k++) {
if (Signature[k][CurSig] < min) {
min = Signature[k][CurSig];
}
}
Sig += min.GetStr() + "-";
}
//Err(Sig.CStr());
const TMd5Sig SigMd5(Sig);
TIntSet Bucket;
SignatureBandBuckets[i].IsKeyGetDat(SigMd5, Bucket);
Bucket.AddKey(Id);
SignatureBandBuckets[i].AddDat(SigMd5, Bucket);
}
} else {
}
}
Err("Minhash step complete!\n");
}
开发者ID:snap-stanford,项目名称:curis-2012,代码行数:66,代码来源:lsh.cpp
注:本文中的THashSet类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论