本文整理汇总了C++中TIntFltH类的典型用法代码示例。如果您正苦于以下问题:C++ TIntFltH类的具体用法?C++ TIntFltH怎么用?C++ TIntFltH使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了TIntFltH类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: GetWeightedPageRankMP1
int GetWeightedPageRankMP1(const PNEANet Graph, TIntFltH& PRankH, const TStr& Attr, const double& C, const double& Eps, const int& MaxIter) {
if (!Graph->IsFltAttrE(Attr)) return -1;
TFltV Weights = Graph->GetFltAttrVecE(Attr);
int mxid = Graph->GetMxNId();
TFltV OutWeights(mxid);
Graph->GetWeightOutEdgesV(OutWeights, Weights);
/*for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
OutWeights[NI.GetId()] = Graph->GetWeightOutEdges(NI, Attr);
}*/
/*TIntFltH Weights;
for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
Weights.AddDat(NI.GetId(), Graph->GetWeightOutEdges(NI, Attr));
}*/
const int NNodes = Graph->GetNodes();
TVec<TNEANet::TNodeI> NV;
//const double OneOver = 1.0/double(NNodes);
PRankH.Gen(NNodes);
for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
NV.Add(NI);
PRankH.AddDat(NI.GetId(), 1.0/NNodes);
//IAssert(NI.GetId() == PRankH.GetKey(PRankH.Len()-1));
}
TFltV TmpV(NNodes);
for (int iter = 0; iter < MaxIter; iter++) {
#pragma omp parallel for schedule(dynamic,10000)
for (int j = 0; j < NNodes; j++) {
TNEANet::TNodeI NI = NV[j];
TmpV[j] = 0;
for (int e = 0; e < NI.GetInDeg(); e++) {
const int InNId = NI.GetInNId(e);
const TFlt OutWeight = OutWeights[InNId];
int EId = Graph->GetEId(InNId, NI.GetId());
const TFlt Weight = Weights[Graph->GetFltKeyIdE(EId)];
if (OutWeight > 0) {
TmpV[j] += PRankH.GetDat(InNId) * Weight / OutWeight; }
}
TmpV[j] = C*TmpV[j]; // Berkhin (the correct way of doing it)
//TmpV[j] = C*TmpV[j] + (1.0-C)*OneOver; // iGraph
}
double diff=0, sum=0, NewVal;
#pragma omp parallel for reduction(+:sum) schedule(dynamic,10000)
for (int i = 0; i < TmpV.Len(); i++) { sum += TmpV[i]; }
const double Leaked = (1.0-sum) / double(NNodes);
#pragma omp parallel for reduction(+:diff) schedule(dynamic,10000)
for (int i = 0; i < PRankH.Len(); i++) { // re-instert leaked PageRank
NewVal = TmpV[i] + Leaked; // Berkhin
//NewVal = TmpV[i] / sum; // iGraph
diff += fabs(NewVal-PRankH[i]);
PRankH[i] = NewVal;
}
if (diff < Eps) { break; }
}
return 0;
}
开发者ID:sramas15,项目名称:snapr,代码行数:59,代码来源:centr.cpp
示例2: GetCom
double TAGMFast::LikelihoodForRow(const int UID, const TIntFltH& FU) {
double L = 0.0;
TFltV HOSumFV; //adjust for Fv of v hold out
if (HOVIDSV[UID].Len() > 0) {
HOSumFV.Gen(SumFV.Len());
for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
for (int c = 0; c < SumFV.Len(); c++) {
HOSumFV[c] += GetCom(HOVIDSV[UID][e], c);
}
}
}
TUNGraph::TNodeI NI = G->GetNI(UID);
if (DoParallel && NI.GetDeg() > 10) {
#pragma omp parallel for schedule(static, 1)
for (int e = 0; e < NI.GetDeg(); e++) {
int v = NI.GetNbrNId(e);
if (v == UID) { continue; }
if (HOVIDSV[UID].IsKey(v)) { continue; }
double LU = log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]);
#pragma omp atomic
L += LU;
}
for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
double LU = NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat();
L -= LU;
}
} else {
for (int e = 0; e < NI.GetDeg(); e++) {
int v = NI.GetNbrNId(e);
if (v == UID) { continue; }
if (HOVIDSV[UID].IsKey(v)) { continue; }
L += log (1.0 - Prediction(FU, F[v])) + NegWgt * DotProduct(FU, F[v]);
}
for (TIntFltH::TIter HI = FU.BegI(); HI < FU.EndI(); HI++) {
double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[HI.GetKey()].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
L -= NegWgt * (SumFV[HI.GetKey()] - HOSum - GetCom(UID, HI.GetKey())) * HI.GetDat();
}
}
//add regularization
if (RegCoef > 0.0) { //L1
L -= RegCoef * Sum(FU);
}
if (RegCoef < 0.0) { //L2
L += RegCoef * Norm2(FU);
}
return L;
}
开发者ID:alwayskidd,项目名称:snap,代码行数:51,代码来源:agmfast.cpp
示例3: GetEigenVectorCentr
void GetEigenVectorCentr(const PUNGraph& Graph, TIntFltH& EigenH, const double& Eps, const int& MaxIter) {
const int NNodes = Graph->GetNodes();
EigenH.Gen(NNodes);
for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
EigenH.AddDat(NI.GetId(), 1.0/NNodes);
IAssert(NI.GetId() == EigenH.GetKey(EigenH.Len()-1));
}
TFltV TmpV(NNodes);
double diff = TFlt::Mx;
for (int iter = 0; iter < MaxIter; iter++) {
int j = 0;
for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) {
TmpV[j] = 0;
for (int e = 0; e < NI.GetOutDeg(); e++) {
TmpV[j] += EigenH.GetDat(NI.GetOutNId(e)); }
}
double sum = 0;
for (int i = 0; i < TmpV.Len(); i++) {
EigenH[i] = TmpV[i];
sum += EigenH[i];
}
for (int i = 0; i < EigenH.Len(); i++) {
EigenH[i] /= sum; }
if (fabs(diff-sum) < Eps) { break; }
//printf("\tdiff:%f\tsum:%f\n", fabs(diff-sum), sum);
diff = sum;
}
}
开发者ID:Networks-Learning,项目名称:infopath,代码行数:28,代码来源:centr.cpp
示例4: TEST
// Test GetNodeClustCf (Vector)
TEST(triad, TestGetNodeClustCfVector) {
// Test TUNGraph
PUNGraph GraphTUN = TriadGetTestTUNGraph();
TIntFltH NIdCCfH;
TSnap::GetNodeClustCf(GraphTUN, NIdCCfH);
for (int i = 0; i < GraphTUN->GetNodes(); i++) {
double ClustCf = NIdCCfH.GetDat(i);
VerifyNodeClustCf(i, ClustCf);
}
// TNGraph should be treated as TUNGraph for calculations
PNGraph GraphTN = TriadGetTestTNGraph();
NIdCCfH.Clr();
TSnap::GetNodeClustCf(GraphTN, NIdCCfH);
for (int i = 0; i < GraphTN->GetNodes(); i++) {
double ClustCf = NIdCCfH.GetDat(i);
VerifyNodeClustCf(i, ClustCf);
}
// TNEGraph should be treated as TUNGraph for calculations
PNEGraph GraphTNE = TriadGetTestTNEGraph();
NIdCCfH.Clr();
TSnap::GetNodeClustCf(GraphTNE, NIdCCfH);
for (int i = 0; i < GraphTNE->GetNodes(); i++) {
double ClustCf = NIdCCfH.GetDat(i);
VerifyNodeClustCf(i, ClustCf);
}
}
开发者ID:Antobiotics,项目名称:snap,代码行数:32,代码来源:test-triad.cpp
示例5: getPageRankFromVect
void getPageRankFromVect(const PNGraph& graph, std::vector<int> srcIds, std::vector<int> dstIds, int sampleSz, char* fileName) {
std::random_shuffle(srcIds.begin(), srcIds.end());
std::random_shuffle(dstIds.begin(), dstIds.end());
std::ofstream outputFile;
outputFile.open(fileName);
for (int i = 0; i < sampleSz; ) {
int srcNodeId = srcIds[rand() % srcIds.size()];
int dstNodeId = dstIds[rand() % dstIds.size()];
if (!graph->IsNode(srcNodeId)) continue;
if (!graph->IsNode(dstNodeId)) continue;
int shortPath = TSnap::GetShortPath(graph, srcNodeId, dstNodeId, true);
if (shortPath > 4 || shortPath <= 2) continue;
PNGraph subgraph = getFourHopGraph(graph, srcNodeId, dstNodeId);
TIntFltH pageRankScores;
TSnap::GetPageRank(subgraph, pageRankScores);
// Calculate total PR score;
/*
double totalPR = 0.0;
for (TIntFltH::TIter itr = pageRankScores.BegI(); itr != pageRankScores.EndI(); itr++) {
totalPR += itr.GetDat();
}*/
int numOfNodesInSubGraph = subgraph->GetNodes();
double normalizedSrcPR = pageRankScores.GetDat(srcNodeId) * numOfNodesInSubGraph;
double normalizedDstPR = pageRankScores.GetDat(dstNodeId) * numOfNodesInSubGraph;
char buffer[100];
printf("%d, %d\n", i, numOfNodesInSubGraph);
sprintf(buffer, "%d\t%f\t%d\t%f", srcNodeId, normalizedSrcPR, dstNodeId, normalizedDstPR);
std::cout << buffer << std::endl;
outputFile << buffer << std::endl;
++i;
}
outputFile.close();
}
开发者ID:ziyan,项目名称:unfollow,代码行数:38,代码来源:experiments.cpp
示例6: LikelihoodForRow
double TAGMFast::GetStepSizeByLineSearch(const int UID, const TIntFltH& DeltaV, const TIntFltH& GradV, const double& Alpha, const double& Beta, const int MaxIter) {
double StepSize = 1.0;
double InitLikelihood = LikelihoodForRow(UID);
TIntFltH NewVarV(DeltaV.Len());
for(int iter = 0; iter < MaxIter; iter++) {
for (int i = 0; i < DeltaV.Len(); i++){
int CID = DeltaV.GetKey(i);
double NewVal = GetCom(UID, CID) + StepSize * DeltaV.GetDat(CID);
if (NewVal < MinVal) { NewVal = MinVal; }
if (NewVal > MaxVal) { NewVal = MaxVal; }
NewVarV.AddDat(CID, NewVal);
}
if (LikelihoodForRow(UID, NewVarV) < InitLikelihood + Alpha * StepSize * DotProduct(GradV, DeltaV)) {
StepSize *= Beta;
} else {
break;
}
if (iter == MaxIter - 1) {
StepSize = 0.0;
break;
}
}
return StepSize;
}
开发者ID:alwayskidd,项目名称:snap,代码行数:24,代码来源:agmfast.cpp
示例7: GetEigenVectorCentr
void GetEigenVectorCentr(const PUNGraph& Graph, TIntFltH& NIdEigenH, const double& Eps, const int& MaxIter) {
const int NNodes = Graph->GetNodes();
NIdEigenH.Gen(NNodes);
// initialize vector values
for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
NIdEigenH.AddDat(NI.GetId(), 1.0 / NNodes);
IAssert(NI.GetId() == NIdEigenH.GetKey(NIdEigenH.Len() - 1));
}
TFltV TmpV(NNodes);
for (int iter = 0; iter < MaxIter; iter++) {
int j = 0;
// add neighbor values
for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) {
TmpV[j] = 0;
for (int e = 0; e < NI.GetOutDeg(); e++) {
TmpV[j] += NIdEigenH.GetDat(NI.GetOutNId(e));
}
}
// normalize
double sum = 0;
for (int i = 0; i < TmpV.Len(); i++) {
sum += (TmpV[i] * TmpV[i]);
}
sum = sqrt(sum);
for (int i = 0; i < TmpV.Len(); i++) {
TmpV[i] /= sum;
}
// compute difference
double diff = 0.0;
j = 0;
for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) {
diff += fabs(NIdEigenH.GetDat(NI.GetId()) - TmpV[j]);
}
// set new values
j = 0;
for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) {
NIdEigenH.AddDat(NI.GetId(), TmpV[j]);
}
if (diff < Eps) {
break;
}
}
}
开发者ID:Austindeadhead,项目名称:qminer,代码行数:47,代码来源:centr.cpp
示例8: GetGiniCoefficient
// Computes GINI coefficient of egonet as a subset of the parent graph (edges into and out of the egonet ARE considered)
double TSnap::GetGiniCoefficient(const TIntFltH DegH, const TIntV NIdV) {
typename TIntV::TIter VI;
typename TFltV::TIter DI;
TFltV DegV;
const int n = NIdV.Len();
// DegV.Gen(n); // NOTE: don't use Gen() and Sort() on the same object (!)
for (VI = NIdV.BegI(); VI < NIdV.EndI(); VI++) {
DegV.Add(DegH.GetDat(VI->Val)); // might need to change this (in / out / undirected)
}
DegV.Sort();
int i = 0;
double numerator = 0.0, denominator = 0.0;
for (DI = DegV.BegI(); DI < DegV.EndI(); DI++, i++) {
numerator += (i + 1)*DegV[i];
denominator += DegV[i];
}
return(double(2*numerator) / double(n*denominator) - double(n + 1) / double(n));
}
开发者ID:huisaddison,项目名称:snap,代码行数:19,代码来源:wstats.cpp
示例9: main
int main(int argc, char* argv[]) {
Env = TEnv(argc, argv, TNotify::StdNotify);
Env.PrepArgs(TStr::Fmt("Inverse PageRank. Build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
TExeTm ExeTm;
Try
const TStr Iput = Env.GetIfArgPrefixStr("-i:", "Input.txt", "Input File" );
const TStr Oput = Env.GetIfArgPrefixStr("-o:", "Output.txt", "Output File");
FILE* fpI = fopen(Iput.CStr(), "r");
FILE* fpO = fopen(Oput.CStr(), "w");
const double C = 0.85;
const int MaxIter = 50;
const double Eps = 1e-9;
PNGraph Graph = TSnap::LoadEdgeList< PNGraph > (Iput);
fprintf(fpO, "\nNodes: %d, Edges: %d\n\n", Graph->GetNodes(), Graph->GetEdges());
const int NNodes = Graph->GetNodes();
const double OneOver = (double) 1.0 / (double) NNodes;
TIntFltH PRankH;
PRankH.Gen(NNodes);
for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++)
PRankH.AddDat(NI.GetId(), OneOver);
TFltV TmpV(NNodes);
for (int iter = 0; iter < MaxIter; iter++) {
int j = 0;
for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) {
TmpV[j] = 0;
for (int e = 0; e < NI.GetOutDeg(); e++) {
const int OutNId = NI.GetOutNId(e);
const int InDeg = Graph->GetNI(OutNId).GetInDeg();
if (InDeg > 0)
TmpV[j] += PRankH.GetDat(OutNId) / InDeg;
}
TmpV[j] = C * TmpV[j];
}
for (int i = 0; i < PRankH.Len(); i++)
PRankH[i] = TmpV[i];
/*
double diff = 0, sum = 0, NewVal;
for (int i = 0; i < TmpV.Len(); i++)
sum += TmpV[i];
const double Leaked = (double) (1.0 - sum) / (double) NNodes;
for (int i = 0; i < PRankH.Len(); i++) {
NewVal = TmpV[i] + Leaked;
diff += fabs(NewVal - PRankH[i]);
PRankH[i] = NewVal;
}
if (diff < Eps)
break;
*/
}
fprintf(fpO, "Node ID\t\tInverse PageRank\n");
for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++){
int Id = NI.GetId();
double ipr = PRankH.GetDat(Id);
fprintf(fpO, "%d\t\t\t%.5lf\n", Id, ipr);
}
Catch
printf("\nRun Time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
return 0;
}
开发者ID:SITZ,项目名称:OSN-Ranking-Algorithms,代码行数:69,代码来源:ipagerank.cpp
示例10: GenCascade
void TNetInfBs::GenCascade(TCascade& C, const int& TModel, const double &window, TIntPrIntH& EdgesUsed, const double& delta,
const double& std_waiting_time, const double& std_beta) {
TIntFltH InfectedNIdH; TIntH InfectedBy;
double GlobalTime; int StartNId;
double alpha, beta;
if (GroundTruth->GetNodes() == 0)
return;
while (C.Len() < 2) {
C.Clr();
InfectedNIdH.Clr();
InfectedBy.Clr();
GlobalTime = 0;
StartNId = GroundTruth->GetRndNId();
InfectedNIdH.AddDat(StartNId) = GlobalTime;
while (true) {
// sort by time & get the oldest node that did not run infection
InfectedNIdH.SortByDat(true);
const int& NId = InfectedNIdH.BegI().GetKey();
GlobalTime = InfectedNIdH.BegI().GetDat();
// all the nodes has run infection
if (GlobalTime >= window)
break;
// add current oldest node to the network and set its time
C.Add(NId, GlobalTime);
// run infection from the current oldest node
const TNGraph::TNodeI NI = GroundTruth->GetNI(NId);
for (int e = 0; e < NI.GetOutDeg(); e++) {
const int DstNId = NI.GetOutNId(e);
beta = Betas.GetDat(TIntPr(NId, DstNId));
// flip biased coin (set by beta)
if (TInt::Rnd.GetUniDev() > beta+std_beta*TFlt::Rnd.GetNrmDev())
continue;
alpha = Alphas.GetDat(TIntPr(NId, DstNId));
// not infecting the parent
if (InfectedBy.IsKey(NId) && InfectedBy.GetDat(NId).Val == DstNId)
continue;
double sigmaT;
switch (TModel) {
case 0:
// exponential with alpha parameter
sigmaT = TInt::Rnd.GetExpDev(alpha);
break;
case 1:
// power-law with alpha parameter
sigmaT = TInt::Rnd.GetPowerDev(alpha);
while (sigmaT < delta) { sigmaT = TInt::Rnd.GetPowerDev(alpha); }
break;
case 2:
// rayleigh with alpha parameter
sigmaT = TInt::Rnd.GetRayleigh(1/sqrt(alpha));
break;
default:
sigmaT = 1;
break;
}
// avoid negative time diffs in case of noise
if (std_waiting_time > 0)
sigmaT = TFlt::GetMx(0.0, sigmaT + std_waiting_time*TFlt::Rnd.GetNrmDev());
double t1 = GlobalTime + sigmaT;
if (InfectedNIdH.IsKey(DstNId)) {
double t2 = InfectedNIdH.GetDat(DstNId);
if (t2 > t1 && t2 != window) {
InfectedNIdH.GetDat(DstNId) = t1;
InfectedBy.GetDat(DstNId) = NId;
}
} else {
InfectedNIdH.AddDat(DstNId) = t1;
InfectedBy.AddDat(DstNId) = NId;
}
}
// we cannot delete key (otherwise, we cannot sort), so we assign a big time (window cut-off)
InfectedNIdH.GetDat(NId) = window;
}
}
C.Sort();
for (TIntH::TIter EI = InfectedBy.BegI(); EI < InfectedBy.EndI(); EI++) {
TIntPr Edge(EI.GetDat().Val, EI.GetKey().Val);
if (!EdgesUsed.IsKey(Edge)) EdgesUsed.AddDat(Edge) = 0;
EdgesUsed.GetDat(Edge) += 1;
//.........这里部分代码省略.........
开发者ID:blizzardwj,项目名称:ML_netinf,代码行数:101,代码来源:cascinf.cpp
示例11: main
int main(int argc, char* argv[]) {
setbuf(stdout, NULL); // disables the buffer so that print statements are not buffered and display immediately (?)
Env = TEnv(argc, argv, TNotify::StdNotify);
Env.PrepArgs(TStr::Fmt("Node centrality. build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
TExeTm ExeTm;
Try
const TStr InFNm = Env.GetIfArgPrefixStr("-i:", "", "input network");
const TStr OutFNm = Env.GetIfArgPrefixStr("-o:", "", "output prefix (filename extensions added)");
const TStr BseFNm = OutFNm.RightOfLast('/');
const int k = Env.GetIfArgPrefixInt("-k:", 1, "depth of weighted degree distributions (1 / 2 / ...)");
const bool c = Env.GetIfArgPrefixBool("-c:", false, "collate centralities into matrix (T / F)");
// Load graph and create directed and undirected graphs (pointer to the same memory)
printf("\nLoading %s...", InFNm.CStr());
PFltWNGraph WGraph = TSnap::LoadFltWEdgeList<TWNGraph>(InFNm);
printf(" DONE\n");
printf(" nodes: %d\n", WGraph->GetNodes());
printf(" edges: %d\n", WGraph->GetEdges());
printf(" time elapsed: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
// Declare variables
TIntFltVH FirstWDegVH;
TIntFltVH kWInDegVH, kWOutDegVH, kWDegVH;
TIntFltVH WDegCentrVH, WEigCentrVH;
TFltV WEigDiffV;
TIntFltH WPgRH;
double WPgRDiff;
TFltWNGraph::TNodeI NI;
TFltV::TIter VI;
// CENTRALITY (computations)
// Weighted first degree distributions
printf("\nComputing weighted degree distributions...");
TSnap::GetWDegVH(WGraph, FirstWDegVH);
printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
// 1:k degree distributions
printf("Computing egonet degrees for k = 1 to %d (in / out / undirected)\n", k);
TSnap::TFixedMemorykWDeg<TFlt, TWNGraph> FixedMemorykWDeg(WGraph, k);
printf(" ...");
FixedMemorykWDeg.GetkWInDegSeqH(kWInDegVH);
printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
printf(" ...");
FixedMemorykWDeg.GetkWOutDegSeqH(kWOutDegVH);
printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
printf(" ...");
FixedMemorykWDeg.GetkWDegSeqH(kWDegVH);
printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
// Centrality measures
printf("Computing weighted degree centrality...");
TSnap::GetWDegreeCentrVH(WGraph, WDegCentrVH, 0.5);
printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
printf("Computing weighted eigenvector centrality...");
WEigDiffV = TSnap::GetWEigenVectorCentrVH<TFlt>(WGraph, WEigCentrVH, 1e-4, 1000);
printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
printf(" convergence differences (in / out / undirected)\n");
printf(" %f\n", double(WEigDiffV[0]));
printf(" %f\n", double(WEigDiffV[1]));
printf(" %f\n", double(WEigDiffV[2]));
printf("Computing weighted PageRank centrality...");
WPgRDiff = TSnap::GetWPageRank<TFlt>(WGraph, WPgRH, 0.85, 1e-4, 1000);
printf(" DONE (time elapsed: %s (%s))\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
printf(" convergence difference: %f\n", double(WPgRDiff));
// OUTPUTTING (mostly verbose printing statements, don't get scared)
if (c) {
printf("\nSaving %s.wcentr...", BseFNm.CStr());
const TStr AggFNm = TStr::Fmt("%s.wcentr", OutFNm.CStr());
FILE *F = fopen(AggFNm.CStr(), "wt");
fprintf(F,"# Node centrality distributions on the directed / undirected graph (as applicable)\n");
fprintf(F,"# Nodes: %d\tEdges: %d\n", WGraph->GetNodes(), WGraph->GetEdges());
fprintf(F,"# NodeId\tWInDegCentr\tWOutDegCentr\tWDegCentr\tWInEigCentr\tWOutEigCentr\tWEigCentr\tWPgRCentr\n");
for (NI = WGraph->BegNI(); NI < WGraph->EndNI(); NI++) {
const int NId = NI.GetId(); fprintf(F, "%d", NId);
const TFltV WDegCentrV = WDegCentrVH.GetDat(NId);
for (VI = WDegCentrV.BegI(); VI < WDegCentrV.EndI(); VI++) { fprintf(F, "\t%f", VI->Val); }
const TFltV WEigCentrV = WEigCentrVH.GetDat(NId);
for (VI = WEigCentrV.BegI(); VI < WEigCentrV.EndI(); VI++) { fprintf(F, "\t%f", VI->Val); }
const double WPgRCentr = WPgRH.GetDat(NId); fprintf(F, "\t%f", WPgRCentr);
fprintf(F, "\n");
}
printf(" DONE\n");
} else {
printf("\nSaving %s.wdeg.centr...", BseFNm.CStr());
//.........这里部分代码省略.........
开发者ID:bpark738,项目名称:snap,代码行数:101,代码来源:wcentrality.cpp
示例12: GetWeightedPageRankMP2
int GetWeightedPageRankMP2(const PNEANet Graph, TIntFltH& PRankH, const TStr& Attr, const double& C, const double& Eps, const int& MaxIter) {
if (!Graph->IsFltAttrE(Attr)) return -1;
const int NNodes = Graph->GetNodes();
TVec<TNEANet::TNodeI> NV;
//const double OneOver = 1.0/double(NNodes);
PRankH.Gen(NNodes);
int MxId;
for (TNEANet::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
NV.Add(NI);
PRankH.AddDat(NI.GetId(), 1.0/NNodes);
int Id = NI.GetId();
if (Id > MxId) {
MxId = Id;
}
}
TFltV PRankV(MxId+1);
TFltV OutWeights(MxId+1);
TFltV Weights = Graph->GetFltAttrVecE(Attr);
#pragma omp parallel for schedule(dynamic,10000)
for (int j = 0; j < NNodes; j++) {
TNEANet::TNodeI NI = NV[j];
int Id = NI.GetId();
OutWeights[Id] = Graph->GetWeightOutEdges(NI, Attr);
PRankV[Id] = 1/NNodes;
}
TFltV TmpV(NNodes);
for (int iter = 0; iter < MaxIter; iter++) {
#pragma omp parallel for schedule(dynamic,10000)
for (int j = 0; j < NNodes; j++) {
TNEANet::TNodeI NI = NV[j];
TFlt Tmp = 0;
for (int e = 0; e < NI.GetInDeg(); e++) {
const int InNId = NI.GetInNId(e);
const TFlt OutWeight = OutWeights[InNId];
int EId = Graph->GetEId(InNId, NI.GetId());
const TFlt Weight = Weights[Graph->GetFltKeyIdE(EId)];
if (OutWeight > 0) {
Tmp += PRankH.GetDat(InNId) * Weight / OutWeight;
}
}
TmpV[j] = C*Tmp; // Berkhin (the correct way of doing it)
//TmpV[j] = C*TmpV[j] + (1.0-C)*OneOver; // iGraph
}
double sum = 0;
#pragma omp parallel for reduction(+:sum) schedule(dynamic,10000)
for (int i = 0; i < TmpV.Len(); i++) { sum += TmpV[i]; }
const double Leaked = (1.0-sum) / double(NNodes);
double diff = 0;
#pragma omp parallel for reduction(+:diff) schedule(dynamic,10000)
for (int i = 0; i < NNodes; i++) {
TNEANet::TNodeI NI = NV[i];
double NewVal = TmpV[i] + Leaked; // Berkhin
//NewVal = TmpV[i] / sum; // iGraph
int Id = NI.GetId();
diff += fabs(NewVal-PRankV[Id]);
PRankV[Id] = NewVal;
}
if (diff < Eps) { break; }
}
#pragma omp parallel for schedule(dynamic,10000)
for (int i = 0; i < NNodes; i++) {
TNEANet::TNodeI NI = NV[i];
PRankH[i] = PRankV[NI.GetId()];
}
return 0;
}
开发者ID:sramas15,项目名称:snapr,代码行数:80,代码来源:centr.cpp
示例13: time
int TAGMFast::MLEGradAscentParallel(const double& Thres, const int& MaxIter, const int ChunkNum, const int ChunkSize, const TStr PlotNm, const double StepAlpha, const double StepBeta) {
//parallel
time_t InitTime = time(NULL);
uint64 StartTm = TSecTm::GetCurTm().GetAbsSecs();
TExeTm ExeTm, CheckTm;
double PrevL = Likelihood(true);
TIntFltPrV IterLV;
int PrevIter = 0;
int iter = 0;
TIntV NIdxV(F.Len(), 0);
for (int i = 0; i < F.Len(); i++) { NIdxV.Add(i); }
TIntV NIDOPTV(F.Len()); //check if a node needs optimization or not 1: does not require optimization
NIDOPTV.PutAll(0);
TVec<TIntFltH> NewF(ChunkNum * ChunkSize);
TIntV NewNIDV(ChunkNum * ChunkSize);
for (iter = 0; iter < MaxIter; iter++) {
NIdxV.Clr(false);
for (int i = 0; i < F.Len(); i++) {
if (NIDOPTV[i] == 0) { NIdxV.Add(i); }
}
IAssert (NIdxV.Len() <= F.Len());
NIdxV.Shuffle(Rnd);
// compute gradient for chunk of nodes
#pragma omp parallel for schedule(static, 1)
for (int TIdx = 0; TIdx < ChunkNum; TIdx++) {
TIntFltH GradV;
for (int ui = TIdx * ChunkSize; ui < (TIdx + 1) * ChunkSize; ui++) {
NewNIDV[ui] = -1;
if (ui > NIdxV.Len()) { continue; }
int u = NIdxV[ui]; //
//find set of candidate c (we only need to consider c to which a neighbor of u belongs to)
TUNGraph::TNodeI UI = G->GetNI(u);
TIntSet CIDSet(5 * UI.GetDeg());
TIntFltH CurFU = F[u];
for (int e = 0; e < UI.GetDeg(); e++) {
if (HOVIDSV[u].IsKey(UI.GetNbrNId(e))) { continue; }
TIntFltH& NbhCIDH = F[UI.GetNbrNId(e)];
for (TIntFltH::TIter CI = NbhCIDH.BegI(); CI < NbhCIDH.EndI(); CI++) {
CIDSet.AddKey(CI.GetKey());
}
}
if (CIDSet.Empty()) {
CurFU.Clr();
}
else {
for (TIntFltH::TIter CI = CurFU.BegI(); CI < CurFU.EndI(); CI++) { //remove the community membership which U does not share with its neighbors
if (! CIDSet.IsKey(CI.GetKey())) {
CurFU.DelIfKey(CI.GetKey());
}
}
GradientForRow(u, GradV, CIDSet);
if (Norm2(GradV) < 1e-4) { NIDOPTV[u] = 1; continue; }
double LearnRate = GetStepSizeByLineSearch(u, GradV, GradV, StepAlpha, StepBeta, 5);
if (LearnRate <= 1e-5) { NewNIDV[ui] = -2; continue; }
for (int ci = 0; ci < GradV.Len(); ci++) {
int CID = GradV.GetKey(ci);
double Change = LearnRate * GradV.GetDat(CID);
double NewFuc = CurFU.IsKey(CID)? CurFU.GetDat(CID) + Change : Change;
if (NewFuc <= 0.0) {
CurFU.DelIfKey(CID);
} else {
CurFU.AddDat(CID) = NewFuc;
}
}
CurFU.Defrag();
}
//store changes
NewF[ui] = CurFU;
NewNIDV[ui] = u;
}
}
int NumNoChangeGrad = 0;
int NumNoChangeStepSize = 0;
for (int ui = 0; ui < NewNIDV.Len(); ui++) {
int NewNID = NewNIDV[ui];
if (NewNID == -1) { NumNoChangeGrad++; continue; }
if (NewNID == -2) { NumNoChangeStepSize++; continue; }
for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
SumFV[CI.GetKey()] -= CI.GetDat();
}
}
#pragma omp parallel for
for (int ui = 0; ui < NewNIDV.Len(); ui++) {
int NewNID = NewNIDV[ui];
if (NewNID < 0) { continue; }
F[NewNID] = NewF[ui];
}
for (int ui = 0; ui < NewNIDV.Len(); ui++) {
int NewNID = NewNIDV[ui];
if (NewNID < 0) { continue; }
for (TIntFltH::TIter CI = F[NewNID].BegI(); CI < F[NewNID].EndI(); CI++) {
SumFV[CI.GetKey()] += CI.GetDat();
}
}
// update the nodes who are optimal
for (int ui = 0; ui < NewNIDV.Len(); ui++) {
int NewNID = NewNIDV[ui];
if (NewNID < 0) { continue; }
TUNGraph::TNodeI UI = G->GetNI(NewNID);
NIDOPTV[NewNID] = 0;
//.........这里部分代码省略.........
开发者ID:alwayskidd,项目名称:snap,代码行数:101,代码来源:agmfast.cpp
示例14: main
int main(int argc, char* argv[]) {
Env = TEnv(argc, argv, TNotify::StdNotify);
Env.PrepArgs(TStr::Fmt("Trust Rank. Build: %s, %s. Time: %s", __TIME__, __DATE__, TExeTm::GetCurTm()));
TExeTm ExeTm;
Try
const TStr Gnod = Env.GetIfArgPrefixStr("-g:", "Gnode.txt", "Good Nodes");
const TStr Bnod = Env.GetIfArgPrefixStr("-b:", "Bnode.txt", "Bad Nodes" );
const TStr Iput = Env.GetIfArgPrefixStr("-i:", "Input.txt", "Input File");
const TStr Oput = Env.GetIfArgPrefixStr("-o:", "Output.txt", "Output File");
const double C = 0.85;
const int MaxIter = 50;
const double Eps = 1e-9;
FILE* fpO = fopen(Oput.CStr(), "w");
PNGraph Graph = TSnap::LoadEdgeList< PNGraph > (Iput);
fprintf(fpO, "\nNodes: %d, Edges: %d\n\n", Graph->GetNodes(), Graph->GetEdges());
const int NNodes = Graph->GetNodes();
TIntFltH TRankH;
TRankH.Gen(NNodes);
int maxNId = 0, NId = 0, ret = 0;
for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++)
maxNId = max(maxNId, NI.GetId());
TFltV initialTrustScore(maxNId + 1);
for (int i = 0; i < initialTrustScore.Len(); i++)
initialTrustScore[i] = 0.5;
FILE* fpI = fopen(Gnod.CStr(), "r");
while (true) {
ret = fscanf(fpI, "%d", &NId);
if (ret == EOF) break;
if (Graph->IsNode(NId))
initialTrustScore[NId] = 1.0;
}
fclose(fpI);
fpI = fopen(Bnod.CStr(), "r");
while (true) {
ret = fscanf(fpI, "%d", &NId);
if (ret == EOF) break;
if (Graph->IsNode(NId))
initialTrustScore[NId] = 0.0;
}
fclose(fpI);
double Tot = 0.0;
for(int i = 0; i < initialTrustScore.Len(); i++)
Tot += initialTrustScore[i];
for(int i = 0; i < initialTrustScore.Len(); i++)
initialTrustScore[i] /= Tot;
for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++)
TRankH.AddDat( NI.GetId(), initialTrustScore[NI.GetId()] );
TFltV TmpV(NNodes);
for (int iter = 0; iter < MaxIter; iter++) {
int j = 0;
for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++, j++) {
TmpV[j] = 0;
for (int e = 0; e < NI.GetOutDeg(); e++) {
const int OutNId = NI.GetOutNId(e);
const int InDeg = Graph->GetNI(InNId).GetInDeg();
if (InDeg > 0)
TmpV[j] += (double) TRankH.GetDat(OutNId) / (double) InDeg;
}
TmpV[j] = C * TmpV[j] + (1.0 - C) * initialTrustScore[NI.GetId()];
}
for (int i = 0; i < TRankH.Len(); i++)
TRankH[i] = TmpV[i];
}
fprintf(fpO, "Node ID\t\tTrustRank\n");
for (TNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++){
int Id = NI.GetId();
double tr = TRankH.GetDat(Id);
fprintf(fpO, "%d\t\t\t%.5lf\n", Id, tr);
}
fclose(fpO);
Catch
printf("\nRun Time: %s (%s)\n", ExeTm.GetTmStr(), TSecTm::GetCurTm().GetTmStr().CStr());
return 0;
}
开发者ID:SITZ,项目名称:OSN-Ranking-Algorithms,代码行数:86,代码来源:itrustrank.cpp
示例15: GetBetweennessCentr
void GetBetweennessCentr(const PUNGraph& Graph, const TIntV& BtwNIdV, TIntFltH& NodeBtwH, const bool& DoNodeCent, TIntPrFltH& EdgeBtwH, const bool& DoEdgeCent) {
if (DoNodeCent) { NodeBtwH.Clr(); }
if (DoEdgeCent) { EdgeBtwH.Clr(); }
const int nodes = Graph->GetNodes();
TIntS S(nodes);
TIntQ Q(nodes);
TIntIntVH P(nodes); // one vector for every node
TIntFltH delta(nodes);
TIntH sigma(nodes), d(nodes);
// init
for (TUNGraph::TNodeI NI = Graph->BegNI(); NI < Graph->EndNI(); NI++) {
if (DoNodeCent) {
NodeBtwH.AddDat(NI.GetId(), 0);
}
if (DoEdgeCent) {
for (int e = 0; e < NI.GetOutDeg(); e++) {
if (NI.GetId() < NI.GetOutNId(e)) {
EdgeBtwH.AddDat(TIntPr(NI.GetId(), NI.GetOutNId(e)), 0);
}
}
}
sigma.AddDat(NI.GetId(), 0);
d.AddDat(NI.GetId(), -1);
P.AddDat(NI.GetId(), TIntV());
delta.AddDat(NI.GetId(), 0);
}
// calc betweeness
for (int k = 0; k < BtwNIdV.Len(); k++) {
const TUNGraph::TNodeI NI = Graph->GetNI(BtwNIdV[k]);
// reset
for (int i = 0; i < sigma.Len(); i++) {
sigma[i] = 0; d[i] = -1; delta[i] = 0; P[i].Clr(false);
}
S.Clr(false);
Q.Clr(false);
sigma.AddDat(NI.GetId(), 1);
d.AddDat(NI.GetId(), 0);
Q.Push(NI.GetId());
while (!Q.Empty()) {
const int v = Q.Top(); Q.Pop();
const TUNGraph::TNodeI NI2 = Graph->GetNI(v);
S.Push(v);
const int VDat = d.GetDat(v);
for (int e = 0; e < NI2.GetOutDeg(); e++) {
const int w = NI2.GetOutNId(e);
if (d.GetDat(w) < 0) { // find w for the first time
Q.Push(w);
d.AddDat(w, VDat + 1);
}
//shortest path to w via v ?
if (d.GetDat(w) == VDat + 1) {
sigma.AddDat(w) += sigma.GetDat(v);
P.GetDat(w).Add(v);
}
}
}
while (!S.Empty()) {
const int w = S.Top();
const double SigmaW = sigma.GetDat(w);
const double DeltaW = delta.GetDat(w);
const TIntV NIdV = P.GetDat(w);
S.Pop();
for (int i = 0; i < NIdV.Len(); i++) {
const int nid = NIdV[i];
const double c = (sigma.GetDat(nid)*1.0 / SigmaW) * (1 + DeltaW);
delta.AddDat(nid) += c;
if (DoEdgeCent) {
EdgeBtwH.AddDat(TIntPr(TMath::Mn(nid, w), TMath::Mx(nid, w))) += c;
}
}
if (DoNodeCent && w != NI.GetId()) {
NodeBtwH.AddDat(w) += delta.GetDat(w) / 2.0;
}
}
}
}
开发者ID:Austindeadhead,项目名称:qminer,代码行数:76,代码来源:centr.cpp
示例16: GradientForRow
void TAGMFast::GradientForRow(const int UID, TIntFltH& GradU, const TIntSet& CIDSet) {
GradU.Gen(CIDSet.Len());
TFltV HOSumFV; //adjust for Fv of v hold out
if (HOVIDSV[UID].Len() > 0) {
HOSumFV.Gen(SumFV.Len());
for (int e = 0; e < HOVIDSV[UID].Len(); e++) {
for (int c = 0; c < SumFV.Len(); c++) {
HOSumFV[c] += GetCom(HOVIDSV[UID][e], c);
}
}
}
TUNGraph::TNodeI NI = G->GetNI(UID);
int Deg = NI.GetDeg();
TFltV PredV(Deg), GradV(CIDSet.Len());
TIntV CIDV(CIDSet.Len());
if (DoParallel && Deg + CIDSet.Len() > 10) {
#pragma omp parallel for schedule(static, 1)
for (int e = 0; e < Deg; e++) {
if (NI.GetNbrNId(e) == UID) { continue; }
if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; }
PredV[e] = Prediction(UID, NI.GetNbrNId(e));
}
#pragma omp parallel for schedule(static, 1)
for (int c = 0; c < CIDSet.Len(); c++) {
int CID = CIDSet.GetKey(c);
double Val = 0.0;
for (int e = 0; e < Deg; e++) {
int VID = NI.GetNbrNId(e);
if (VID == UID) { continue; }
if (HOVIDSV[UID].IsKey(VID)) { continue; }
Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID);
}
double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID));
CIDV[c] = CID;
GradV[c] = Val;
}
}
else {
for (int e = 0; e < Deg; e++) {
if (NI.GetNbrNId(e) == UID) { continue; }
if (HOVIDSV[UID].IsKey(NI.GetNbrNId(e))) { continue; }
PredV[e] = Prediction(UID, NI.GetNbrNId(e));
}
for (int c = 0; c < CIDSet.Len(); c++) {
int CID = CIDSet.GetKey(c);
double Val = 0.0;
for (int e = 0; e < Deg; e++) {
int VID = NI.GetNbrNId(e);
if (VID == UID) { continue; }
if (HOVIDSV[UID].IsKey(VID)) { continue; }
Val += PredV[e] * GetCom(VID, CID) / (1.0 - PredV[e]) + NegWgt * GetCom(VID, CID);
}
double HOSum = HOVIDSV[UID].Len() > 0? HOSumFV[CID].Val: 0.0;//subtract Hold out pairs only if hold out pairs exist
Val -= NegWgt * (SumFV[CID] - HOSum - GetCom(UID, CID));
CIDV[c] = CID;
GradV[c] = Val;
}
}
//add regularization
if (RegCoef > 0.0) { //L1
for (int c = 0; c < GradV.Len(); c++) {
GradV[c] -= RegCoef;
}
}
if (RegCoef < 0.0) { //L2
for (int c = 0; c < GradV.Len(); c++) {
GradV[c] += 2 * RegCoef * GetCom(UID, CIDV[c]);
}
}
for (int c = 0; c < GradV.Len(); c++) {
if (GetCom(UID, CIDV[c]) == 0.0 && GradV[c] < 0.0) { continue; }
if (fabs(GradV[c]) < 0.0001) { continue; }
GradU.AddDat(CIDV[c], GradV[c]);
}
for (int c = 0; c < GradU.Len(); c++) {
if (GradU[c] >= 10) { GradU[c] = 10; }
if (GradU[c] <= -10) { GradU[c] = -10; }
IAssert(GradU[c] >= -10);
}
}
开发者ID:alwayskidd,项目名称:snap,代码行数:88,代码来源:agmfast.cpp
示例17: IAssert
/////////////////////////////////////////////////
// NIST-score
double TEvalScoreNist::Eval(const PTransCorpus& TransCorpus, const TIntV& _SentIdV) {
// check if the corpus has translations
IAssert(TransCorpus->IsTrans());
// ngram counts (cliped and full)
TIntH ClipCountNGramH, CountNGramH;
// ngram info score
TIntFltH NGramInfoH;
// candidate and effective reference length
double FullTransLen = 0.0, FullRefLen = 0.0;
// iterate over sentences
TIntV SentIdV = _SentIdV;
if (SentIdV.Empty()) { TransCorpus->GetSentIdV(SentIdV); }
const int Sents = SentIdV.Len();
for (int SentIdN = 0; SentIdN < Sents; SentIdN++) {
const int SentId = SentIdV[SentIdN];
// tokenize translation
TIntV TransWIdV; Parse(TransCorpus->GetTransStr(SentId), TransWIdV);
TIntH TransNGramH; GetNGramH(TransWIdV, MxNGramLen, TransNGramH);
TIntH FreeTransNGramH = TransNGramH; // number of non-matched ngrams
// counters for getting the closest length of reference sentences
const int TransLen = TransWIdV.Len(); int RefLenSum = 0;
// go over reference translations and count ngram matches
TStrV RefTransStrV = TransCorpus->GetRefTransStrV(SentId);
// we assume that there is at least one reference translation
IAssert(!RefTransStrV.Empty());
for (int RefN = 0; RefN < RefTransStrV.Len(); RefN++) {
// parse reference translation sentence
TIntV RefWIdV; Parse(RefTransStrV[RefN], RefWIdV);
TIntH RefNGramH; GetNGramH(RefWIdV, MxNGramLen, RefNGramH);
// check for matches
int TransNGramKeyId = TransNGramH.FFirstKeyId();
while(TransNGramH.FNextKeyId(TransNGramKeyId)) {
const int NGramId = TransNGramH.GetKey(TransNGramKeyId);
const int FreeTransNGrams = FreeTransNGramH(NGramId);
if (RefNGramH.IsKey(NGramId) && (FreeTransNGrams>0)) {
// ngram match and still some free ngrams left to clip
const int RefNGrams = RefNGramH(NGramId);
FreeTransNGramH(NGramId) = TInt::GetMx(0, FreeTransNGrams - RefNGrams);
}
}
// check the length difference
const int RefLen = RefWIdV.Len();
RefLenSum += RefLen;
}
// count ngrams
int TransNGramKeyId = TransNGramH.FFirstKeyId();
while(TransNGramH.FNextKeyId(TransNGramKeyId)) {
// get ngram
const int NGramId = TransNGramH.GetKey(TransNGramKeyId);
IAssert(NGramId != -1);
// check if two hash tables are aligned (should be...)
const int FreeNGramId = FreeTransNGramH.GetKey(TransNGramKeyId);
IAssert(NGramId == FreeNGramId);
// get ngram count and clip-count
const int Count = TransNGramH[TransNGramKeyId];
const int ClipCount = Count - FreeTransNGramH[TransNGramKeyId];
// add ngram to the coprus ngram counts
CountNGramH.AddDat(NGramId) += Count;
ClipCountNGramH.AddDat(NGramId) += ClipCount;
}
// count length
FullTransLen += double(TransLen);
FullRefLen += double(RefLenSum) / double(RefTransStrV.Len());
}
// calculate ngram info scores
int CountKeyId = CountNGramH.FFirstKeyId();
while (CountNGramH.FNextKeyId(CountKeyId)) {
// get the n-gram
const int NGramId = CountNGramH.GetKey(CountKeyId);
TIntV NGram = GetNGram(NGramId);
// prepare counts
if (NGram.Len() == 1) {
// n-gram is a word
const int WordCount = CountNGramH[CountKeyId];
const double NGramInfoScore = TMath::Log2(FullTransLen / double(WordCount));
NGramInfoH.AddDat(NGramId, NGramInfoScore);
} else {
// more then one word in the n-gram
// get a n-gram with removed last element
TIntV N1Gram = NGram; N1Gram.DelLast();
const int N1GramId = NGramH.GetKeyId(N1Gram);
// get the counts
const int NGramCount = CountNGramH(NGramId);
const int N1GramCount = CountNGramH(N1GramId);
// get the score
const double NGramInfoScore = TMath::Log2(double(N1GramCount) / double(NGramCount));
NGramInfoH.AddDat(NGramId, NGramInfoScore);
}
}
// calcualte ngram precisions
TFltV ClipCountV(MxNGramLen); ClipCountV.PutAll(0);
|
请发表评论