本文整理汇总了Golang中go-glue/googlecode/com/hg/rltools/discrete.Action类的典型用法代码示例。如果您正苦于以下问题:Golang Action类的具体用法?Golang Action怎么用?Golang Action使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Action类的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Golang代码示例。
示例1: Update
func (this *CRPReward) Update(s discrete.State, a discrete.Action, r float64) (next RewardBelief) {
index := s.Hashcode() + this.NumStates*a.Hashcode()
if this.Known[index] {
return this
}
ndr := new(CRPReward)
*ndr = *this
ndr.Known = make([]bool, len(this.Known))
copy(ndr.Known, this.Known)
ndr.R = make([]float64, len(this.R))
copy(ndr.R, this.R)
ndr.Known[index] = true
ndr.R[index] = r
ndr.countKnown++
ndr.SeenRewards = append([]float64{r}, this.SeenRewards...)
ndr.Counts = append([]uint64{1}, this.Counts...)
var seen bool
for i, sr := range this.SeenRewards {
if i != 0 && sr == r {
seen = true
ndr.Counts[i]++
break
}
}
if seen {
ndr.SeenRewards = ndr.SeenRewards[1:len(ndr.SeenRewards)]
ndr.Counts = ndr.Counts[1:len(ndr.Counts)]
}
ndr.Total++
next = ndr
return
}
开发者ID:postfix,项目名称:rlbayes,代码行数:35,代码来源:reward.go
示例2: Next
func (this *CRPReward) Next(s discrete.State, a discrete.Action) (r float64) {
index := s.Hashcode() + this.NumStates*a.Hashcode()
if this.Known[index] {
r = this.R[index]
return
}
if this.chooser == nil {
if len(this.Counts) == 0 {
this.chooser = func() int64 { return 0 }
} else {
normalizer := 1.0 / (float64(this.Total) + this.Alpha)
weights := make([]float64, len(this.Counts))
for i := range weights {
weights[i] = float64(this.Counts[i]) * normalizer
}
this.chooser = stat.Choice(weights)
}
}
which := int(this.chooser())
if which == len(this.SeenRewards) {
r = this.BaseSampler()
} else {
r = this.SeenRewards[which]
}
return
}
开发者ID:postfix,项目名称:rlbayes,代码行数:29,代码来源:reward.go
示例3: Update
func (this *FDMTransition) Update(s discrete.State, a discrete.Action, n discrete.State) (next TransitionBelief) {
o := this.bg.NextToOutcome(s, n)
k := s.Hashcode() + a.Hashcode()*this.bg.NumStates
dsa := this.sas[k]
if dsa == nil {
dsa = NewDirSA(this.bg.Alpha)
this.sas[k] = dsa
}
if this.bg.ForgetThreshold != 0 && dsa.visits >= this.bg.ForgetThreshold {
next = this
return
}
nextFDM := new(FDMTransition)
nextFDM.bg = this.bg
nextFDM.sas = make([]*DirSA, len(this.sas))
copy(nextFDM.sas, this.sas)
nextFDM.sas[k] = dsa.Update(o)
if nextFDM.sas[k].visits == this.bg.ForgetThreshold {
nextFDM.sas[k].ForgetPrior(this.bg.Alpha)
//fmt.Printf("%v\n", nextFDM.sas[k])
}
nextFDM.hash = this.hash - this.sas[k].Hashcode() + nextFDM.sas[k].Hashcode()
next = nextFDM
return
}
开发者ID:postfix,项目名称:rlbayes,代码行数:28,代码来源:transition.go
示例4: Next
func (this *FObjTransition) Next(s discrete.State, a discrete.Action) (n discrete.State) {
avalues := this.bg.Task.Act.Ints.Values(a.Hashcode())
which, act := avalues[0], avalues[1]
sobjs := this.bg.GetObjs(s)
nobjs := append([]discrete.State{}, sobjs...)
nobjs[which] = this.ObjFDM.Next(sobjs[which], discrete.Action(act))
n = this.bg.GetState(nobjs)
return
}
开发者ID:postfix,项目名称:rlbayes,代码行数:9,代码来源:fobj.go
示例5: UpdatePosterior
func (this *Posterior) UpdatePosterior(s discrete.State, a discrete.Action, o discrete.State) (next *Posterior) {
next = new(Posterior)
*next = *this
next.stateData = append([]SAHist{}, this.stateData...)
next.clusterData = append([]SAHist{}, this.clusterData...)
next.C = this.C.Copy()
k := s.Hashcode()*this.bg.NumActions + a.Hashcode()
next.stateData[k] = next.stateData[k].Incr(this.bg.NumOutcomes, o)
return
}
开发者ID:postfix,项目名称:rlbayes,代码行数:10,代码来源:cluster.go
示例6: Next
func (this *FDMTransition) Next(s discrete.State, a discrete.Action) (n discrete.State) {
k := s.Hashcode() + a.Hashcode()*this.bg.NumStates
dsa := this.sas[k]
if dsa == nil {
dsa = NewDirSA(this.bg.Alpha)
this.sas[k] = dsa
}
n = this.bg.OutcomeToNext(s, dsa.Next())
return
}
开发者ID:postfix,项目名称:rlbayes,代码行数:10,代码来源:transition.go
示例7: Next
func (this *Oracle) Next(action discrete.Action) (o discrete.Oracle, r float64) {
avalues := this.Task.Act.Ints.Values(action.Hashcode())
act := rlglue.NewAction(avalues, []float64{}, []byte{})
next := new(Oracle)
*next = *this
next.Cans = append([]Can{}, this.Cans...)
_, r, next.isTerminal = next.Env.EnvStep(act)
next.rehash()
o = next
return
}
开发者ID:skelterjohn,项目名称:rlenv,代码行数:11,代码来源:paint.go
示例8: Update
func (this *FObjTransition) Update(s discrete.State, a discrete.Action, n discrete.State) (next TransitionBelief) {
nt := new(FObjTransition)
*nt = *this
avalues := this.bg.Task.Act.Ints.Values(a.Hashcode())
which, act := avalues[0], avalues[1]
sobjs := this.bg.GetObjs(s)
nobjs := this.bg.GetObjs(n)
nt.ObjFDM = this.ObjFDM.Update(sobjs[which], discrete.Action(act), nobjs[which]).(*FDMTransition)
next = nt
return
}
开发者ID:postfix,项目名称:rlbayes,代码行数:11,代码来源:fobj.go
示例9: Next
func (this *BetaTerminal) Next(s discrete.State, a discrete.Action) (t bool) {
index := s.Hashcode() + this.NumStates*a.Hashcode()
if this.Known[index] {
t = this.Term[index]
return
}
prob := this.Alpha / (this.Alpha + this.Beta)
if stat.NextUniform() < prob {
t = true
}
return
}
开发者ID:postfix,项目名称:rlbayes,代码行数:12,代码来源:terminal.go
示例10: Update
func (this *CountKnown) Update(s discrete.State, a discrete.Action) (next KnownBelief) {
nk := new(CountKnown)
nk.numStates = this.numStates
nk.visits = make([]int, len(this.visits))
copy(nk.visits, this.visits)
nk.threshold = this.threshold
k := s.Hashcode() + nk.numStates*a.Hashcode()
nk.visits[k]++
next = nk
return
}
开发者ID:postfix,项目名称:rlbayes,代码行数:13,代码来源:known.go
示例11: Next
func (this *Posterior) Next(s discrete.State, a discrete.Action) (n discrete.State) {
c := uint64(this.C.Get(int(s)))
ck := c*this.bg.NumActions + a.Hashcode()
hist := this.clusterData[ck]
fhist := append([]float64{}, this.bg.Beta...)
total := 0.0
for i, c := range hist {
fhist[i] += float64(c)
total += fhist[i]
}
for i := range fhist {
fhist[i] /= total
}
o := discrete.State(stat.NextChoice(fhist))
n = this.bg.OutcomeToNext(s, o)
return
}
开发者ID:postfix,项目名称:rlbayes,代码行数:17,代码来源:cluster.go
示例12: Update
func (this *DepLearner) Update(s discrete.State, a discrete.Action, o int32) (next *DepLearner) {
k := a.Hashcode() + this.bg.numActions*s.Hashcode()
next = new(DepLearner)
*next = *this
oi := this.bg.myRange.Index(o)
next.history = append([]Histogram{}, this.history...)
next.history[k] = next.history[k].Incr(oi)
sv := next.bg.stateValues[s]
mv := next.parents.CutValues(sv)
ms := next.cutRanges.Index(mv)
mk := a.Hashcode() + this.bg.numActions*ms
next.mappedHistory = append([]Histogram{}, this.mappedHistory...)
next.mappedLoglihood += next.mappedHistory[mk].LogFactorAlpha(this.bg.cfg.Alpha)
next.mappedHistory[mk] = next.mappedHistory[mk].Incr(oi)
next.mappedLoglihood -= next.mappedHistory[mk].LogFactorAlpha(this.bg.cfg.Alpha)
next.hash += k << oi
return
}
开发者ID:postfix,项目名称:rlbayes,代码行数:18,代码来源:deplearner.go
示例13: Next
func (this *DepLearner) Next(s discrete.State, a discrete.Action) (o int32) {
sv := this.bg.stateValues[s]
mv := this.parents.CutValues(sv)
ms := this.cutRanges.Index(mv)
mk := a.Hashcode() + this.bg.numActions*ms
h := this.mappedHistory[mk]
lls := make([]float64, len(h))
usePrior := h.Sum() < this.bg.cfg.M
for i, c := range h {
if usePrior {
lls[i] = math.Log(this.bg.cfg.Alpha + float64(c))
} else {
lls[i] = math.Log(float64(c))
}
}
oi := uint64(stat.NextLogChoice(lls))
o = this.bg.myRange.Value(oi)
return
}
开发者ID:postfix,项目名称:rlbayes,代码行数:19,代码来源:deplearner.go
示例14: Update
func (this *BetaTerminal) Update(s discrete.State, a discrete.Action, t bool) (next TerminalBelief) {
index := s.Hashcode() + this.NumStates*a.Hashcode()
if this.Known[index] {
next = this
return
}
nbt := new(BetaTerminal)
*nbt = *this
nbt.Known = append([]bool{}, this.Known...)
nbt.Term = append([]bool{}, this.Term...)
nbt.Known[index] = true
nbt.Term[index] = t
if t {
nbt.Alpha++
} else {
nbt.Beta++
}
return nbt
}
开发者ID:postfix,项目名称:rlbayes,代码行数:20,代码来源:terminal.go
示例15: Update
func (this *Belief) Update(s discrete.State, a discrete.Action, n discrete.State) (nextBelief bayes.TransitionBelief) {
k := a.Hashcode() + s.Hashcode()*this.bg.numActions
if this.totals[k] >= this.bg.cfg.M {
nextBelief = this
return
}
nv := this.bg.stateValues[n]
next := new(Belief)
*next = *this
next.hash = 0
next.learners = append([]*DepLearner{}, this.learners...)
for child := range this.learners {
next.learners[child] = next.learners[child].Update(s, a, nv[child])
next.hash += next.learners[child].Hashcode() << uint(child)
}
next.totals = append([]uint64{}, this.totals...)
next.totals[k]++
nextBelief = next
return
}
开发者ID:postfix,项目名称:rlbayes,代码行数:20,代码来源:belief.go
示例16: getIndexAction
func (this *BFS3Agent) getIndexAction(index discrete.Action) (act rlglue.Action) {
return rlglue.NewAction(this.task.Act.Ints.Values(index.Hashcode()), []float64{}, []byte{})
}
开发者ID:skelterjohn,项目名称:rlalg,代码行数:3,代码来源:bfs3.go
示例17: Known
func (this *CountKnown) Known(s discrete.State, a discrete.Action) (known bool) {
k := s.Hashcode() + this.numStates*a.Hashcode()
return this.visits[k] >= this.threshold
}
开发者ID:postfix,项目名称:rlbayes,代码行数:4,代码来源:known.go
示例18: R
func (this *SysMDP) R(s discrete.State, a discrete.Action) float64 {
k := s.Hashcode() + a.Hashcode()*this.maxStates
return this.r[k]
}
开发者ID:skelterjohn,项目名称:rlenv,代码行数:4,代码来源:mdp.go
示例19: T
func (this *SysMDP) T(s discrete.State, a discrete.Action, n discrete.State) float64 {
k := s.Hashcode() + a.Hashcode()*this.maxStates
return this.t[k][n]
}
开发者ID:skelterjohn,项目名称:rlenv,代码行数:4,代码来源:mdp.go
注:本文中的go-glue/googlecode/com/hg/rltools/discrete.Action类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论