• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Java EnvironmentOutcome类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Java中burlap.mdp.singleagent.environment.EnvironmentOutcome的典型用法代码示例。如果您正苦于以下问题:Java EnvironmentOutcome类的具体用法?Java EnvironmentOutcome怎么用?Java EnvironmentOutcome使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



EnvironmentOutcome类属于burlap.mdp.singleagent.environment包,在下文中一共展示了EnvironmentOutcome类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: addExperience

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public void addExperience(EnvironmentOutcome eo) {
    // If this is the first frame of the episode, add the o frame.
    if (currentFrameHistory.historyLength == 0) {
        currentFrameHistory = addFrame(((ALEState)eo.o).getScreen());
    }

    // If this is experience ends in a terminal state,
    // the terminal frame will never be used so don't add it.
    FrameHistory op;
    if (eo.terminated) {
        op = new FrameHistory(currentFrameHistory.index, 0);
    } else {
        op = addFrame(((ALEState)eo.op).getScreen());
    }

    experiences[next] = new FrameExperience(currentFrameHistory, actionSet.map(eo.a), op, eo.r, eo.terminated);
    next = (next+1) % experiences.length;
    size = Math.min(size+1, experiences.length);

    currentFrameHistory = op;
}
 
开发者ID:h2r,项目名称:burlap_caffe,代码行数:23,代码来源:FrameExperienceMemory.java


示例2: updateQFunction

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public void updateQFunction(List<EnvironmentOutcome> samples) {

    // fill up experience replay
    if (runningRandomPolicy) {
        if (totalSteps >= replayStartSize) {
            System.out.println("Replay sufficiently filled. Beginning training...");

            setLearningPolicy(trainingPolicy);
            runningRandomPolicy = false;

            // reset stale update timer
            this.stepsSinceStale = 1;
        }

        return;
    }

    // only update every updateFreq steps
    if (totalSteps % updateFreq == 0) {
        ((DQN)vfa).updateQFunction(samples, (DQN)staleVfa);
    }
}
 
开发者ID:h2r,项目名称:burlap_caffe,代码行数:24,代码来源:DeepQLearner.java


示例3: executeAction

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public EnvironmentOutcome executeAction(Action a) {
	State startState = this.currentObservation();
	
	ActionController ac = this.actionControllerMap.get(a.actionName());
	int delay = ac.executeAction(a);
	if (delay > 0) {
		try {
			Thread.sleep(delay);
		} catch(InterruptedException e) {
			e.printStackTrace();
		}
	}
	
	State finalState = this.currentObservation();
	
	this.lastReward = this.rewardFunction.reward(startState, a, finalState);
	
	EnvironmentOutcome eo = new EnvironmentOutcome(startState, a, finalState, this.lastReward, this.isInTerminalState());
	
	return eo;
}
 
开发者ID:h2r,项目名称:burlapcraft,代码行数:23,代码来源:MinecraftEnvironment.java


示例4: executeAction

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public EnvironmentOutcome executeAction(Action ga) {

	State prevState = this.currentState;
	synchronized(this.nextAction){
		this.nextAction.val = ga;
		this.nextAction.notifyAll();
	}


	synchronized(this.nextState){
		while(this.nextState.val == null){
			try{
				nextState.wait();
			} catch(InterruptedException ex){
				ex.printStackTrace();
			}
		}
		this.nextState.val = null;
	}

	EnvironmentOutcome eo = new EnvironmentOutcome(prevState, ga, this.currentState, this.lastReward, this.curStateIsTerminal);

	return eo;
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:26,代码来源:LearningAgentToSGAgentInterface.java


示例5: collectDataFrom

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public SARSData collectDataFrom(State s, SampleModel model, int maxSteps, SARSData intoDataset) {
	
	if(intoDataset == null){
		intoDataset = new SARSData();
	}
	
	State curState = s;
	int nsteps = 0;
	boolean terminated = model.terminal(s);
	while(!terminated && nsteps < maxSteps){
		
		List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, curState);
		Action ga = gas.get(RandomFactory.getMapped(0).nextInt(gas.size()));
		EnvironmentOutcome eo = model.sample(curState, ga);
		intoDataset.add(curState, ga, eo.r, eo.op);
		curState = eo.op;
		terminated = eo.terminated;
		nsteps++;
		
	}
	
	
	return intoDataset;
	
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:27,代码来源:SARSCollector.java


示例6: computeF

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
public double computeF(PrioritizedSearchNode parentNode, Action generatingAction, HashableState successorState, EnvironmentOutcome eo) {
	double cumR = 0.;
	int d = 0;
	if(parentNode != null){
		double pCumR = cumulatedRewardMap.get(parentNode.s);
		cumR = pCumR + eo.r;
		
		int pD = depthMap.get(parentNode.s);
		if(!(generatingAction instanceof Option)){
			d = pD + 1;
		}
		else{
			d = pD + ((EnvironmentOptionOutcome)eo).numSteps();
		}
	}
	
	double H  = heuristic.h(successorState.s());
	lastComputedCumR = cumR;
	lastComputedDepth = d;
	double weightedE = this.epsilon * this.epsilonWeight(d);
	double F = cumR + ((1. + weightedE)*H);
	
	return F;
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:25,代码来源:DynamicWeightedAStar.java


示例7: transitions

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public List<TransitionProb> transitions(State s, Action a) {

	if(!(this.stateModel instanceof FullStateModel)){
		throw new RuntimeException("Factored Model cannot enumerate transition distribution, because the state model does not implement FullStateModel");
	}

	List<StateTransitionProb> stps = ((FullStateModel)this.stateModel).stateTransitions(s, a);
	List<TransitionProb> tps = new ArrayList<TransitionProb>(stps.size());
	for(StateTransitionProb stp : stps){
		double r = this.rf.reward(s, a, stp.s);
		boolean t = this.tf.isTerminal(stp.s);
		TransitionProb tp = new TransitionProb(stp.p, new EnvironmentOutcome(s, a, stp.s, r, t));
		tps.add(tp);
	}

	return tps;
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:19,代码来源:FactoredModel.java


示例8: actUntilTerminalOrMaxSteps

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
/**
 * Causes the agent to act for some fixed number of steps. The agent's belief is automatically
 * updated by this method using the specified {@link BeliefUpdate}.
 * The agent's action selection for the current belief state is defined by
 * the {@link #getAction(burlap.mdp.singleagent.pomdp.beliefstate.BeliefState)} method. The observation, action, and reward
 * sequence is saved and {@link Episode} object and returned.
 * @param maxSteps the maximum number of steps to take in the environment
 * @return and {@link Episode} that recorded the observation, action, and reward sequence.
 */
public Episode actUntilTerminalOrMaxSteps(int maxSteps){
	Episode ea = new Episode();
	ea.initializeInState(this.environment.currentObservation());
	int c = 0;
	while(!this.environment.isInTerminalState() && c < maxSteps){
		Action ga = this.getAction(this.curBelief);
		EnvironmentOutcome eo = environment.executeAction(ga);
		ea.transition(ga, eo.op, eo.r);

		//update our belief
		this.curBelief = this.updater.update(this.curBelief, eo.op, eo.a);
		
		c++;
		
	}
	
	return ea;
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:28,代码来源:BeliefAgent.java


示例9: sampleExperiences

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public List<EnvironmentOutcome> sampleExperiences(int n) {
    List<FrameExperience> samples = sampleFrameExperiences(n);

    List<EnvironmentOutcome> sampleOutcomes = new ArrayList<>(samples.size());
    for (FrameExperience exp : samples) {
        sampleOutcomes.add(new EnvironmentOutcome(exp.o, actionSet.get(exp.a), exp.op, exp.r, exp.terminated));
    }

    return sampleOutcomes;
}
 
开发者ID:h2r,项目名称:burlap_caffe,代码行数:12,代码来源:FrameExperienceMemory.java


示例10: executeAction

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
public EnvironmentOutcome executeAction(ALEAction a) {
    // save start state
    State startState = currentState;

    // perform action
    boolean closed = io.act(a.aleCode);
    if (closed) {
        // the FIFO stream was closed
        throw new RuntimeException("ALE FIFO stream closed");
    }

    // Obtain the edu.brown.cs.burlap.screen matrix
    Mat screen = io.getScreen();

    // Get RLData
    RLData rlData = io.getRLData();

    // Update Environment State
    lastReward = rlData.reward;
    isTerminal = rlData.isTerminal;
    currentState = new ALEState(screen);

    if (terminateOnEndLife) {
        if (rlData.isTerminal) {
            isTerminal = true;
            currentLives = 0;
        } else if (rlData.lives != currentLives) {
            isTerminal = true;
            currentLives = rlData.lives;
        }
    } else {
        isTerminal = rlData.isTerminal;
        currentLives = rlData.lives;
    }

    return new EnvironmentOutcome(startState, a, currentState, lastReward, isTerminal);
}
 
开发者ID:h2r,项目名称:burlap_ale,代码行数:38,代码来源:ALEEnvironment.java


示例11: runLearningEpisode

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
	//initialize our episode object with the initial state of the environment
	Episode e = new Episode(env.currentObservation());

	//behave until a terminal state or max steps is reached
	State curState = env.currentObservation();
	int steps = 0;
	while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){

		//select an action
		Action a = this.learningPolicy.action(curState);

		//take the action and observe outcome
		EnvironmentOutcome eo = env.executeAction(a);

		//record result
		e.transition(eo);

		//get the max Q value of the resulting state if it's not terminal, 0 otherwise
		double maxQ = eo.terminated ? 0. : this.value(eo.op);

		//update the old Q-value
		QValue oldQ = this.storedQ(curState, a);
		oldQ.q = oldQ.q + this.learningRate * (eo.r + this.gamma * maxQ - oldQ.q);


		//update state pointer to next environment state observed
		curState = eo.op;
		steps++;

	}

	return e;
}
 
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:36,代码来源:QLTutorial.java


示例12: executeAction

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
/**
     * Executes the specified action in this environment
     *
     * @param a the Action that is to be performed in this environment.
     * @return the resulting observation and reward transition from applying the given GroundedAction in this environment.
     */
    @Override
    public EnvironmentOutcome executeAction(Action a) {

        ShodanStateOil currentState =  currentObservation();


        if(a.actionName().equals(ACTION_OPEN))
            shodan.setOpen(true);
        else {
            assert a.actionName().equals(ACTION_CLOSE);
            shodan.setOpen(false);
        }
        //lspiRun the model for another 30 days
        for(int day=0; day<30; day++)
            state.schedule.step(state);

        /*
        System.out.println(a.actionName() + "  " + state.getFishers().get(0).getRegulation().allowedAtSea(null,state) +
                                   "   " + state.getMap().getPorts().iterator().next().getGasPricePerLiter()
        );
*/

        ShodanStateOil newState =  currentObservation();


        return new EnvironmentOutcome(
                currentState,
                a,
                newState,
                lastReward(),
                isInTerminalState()
        );

    }
 
开发者ID:CarrKnight,项目名称:POSEIDON,代码行数:41,代码来源:ShodanEnvironment.java


示例13: executeAction

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public EnvironmentOutcome executeAction(Action a) {

	State startState = this.currentObservation();

	ActionPublisher ap = this.actionPublishers.get(a.actionName());
	if(ap == null){
		throw new RuntimeException("AbstractRosEnvironment has no ActionPublisher available to handle action " + a.toString());
	}

	int delay = ap.publishAction(a);
	if(delay > 0){
		try {
			Thread.sleep(delay);
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
	}

	State finalState = this.currentObservation();

	this.lastReward = this.getMostRecentRewardSignal(startState, a, finalState);

	EnvironmentOutcome eo = new EnvironmentOutcome(startState, a, finalState, this.lastReward, this.isInTerminalState());

	if(this.isInTerminalState()){
		this.handleEnterTerminalState();
	}

	return eo;
}
 
开发者ID:h2r,项目名称:burlap_rosbridge,代码行数:32,代码来源:AbstractRosEnvironment.java


示例14: sample

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public EnvironmentOutcome sample(State s, Action a) {
	if(!(a instanceof Option)){
		return model.sample(s, a);
	}

	Option o = (Option)a;

	SimulatedEnvironment env = new SimulatedEnvironment(model, s);
	return o.control(env, discount);
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:12,代码来源:BFSMarkovOptionModel.java


示例15: control

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
public static EnvironmentOptionOutcome control(Option o, Environment env, double discount){
	Random rand = RandomFactory.getMapped(0);
	State initial = env.currentObservation();
	State cur = initial;

	Episode episode = new Episode(cur);
	Episode history = new Episode(cur);
	double roll;
	double pT;
	int nsteps = 0;
	double r = 0.;
	double cd = 1.;
	do{
		Action a = o.policy(cur, history);
		EnvironmentOutcome eo = env.executeAction(a);
		nsteps++;
		r += cd*eo.r;
		cur = eo.op;
		cd *= discount;


		history.transition(a, eo.op, eo.r);

		AnnotatedAction annotatedAction = new AnnotatedAction(a, o.toString() + "(" + nsteps + ")");
		episode.transition(annotatedAction, eo.op, r);


		pT = o.probabilityOfTermination(eo.op, history);
		roll = rand.nextDouble();

	}while(roll > pT && !env.isInTerminalState());

	EnvironmentOptionOutcome eoo = new EnvironmentOptionOutcome(initial, o, cur, r, env.isInTerminalState(), discount, episode);

	return eoo;

}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:38,代码来源:Option.java


示例16: executeAction

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public EnvironmentOutcome executeAction(burlap.mdp.core.action.Action ga) {

	if(this.curState == null){
		this.blockUntilStateReceived();
	}

	if(!(ga instanceof RLGlueDomain.RLGlueActionType)){
		throw new RuntimeException("RLGlueEnvironment cannot execute actions that are not instances of RLGlueDomain.RLGlueSpecification.");
	}

	State prevState = this.curState;

	int actionId = ((RLGlueDomain.RLGlueActionType)ga).getInd();
	synchronized (nextAction) {
		this.nextStateReference.val = null;
		this.nextAction.val = actionId;
		this.nextAction.notifyAll();
	}

	DPrint.cl(debugCode, "Set action (" + this.nextAction.val + ")");

	State toRet;
	synchronized (this.nextStateReference) {
		while(this.nextStateReference.val == null){
			try{
				DPrint.cl(debugCode, "Waiting for state from RLGlue Server...");
				nextStateReference.wait();
			} catch(InterruptedException ex){
				ex.printStackTrace();
			}
		}
		toRet = this.curState;
		this.nextStateReference.val = null;
	}

	EnvironmentOutcome eo = new EnvironmentOutcome(prevState, ga, toRet, this.lastReward, this.curStateIsTerminal);

	return eo;
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:41,代码来源:RLGlueAgent.java


示例17: FixedSizeMemory

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
/**
 * Initializes with the size of the memory and whether the most recent memory should always be included
 * in the returned results from the sampling memory.
 * @param size the number of experiences to store
 * @param alwaysIncludeMostRecent if true, then the result of the {@link #sampleExperiences(int)}} will always include the most recent experience and is a uniform random sampling for the n-1 samples.
 *                                   If false, then it is a pure random sample with replacement.
 */
public FixedSizeMemory(int size, boolean alwaysIncludeMostRecent) {
	if(size < 1){
		throw new RuntimeException("FixedSizeMemory requires memory size > 0; was request size of " + size);
	}
	this.alwaysIncludeMostRecent = alwaysIncludeMostRecent;
	this.memory = new EnvironmentOutcome[size];
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:15,代码来源:FixedSizeMemory.java


示例18: updateModel

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public void updateModel(EnvironmentOutcome eo) {
	
	HashableState sh = this.hashingFactory.hashState(eo.o);
	HashableState shp = this.hashingFactory.hashState(eo.op);
	
	if(eo.terminated){
		this.terminalStates.add(shp);
	}
	
	StateActionNode san = this.getOrCreateActionNode(sh, eo.a);
	san.update(eo.r, shp);

}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:15,代码来源:TabularModel.java


示例19: transitions

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public List<TransitionProb> transitions(State s, Action a) {
	List<TransitionProb> tps = sourceModel.transitions(s, a);
	for(TransitionProb tp : tps){
		EnvironmentOutcome eo = tp.eo;
		this.modifyEO(eo);
	}
	return tps;
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:10,代码来源:RMaxModel.java


示例20: modifyEO

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
protected void modifyEO(EnvironmentOutcome eo){
	double oldPotential = potentialFunction.potentialValue(eo.o);
	double nextPotential = 0.;
	if(!eo.terminated){
		nextPotential = potentialFunction.potentialValue(eo.op);
	}
	double bonus = gamma * nextPotential - oldPotential;
	eo.r = eo.r + bonus;

	if(!KWIKModel.Helper.stateTransitionsModeled(this, actionsTypes, eo.o)){
		eo.terminated = true;
	}
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:14,代码来源:RMaxModel.java



注:本文中的burlap.mdp.singleagent.environment.EnvironmentOutcome类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Java JSONArray类代码示例发布时间:2022-05-23
下一篇:
Java LineSeparator类代码示例发布时间:2022-05-23
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap