• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python experiments.EpisodicExperiment类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pybrain.rl.experiments.EpisodicExperiment的典型用法代码示例。如果您正苦于以下问题:Python EpisodicExperiment类的具体用法?Python EpisodicExperiment怎么用?Python EpisodicExperiment使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了EpisodicExperiment类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: __init__

class BaggerBot:
	def __init__(self, host, port, net=None):
		self.conn = ServerConnection(host, port)
		self.env = self.conn.env
		self.conn.join()
		self.task = SurviveTask(self.env, self.conn)
		self.net = buildNetwork(self.env.outdim, 4, self.env.indim, outclass=TanhLayer)
		self.agent = OptimizationAgent(self.net, PGPE())
		self.experiment = EpisodicExperiment(self.task, self.agent)

	def wait_connected(self):
		self.conn.wait_connected()

	def train(self):
		'''
		Infinitely play the game. Figure out the next move(s), parse incoming
		data, discard all that, do stupid stuff and die :)
		'''
		while self.env.in_game:
			# Ask to be spawned
			logging.info('Requesting spawn...')
			self.conn.send_spawn()
			while not self.env.playing:
				self.conn.parse_pregame()
			while self.env.playing:
				self.experiment.doEpisodes(100)
开发者ID:Remboooo,项目名称:LoBotomy,代码行数:26,代码来源:baggerbot.py


示例2: train

def train():

    # Make the environment
    environment = TwentyFortyEightEnvironment()

    # Store the environment as the task
    task = environment

    # Set up the Neural Network
    neuralNet = buildNetwork(task.nSenses, HIDDEN_NODES, task.nActions)

    # Use a Genetic Algorithm as the Trainer
    trainer = GA( populationSize=20, topProportion=0.2, elitism=False
                , eliteProportion=0.25, mutationProb=0.1
                , mutationStdDev=0.2, tournament=False
                , tournamentSize=2 )

    agent = OptimizationAgent(neuralNet, trainer)

    # Set up an experiment
    experiment = EpisodicExperiment(task, agent)

    # Train the network
    meanScores = []
    print "Starting HillClimberNN"
    for i in xrange(LEARNING_EPOCHS):
        experiment.doEpisodes(GAMES_PER_EPOCH)
        print "Training Iteration", i, "With mean score ", task.meanScore, "Max block achieved ", environment.maxGameBlock
        environment.maxGameBlock = 0
        meanScores.append(task.meanScore)

    params = {"learningEpochs": LEARNING_EPOCHS, "gamesPerEpoch": GAMES_PER_EPOCH, "hiddenNodes": HIDDEN_NODES }
    return meanScores, params, experiment
开发者ID:Aggregates,项目名称:MI_HW2,代码行数:33,代码来源:hillclimberNN.py


示例3: train

	def train(self, episodes, maxSteps):
 	
		avgReward = 0

		# set up environment and task
		self.env = InfoMaxEnv(self.objectNames, self.actionNames, self.numCategories)
		self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \
					do_decay_beliefs = True, uniformInitialBeliefs = True)

		# create neural net and learning agent
		self.params = buildNetwork(self.task.outdim, self.task.indim, \
						bias=True, outclass=SoftmaxLayer)

		if self._PGPE:
			self.agent = OptimizationAgent(self.params, PGPE(minimize=False,verbose=False))
		elif self._CMAES:
			self.agent = OptimizationAgent(self.params, CMAES(minimize=False,verbose=False))

		# init and perform experiment
		exp = EpisodicExperiment(self.task, self.agent)

		for i in range(episodes):        
			exp.doEpisodes(1)
			avgReward += self.task.getTotalReward()
			print "reward episode ",i,self.task.getTotalReward()

		# print initial info
		print "\naverage reward over training = ",avgReward/episodes

		# save trained network
		self._saveWeights()
开发者ID:Kenkoko,项目名称:ua-ros-pkg,代码行数:31,代码来源:agent.py


示例4: train

def train():

    # Make the environment
    environment = TwentyFortyEightEnvironment()

    # The task is the game this time
    task = environment

    # Make the reinforcement learning agent (use a network because inputs are continuous)
    network = ActionValueNetwork(task.nSenses, task.nActions)

    # Use Q learning for updating the table (NFQ is for networks)
    learner = NFQ()
    learner.gamma = GAMMA

    agent = LearningAgent(network, learner)

    # Set up an experiment
    experiment = EpisodicExperiment(task, agent)

    # Train the Learner
    meanScores = []
    for i in xrange(LEARNING_EPOCHS):
        experiment.doEpisodes(GAMES_PER_EPOCH)
        print "Iteration ", i, " With mean score ", task.meanScore, "Max block achieved ", environment.maxGameBlock
        meanScores.append(task.meanScore)
        agent.learn()
        agent.reset()

    params = {"learningEpochs": LEARNING_EPOCHS, "gamesPerEpoch": GAMES_PER_EPOCH, "gamma": GAMMA }
    return meanScores, params, agent
开发者ID:Aggregates,项目名称:MI_HW2,代码行数:31,代码来源:RLNFQ.py


示例5: main

def main():
    """
    The task represents one full simulation. Therefore it is episodic.
    Each episode calls performAction after passing getObservation to the agent.
    Once isFinished is true, the reward is returned and one simulation is done.

    The net is the neural network. It has 7 input nodes, a hidden layer of 5
    nodes, and 2 output nodes. It is a feed-forward network using sigmoid
    activation functions.

    OptimizationAgent(module, learner)
    EpisodicExperiment.optimizer = learner
    learner.setEvaluator(task, module)
    optimizer.learn()
    """
    task = LanderTask(batchSize=1)
    net = buildNetwork(task.indim, 5, task.outdim)
    learner = StochasticHillClimber()
    agent = OptimizationAgent(net, learner)
    experiment = EpisodicExperiment(task, agent)
    experiment.doEpisodes(100000)

    tasks = [LanderTask(environment=Lander(acceleration=float(i)))
             for i in range(1, 4)]
    test_size = 1000
    for task in tasks:
        print("Running task with acceleration {}".format(task.env.acceleration))
        success = 0
        for _ in range(test_size):
            task.env.reset()
            while not task.isFinished():
                observation = task.getObservation()
                action = net.activate(observation)
                task.performAction(action)
            print("Finished a simulation with result {}".format(task.env.status))
            if task.env.status == 'landed':
                success += 1
        print("Succeeded {} times out of {}".format(success, test_size))
开发者ID:andschwa,项目名称:uidaho-cs470-moonlander,代码行数:38,代码来源:main.py


示例6: someEpisodes

def someEpisodes(game_env, net, discountFactor=0.99, maxSteps=100, avgOver=1, returnEvents=False):
    """ Return the fitness value for one episode of play, given the policy defined by a neural network. """
    task = GameTask(game_env)
    game_env.recordingEnabled = True        
    game_env.reset()        
    net.reset()
    task.maxSteps=maxSteps
    agent = LearningAgent(net)
    agent.learning = False
    agent.logging = False
    exper = EpisodicExperiment(task, agent)
    fitness = 0
    for _ in range(avgOver):
        rs = exper.doEpisodes(1)
        # add a slight bonus for more exploration, if rewards are identical
        fitness += len(set(game_env._allEvents)) * 1e-6
        # the true, discounted reward        
        fitness += sum([sum([v*discountFactor**step for step, v in enumerate(r)]) for r in rs])
    fitness /= avgOver
    if returnEvents:
        return fitness, game_env._allEvents
    else:
        return fitness
开发者ID:sarobe,项目名称:VGDLEntityCreator,代码行数:23,代码来源:nomodel_pomdp.py


示例7: agent

#the task is the game this time
task = environment

#make the reinforcement learning agent (use a network because inputs are continuous)
controller = ActionValueNetwork(task.nsenses,task.nactions)

#use Q learning for updating the table (NFQ is for networks)
learner = NFQ()

agent = LearningAgent(controller, learner)



#set up an experiment
experiment = EpisodicExperiment(task, agent)

meanscores = []
m = 0.0
for i in xrange(learning_eps):
    print i
    experiment.doEpisodes(games_per_ep)
    meanscores.append(task.meanscore)
    if meanscores[-1] > m:
        m = meanscores[-1]
        f = open("bestRL.pkl",'w')
        pickle.dump(agent,f)
        f.close()
    agent.learn()
    agent.reset()
开发者ID:jskye,项目名称:uon.2014.comp3330.hwa2.alevmy,代码行数:29,代码来源:runRL.py


示例8: CartPoleEnvironment

env = CartPoleEnvironment()
if render:
    renderer = CartPoleRenderer()
    env.setRenderer(renderer)
    renderer.start()

module = ActionValueNetwork(4, 3)

task = DiscreteBalanceTask(env, 100)
learner = NFQ()
learner.explorer.epsilon = 0.4

agent = LearningAgent(module, learner)
testagent = LearningAgent(module, None)
experiment = EpisodicExperiment(task, agent)

def plotPerformance(values, fig):
    plt.figure(fig.number)
    plt.clf()
    plt.plot(values, 'o-')
    plt.gcf().canvas.draw()
    # Without the next line, the pyplot plot won't actually show up.
    plt.pause(0.001)

performance = []

if not render:
    pf_fig = plt.figure()

while(True):
开发者ID:vascobailao,项目名称:PYTHON,代码行数:30,代码来源:NFQ.py


示例9: run

def run(arg):
    task = arg[0]
    parameters = arg[1]
    #print "run with", parameters
    
    seed = parameters["seed"]
   

    process_id = hash(multiprocessing.current_process()._identity)
    numpy.random.seed(seed + process_id)


    
    
    render = False    
    plot = False
    
    plt.ion()
    
    env = CartPoleEnvironment()
    if render:
        renderer = CartPoleRenderer()
        env.setRenderer(renderer)
        renderer.start()
    
    task_class = getattr(cp, task)
    task = task_class(env, parameters["MaxRunsPerEpisode"])
    testtask = task_class(env, parameters["MaxRunsPerEpisodeTest"])

    #print "dim: ", task.indim, task.outdim
    
    # to inputs state and 4 actions
    module = ActionValueNetwork(task.outdim, task.indim)
    

    learner = NFQ()
    # % of random actions
    learner.explorer.epsilon = parameters["ExplorerEpsilon"]
    
    
    agent = LearningAgent(module, learner)
    testagent = LearningAgent(module, None)
    experiment = EpisodicExperiment(task, agent)
    testexperiment = EpisodicExperiment(testtask, testagent)

    
    def plotPerformance(values, fig):
        plt.figure(fig.number)
        plt.clf()
        plt.plot(values, 'o-')
        plt.gcf().canvas.draw()
        # Without the next line, the pyplot plot won't actually show up.
        plt.pause(0.001)
    
    performance = []
    
    if plot:
        pf_fig = plt.figure()
    
    m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"]
    for episode in range(0,m):
    	# one learning step after one episode of world-interaction
        experiment.doEpisodes(parameters["EpisodesPerLearn"])
        agent.learn(1)
    
        #renderer.drawPlot()
        
        # test performance (these real-world experiences are not used for training)
        if plot:
            env.delay = True
        
        if (episode) % parameters["TestAfter"] == 0:
            #print "Evaluating at episode: ", episode
            
            #experiment.agent = testagent
            r = mean([sum(x) for x in testexperiment.doEpisodes(parameters["TestWith"])])
            
            env.delay = False
            testagent.reset()
            #experiment.agent = agent
        
            performance.append(r)
            if plot:
                plotPerformance(performance, pf_fig)
        
#            print "reward avg", r
#            print "explorer epsilon", learner.explorer.epsilon
#            print "num episodes", agent.history.getNumSequences()
#            print "update step", len(performance)
            
#    print "done"
    return performance
            
        #print "network",   json.dumps(module.bn.net.E, indent=2)
开发者ID:nairboon,项目名称:bnrl,代码行数:94,代码来源:NFQ.py


示例10: RecurrentNetwork

from pybrain.rl.environments.timeseries.timeseries import MonthlySnPEnvironment
from pybrain.rl.learners.directsearch.rrl import RRL

from pybrain.structure import RecurrentNetwork
from pybrain.structure import LinearLayer, SigmoidLayer, TanhLayer, BiasUnit
from pybrain.structure import FullConnection
from pybrain.rl.agents import LearningAgent
from pybrain.rl.experiments import EpisodicExperiment

from numpy import sign, round
from matplotlib import pyplot

net= RecurrentNetwork()
#Single linear layer with bias unit, and single tanh layer. the linear layer is whats optimised
net.addInputModule(BiasUnit(name='bias'))
net.addOutputModule(TanhLayer(1, name='out'))
net.addRecurrentConnection(FullConnection(net['out'], net['out'], name='c3'))
net.addInputModule(LinearLayer(1,name='in'))
net.addConnection(FullConnection(net['in'],net['out'],name='c1'))
net.addConnection((FullConnection(net['bias'],net['out'],name='c2')))
net.sortModules()
net._setParameters([-8.79227886e-02, -8.29319017e+02, 1.25946474e+00])
print(net._params)
env=MonthlySnPEnvironment()
task=MaximizeReturnTask(env)
learner = RRL() # ENAC() #Q_LinFA(2,1)
agent = LearningAgent(net,learner)
exp=EpisodicExperiment(task,agent)

exp.doEpisodes(10)
开发者ID:samstern,项目名称:MSc-Project,代码行数:30,代码来源:episodicSnP.py


示例11: CCRLEnvironment

__author__ = 'Stubborn'

from pybrain.rl.environments.ode import CCRLEnvironment
from pybrain.rl.environments.ode.tasks import CCRLGlasTask
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure.modules.tanhlayer import TanhLayer
from pybrain.optimization import PGPE
from pybrain.rl.agents import OptimizationAgent
from pybrain.rl.experiments import EpisodicExperiment

environment = CCRLEnvironment()
task = CCRLGlasTask(environment)

net = buildNetwork(len(task.getObservation()), 4, environment.indim, outclass=TanhLayer)

agent = OptimizationAgent(net, PGPE())

experiment = EpisodicExperiment(task, agent)

for updates in range(20000):
    experiment.doEpisodes(1)



开发者ID:AkselBH,项目名称:QLearning,代码行数:21,代码来源:Pybrain+tutvideo+3+Robotarm.py


示例12: Bool

 # create environment
 #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560)
 env = ShipSteeringEnvironment(False)
 # create task
 task = GoNorthwardTask(env,maxsteps = 500)
 # create controller network
 net = buildNetwork(task.outdim, task.indim, outclass=TanhLayer)
 # create agent with controller and learner
 agent = FiniteDifferenceAgent(net, SPLA())
 # learning options
 agent.learner.gd.alpha = 0.3 #step size of \mu adaption
 agent.learner.gdSig.alpha = 0.15 #step size of \sigma adaption
 agent.learner.gd.momentum = 0.0
 batch=2 #number of samples per gradient estimate (was: 2; more here due to stochastic setting)
 #create experiment
 experiment = EpisodicExperiment(task, agent)
 prnts=1 #frequency of console output
 epis=2000/batch/prnts
 
 #actual roll outs
 filename="dataSPLA08NoRew"+repr(int(random.random()*1000000.0))+".dat"
 wf = open(filename, 'wb')
 for updates in range(epis):
     for i in range(prnts):
         experiment.doEpisodes(batch) #execute #batch episodes
         agent.learn() #learn from the gather experience
         agent.reset() #reset agent and environment
     #print out related data
     stp = (updates+1)*batch*prnts
     print "Step: ", runs, "/", stp, "Best: ", agent.learner.best, "Base: ", agent.learner.baseline, "Reward: ", agent.learner.reward   
     wf.write(repr(stp)+"\n") 
开发者ID:HKou,项目名称:pybrain,代码行数:31,代码来源:shipbenchSPLA.py


示例13: GrowTask

# create task
task = GrowTask(env)
# create controller network (flat network)
net = buildNetwork(32, 10, 12)
# create agent with controller and learner
agent = FiniteDifferenceAgent(net, SPLA())
# learning options
agent.learner.gd.alpha = 0.05
agent.learner.gdSig.alpha = 0.1
agent.learner.gd.momentum = 0.0
agent.learner.epsilon = 2.0
agent.learner.initSigmas()

sr = []

experiment = EpisodicExperiment(task, agent)
for updates in range(1000):
    # training step
    for i in range(5):
        experiment.doEpisodes(10)
        agent.learn()
        print "parameters:", agent.module.params
        agent.reset()
        
    # learning step
    agent.disableLearning()
    experiment.doEpisodes(50)
    # append mean reward to sr array
    ret = []
    for n in range(agent.history.getNumSequences()):
        state, action, reward, _ = agent.history.getSequence(n)
开发者ID:HKou,项目名称:pybrain,代码行数:31,代码来源:flexCubeNAC.py


示例14: run_experiment

def run_experiment():
    # Create the controller network
    HIDDEN_NODES = 4

    RUNS = 2
    BATCHES = 1
    PRINTS = 1
    EPISODES = 500

    env = None
    start_state_net = None

    run_results = []

    # Set up plotting tools for the experiments
    tools = ExTools(BATCHES, PRINTS)

    # Run the experiment
    for run in range(RUNS):
        if run == 0:
            continue

        # If an environment already exists, shut it down
        if env:
            env.closeSocket()

        # Create the environment
        env = create_environment()

        # Create the task
        task = Pa10MovementTask(env)

        # Create the neural network. Only create the network once so it retains
        # the same starting values for each run.
        if start_state_net:
            net = start_state_net.copy()
        else:
            # Create the initial neural network
            net = create_network(
                    in_nodes=env.obsLen,
                    hidden_nodes=HIDDEN_NODES,
                    out_nodes=env.actLen
            )
            start_state_net = net.copy()

        # Create the learning agent
        learner = HillClimber(storeAllEvaluations=True)
        agent = OptimizationAgent(net, learner)
        tools.agent = agent

        # Create the experiment
        experiment = EpisodicExperiment(task, agent)

        # Perform all episodes in the run
        for episode in range(EPISODES):
            experiment.doEpisodes(BATCHES)

        # Calculate results
        all_results = agent.learner._allEvaluations
        max_result = np.max(all_results)
        min_result = np.min(all_results)
        avg_result = np.sum(all_results) / len(all_results)
        run_results.append((run, max_result, min_result, avg_result))

        # Make the results directory if it does not exist
        if not os.path.exists(G_RESULTS_DIR):
            os.mkdir(G_RESULTS_DIR)

        # Write all results to the results file
        with open(os.path.join(G_RESULTS_DIR, 'run_%d.txt' % run), 'w+') as f:
            # Store the calculated max, min, avg
            f.write('RUN, MAX, MIN, AVG\n')
            f.write('%d, %f, %f, %f\n' % (run, max_result, min_result, avg_result))

            # Store all results from this run
            f.write('EPISODE, REWARD\n')
            for episode, result in enumerate(all_results):
                f.write('%d, %f\n' % (episode, result))

    return
开发者ID:evansneath,项目名称:surgicalsim,代码行数:80,代码来源:start_environment.py


示例15: run

def run(nao,pad):





    # ################################
    # choose bottom cam, so nao can see object when standing next to it
    nao.camera.selectCam(1)
    
    env = grabbingEnvironment(nao)
    #env.connect(nao)

    task = grabbingTask(env)

    net = buildNetwork(len(task.getObservation()),8, env.indim, bias = True, recurrent=True)
    print env.indim
    #net = ActionValueNetwork(5,4)
    #, outclass=TanhLayer)
    #, hiddenclass=TanhLayer, outclass=TanhLayer

    # not correct right now..
    # TODO: train into RL Modules, dataset needs to be merged with exploration data
    #generateTraining.generateTraining().runDeltaMovements(nao,net,env,pad)


    #module = ActionValueNetwork(3, 3)
    #module = NeuronLayer(40)

    #agent = LearningAgent(net, SARSA())
    #learner = PolicyGradientLearner()
    #learner._setExplorer(StateDependentExplorer(3,3))
    #learner._setModule(module)
    #agent = LearningAgent(module, learner)
    #agent = LearningAgent(net, ENAC())
    #agent = LearningAgent(net, Reinforce())

    #learner = NFQ()
    #learner.explorer.epsilon = 0.4
    #agent = LearningAgent(net, learner)

    testagent = OptimizationAgent(net,None,env)
    #agent = LearningAgent(module, Q())
    #agent = LearningAgent(module, QLambda())
    learner = grabbingPGPE(storeAllEvaluations = True, verbose = True, epsilon = 1.0, deltamax =5.0, sigmaLearningRate = 0.1, learningRate = 0.2)
    agent = OptimizationAgent(net, learner,env)
    #agent = OptimizationAgent(net, SimpleSPSA(storeAllEvaluations = True, verbose = True))
    #agent = OptimizationAgent(net, HillClimber(storeAllEvaluations = True, verbose = True))

    #agent = OptimizationAgent(net, RandomSearch(storeAllEvaluations = True, verbose = True))
    
    experiment = EpisodicExperiment(task, agent)
    # only for optimizationAgent
    #experiment.doOptimization = True

    # only for simulator!
    nao.fractionMaxSpeed = 1.0



    print "#env"
    print "  sensors:", env.outdim
    print "  actions:", env.indim
    print "  discreteStates:", env.discreteStates
    print "  discreteActions:", env.discreteActions
    
    print
    print "#task"
    print "  sensor_limits:", task.sensor_limits
    print "  actor_limits:", task.actor_limits
    print "  epilen: ", task.epiLen
    print "#EpisodicTask"
    print "  discount:", task.discount
    print "  batchsize:", task.batchSize
    

    print
    print "#PGPE"
    print "  exploration type:", grabbingPGPE().exploration
    print "  LearningRate:", grabbingPGPE().learningRate
    print "  sigmaLearningRate:", grabbingPGPE().sigmaLearningRate
    print "  epsilon:", grabbingPGPE().epsilon
    print "  wDecay:", grabbingPGPE().wDecay
    print "  momentum:", grabbingPGPE().momentum
    print "  rprop:", grabbingPGPE().rprop



#    # switch this to True if you want to see the cart balancing the pole (slower)
#    render = False
#
#    plt.ion()
#
#    env = CartPoleEnvironment()
#    if render:
#        renderer = CartPoleRenderer()
#        env.setRenderer(renderer)
#        renderer.start()
#
#    module = ActionValueNetwork(4, 3)
#.........这里部分代码省略.........
开发者ID:c0de2014,项目名称:nao-control,代码行数:101,代码来源:grabbingTest.py


示例16: run

def run(arg):
    task = arg[0]
    parameters = arg[1]
    #print "run with", parameters
    
    seed = parameters["seed"]
   

    process_id = hash(multiprocessing.current_process()._identity)
    numpy.random.seed(seed + process_id)


    
    
    render = False    
    plot = False
    
    plt.ion()
    
    env = CartPoleEnvironment()
    if render:
        renderer = CartPoleRenderer()
        env.setRenderer(renderer)
        renderer.start()
    
    task_class = getattr(cp, task)
    task = task_class(env, parameters["MaxRunsPerEpisode"])
    
    testtask = task_class(env, parameters["MaxRunsPerEpisodeTest"],desiredValue=None)

    #print "dim: ", task.indim, task.outdim

    from pybrain.tools.shortcuts import buildNetwork
    from pybrain.rl.agents import OptimizationAgent
    from pybrain.optimization import PGPE

    module = buildNetwork(task.outdim, task.indim, bias=False)
    # create agent with controller and learner (and its options)

    # % of random actions
    #learner.explorer.epsilon = parameters["ExplorerEpsilon"]
    
    
    agent = OptimizationAgent(module, PGPE(storeAllEvaluations = True,storeAllEvaluated=False, maxEvaluations=None,desiredEvaluation=1, verbose=False))
#
#    print agent
#    from pprint import pprint
#    pprint (vars(agent.learner))
    
    testagent = LearningAgent(module, None)
    experiment = EpisodicExperiment(task, agent)
    testexperiment = EpisodicExperiment(testtask, testagent)

    
    def plotPerformance(values, fig):
        plt.figure(fig.number)
        plt.clf()
        plt.plot(values, 'o-')
        plt.gcf().canvas.draw()
        # Without the next line, the pyplot plot won't actually show up.
        plt.pause(0.001)
    
    performance = []
    
    if plot:
        pf_fig = plt.figure()
    
    m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"]
    for episode in range(0,m):
    	# one learning step after one episode of world-interaction
        experiment.doEpisodes(parameters["EpisodesPerLearn"])
        #agent.learn(1)
    
        #renderer.drawPlot()
        
        # test performance (these real-world experiences are not used for training)
        if plot:
            env.delay = True
        
        if (episode) % parameters["TestAfter"] == 0:
            #print "Evaluating at episode: ", episode
            
            #experiment.agent = testagent
            #r = mean([sum(x) for x in testexperiment.doEpisodes(parameters["TestWith"])])
            #for i in range(0,parameters["TestWith"]):
#            y = testexperiment.doEpisodes(1)
#            print (agent.learner._allEvaluated)
#                
#            
#            from pprint import pprint
#            pprint (vars(task))
                
            l = parameters["TestWith"]
            
            task.N = parameters["MaxRunsPerEpisodeTest"]
            experiment.doEpisodes(l)
            task.N = parameters["MaxRunsPerEpisode"]

            resList = (agent.learner._allEvaluations)[-l:-1]
            
#.........这里部分代码省略.........
开发者ID:nairboon,项目名称:bnrl,代码行数:101,代码来源:PGPE.py


示例17: mazeEnv

                         [1, 0, 0, 0, 0, 0, 0, 0, 1],
                         [1, 1, 1, 1, 1, 1, 1, 1, 1]])

  side = 9
  goal = 3,2

  env = mazeEnv(structure, goal)   #use maze environment for now; note pos is Y,X

  # our own task and environment for later
  #env = policyEnv()
  thetask = MDPMazeTaskEpisodic(env)

  # create neural net; create and train agent
  theparams = buildNetwork(thetask.outdim, thetask.indim, bias=False)
  agent = OptimizationAgent(theparams, CMAES())
  exp = EpisodicExperiment(thetask, agent)

  # train agent        
  exp.doEpisodes(NUM_EPISODES)
  print "\ntotal reward = ",thetask.getTotalReward()

  #print "\n"
  #print "initial weights: "; print theparams.params
  print "\n"
  print "NOTE positions below are (Y,X)"

  print "\n"
  print "getting observation 1"
  print "robot = ",thetask.getObservation()
  print "goal  = ",goal
  print "reward: ", thetask.getReward()
开发者ID:krylenko,项目名称:python,代码行数:31,代码来源:INFOMAX__policyWrapper.py


示例18: train

	def train(self, size, goal, initPose, mapSelect, envSelect, episodes, maxSteps, goalTol, randomizeInitPose):
 	
		avgReward = 0

		# set up environment and task
		self.env = mazeEnv(size, goal, initPose, mapSelect, envSelect, randomizeInitPose)
		self.task = MDPMazeTaskEpisodic(self.env, maxSteps, goalTol)

		# create neural net and learning agent
		self.params = buildNetwork(self.task.outdim, 48, self.task.indim, \
		bias=True, outclass=SoftmaxLayer)

		if self._PGPE:
			self.agent = OptimizationAgent(self.params, PGPE(minimize=True,verbose=False))
		elif self._CMAES:
			self.agent = OptimizationAgent(self.params, CMAES(minimize=True,verbose=False))

		# init experiment
		exp = EpisodicExperiment(self.task, self.agent)

		for i in range(0, episodes):        
			exp.doEpisodes()
			avgReward += self.task.getTotalReward()
			print "reward episode ",i,self.task.getTotalReward()

		# print initial info
		print "\naverage reward over training = ",avgReward/episodes

		# import weights into network and save network
		if self._PGPE:
			for i in range(len(self.params.params)):
				self.params.params[i] = self.agent.learner.current[i]
			pickle.dump(self.params, open('policyNet.pkl','w'))

		elif self._CMAES:

			################ following code came from WWInfoMaxCMAES.py script from ICDL 2010 paper
			arz = randn(self.agent.learner.numParameters, self.agent.learner.batchSize)
			arx = tile(self.agent.learner.center.reshape(self.agent.learner.numParameters, 1),\
			(1, self.agent.learner.batchSize)) + \
			self.agent.learner.stepSize * dot(dot(self.agent.learner.B, self.agent.learner.D), arz)
			# Go through the parameters and pick the current best 
			arfitness = zeros(self.agent.learner.batchSize)
			for k in xrange(self.agent.learner.batchSize):
			  self.agent.learner.wrappingEvaluable._setParameters(arx[:, k]);
			  arfitness[k] = self.agent.learner._BlackBoxOptimizer__evaluator\
			(self.agent.learner.wrappingEvaluable)

			# Sort by fitness and compute weighted mean into center
			tmp = sorted(map(lambda (x, y): (y, x), enumerate(ravel(arfitness))))
			arfitness = array(map(lambda x: x[0], tmp))
			arindex = array(map(lambda x: int(x[1]), tmp))

			arz = arz[:, arindex]
			curparams = arx[:, arindex[0]];

			# update network weights with selected parameters
			for i in range(len(self.params.params)):
				self.params.params[i] = curparams[i]
			# save trained network
			pickle.dump(self.params, open('policyNet.pkl','w'))
开发者ID:krylenko,项目名称:python,代码行数:61,代码来源:INFOMAX__pybrainNode.py


示例19: run

def run(arg):
    task = arg[0]
    parameters = arg[1]
    #print "run with", task,parameters
    
    
    seed = parameters["seed"]
   

    process_id = hash(multiprocessing.current_process()._identity)
    numpy.random.seed(seed)
    
    render = False    
    plot = False
    
    plt.ion()
    
    env = CartPoleEnvironment()
    env.randomInitialization = False
    if render:
        renderer = CartPoleRenderer()
        env.setRenderer(renderer)
        renderer.start()
    
    task_class = getattr(cp, task)
    task = task_class(env, 50)

    #print "dim: ", task.indim, task.outdim
    
    # to inputs state and 4 actions
    bmodule = ActionValueRAND(task.outdim, task.indim)
    rlearner = RAND()

    blearner = RAND()
    # % of random actions
    
    bagent = LearningAgent(bmodule, rlearner)
    
    from pybrain.tools.shortcuts import buildNetwork
    from pybrain.rl.agents import OptimizationAgent
    from pybrain.optimization import PGPE

    module = buildNetwork(task.outdim, task.indim, bias=False)
    # create agent with controller and learner (and its options)

    # % of random actions
    #learner.explorer.epsilon = parameters["ExplorerEpsilon"]
    
    
    agent = OptimizationAgent(module, PGPE(storeAllEvaluations = True,storeAllEvaluated=True, maxEvaluations=None, verbose=False))


    
    
    testagent = LearningAgent(module, None)
    pgpeexperiment = EpisodicExperiment(task, agent)
    randexperiment = EpisodicExperiment(task, bagent)


    def plotPerformance(values, fig):
        plt.figure(fig.number)
        plt.clf()
        plt.plot(values, 'o-')
        plt.gcf().canvas.draw()
        # Without the next line, the pyplot plot won't actually show up.
        plt.pause(0.001)
    
    performance = []
    
    if plot:
        pf_fig = plt.figure()
    
    m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"]
    
    ## train pgpe
    for episode in range(0,50):
    	# one learning step after one episode of world-interaction
        y =pgpeexperiment.doEpisodes(1)
        
    be, bf = agent.learner._bestFound()
    print be,bf
    
    print "generate data"
    be.numActions = 1
    gdagent = LearningAgent(be, blearner)
    experiment = EpisodicExperiment(task, gdagent)
    
    for episode in range(0,1000):
#        print episode, " of 1000"
    	# one learning step after one episode of world-interaction
        y =experiment.doEpisodes(1)
        
#        print y
        x = randexperiment.doEpisodes(1)
#        print len(y[0])
        #renderer.drawPlot()
        
        # test performance (these real-world experiences are not used for training)
        if plot:
            env.delay = True
#.........这里部分代码省略.........
开发者ID:nairboon,项目名称:bnrl,代码行数:101,代码来源:trainnet.py


示例20: BalanceTask

env.setRenderer(CartPoleRenderer())
env.getRenderer().start()
env.delay = (episodes == 1)

# create task
task = BalanceTask(env, epilen)

# create controller network
net = buildNetwork(4, 1, bias=False)

# create agent and set parameters from command line
agent = LearningAgent(net, None)
agent.module._setParameters([float(sys.argv[1]), float(sys.argv[2]), float(sys.argv[3]), float(sys.argv[4])])

# create experiment
experiment = EpisodicExperiment(task, agent)
experiment.doEpisodes(episodes)

# run environment
ret = []
for n in range(agent.history.getNumSequences()):
    returns = agent.history.getSequence(n)
    reward = returns[2]
    ret.append( sum(reward, 0).item() )

# print results
print ret, "mean:",mean(ret)
#env.getRenderer().stop()


开发者ID:Boblogic07,项目名称:pybrain,代码行数:28,代码来源:play_cartpole.py



注:本文中的pybrain.rl.experiments.EpisodicExperiment类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python experiments.Experiment类代码示例发布时间:2022-05-25
下一篇:
Python episodic.EpisodicTask类代码示例发布时间:2022-05-25
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap