本文整理汇总了Python中ray.rllib.evaluation.policy_evaluator.PolicyEvaluator类的典型用法代码示例。如果您正苦于以下问题:Python PolicyEvaluator类的具体用法?Python PolicyEvaluator怎么用?Python PolicyEvaluator使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PolicyEvaluator类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: testMultiAgentSampleRoundRobin
def testMultiAgentSampleRoundRobin(self):
act_space = gym.spaces.Discrete(2)
obs_space = gym.spaces.Discrete(10)
ev = PolicyEvaluator(
env_creator=lambda _: RoundRobinMultiAgent(5, increment_obs=True),
policy_graph={
"p0": (MockPolicyGraph, obs_space, act_space, {}),
},
policy_mapping_fn=lambda agent_id: "p0",
batch_steps=50)
batch = ev.sample()
self.assertEqual(batch.count, 50)
# since we round robin introduce agents into the env, some of the env
# steps don't count as proper transitions
self.assertEqual(batch.policy_batches["p0"].count, 42)
self.assertEqual(batch.policy_batches["p0"]["obs"].tolist()[:10], [
one_hot(0, 10),
one_hot(1, 10),
one_hot(2, 10),
one_hot(3, 10),
one_hot(4, 10),
] * 2)
self.assertEqual(batch.policy_batches["p0"]["new_obs"].tolist()[:10], [
one_hot(1, 10),
one_hot(2, 10),
one_hot(3, 10),
one_hot(4, 10),
one_hot(5, 10),
] * 2)
self.assertEqual(batch.policy_batches["p0"]["rewards"].tolist()[:10],
[100, 100, 100, 100, 0] * 2)
self.assertEqual(batch.policy_batches["p0"]["dones"].tolist()[:10],
[False, False, False, False, True] * 2)
self.assertEqual(batch.policy_batches["p0"]["t"].tolist()[:10],
[4, 9, 14, 19, 24, 5, 10, 15, 20, 25])
开发者ID:robertnishihara,项目名称:ray,代码行数:35,代码来源:test_multi_agent_env.py
示例2: testCustomRNNStateValues
def testCustomRNNStateValues(self):
h = {"some": {"arbitrary": "structure", "here": [1, 2, 3]}}
class StatefulPolicyGraph(PolicyGraph):
def compute_actions(self,
obs_batch,
state_batches,
prev_action_batch=None,
prev_reward_batch=None,
episodes=None,
**kwargs):
return [0] * len(obs_batch), [[h] * len(obs_batch)], {}
def get_initial_state(self):
return [{}] # empty dict
ev = PolicyEvaluator(
env_creator=lambda _: gym.make("CartPole-v0"),
policy_graph=StatefulPolicyGraph,
batch_steps=5)
batch = ev.sample()
self.assertEqual(batch.count, 5)
self.assertEqual(batch["state_in_0"][0], {})
self.assertEqual(batch["state_out_0"][0], h)
self.assertEqual(batch["state_in_0"][1], h)
self.assertEqual(batch["state_out_0"][1], h)
开发者ID:robertnishihara,项目名称:ray,代码行数:26,代码来源:test_multi_agent_env.py
示例3: testCompleteEpisodes
def testCompleteEpisodes(self):
ev = PolicyEvaluator(
env_creator=lambda _: MockEnv(10),
policy_graph=MockPolicyGraph,
batch_steps=5,
batch_mode="complete_episodes")
batch = ev.sample()
self.assertEqual(batch.count, 10)
开发者ID:jamescasbon,项目名称:ray,代码行数:8,代码来源:test_policy_evaluator.py
示例4: testExternalEnvHorizonNotSupported
def testExternalEnvHorizonNotSupported(self):
ev = PolicyEvaluator(
env_creator=lambda _: SimpleServing(MockEnv(25)),
policy_graph=MockPolicyGraph,
episode_horizon=20,
batch_steps=10,
batch_mode="complete_episodes")
self.assertRaises(ValueError, lambda: ev.sample())
开发者ID:robertnishihara,项目名称:ray,代码行数:8,代码来源:test_external_env.py
示例5: testExternalEnvBadActions
def testExternalEnvBadActions(self):
ev = PolicyEvaluator(
env_creator=lambda _: SimpleServing(MockEnv(25)),
policy_graph=BadPolicyGraph,
sample_async=True,
batch_steps=40,
batch_mode="truncate_episodes")
self.assertRaises(Exception, lambda: ev.sample())
开发者ID:robertnishihara,项目名称:ray,代码行数:8,代码来源:test_external_env.py
示例6: testAsync
def testAsync(self):
ev = PolicyEvaluator(
env_creator=lambda _: gym.make("CartPole-v0"),
sample_async=True,
policy_graph=MockPolicyGraph)
batch = ev.sample()
for key in ["obs", "actions", "rewards", "dones", "advantages"]:
self.assertIn(key, batch)
self.assertGreater(batch["advantages"][0], 1)
开发者ID:jamescasbon,项目名称:ray,代码行数:9,代码来源:test_policy_evaluator.py
示例7: testExternalEnvTruncateEpisodes
def testExternalEnvTruncateEpisodes(self):
ev = PolicyEvaluator(
env_creator=lambda _: SimpleServing(MockEnv(25)),
policy_graph=MockPolicyGraph,
batch_steps=40,
batch_mode="truncate_episodes")
for _ in range(3):
batch = ev.sample()
self.assertEqual(batch.count, 40)
开发者ID:robertnishihara,项目名称:ray,代码行数:9,代码来源:test_external_env.py
示例8: testCompleteEpisodesPacking
def testCompleteEpisodesPacking(self):
ev = PolicyEvaluator(
env_creator=lambda _: MockEnv(10),
policy_graph=MockPolicyGraph,
batch_steps=15,
batch_mode="complete_episodes")
batch = ev.sample()
self.assertEqual(batch.count, 20)
self.assertEqual(
batch["t"].tolist(),
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
开发者ID:jamescasbon,项目名称:ray,代码行数:11,代码来源:test_policy_evaluator.py
示例9: testExternalEnvOffPolicy
def testExternalEnvOffPolicy(self):
ev = PolicyEvaluator(
env_creator=lambda _: SimpleOffPolicyServing(MockEnv(25), 42),
policy_graph=MockPolicyGraph,
batch_steps=40,
batch_mode="complete_episodes")
for _ in range(3):
batch = ev.sample()
self.assertEqual(batch.count, 50)
self.assertEqual(batch["actions"][0], 42)
self.assertEqual(batch["actions"][-1], 42)
开发者ID:robertnishihara,项目名称:ray,代码行数:11,代码来源:test_external_env.py
示例10: testBaselinePerformance
def testBaselinePerformance(self):
ev = PolicyEvaluator(
env_creator=lambda _: gym.make("CartPole-v0"),
policy_graph=MockPolicyGraph,
batch_steps=100)
start = time.time()
count = 0
while time.time() - start < 1:
count += ev.sample().count
print()
print("Samples per second {}".format(count / (time.time() - start)))
print()
开发者ID:jamescasbon,项目名称:ray,代码行数:12,代码来源:test_policy_evaluator.py
示例11: testFilterSync
def testFilterSync(self):
ev = PolicyEvaluator(
env_creator=lambda _: gym.make("CartPole-v0"),
policy_graph=MockPolicyGraph,
sample_async=True,
observation_filter="ConcurrentMeanStdFilter")
time.sleep(2)
ev.sample()
filters = ev.get_filters(flush_after=True)
obs_f = filters["default"]
self.assertNotEqual(obs_f.rs.n, 0)
self.assertNotEqual(obs_f.buffer.n, 0)
开发者ID:jamescasbon,项目名称:ray,代码行数:12,代码来源:test_policy_evaluator.py
示例12: testBatchesLargerWhenVectorized
def testBatchesLargerWhenVectorized(self):
ev = PolicyEvaluator(
env_creator=lambda _: MockEnv(episode_length=8),
policy_graph=MockPolicyGraph,
batch_mode="truncate_episodes",
batch_steps=4,
num_envs=4)
batch = ev.sample()
self.assertEqual(batch.count, 16)
result = collect_metrics(ev, [])
self.assertEqual(result["episodes_this_iter"], 0)
batch = ev.sample()
result = collect_metrics(ev, [])
self.assertEqual(result["episodes_this_iter"], 4)
开发者ID:jamescasbon,项目名称:ray,代码行数:14,代码来源:test_policy_evaluator.py
示例13: testGetFilters
def testGetFilters(self):
ev = PolicyEvaluator(
env_creator=lambda _: gym.make("CartPole-v0"),
policy_graph=MockPolicyGraph,
sample_async=True,
observation_filter="ConcurrentMeanStdFilter")
self.sample_and_flush(ev)
filters = ev.get_filters(flush_after=False)
time.sleep(2)
filters2 = ev.get_filters(flush_after=False)
obs_f = filters["default"]
obs_f2 = filters2["default"]
self.assertGreaterEqual(obs_f2.rs.n, obs_f.rs.n)
self.assertGreaterEqual(obs_f2.buffer.n, obs_f.buffer.n)
开发者ID:jamescasbon,项目名称:ray,代码行数:14,代码来源:test_policy_evaluator.py
示例14: testMultiAgentSampleWithHorizon
def testMultiAgentSampleWithHorizon(self):
act_space = gym.spaces.Discrete(2)
obs_space = gym.spaces.Discrete(2)
ev = PolicyEvaluator(
env_creator=lambda _: BasicMultiAgent(5),
policy_graph={
"p0": (MockPolicyGraph, obs_space, act_space, {}),
"p1": (MockPolicyGraph, obs_space, act_space, {}),
},
policy_mapping_fn=lambda agent_id: "p{}".format(agent_id % 2),
episode_horizon=10, # test with episode horizon set
batch_steps=50)
batch = ev.sample()
self.assertEqual(batch.count, 50)
开发者ID:robertnishihara,项目名称:ray,代码行数:14,代码来源:test_multi_agent_env.py
示例15: testMetrics
def testMetrics(self):
ev = PolicyEvaluator(
env_creator=lambda _: MockEnv(episode_length=10),
policy_graph=MockPolicyGraph,
batch_mode="complete_episodes")
remote_ev = PolicyEvaluator.as_remote().remote(
env_creator=lambda _: MockEnv(episode_length=10),
policy_graph=MockPolicyGraph,
batch_mode="complete_episodes")
ev.sample()
ray.get(remote_ev.sample.remote())
result = collect_metrics(ev, [remote_ev])
self.assertEqual(result["episodes_this_iter"], 20)
self.assertEqual(result["episode_reward_mean"], 10)
开发者ID:jamescasbon,项目名称:ray,代码行数:14,代码来源:test_policy_evaluator.py
示例16: testSampleFromEarlyDoneEnv
def testSampleFromEarlyDoneEnv(self):
act_space = gym.spaces.Discrete(2)
obs_space = gym.spaces.Discrete(2)
ev = PolicyEvaluator(
env_creator=lambda _: EarlyDoneMultiAgent(),
policy_graph={
"p0": (MockPolicyGraph, obs_space, act_space, {}),
"p1": (MockPolicyGraph, obs_space, act_space, {}),
},
policy_mapping_fn=lambda agent_id: "p{}".format(agent_id % 2),
batch_mode="complete_episodes",
batch_steps=1)
self.assertRaisesRegexp(ValueError,
".*don't have a last observation.*",
lambda: ev.sample())
开发者ID:robertnishihara,项目名称:ray,代码行数:15,代码来源:test_multi_agent_env.py
示例17: testMultiAgentSampleAsyncRemote
def testMultiAgentSampleAsyncRemote(self):
act_space = gym.spaces.Discrete(2)
obs_space = gym.spaces.Discrete(2)
ev = PolicyEvaluator(
env_creator=lambda _: BasicMultiAgent(5),
policy_graph={
"p0": (MockPolicyGraph, obs_space, act_space, {}),
"p1": (MockPolicyGraph, obs_space, act_space, {}),
},
policy_mapping_fn=lambda agent_id: "p{}".format(agent_id % 2),
batch_steps=50,
num_envs=4,
async_remote_worker_envs=True)
batch = ev.sample()
self.assertEqual(batch.count, 200)
开发者ID:robertnishihara,项目名称:ray,代码行数:15,代码来源:test_multi_agent_env.py
示例18: testVectorEnvSupport
def testVectorEnvSupport(self):
ev = PolicyEvaluator(
env_creator=lambda _: MockVectorEnv(episode_length=20, num_envs=8),
policy_graph=MockPolicyGraph,
batch_mode="truncate_episodes",
batch_steps=10)
for _ in range(8):
batch = ev.sample()
self.assertEqual(batch.count, 10)
result = collect_metrics(ev, [])
self.assertEqual(result["episodes_this_iter"], 0)
for _ in range(8):
batch = ev.sample()
self.assertEqual(batch.count, 10)
result = collect_metrics(ev, [])
self.assertEqual(result["episodes_this_iter"], 8)
开发者ID:jamescasbon,项目名称:ray,代码行数:16,代码来源:test_policy_evaluator.py
示例19: testMultiAgentSample
def testMultiAgentSample(self):
act_space = gym.spaces.Discrete(2)
obs_space = gym.spaces.Discrete(2)
ev = PolicyEvaluator(
env_creator=lambda _: BasicMultiAgent(5),
policy_graph={
"p0": (MockPolicyGraph, obs_space, act_space, {}),
"p1": (MockPolicyGraph, obs_space, act_space, {}),
},
policy_mapping_fn=lambda agent_id: "p{}".format(agent_id % 2),
batch_steps=50)
batch = ev.sample()
self.assertEqual(batch.count, 50)
self.assertEqual(batch.policy_batches["p0"].count, 150)
self.assertEqual(batch.policy_batches["p1"].count, 100)
self.assertEqual(batch.policy_batches["p0"]["t"].tolist(),
list(range(25)) * 6)
开发者ID:robertnishihara,项目名称:ray,代码行数:17,代码来源:test_multi_agent_env.py
示例20: testReturningModelBasedRolloutsData
def testReturningModelBasedRolloutsData(self):
class ModelBasedPolicyGraph(PGPolicyGraph):
def compute_actions(self,
obs_batch,
state_batches,
prev_action_batch=None,
prev_reward_batch=None,
episodes=None,
**kwargs):
# Pretend we did a model-based rollout and want to return
# the extra trajectory.
builder = episodes[0].new_batch_builder()
rollout_id = random.randint(0, 10000)
for t in range(5):
builder.add_values(
agent_id="extra_0",
policy_id="p1", # use p1 so we can easily check it
t=t,
eps_id=rollout_id, # new id for each rollout
obs=obs_batch[0],
actions=0,
rewards=0,
dones=t == 4,
infos={},
new_obs=obs_batch[0])
batch = builder.build_and_reset(episode=None)
episodes[0].add_extra_batch(batch)
# Just return zeros for actions
return [0] * len(obs_batch), [], {}
single_env = gym.make("CartPole-v0")
obs_space = single_env.observation_space
act_space = single_env.action_space
ev = PolicyEvaluator(
env_creator=lambda _: MultiCartpole(2),
policy_graph={
"p0": (ModelBasedPolicyGraph, obs_space, act_space, {}),
"p1": (ModelBasedPolicyGraph, obs_space, act_space, {}),
},
policy_mapping_fn=lambda agent_id: "p0",
batch_steps=5)
batch = ev.sample()
self.assertEqual(batch.count, 5)
self.assertEqual(batch.policy_batches["p0"].count, 10)
self.assertEqual(batch.policy_batches["p1"].count, 25)
开发者ID:robertnishihara,项目名称:ray,代码行数:46,代码来源:test_multi_agent_env.py
注:本文中的ray.rllib.evaluation.policy_evaluator.PolicyEvaluator类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论