-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathevaluate.py
More file actions
29 lines (25 loc) · 869 Bytes
/
Copy pathevaluate.py
File metadata and controls
29 lines (25 loc) · 869 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
import numpy as np
# Load model
model = PPO.load("models/best_model")
env = gym.make("LunarLander-v2")
# Evaluate
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=20)
print(f"Mean Reward: {mean_reward:.2f} +/- {std_reward:.2f}")
# Run 5 episodes and print per-episode reward
print("\nSample episodes:")
for ep in range(5):
obs, _ = env.reset()
total_reward = 0
done = False
steps = 0
while not done:
action, _ = model.predict(obs, deterministic=True)
obs, reward, terminated, truncated, _ = env.step(action)
total_reward += reward
done = terminated or truncated
steps += 1
print(f" Episode {ep+1}: Reward = {total_reward:.2f}, Steps = {steps}")
env.close()