|
43 | 43 | "source": [ |
44 | 44 | "# Install packages\n", |
45 | 45 | "\n", |
46 | | - "!pip install ksim==0.1.0 xax==0.3.0 mujoco-scenes" |
| 46 | + "!pip install ksim==0.1.2 xax==0.3.0 mujoco-scenes" |
47 | 47 | ] |
48 | 48 | }, |
49 | 49 | { |
|
474 | 474 | " value=True,\n", |
475 | 475 | " help=\"Whether to use the IMU acceleration and gyroscope observations.\",\n", |
476 | 476 | " )\n", |
477 | | - " use_domain_randomization: bool = xax.field(\n", |
478 | | - " value=True,\n", |
479 | | - " help=\"Whether to use domain randomization.\",\n", |
480 | | - " )\n", |
481 | 477 | "\n", |
482 | 478 | " # Curriculum parameters.\n", |
483 | 479 | " num_curriculum_levels: int = xax.field(\n", |
|
628 | 624 | " def get_rewards(self, physics_model: ksim.PhysicsModel) -> list[ksim.Reward]:\n", |
629 | 625 | " return [\n", |
630 | 626 | " # Standard rewards.\n", |
631 | | - " ksim.NaiveForwardReward(clip_max=2.0, in_robot_frame=False, scale=3.0),\n", |
| 627 | + " ksim.NaiveForwardReward(clip_max=1.25, in_robot_frame=False, scale=3.0),\n", |
632 | 628 | " ksim.NaiveForwardOrientationReward(scale=1.0),\n", |
633 | 629 | " ksim.StayAliveReward(scale=1.0),\n", |
634 | 630 | " ksim.UprightReward(scale=0.5),\n", |
635 | 631 | " # Avoid movement penalties.\n", |
636 | | - " ksim.AngularVelocityPenalty(index=(\"x\", \"y\"), scale=-0.005),\n", |
637 | | - " ksim.LinearVelocityPenalty(index=(\"z\"), scale=-0.005),\n", |
| 632 | + " ksim.AngularVelocityPenalty(index=(\"x\", \"y\"), scale=-0.1),\n", |
| 633 | + " ksim.LinearVelocityPenalty(index=(\"z\"), scale=-0.1),\n", |
638 | 634 | " # Normalization penalties.\n", |
639 | 635 | " ksim.AvoidLimitsPenalty.create(physics_model, scale=-0.01),\n", |
640 | | - " ksim.JointVelocityPenalty(scale=-0.01, scale_by_curriculum=True),\n", |
641 | 636 | " ksim.JointAccelerationPenalty(scale=-0.01, scale_by_curriculum=True),\n", |
642 | 637 | " ksim.JointJerkPenalty(scale=-0.01, scale_by_curriculum=True),\n", |
643 | 638 | " ksim.LinkAccelerationPenalty(scale=-0.01, scale_by_curriculum=True),\n", |
644 | 639 | " ksim.LinkJerkPenalty(scale=-0.01, scale_by_curriculum=True),\n", |
645 | 640 | " ksim.ActionAccelerationPenalty(scale=-0.01, scale_by_curriculum=True),\n", |
646 | | - " ksim.CtrlPenalty(scale=-0.01, scale_by_curriculum=True),\n", |
647 | 641 | " # Bespoke rewards.\n", |
648 | 642 | " BentArmPenalty.create_penalty(physics_model, scale=-0.1),\n", |
649 | 643 | " StraightLegPenalty.create_penalty(physics_model, scale=-0.1),\n", |
|
656 | 650 | " ]\n", |
657 | 651 | "\n", |
658 | 652 | " def get_curriculum(self, physics_model: ksim.PhysicsModel) -> ksim.Curriculum:\n", |
659 | | - " return ksim.ConstantCurriculum(\n", |
660 | | - " # We toggle domain randomization by setting the curriculum level.\n", |
661 | | - " # Since the domain randomization functions all use this level,\n", |
662 | | - " # this effectively toggles them on and off.\n", |
663 | | - " level=1.0 if self.config.use_domain_randomization else 0.0,\n", |
| 653 | + " return ksim.DistanceFromOriginCurriculum(\n", |
| 654 | + " min_level_steps=5,\n", |
664 | 655 | " )\n", |
665 | 656 | "\n", |
666 | 657 | " def get_model(self, key: PRNGKeyArray) -> Model:\n", |
|
0 commit comments