Unverified Commit ec9cb658 authored by Benjamin Beyret's avatar Benjamin Beyret Committed by GitHub
Browse files

Merge pull request #12 from beyretb/dev-v0.4

merge Dev v0.4 to master
parents 5a238567 202bd71c
......@@ -6,6 +6,4 @@ env/*
__pycache__/
UnitySDK.log
/venv
testDevs.py
testDevs.yaml
.DS_Store
\ No newline at end of file
/dev
\ No newline at end of file
......@@ -44,9 +44,9 @@ Finally download the environment for your system:
| OS | Environment link |
| --- | --- |
| Linux | [download v0.3](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_linux_v0.3.zip) |
| MacOS | [download v0.3](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_mac_v0.3.zip) |
| Windows | [download v0.3](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_windows_v0.3.zip) |
| Linux | [download v0.4](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_linux_v0.4.zip) |
| MacOS | [download v0.4](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_mac_v0.4.zip) |
| Windows | [download v0.4](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_windows_v0.4.zip) |
You can now unzip the content of the archive to the `env` folder and you're ready to go! Make sure the executable
`AnimalAI.*` is in `env/`. On linux you may have to make the file executable by running `chmod +x env/AnimalAI.x86_64`.
......@@ -99,19 +99,29 @@ Occasional slow frame rates in play mode. Temporary fix: reduce screen size.
- [ ] Offer a gym wrapper for training
- [ ] Add protobuf for arena spawning feedback
- [ ] Improve the way the agent spawns
- [x] Improve the way the agent spawns
- [x] Add lights out configurations.
- [x] Improve environment framerates
- [x] Add moving food
## Version History
- v0.4 - Lights off moved to Unity, colors configurations, proportional goals, bugs fixes
- The light is now directly switched on/off within Unity, configuration files stay the same
- Blackouts now work with infinite episodes (`t=0`)
- The `rand_colors` configurations have been removed and the user can now pass `RGB` values, see [here](documentation/configFile.md#objects)
- Rewards for goals are now proportional to their size (except for the `DeathZone`), see [here](documentation/definitionsOfObjects.md#rewards)
- The agent is now a ball rather than a cube
- Increased safety for spawning the agent to avoid infinite loops
- Bugs fixes
- v0.3 - Lights off, remove Beams and add cylinder
- We added the possibility to switch the lights off at given intervals, see [here](documentation/configFile.md#blackouts)
- visualizeLightsOff.py displays an example of lights off, from the agent's point of view
- Beams objects have been removed
- A `Cylinder` object has been added (similar behaviour to the `Woodlog`)
- The immovable `Cylinder` tunnel has been renamed `CylinderTunnel`
- `UnityEnvironment.reset()` parameter `config` renamed to `arenas_configurations_input`
- v0.2 - New moving food rewards, improved Unity performance and bug fixes
- Moving rewards have been added, two for each type of reward, see
......
from animalai.envs.brain import BrainInfo
class Agent(object):
def __init__(self, configuration_to_load: str):
"""
Load your agent here and initialize anything needed
:param configuration_to_load: path to your model to lead
"""
pass
def step(self, brain_info: BrainInfo) -> list[float]:
"""
A single step the agent should take based on the current
:param brain_info: a single BrainInfo containing the observations and reward for a single step for one agent
:return: a list of actions to execute (of size 2)
"""
self.action = []
return self.action
def destroy(self):
pass
......@@ -20,7 +20,7 @@ DESCRIPTOR = _descriptor.FileDescriptor(
package='communicator_objects',
syntax='proto3',
serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
serialized_pb=_b('\n:animalai/communicator_objects/arena_parameters_proto.proto\x12\x14\x63ommunicator_objects\"\x98\x03\n\x14\x41renaParametersProto\x12\t\n\x01t\x18\x01 \x01(\x05\x12\x46\n\x05items\x18\x02 \x03(\x0b\x32\x37.communicator_objects.ArenaParametersProto.ItemsToSpawn\x12\x17\n\x0frand_all_colors\x18\x03 \x01(\x08\x1a\x93\x02\n\x0cItemsToSpawn\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nrand_color\x18\x02 \x01(\x08\x12R\n\tpositions\x18\x03 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x12\x11\n\trotations\x18\x04 \x03(\x02\x12N\n\x05sizes\x18\x05 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x1a*\n\x07Vector3\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\x12\t\n\x01z\x18\x03 \x01(\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n:animalai/communicator_objects/arena_parameters_proto.proto\x12\x14\x63ommunicator_objects\"\xcf\x03\n\x14\x41renaParametersProto\x12\t\n\x01t\x18\x01 \x01(\x05\x12\x46\n\x05items\x18\x02 \x03(\x0b\x32\x37.communicator_objects.ArenaParametersProto.ItemsToSpawn\x12\x11\n\tblackouts\x18\x03 \x03(\x05\x1a\xd0\x02\n\x0cItemsToSpawn\x12\x0c\n\x04name\x18\x01 \x01(\t\x12R\n\tpositions\x18\x03 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x12\x11\n\trotations\x18\x04 \x03(\x02\x12N\n\x05sizes\x18\x05 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x12O\n\x06\x63olors\x18\x06 \x03(\x0b\x32?.communicator_objects.ArenaParametersProto.ItemsToSpawn.Vector3\x1a*\n\x07Vector3\x12\t\n\x01x\x18\x01 \x01(\x02\x12\t\n\x01y\x18\x02 \x01(\x02\x12\t\n\x01z\x18\x03 \x01(\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
)
......@@ -66,8 +66,8 @@ _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3 = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=451,
serialized_end=493,
serialized_start=506,
serialized_end=548,
)
_ARENAPARAMETERSPROTO_ITEMSTOSPAWN = _descriptor.Descriptor(
......@@ -85,29 +85,29 @@ _ARENAPARAMETERSPROTO_ITEMSTOSPAWN = _descriptor.Descriptor(
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='rand_color', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.rand_color', index=1,
number=2, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
name='positions', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.positions', index=1,
number=3, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='positions', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.positions', index=2,
number=3, type=11, cpp_type=10, label=3,
name='rotations', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.rotations', index=2,
number=4, type=2, cpp_type=6, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='rotations', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.rotations', index=3,
number=4, type=2, cpp_type=6, label=3,
name='sizes', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.sizes', index=3,
number=5, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='sizes', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.sizes', index=4,
number=5, type=11, cpp_type=10, label=3,
name='colors', full_name='communicator_objects.ArenaParametersProto.ItemsToSpawn.colors', index=4,
number=6, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
......@@ -124,8 +124,8 @@ _ARENAPARAMETERSPROTO_ITEMSTOSPAWN = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=218,
serialized_end=493,
serialized_start=212,
serialized_end=548,
)
_ARENAPARAMETERSPROTO = _descriptor.Descriptor(
......@@ -150,9 +150,9 @@ _ARENAPARAMETERSPROTO = _descriptor.Descriptor(
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='rand_all_colors', full_name='communicator_objects.ArenaParametersProto.rand_all_colors', index=2,
number=3, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
name='blackouts', full_name='communicator_objects.ArenaParametersProto.blackouts', index=2,
number=3, type=5, cpp_type=1, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
......@@ -169,12 +169,13 @@ _ARENAPARAMETERSPROTO = _descriptor.Descriptor(
oneofs=[
],
serialized_start=85,
serialized_end=493,
serialized_end=548,
)
_ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3.containing_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN
_ARENAPARAMETERSPROTO_ITEMSTOSPAWN.fields_by_name['positions'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3
_ARENAPARAMETERSPROTO_ITEMSTOSPAWN.fields_by_name['sizes'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3
_ARENAPARAMETERSPROTO_ITEMSTOSPAWN.fields_by_name['colors'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN_VECTOR3
_ARENAPARAMETERSPROTO_ITEMSTOSPAWN.containing_type = _ARENAPARAMETERSPROTO
_ARENAPARAMETERSPROTO.fields_by_name['items'].message_type = _ARENAPARAMETERSPROTO_ITEMSTOSPAWN
DESCRIPTOR.message_types_by_name['ArenaParametersProto'] = _ARENAPARAMETERSPROTO
......
......@@ -2,7 +2,6 @@ import json
import jsonpickle
import yaml
import copy
import numpy as np
from animalai.communicator_objects import UnityRLResetInput, ArenaParametersProto
......@@ -26,44 +25,41 @@ class Vector3(yaml.YAMLObject):
return res
class RGB(yaml.YAMLObject):
yaml_tag = u'!RGB'
def __init__(self, r=0, g=0, b=0):
self.r = r
self.g = g
self.b = b
def to_proto(self):
res = ArenaParametersProto.ItemsToSpawn.Vector3()
res.x = self.r
res.y = self.g
res.z = self.b
return res
class Item(yaml.YAMLObject):
yaml_tag = u'!Item'
def __init__(self, name='', rand_color=False, positions=None, rotations=None, sizes=None):
def __init__(self, name='', positions=None, rotations=None, sizes=None, colors=None):
self.name = name
self.rand_color = rand_color
self.positions = positions if positions is not None else []
self.rotations = rotations if rotations is not None else []
self.sizes = sizes if sizes is not None else []
self.colors = colors if colors is not None else []
class Arena(yaml.YAMLObject):
yaml_tag = u'!Arena'
def __init__(self, t=1000, rand_all_colors=False, items=None, blackouts=None):
def __init__(self, t=1000, items=None, blackouts=None):
self.t = t
self.rand_all_colors = rand_all_colors
self.items = items if items is not None else {}
self.blackouts = blackouts if blackouts is not None else []
self.generate_blackout_steps()
def generate_blackout_steps(self):
# Transform a list of steps at which we turn on/off the light into a list of 1/0 of size t for each step
if self.blackouts is not None and len(self.blackouts) > 0 and self.t>0:
if self.blackouts[0] > 0:
self.blackouts_steps = np.ones(self.t)
light = True
for i in range(len(self.blackouts) - 1):
self.blackouts_steps[self.blackouts[i]:self.blackouts[i + 1]] = not light
light = not light
self.blackouts_steps[self.blackouts[-1]:] = not light
else:
flip_every = -self.blackouts[0]
self.blackouts_steps = np.array(
([1] * flip_every + [0] * flip_every) * (self.t // (2 * flip_every) + 1))[:self.t]
else:
self.blackouts_steps = np.ones(max(self.t, 1))
class ArenaConfig(yaml.YAMLObject):
......@@ -73,8 +69,6 @@ class ArenaConfig(yaml.YAMLObject):
if yaml_path is not None:
self.arenas = yaml.load(open(yaml_path, 'r'), Loader=yaml.Loader).arenas
for arena in self.arenas.values():
arena.generate_blackout_steps()
else:
self.arenas = {}
......@@ -89,14 +83,14 @@ class ArenaConfig(yaml.YAMLObject):
for k in self.arenas:
config_out.arenas[k].CopyFrom(ArenaParametersProto())
config_out.arenas[k].t = self.arenas[k].t
config_out.arenas[k].rand_all_colors = self.arenas[k].rand_all_colors
config_out.arenas[k].blackouts.extend(self.arenas[k].blackouts)
for item in self.arenas[k].items:
to_spawn = config_out.arenas[k].items.add()
to_spawn.name = item.name
to_spawn.rand_color = item.rand_color
to_spawn.positions.extend([v.to_proto() for v in item.positions])
to_spawn.rotations.extend(item.rotations)
to_spawn.sizes.extend([v.to_proto() for v in item.sizes])
to_spawn.colors.extend([v.to_proto() for v in item.colors])
return config_out
......@@ -105,7 +99,6 @@ class ArenaConfig(yaml.YAMLObject):
if arenas_configurations_input is not None:
for arena_i in arenas_configurations_input.arenas:
self.arenas[arena_i] = copy.copy(arenas_configurations_input.arenas[arena_i])
self.arenas[arena_i].generate_blackout_steps()
def constructor_arena(loader, node):
......
......@@ -30,7 +30,6 @@ class UnityEnvironment(object):
base_port=5005,
seed=0,
docker_training=False,
no_graphics=False,
n_arenas=1,
play=False,
arenas_configurations=None):
......@@ -43,7 +42,6 @@ class UnityEnvironment(object):
:int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
:int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios.
:param docker_training: Informs this class whether the process is being run within a container.
:param no_graphics: Whether to run the Unity simulator in no-graphics mode
"""
atexit.register(self._close)
......@@ -56,10 +54,9 @@ class UnityEnvironment(object):
self.proc1 = None # The process that is started. If None, no process was started
self.communicator = self.get_communicator(worker_id, base_port)
self.arenas_configurations = arenas_configurations if arenas_configurations is not None else ArenaConfig()
self.check_lights = True
if file_name is not None:
self.executable_launcher(file_name, docker_training, no_graphics)
self.executable_launcher(file_name, docker_training)
else:
logger.info("Start training by pressing the Play button in the Unity Editor.")
self._loaded = True
......@@ -130,7 +127,7 @@ class UnityEnvironment(object):
def external_brain_names(self):
return self._external_brain_names
def executable_launcher(self, file_name, docker_training, no_graphics):
def executable_launcher(self, file_name, docker_training):
cwd = os.getcwd()
file_name = (file_name.strip()
.replace('.app', '').replace('.exe', '').replace('.x86_64', '').replace('.x86',
......@@ -177,17 +174,12 @@ class UnityEnvironment(object):
logger.debug("This is the launch string {}".format(launch_string))
# Launch Unity environment
if not docker_training:
if no_graphics:
if not self.play:
self.proc1 = subprocess.Popen(
[launch_string, '-nographics', '-batchmode',
'--port', str(self.port)])
[launch_string, '--port', str(self.port), '--nArenas', str(self.n_arenas)])
else:
if not self.play:
self.proc1 = subprocess.Popen(
[launch_string, '--port', str(self.port), '--nArenas', str(self.n_arenas)])
else:
self.proc1 = subprocess.Popen(
[launch_string, '--port', str(self.port)])
self.proc1 = subprocess.Popen(
[launch_string, '--port', str(self.port)])
else:
"""
......@@ -209,7 +201,7 @@ class UnityEnvironment(object):
"""
docker_ls = ("exec xvfb-run --auto-servernum"
" --server-args='-screen 0 640x480x24'"
" {0} --port {1}").format(launch_string, str(self.port))
" {0} --port {1} --nArenas {2}").format(launch_string, str(self.port), str(self.n_arenas))
self.proc1 = subprocess.Popen(docker_ls,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
......@@ -232,7 +224,6 @@ class UnityEnvironment(object):
"""
if self._loaded:
self.arenas_configurations.update(arenas_configurations_input)
self.check_lights = not np.all([e.blackouts for e in self.arenas_configurations.arenas.values()])
outputs = self.communicator.exchange(
self._generate_reset_input(train_mode, arenas_configurations_input)
......@@ -388,8 +379,6 @@ class UnityEnvironment(object):
self._global_done = state[1]
for _b in self._external_brain_names:
self._n_agents[_b] = len(state[0][_b].agents)
if self.check_lights:
state = self._apply_lights(state, step_number)
return state[0]
elif not self._loaded:
raise UnityEnvironmentException("No Unity environment is loaded.")
......@@ -436,17 +425,6 @@ class UnityEnvironment(object):
arr = [float(x) for x in arr]
return arr
def _apply_lights(self, state, step_number):
"""
Sets visual observations to zero for Arenas where the light should be off.
:return: the modified state
"""
if 'Learner' in state[0].keys():
mask = np.array([e.blackouts_steps[step_number % len(e.blackouts_steps)] \
for e in self.arenas_configurations.arenas.values()])
state[0]['Learner'].visual_observations[0] = (state[0]['Learner'].visual_observations[0].T * mask).T
return state
def _get_state(self, output: UnityRLOutput) -> (AllBrainInfo, bool):
"""
Collects experience information from all external brains in environment at current step.
......
......@@ -2,504 +2,176 @@
arenas:
0: !Arena
t: 0
rand_all_colors: true
items:
- !Item
name: CubeTunnel
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: CylinderTunnel
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: Cylinder
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: Ramp
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: Wall
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: WallTransparent
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: Cube
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: CubeTransparent
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: Cardbox1
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: Cardbox2
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: WoodLog
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: UObject
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: LObject
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: GoodGoal
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: BadGoal
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: DeathZone
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: GoodGoalMulti
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: MazeGenerator
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: GoodGoalMove
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: GoodGoalBounce
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: BadGoalMove
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: BadGoalBounce
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: GoodGoalMultiMove
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: GoodGoalMultiBounce
positions: []
rand_color: false
rotations: []
sizes: []
1: !Arena
t: 0
rand_all_colors: false
items:
- !Item
name: CubeTunnel
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: Cylinder
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: CylinderTunnel
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: Ramp
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: Wall
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: WallTransparent
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: Cube
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: CubeTransparent
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: Cardbox1
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: Cardbox2
positions: []
rand_color: false
rotations: []
sizes: []
- !Item
name: WoodLog
positions: []
rand_color: false